foundry-mcp 0.7.0__py3-none-any.whl → 0.8.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. foundry_mcp/cli/__init__.py +0 -13
  2. foundry_mcp/cli/commands/session.py +1 -8
  3. foundry_mcp/cli/context.py +39 -0
  4. foundry_mcp/config.py +381 -7
  5. foundry_mcp/core/batch_operations.py +1196 -0
  6. foundry_mcp/core/discovery.py +1 -1
  7. foundry_mcp/core/llm_config.py +8 -0
  8. foundry_mcp/core/naming.py +25 -2
  9. foundry_mcp/core/prometheus.py +0 -13
  10. foundry_mcp/core/providers/__init__.py +12 -0
  11. foundry_mcp/core/providers/base.py +39 -0
  12. foundry_mcp/core/providers/claude.py +45 -1
  13. foundry_mcp/core/providers/codex.py +64 -3
  14. foundry_mcp/core/providers/cursor_agent.py +22 -3
  15. foundry_mcp/core/providers/detectors.py +34 -7
  16. foundry_mcp/core/providers/gemini.py +63 -1
  17. foundry_mcp/core/providers/opencode.py +95 -71
  18. foundry_mcp/core/providers/package-lock.json +4 -4
  19. foundry_mcp/core/providers/package.json +1 -1
  20. foundry_mcp/core/providers/validation.py +128 -0
  21. foundry_mcp/core/research/memory.py +103 -0
  22. foundry_mcp/core/research/models.py +783 -0
  23. foundry_mcp/core/research/providers/__init__.py +40 -0
  24. foundry_mcp/core/research/providers/base.py +242 -0
  25. foundry_mcp/core/research/providers/google.py +507 -0
  26. foundry_mcp/core/research/providers/perplexity.py +442 -0
  27. foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
  28. foundry_mcp/core/research/providers/tavily.py +383 -0
  29. foundry_mcp/core/research/workflows/__init__.py +5 -2
  30. foundry_mcp/core/research/workflows/base.py +106 -12
  31. foundry_mcp/core/research/workflows/consensus.py +160 -17
  32. foundry_mcp/core/research/workflows/deep_research.py +4020 -0
  33. foundry_mcp/core/responses.py +240 -0
  34. foundry_mcp/core/spec.py +1 -0
  35. foundry_mcp/core/task.py +141 -12
  36. foundry_mcp/core/validation.py +6 -1
  37. foundry_mcp/server.py +0 -52
  38. foundry_mcp/tools/unified/__init__.py +37 -18
  39. foundry_mcp/tools/unified/authoring.py +0 -33
  40. foundry_mcp/tools/unified/environment.py +202 -29
  41. foundry_mcp/tools/unified/plan.py +20 -1
  42. foundry_mcp/tools/unified/provider.py +0 -40
  43. foundry_mcp/tools/unified/research.py +644 -19
  44. foundry_mcp/tools/unified/review.py +5 -2
  45. foundry_mcp/tools/unified/review_helpers.py +16 -1
  46. foundry_mcp/tools/unified/server.py +9 -24
  47. foundry_mcp/tools/unified/task.py +528 -9
  48. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/METADATA +2 -1
  49. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/RECORD +52 -46
  50. foundry_mcp/cli/flags.py +0 -266
  51. foundry_mcp/core/feature_flags.py +0 -592
  52. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/WHEEL +0 -0
  53. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/entry_points.txt +0 -0
  54. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,4020 @@
1
+ """Deep Research workflow with async background execution.
2
+
3
+ Provides multi-phase iterative research through query decomposition,
4
+ parallel source gathering, content analysis, and synthesized reporting.
5
+
6
+ Key Features:
7
+ - Background execution via asyncio.create_task()
8
+ - Immediate research_id return on start
9
+ - Status polling while running
10
+ - Task lifecycle tracking with cancellation support
11
+ - Multi-agent supervisor orchestration hooks
12
+
13
+ Inspired by:
14
+ - open_deep_research: Multi-agent supervision with think-tool pauses
15
+ - Claude-Deep-Research: Dual-source search with link following
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import asyncio
21
+ import atexit
22
+ import json
23
+ import logging
24
+ import re
25
+ import sys
26
+ import time
27
+ import traceback
28
+ from dataclasses import dataclass, field as dataclass_field
29
+ from datetime import datetime
30
+ from enum import Enum
31
+ from pathlib import Path
32
+ from typing import Any, Callable, Optional
33
+ from uuid import uuid4
34
+ from weakref import WeakValueDictionary
35
+
36
+ from foundry_mcp.config import ResearchConfig
37
+ from foundry_mcp.core.research.memory import ResearchMemory
38
+ from foundry_mcp.core.research.models import (
39
+ ConfidenceLevel,
40
+ DeepResearchPhase,
41
+ DeepResearchState,
42
+ DOMAIN_TIERS,
43
+ PhaseMetrics,
44
+ ResearchMode,
45
+ ResearchSource,
46
+ SourceQuality,
47
+ )
48
+ from foundry_mcp.core.error_collection import ErrorRecord
49
+ from foundry_mcp.core.error_store import FileErrorStore
50
+ from foundry_mcp.core.providers import ContextWindowError
51
+ from foundry_mcp.core.research.providers import (
52
+ SearchProvider,
53
+ SearchProviderError,
54
+ GoogleSearchProvider,
55
+ PerplexitySearchProvider,
56
+ SemanticScholarProvider,
57
+ TavilySearchProvider,
58
+ )
59
+ from foundry_mcp.core.research.workflows.base import ResearchWorkflowBase, WorkflowResult
60
+
61
+ logger = logging.getLogger(__name__)
62
+
63
+
64
+ # =============================================================================
65
+ # Crash Handler Infrastructure
66
+ # =============================================================================
67
+
68
+ # Track active research sessions for crash recovery
69
+ _active_research_sessions: dict[str, "DeepResearchState"] = {}
70
+
71
+
72
+ def _crash_handler(exc_type: type, exc_value: BaseException, exc_tb: Any) -> None:
73
+ """Handle uncaught exceptions by logging to stderr and writing crash markers.
74
+
75
+ This handler catches process-level crashes that escape normal exception handling
76
+ and ensures we have visibility into what went wrong.
77
+ """
78
+ tb_str = "".join(traceback.format_exception(exc_type, exc_value, exc_tb))
79
+
80
+ # Always write to stderr for visibility
81
+ print(
82
+ f"\n{'='*60}\n"
83
+ f"DEEP RESEARCH CRASH HANDLER\n"
84
+ f"{'='*60}\n"
85
+ f"Exception: {exc_type.__name__}: {exc_value}\n"
86
+ f"Active sessions: {list(_active_research_sessions.keys())}\n"
87
+ f"Traceback:\n{tb_str}"
88
+ f"{'='*60}\n",
89
+ file=sys.stderr,
90
+ flush=True,
91
+ )
92
+
93
+ # Try to save crash markers for active research sessions
94
+ for research_id, state in _active_research_sessions.items():
95
+ try:
96
+ state.metadata["crash"] = True
97
+ state.metadata["crash_error"] = str(exc_value)
98
+ # Write crash marker file
99
+ crash_path = (
100
+ Path.home()
101
+ / ".foundry-mcp"
102
+ / "research"
103
+ / "deep_research"
104
+ / f"{research_id}.crash"
105
+ )
106
+ crash_path.parent.mkdir(parents=True, exist_ok=True)
107
+ crash_path.write_text(tb_str)
108
+ except Exception:
109
+ pass # Best effort - don't fail the crash handler
110
+
111
+ # Call original handler
112
+ sys.__excepthook__(exc_type, exc_value, exc_tb)
113
+
114
+
115
+ # Install crash handler
116
+ sys.excepthook = _crash_handler
117
+
118
+
119
+ @atexit.register
120
+ def _cleanup_on_exit() -> None:
121
+ """Mark any active sessions as interrupted on normal exit."""
122
+ for research_id, state in _active_research_sessions.items():
123
+ if state.completed_at is None:
124
+ state.metadata["interrupted"] = True
125
+
126
+
127
+ # =============================================================================
128
+ # Domain-Based Source Quality Assessment
129
+ # =============================================================================
130
+
131
+
132
+ def _extract_domain(url: str) -> Optional[str]:
133
+ """Extract domain from URL.
134
+
135
+ Args:
136
+ url: Full URL string
137
+
138
+ Returns:
139
+ Domain string (e.g., "arxiv.org") or None if extraction fails
140
+ """
141
+ if not url:
142
+ return None
143
+ try:
144
+ # Handle URLs without scheme
145
+ if "://" not in url:
146
+ url = "https://" + url
147
+ # Extract domain using simple parsing
148
+ from urllib.parse import urlparse
149
+ parsed = urlparse(url)
150
+ domain = parsed.netloc.lower()
151
+ # Remove www. prefix
152
+ if domain.startswith("www."):
153
+ domain = domain[4:]
154
+ return domain if domain else None
155
+ except Exception:
156
+ return None
157
+
158
+
159
+ def _extract_hostname(url: str) -> Optional[str]:
160
+ """Extract full hostname from URL (preserves subdomains like www.).
161
+
162
+ Args:
163
+ url: Full URL string
164
+
165
+ Returns:
166
+ Full hostname (e.g., "www.arxiv.org", "docs.python.org") or None
167
+ """
168
+ if not url:
169
+ return None
170
+ try:
171
+ # Handle URLs without scheme
172
+ if "://" not in url:
173
+ url = "https://" + url
174
+ from urllib.parse import urlparse
175
+ parsed = urlparse(url)
176
+ return parsed.netloc.lower() if parsed.netloc else None
177
+ except Exception:
178
+ return None
179
+
180
+
181
+ def _domain_matches_pattern(domain: str, pattern: str) -> bool:
182
+ """Check if domain matches a pattern (supports wildcards).
183
+
184
+ Patterns:
185
+ - "arxiv.org" - exact match
186
+ - "*.edu" - matches stanford.edu, mit.edu, etc.
187
+ - "docs.*" - matches docs.python.org, docs.microsoft.com, etc.
188
+
189
+ Args:
190
+ domain: Domain to check (e.g., "stanford.edu")
191
+ pattern: Pattern to match (e.g., "*.edu")
192
+
193
+ Returns:
194
+ True if domain matches pattern
195
+ """
196
+ pattern = pattern.lower()
197
+ domain = domain.lower()
198
+
199
+ if "*" not in pattern:
200
+ # Exact match or subdomain match
201
+ return domain == pattern or domain.endswith("." + pattern)
202
+
203
+ if pattern.startswith("*."):
204
+ # Suffix pattern: *.edu matches stanford.edu
205
+ suffix = pattern[2:]
206
+ return domain == suffix or domain.endswith("." + suffix)
207
+
208
+ if pattern.endswith(".*"):
209
+ # Prefix pattern: docs.* matches docs.python.org
210
+ prefix = pattern[:-2]
211
+ return domain == prefix or domain.startswith(prefix + ".")
212
+
213
+ # General wildcard (treat as contains)
214
+ return pattern.replace("*", "") in domain
215
+
216
+
217
+ def get_domain_quality(url: str, mode: ResearchMode) -> SourceQuality:
218
+ """Determine source quality based on domain and research mode.
219
+
220
+ Args:
221
+ url: Source URL
222
+ mode: Research mode (general, academic, technical)
223
+
224
+ Returns:
225
+ SourceQuality based on domain tier matching
226
+ """
227
+ domain = _extract_domain(url)
228
+ if not domain:
229
+ return SourceQuality.UNKNOWN
230
+
231
+ tiers = DOMAIN_TIERS.get(mode.value, DOMAIN_TIERS["general"])
232
+
233
+ # Check high-priority domains first
234
+ for pattern in tiers.get("high", []):
235
+ if _domain_matches_pattern(domain, pattern):
236
+ return SourceQuality.HIGH
237
+
238
+ # Check low-priority domains
239
+ for pattern in tiers.get("low", []):
240
+ if _domain_matches_pattern(domain, pattern):
241
+ return SourceQuality.LOW
242
+
243
+ # Default to medium for unmatched domains
244
+ return SourceQuality.MEDIUM
245
+
246
+
247
+ def _normalize_title(title: str) -> str:
248
+ """Normalize title for deduplication matching.
249
+
250
+ Converts to lowercase, removes punctuation, and collapses whitespace
251
+ to enable matching the same paper from different sources (e.g., arXiv vs OpenReview).
252
+
253
+ Args:
254
+ title: Source title to normalize
255
+
256
+ Returns:
257
+ Normalized title string for comparison
258
+ """
259
+ if not title:
260
+ return ""
261
+ # Lowercase, remove punctuation, collapse whitespace
262
+ normalized = title.lower()
263
+ normalized = re.sub(r"[^\w\s]", "", normalized)
264
+ normalized = re.sub(r"\s+", " ", normalized).strip()
265
+ return normalized
266
+
267
+
268
+ # =============================================================================
269
+ # Task Lifecycle
270
+ # =============================================================================
271
+
272
+
273
+ class TaskStatus(str, Enum):
274
+ """Status of a background research task."""
275
+
276
+ PENDING = "pending" # Created but not started
277
+ RUNNING = "running" # Currently executing
278
+ COMPLETED = "completed" # Finished successfully
279
+ FAILED = "failed" # Finished with error
280
+ CANCELLED = "cancelled" # Cancelled by user
281
+ TIMEOUT = "timeout" # Exceeded timeout limit
282
+
283
+
284
+ class AgentRole(str, Enum):
285
+ """Specialist agent roles in the multi-agent research workflow.
286
+
287
+ Agent Responsibilities:
288
+ - SUPERVISOR: Orchestrates phase transitions, evaluates quality gates,
289
+ decides on iteration vs completion. The supervisor runs think-tool
290
+ pauses between phases to evaluate progress and adjust strategy.
291
+ - PLANNER: Decomposes the original query into focused sub-queries,
292
+ generates the research brief, and identifies key themes to explore.
293
+ - GATHERER: Executes parallel search across providers, handles rate
294
+ limiting, deduplicates sources, and validates source quality.
295
+ - ANALYZER: Extracts findings from sources, assesses evidence quality,
296
+ identifies contradictions, and rates source reliability.
297
+ - SYNTHESIZER: Generates coherent report sections, ensures logical
298
+ flow, integrates findings, and produces the final synthesis.
299
+ - REFINER: Identifies knowledge gaps, generates follow-up queries,
300
+ determines if additional iteration is needed, and prioritizes gaps.
301
+ """
302
+
303
+ SUPERVISOR = "supervisor"
304
+ PLANNER = "planner"
305
+ GATHERER = "gatherer"
306
+ ANALYZER = "analyzer"
307
+ SYNTHESIZER = "synthesizer"
308
+ REFINER = "refiner"
309
+
310
+
311
+ # Mapping from workflow phases to specialist agents
312
+ PHASE_TO_AGENT: dict[DeepResearchPhase, AgentRole] = {
313
+ DeepResearchPhase.PLANNING: AgentRole.PLANNER,
314
+ DeepResearchPhase.GATHERING: AgentRole.GATHERER,
315
+ DeepResearchPhase.ANALYSIS: AgentRole.ANALYZER,
316
+ DeepResearchPhase.SYNTHESIS: AgentRole.SYNTHESIZER,
317
+ DeepResearchPhase.REFINEMENT: AgentRole.REFINER,
318
+ }
319
+
320
+
321
+ @dataclass
322
+ class AgentDecision:
323
+ """Records a decision made by an agent during workflow execution.
324
+
325
+ Used for traceability and debugging. Each decision captures:
326
+ - Which agent made the decision
327
+ - What action was taken
328
+ - The rationale behind the decision
329
+ - Inputs provided to the agent
330
+ - Outputs produced (if any)
331
+ - Timestamp for ordering
332
+
333
+ Handoff Protocol:
334
+ - Inputs: The context passed to the agent (query, state summary, etc.)
335
+ - Outputs: The results produced (sub-queries, findings, report sections)
336
+ - The supervisor evaluates outputs before proceeding to next phase
337
+ """
338
+
339
+ agent: AgentRole
340
+ action: str # e.g., "decompose_query", "evaluate_phase", "decide_iteration"
341
+ rationale: str # Why this decision was made
342
+ inputs: dict[str, Any] # Context provided to the agent
343
+ outputs: Optional[dict[str, Any]] = None # Results produced
344
+ timestamp: datetime = dataclass_field(default_factory=datetime.utcnow)
345
+
346
+ def to_dict(self) -> dict[str, Any]:
347
+ """Convert to dictionary for JSON serialization."""
348
+ return {
349
+ "agent": self.agent.value,
350
+ "action": self.action,
351
+ "rationale": self.rationale,
352
+ "inputs": self.inputs,
353
+ "outputs": self.outputs,
354
+ "timestamp": self.timestamp.isoformat(),
355
+ }
356
+
357
+
358
+ class BackgroundTask:
359
+ """Tracks a background research task.
360
+
361
+ Wraps an asyncio.Task with additional metadata for lifecycle
362
+ management, cancellation support, and timeout handling.
363
+ """
364
+
365
+ def __init__(
366
+ self,
367
+ research_id: str,
368
+ task: asyncio.Task[WorkflowResult],
369
+ timeout: Optional[float] = None,
370
+ ) -> None:
371
+ """Initialize background task.
372
+
373
+ Args:
374
+ research_id: ID of the research session
375
+ task: The asyncio task running the workflow
376
+ timeout: Optional timeout in seconds
377
+ """
378
+ self.research_id = research_id
379
+ self.task = task
380
+ self.timeout = timeout
381
+ self.status = TaskStatus.RUNNING
382
+ self.started_at = time.time()
383
+ self.completed_at: Optional[float] = None
384
+ self.error: Optional[str] = None
385
+ self.result: Optional[WorkflowResult] = None
386
+
387
+ @property
388
+ def elapsed_ms(self) -> float:
389
+ """Get elapsed time in milliseconds."""
390
+ end = self.completed_at or time.time()
391
+ return (end - self.started_at) * 1000
392
+
393
+ @property
394
+ def is_timed_out(self) -> bool:
395
+ """Check if task has exceeded timeout."""
396
+ if self.timeout is None:
397
+ return False
398
+ return (time.time() - self.started_at) > self.timeout
399
+
400
+ def cancel(self) -> bool:
401
+ """Cancel the task.
402
+
403
+ Returns:
404
+ True if cancellation was requested, False if already done
405
+ """
406
+ if self.task.done():
407
+ return False
408
+ self.task.cancel()
409
+ self.status = TaskStatus.CANCELLED
410
+ self.completed_at = time.time()
411
+ return True
412
+
413
+ def mark_completed(self, result: WorkflowResult) -> None:
414
+ """Mark task as completed with result."""
415
+ self.status = TaskStatus.COMPLETED if result.success else TaskStatus.FAILED
416
+ self.result = result
417
+ self.completed_at = time.time()
418
+ if not result.success:
419
+ self.error = result.error
420
+
421
+ def mark_timeout(self) -> None:
422
+ """Mark task as timed out."""
423
+ self.status = TaskStatus.TIMEOUT
424
+ self.completed_at = time.time()
425
+ self.error = f"Task exceeded timeout of {self.timeout}s"
426
+ self.task.cancel()
427
+
428
+
429
+ # =============================================================================
430
+ # Supervisor Hooks (Multi-Agent Orchestration)
431
+ # =============================================================================
432
+
433
+
434
+ class SupervisorHooks:
435
+ """Hooks for multi-agent supervisor orchestration.
436
+
437
+ Allows external orchestrators to inject behavior at key workflow
438
+ points, enabling think-tool pauses, agent handoffs, and custom
439
+ routing logic.
440
+ """
441
+
442
+ def __init__(self) -> None:
443
+ """Initialize with no-op defaults."""
444
+ self._on_phase_start: Optional[Callable[[DeepResearchState], None]] = None
445
+ self._on_phase_complete: Optional[Callable[[DeepResearchState], None]] = None
446
+ self._on_think_pause: Optional[Callable[[DeepResearchState, str], str]] = None
447
+ self._on_agent_handoff: Optional[Callable[[str, dict], dict]] = None
448
+
449
+ def on_phase_start(self, callback: Callable[[DeepResearchState], None]) -> None:
450
+ """Register callback for phase start events."""
451
+ self._on_phase_start = callback
452
+
453
+ def on_phase_complete(self, callback: Callable[[DeepResearchState], None]) -> None:
454
+ """Register callback for phase completion events."""
455
+ self._on_phase_complete = callback
456
+
457
+ def on_think_pause(self, callback: Callable[[DeepResearchState, str], str]) -> None:
458
+ """Register callback for think-tool pauses.
459
+
460
+ The callback receives the current state and a reflection prompt,
461
+ and should return guidance for the next step.
462
+ """
463
+ self._on_think_pause = callback
464
+
465
+ def on_agent_handoff(self, callback: Callable[[str, dict], dict]) -> None:
466
+ """Register callback for agent handoffs.
467
+
468
+ The callback receives the target agent name and context dict,
469
+ and should return the agent's response.
470
+ """
471
+ self._on_agent_handoff = callback
472
+
473
+ def emit_phase_start(self, state: DeepResearchState) -> None:
474
+ """Emit phase start event."""
475
+ if self._on_phase_start:
476
+ try:
477
+ self._on_phase_start(state)
478
+ except Exception as exc:
479
+ logger.error("Phase start hook failed: %s", exc)
480
+
481
+ def emit_phase_complete(self, state: DeepResearchState) -> None:
482
+ """Emit phase complete event."""
483
+ if self._on_phase_complete:
484
+ try:
485
+ self._on_phase_complete(state)
486
+ except Exception as exc:
487
+ logger.error("Phase complete hook failed: %s", exc)
488
+
489
+ def think_pause(self, state: DeepResearchState, prompt: str) -> Optional[str]:
490
+ """Execute think pause if callback registered."""
491
+ if self._on_think_pause:
492
+ try:
493
+ return self._on_think_pause(state, prompt)
494
+ except Exception as exc:
495
+ logger.error("Think pause hook failed: %s", exc)
496
+ return None
497
+
498
+ def agent_handoff(self, agent: str, context: dict) -> Optional[dict]:
499
+ """Execute agent handoff if callback registered."""
500
+ if self._on_agent_handoff:
501
+ try:
502
+ return self._on_agent_handoff(agent, context)
503
+ except Exception as exc:
504
+ logger.error("Agent handoff hook failed: %s", exc)
505
+ return None
506
+
507
+
508
+ # =============================================================================
509
+ # Supervisor Orchestrator
510
+ # =============================================================================
511
+
512
+
513
+ class SupervisorOrchestrator:
514
+ """Coordinates specialist agents and manages phase transitions.
515
+
516
+ The supervisor is responsible for:
517
+ 1. Deciding which specialist agent to dispatch for each phase
518
+ 2. Evaluating phase completion quality before proceeding
519
+ 3. Inserting think-tool pauses for reflection and strategy adjustment
520
+ 4. Recording all decisions for traceability
521
+ 5. Managing iteration vs completion decisions
522
+
523
+ The orchestrator integrates with SupervisorHooks to allow external
524
+ customization of decision logic (e.g., via LLM-based evaluation).
525
+
526
+ Phase Dispatch Flow:
527
+ ```
528
+ SUPERVISOR -> evaluate context -> dispatch to PLANNER
529
+ -> think pause (evaluate planning quality)
530
+ -> dispatch to GATHERER
531
+ -> think pause (evaluate source quality)
532
+ -> dispatch to ANALYZER
533
+ -> think pause (evaluate findings)
534
+ -> dispatch to SYNTHESIZER
535
+ -> think pause (evaluate report)
536
+ -> decide: complete OR dispatch to REFINER
537
+ ```
538
+ """
539
+
540
+ def __init__(self) -> None:
541
+ """Initialize the supervisor orchestrator."""
542
+ self._decisions: list[AgentDecision] = []
543
+
544
+ def dispatch_to_agent(
545
+ self,
546
+ state: DeepResearchState,
547
+ phase: DeepResearchPhase,
548
+ ) -> AgentDecision:
549
+ """Dispatch work to the appropriate specialist agent for a phase.
550
+
551
+ Args:
552
+ state: Current research state
553
+ phase: The phase to execute
554
+
555
+ Returns:
556
+ AgentDecision recording the dispatch
557
+ """
558
+ agent = PHASE_TO_AGENT.get(phase, AgentRole.SUPERVISOR)
559
+ inputs = self._build_agent_inputs(state, phase)
560
+
561
+ decision = AgentDecision(
562
+ agent=agent,
563
+ action=f"execute_{phase.value}",
564
+ rationale=f"Phase {phase.value} requires {agent.value} specialist",
565
+ inputs=inputs,
566
+ )
567
+
568
+ self._decisions.append(decision)
569
+ return decision
570
+
571
+ def _build_agent_inputs(
572
+ self,
573
+ state: DeepResearchState,
574
+ phase: DeepResearchPhase,
575
+ ) -> dict[str, Any]:
576
+ """Build the input context for a specialist agent.
577
+
578
+ Handoff inputs vary by phase:
579
+ - PLANNING: original query, system prompt
580
+ - GATHERING: sub-queries, source types, rate limits
581
+ - ANALYSIS: sources, findings so far
582
+ - SYNTHESIS: findings, gaps, iteration count
583
+ - REFINEMENT: gaps, remaining iterations, report draft
584
+ """
585
+ base_inputs = {
586
+ "research_id": state.id,
587
+ "original_query": state.original_query,
588
+ "current_phase": phase.value,
589
+ "iteration": state.iteration,
590
+ }
591
+
592
+ if phase == DeepResearchPhase.PLANNING:
593
+ return {
594
+ **base_inputs,
595
+ "system_prompt": state.system_prompt,
596
+ "max_sub_queries": state.max_sub_queries,
597
+ }
598
+ elif phase == DeepResearchPhase.GATHERING:
599
+ return {
600
+ **base_inputs,
601
+ "sub_queries": [q.query for q in state.pending_sub_queries()],
602
+ "source_types": [st.value for st in state.source_types],
603
+ "max_sources_per_query": state.max_sources_per_query,
604
+ }
605
+ elif phase == DeepResearchPhase.ANALYSIS:
606
+ return {
607
+ **base_inputs,
608
+ "source_count": len(state.sources),
609
+ "high_quality_sources": len(
610
+ [s for s in state.sources if s.quality == SourceQuality.HIGH]
611
+ ),
612
+ }
613
+ elif phase == DeepResearchPhase.SYNTHESIS:
614
+ return {
615
+ **base_inputs,
616
+ "finding_count": len(state.findings),
617
+ "gap_count": len(state.gaps),
618
+ "has_research_brief": state.research_brief is not None,
619
+ }
620
+ elif phase == DeepResearchPhase.REFINEMENT:
621
+ return {
622
+ **base_inputs,
623
+ "gaps": [g.description for g in state.gaps if not g.resolved],
624
+ "remaining_iterations": state.max_iterations - state.iteration,
625
+ "has_report_draft": state.report is not None,
626
+ }
627
+ return base_inputs
628
+
629
+ def evaluate_phase_completion(
630
+ self,
631
+ state: DeepResearchState,
632
+ phase: DeepResearchPhase,
633
+ ) -> AgentDecision:
634
+ """Supervisor evaluates whether a phase completed successfully.
635
+
636
+ This is the think-tool pause where the supervisor reflects on
637
+ the phase's outputs and decides whether to proceed.
638
+
639
+ Args:
640
+ state: Current research state (after phase execution)
641
+ phase: The phase that just completed
642
+
643
+ Returns:
644
+ AgentDecision with evaluation and proceed/retry rationale
645
+ """
646
+ evaluation = self._evaluate_phase_quality(state, phase)
647
+
648
+ decision = AgentDecision(
649
+ agent=AgentRole.SUPERVISOR,
650
+ action="evaluate_phase",
651
+ rationale=evaluation["rationale"],
652
+ inputs={
653
+ "phase": phase.value,
654
+ "iteration": state.iteration,
655
+ },
656
+ outputs=evaluation,
657
+ )
658
+
659
+ self._decisions.append(decision)
660
+ return decision
661
+
662
+ def _evaluate_phase_quality(
663
+ self,
664
+ state: DeepResearchState,
665
+ phase: DeepResearchPhase,
666
+ ) -> dict[str, Any]:
667
+ """Evaluate the quality of a completed phase.
668
+
669
+ Returns metrics and a proceed/retry recommendation.
670
+ """
671
+ if phase == DeepResearchPhase.PLANNING:
672
+ sub_query_count = len(state.sub_queries)
673
+ quality_ok = sub_query_count >= 2 # At least 2 sub-queries
674
+ return {
675
+ "sub_query_count": sub_query_count,
676
+ "has_research_brief": state.research_brief is not None,
677
+ "quality_ok": quality_ok,
678
+ "rationale": (
679
+ f"Planning produced {sub_query_count} sub-queries. "
680
+ f"{'Sufficient' if quality_ok else 'Insufficient'} for gathering."
681
+ ),
682
+ }
683
+
684
+ elif phase == DeepResearchPhase.GATHERING:
685
+ source_count = len(state.sources)
686
+ quality_ok = source_count >= 3 # At least 3 sources
687
+ return {
688
+ "source_count": source_count,
689
+ "quality_ok": quality_ok,
690
+ "rationale": (
691
+ f"Gathering collected {source_count} sources. "
692
+ f"{'Sufficient' if quality_ok else 'May need more sources'}."
693
+ ),
694
+ }
695
+
696
+ elif phase == DeepResearchPhase.ANALYSIS:
697
+ finding_count = len(state.findings)
698
+ high_confidence = len(
699
+ [f for f in state.findings if f.confidence == ConfidenceLevel.HIGH]
700
+ )
701
+ quality_ok = finding_count >= 2
702
+ return {
703
+ "finding_count": finding_count,
704
+ "high_confidence_count": high_confidence,
705
+ "quality_ok": quality_ok,
706
+ "rationale": (
707
+ f"Analysis extracted {finding_count} findings "
708
+ f"({high_confidence} high confidence). "
709
+ f"{'Ready for synthesis' if quality_ok else 'May need more analysis'}."
710
+ ),
711
+ }
712
+
713
+ elif phase == DeepResearchPhase.SYNTHESIS:
714
+ has_report = state.report is not None
715
+ report_length = len(state.report) if state.report else 0
716
+ quality_ok = has_report and report_length > 100
717
+ return {
718
+ "has_report": has_report,
719
+ "report_length": report_length,
720
+ "quality_ok": quality_ok,
721
+ "rationale": (
722
+ f"Synthesis {'produced' if has_report else 'failed to produce'} report "
723
+ f"({report_length} chars). "
724
+ f"{'Complete' if quality_ok else 'May need refinement'}."
725
+ ),
726
+ }
727
+
728
+ elif phase == DeepResearchPhase.REFINEMENT:
729
+ unaddressed_gaps = len([g for g in state.gaps if not g.resolved])
730
+ can_iterate = state.iteration < state.max_iterations
731
+ should_iterate = unaddressed_gaps > 0 and can_iterate
732
+ return {
733
+ "unaddressed_gaps": unaddressed_gaps,
734
+ "iteration": state.iteration,
735
+ "max_iterations": state.max_iterations,
736
+ "should_iterate": should_iterate,
737
+ "rationale": (
738
+ f"Refinement found {unaddressed_gaps} gaps. "
739
+ f"{'Will iterate' if should_iterate else 'Completing'} "
740
+ f"(iteration {state.iteration}/{state.max_iterations})."
741
+ ),
742
+ }
743
+
744
+ return {"rationale": f"Phase {phase.value} completed", "quality_ok": True}
745
+
746
+ def decide_iteration(self, state: DeepResearchState) -> AgentDecision:
747
+ """Supervisor decides whether to iterate or complete.
748
+
749
+ Called after synthesis to determine if refinement is needed.
750
+
751
+ Args:
752
+ state: Current research state
753
+
754
+ Returns:
755
+ AgentDecision with iterate vs complete decision
756
+ """
757
+ unaddressed_gaps = [g for g in state.gaps if not g.resolved]
758
+ can_iterate = state.iteration < state.max_iterations
759
+ should_iterate = len(unaddressed_gaps) > 0 and can_iterate
760
+
761
+ decision = AgentDecision(
762
+ agent=AgentRole.SUPERVISOR,
763
+ action="decide_iteration",
764
+ rationale=(
765
+ f"{'Iterating' if should_iterate else 'Completing'}: "
766
+ f"{len(unaddressed_gaps)} gaps, "
767
+ f"iteration {state.iteration}/{state.max_iterations}"
768
+ ),
769
+ inputs={
770
+ "gap_count": len(unaddressed_gaps),
771
+ "iteration": state.iteration,
772
+ "max_iterations": state.max_iterations,
773
+ },
774
+ outputs={
775
+ "should_iterate": should_iterate,
776
+ "next_phase": (
777
+ DeepResearchPhase.REFINEMENT.value
778
+ if should_iterate
779
+ else "COMPLETED"
780
+ ),
781
+ },
782
+ )
783
+
784
+ self._decisions.append(decision)
785
+ return decision
786
+
787
+ def record_to_state(self, state: DeepResearchState) -> None:
788
+ """Record all decisions to the state's metadata for persistence.
789
+
790
+ Args:
791
+ state: Research state to update
792
+ """
793
+ if "agent_decisions" not in state.metadata:
794
+ state.metadata["agent_decisions"] = []
795
+
796
+ state.metadata["agent_decisions"].extend(
797
+ [d.to_dict() for d in self._decisions]
798
+ )
799
+ self._decisions.clear()
800
+
801
+ def get_reflection_prompt(self, state: DeepResearchState, phase: DeepResearchPhase) -> str:
802
+ """Generate a reflection prompt for the supervisor think pause.
803
+
804
+ Args:
805
+ state: Current research state
806
+ phase: Phase that just completed
807
+
808
+ Returns:
809
+ Prompt for supervisor reflection
810
+ """
811
+ prompts = {
812
+ DeepResearchPhase.PLANNING: (
813
+ f"Planning complete. Generated {len(state.sub_queries)} sub-queries. "
814
+ f"Research brief: {bool(state.research_brief)}. "
815
+ "Evaluate: Are sub-queries comprehensive? Any gaps in coverage?"
816
+ ),
817
+ DeepResearchPhase.GATHERING: (
818
+ f"Gathering complete. Collected {len(state.sources)} sources. "
819
+ f"Evaluate: Is source diversity sufficient? Quality distribution?"
820
+ ),
821
+ DeepResearchPhase.ANALYSIS: (
822
+ f"Analysis complete. Extracted {len(state.findings)} findings, "
823
+ f"identified {len(state.gaps)} gaps. "
824
+ "Evaluate: Are findings well-supported? Critical gaps?"
825
+ ),
826
+ DeepResearchPhase.SYNTHESIS: (
827
+ f"Synthesis complete. Report: {len(state.report or '')} chars. "
828
+ f"Iteration {state.iteration}/{state.max_iterations}. "
829
+ "Evaluate: Report quality? Need refinement?"
830
+ ),
831
+ DeepResearchPhase.REFINEMENT: (
832
+ f"Refinement complete. Gaps addressed: "
833
+ f"{len([g for g in state.gaps if g.resolved])}/{len(state.gaps)}. "
834
+ "Evaluate: Continue iterating or finalize?"
835
+ ),
836
+ }
837
+ return prompts.get(phase, f"Phase {phase.value} complete. Evaluate progress.")
838
+
839
+
840
+ # =============================================================================
841
+ # Deep Research Workflow
842
+ # =============================================================================
843
+
844
+
845
+ class DeepResearchWorkflow(ResearchWorkflowBase):
846
+ """Multi-phase deep research workflow with background execution.
847
+
848
+ Supports:
849
+ - Async execution with immediate research_id return
850
+ - Status polling while research runs in background
851
+ - Cancellation and timeout handling
852
+ - Multi-agent supervisor hooks
853
+ - Session persistence for resume capability
854
+
855
+ Workflow Phases:
856
+ 1. PLANNING - Decompose query into sub-queries
857
+ 2. GATHERING - Execute sub-queries in parallel
858
+ 3. ANALYSIS - Extract findings and assess quality
859
+ 4. SYNTHESIS - Generate comprehensive report
860
+ 5. REFINEMENT - Identify gaps and iterate if needed
861
+ """
862
+
863
+ # Class-level task registry for background task tracking
864
+ _tasks: WeakValueDictionary[str, BackgroundTask] = WeakValueDictionary()
865
+
866
+ def __init__(
867
+ self,
868
+ config: ResearchConfig,
869
+ memory: Optional[ResearchMemory] = None,
870
+ hooks: Optional[SupervisorHooks] = None,
871
+ ) -> None:
872
+ """Initialize deep research workflow.
873
+
874
+ Args:
875
+ config: Research configuration
876
+ memory: Optional memory instance for persistence
877
+ hooks: Optional supervisor hooks for orchestration
878
+ """
879
+ super().__init__(config, memory)
880
+ self.hooks = hooks or SupervisorHooks()
881
+ self.orchestrator = SupervisorOrchestrator()
882
+ self._search_providers: dict[str, SearchProvider] = {}
883
+
884
+ def _audit_enabled(self) -> bool:
885
+ """Return True if audit artifacts are enabled."""
886
+ return bool(getattr(self.config, "deep_research_audit_artifacts", True))
887
+
888
+ def _audit_path(self, research_id: str) -> Path:
889
+ """Resolve audit artifact path for a research session."""
890
+ storage_path = self.config.get_storage_path()
891
+ return storage_path / "deep_research" / f"{research_id}.audit.jsonl"
892
+
893
+ def _write_audit_event(
894
+ self,
895
+ state: Optional[DeepResearchState],
896
+ event_type: str,
897
+ data: Optional[dict[str, Any]] = None,
898
+ level: str = "info",
899
+ ) -> None:
900
+ """Write a JSONL audit event for deep research observability."""
901
+ if not self._audit_enabled():
902
+ return
903
+
904
+ research_id = state.id if state else None
905
+ payload = {
906
+ "timestamp": datetime.utcnow().isoformat() + "Z",
907
+ "event_id": uuid4().hex,
908
+ "event_type": event_type,
909
+ "level": level,
910
+ "research_id": research_id,
911
+ "phase": state.phase.value if state else None,
912
+ "iteration": state.iteration if state else None,
913
+ "data": data or {},
914
+ }
915
+
916
+ try:
917
+ if research_id is None:
918
+ return
919
+ path = self._audit_path(research_id)
920
+ path.parent.mkdir(parents=True, exist_ok=True)
921
+ with path.open("a", encoding="utf-8") as handle:
922
+ handle.write(json.dumps(payload, ensure_ascii=True))
923
+ handle.write("\n")
924
+ except Exception as exc:
925
+ logger.error("Failed to write audit event: %s", exc)
926
+ # Fallback to stderr for crash visibility
927
+ print(
928
+ f"AUDIT_FALLBACK: {event_type} for {research_id} - {exc}",
929
+ file=sys.stderr,
930
+ flush=True,
931
+ )
932
+
933
+ def _record_workflow_error(
934
+ self,
935
+ error: Exception,
936
+ state: DeepResearchState,
937
+ context: str,
938
+ ) -> None:
939
+ """Record error to the persistent error store.
940
+
941
+ Args:
942
+ error: The exception that occurred
943
+ state: Current research state
944
+ context: Context string (e.g., "background_task", "orchestrator")
945
+ """
946
+ try:
947
+ error_store = FileErrorStore(Path.home() / ".foundry-mcp" / "errors")
948
+ record = ErrorRecord(
949
+ id=f"err_{uuid4().hex[:12]}",
950
+ fingerprint=f"deep-research:{context}:{type(error).__name__}",
951
+ error_code="WORKFLOW_ERROR",
952
+ error_type="internal",
953
+ tool_name=f"deep-research:{context}",
954
+ correlation_id=state.id,
955
+ message=str(error),
956
+ exception_type=type(error).__name__,
957
+ stack_trace=traceback.format_exc(),
958
+ input_summary={
959
+ "research_id": state.id,
960
+ "phase": state.phase.value,
961
+ "iteration": state.iteration,
962
+ },
963
+ )
964
+ error_store.append(record)
965
+ except Exception as store_err:
966
+ logger.error("Failed to record error to store: %s", store_err)
967
+
968
+ def _safe_orchestrator_transition(
969
+ self,
970
+ state: DeepResearchState,
971
+ phase: DeepResearchPhase,
972
+ ) -> None:
973
+ """Safely execute orchestrator phase transition with error logging.
974
+
975
+ This wraps orchestrator calls with exception handling to ensure any
976
+ failures are properly logged and recorded before re-raising.
977
+
978
+ Args:
979
+ state: Current research state
980
+ phase: The phase that just completed
981
+
982
+ Raises:
983
+ Exception: Re-raises any exception after logging
984
+ """
985
+ try:
986
+ self.orchestrator.evaluate_phase_completion(state, phase)
987
+ prompt = self.orchestrator.get_reflection_prompt(state, phase)
988
+ self.hooks.think_pause(state, prompt)
989
+ self.orchestrator.record_to_state(state)
990
+ state.advance_phase()
991
+ except Exception as exc:
992
+ logger.exception(
993
+ "Orchestrator transition failed for phase %s, research %s: %s",
994
+ phase.value,
995
+ state.id,
996
+ exc,
997
+ )
998
+ self._write_audit_event(
999
+ state,
1000
+ "orchestrator_error",
1001
+ data={
1002
+ "phase": phase.value,
1003
+ "error": str(exc),
1004
+ "traceback": traceback.format_exc(),
1005
+ },
1006
+ level="error",
1007
+ )
1008
+ self._record_workflow_error(exc, state, f"orchestrator_{phase.value}")
1009
+ raise # Re-raise to be caught by workflow exception handler
1010
+
1011
+ # =========================================================================
1012
+ # Public API
1013
+ # =========================================================================
1014
+
1015
+ def execute(
1016
+ self,
1017
+ query: Optional[str] = None,
1018
+ research_id: Optional[str] = None,
1019
+ action: str = "start",
1020
+ provider_id: Optional[str] = None,
1021
+ system_prompt: Optional[str] = None,
1022
+ max_iterations: int = 3,
1023
+ max_sub_queries: int = 5,
1024
+ max_sources_per_query: int = 5,
1025
+ follow_links: bool = True,
1026
+ timeout_per_operation: float = 120.0,
1027
+ max_concurrent: int = 3,
1028
+ background: bool = False,
1029
+ task_timeout: Optional[float] = None,
1030
+ **kwargs: Any,
1031
+ ) -> WorkflowResult:
1032
+ """Execute deep research workflow.
1033
+
1034
+ Actions:
1035
+ - start: Begin new research session
1036
+ - continue: Resume existing session
1037
+ - status: Get current status
1038
+ - report: Get final report
1039
+ - cancel: Cancel running task
1040
+
1041
+ Args:
1042
+ query: Research query (required for 'start')
1043
+ research_id: Session ID (required for continue/status/report/cancel)
1044
+ action: One of 'start', 'continue', 'status', 'report', 'cancel'
1045
+ provider_id: Provider for LLM operations
1046
+ system_prompt: Optional custom system prompt
1047
+ max_iterations: Maximum refinement iterations (default: 3)
1048
+ max_sub_queries: Maximum sub-queries to generate (default: 5)
1049
+ max_sources_per_query: Maximum sources per query (default: 5)
1050
+ follow_links: Whether to extract content from URLs (default: True)
1051
+ timeout_per_operation: Timeout per operation in seconds (default: 30)
1052
+ max_concurrent: Maximum concurrent operations (default: 3)
1053
+ background: Run in background, return immediately (default: False)
1054
+ task_timeout: Overall timeout for background task (optional)
1055
+
1056
+ Returns:
1057
+ WorkflowResult with research state or error
1058
+ """
1059
+ if action == "start":
1060
+ return self._start_research(
1061
+ query=query,
1062
+ provider_id=provider_id,
1063
+ system_prompt=system_prompt,
1064
+ max_iterations=max_iterations,
1065
+ max_sub_queries=max_sub_queries,
1066
+ max_sources_per_query=max_sources_per_query,
1067
+ follow_links=follow_links,
1068
+ timeout_per_operation=timeout_per_operation,
1069
+ max_concurrent=max_concurrent,
1070
+ background=background,
1071
+ task_timeout=task_timeout,
1072
+ )
1073
+ elif action == "continue":
1074
+ return self._continue_research(
1075
+ research_id=research_id,
1076
+ provider_id=provider_id,
1077
+ timeout_per_operation=timeout_per_operation,
1078
+ max_concurrent=max_concurrent,
1079
+ )
1080
+ elif action == "status":
1081
+ return self._get_status(research_id=research_id)
1082
+ elif action == "report":
1083
+ return self._get_report(research_id=research_id)
1084
+ elif action == "cancel":
1085
+ return self._cancel_research(research_id=research_id)
1086
+ else:
1087
+ return WorkflowResult(
1088
+ success=False,
1089
+ content="",
1090
+ error=f"Unknown action '{action}'. Use: start, continue, status, report, cancel",
1091
+ )
1092
+
1093
+ # =========================================================================
1094
+ # Background Task Management
1095
+ # =========================================================================
1096
+
1097
+ def _start_background_task(
1098
+ self,
1099
+ state: DeepResearchState,
1100
+ provider_id: Optional[str],
1101
+ timeout_per_operation: float,
1102
+ max_concurrent: int,
1103
+ task_timeout: Optional[float],
1104
+ ) -> WorkflowResult:
1105
+ """Start research as a background task.
1106
+
1107
+ Returns immediately with research_id. The actual workflow
1108
+ runs in a detached asyncio task.
1109
+ """
1110
+ async def run_workflow() -> WorkflowResult:
1111
+ """Execute the full workflow asynchronously."""
1112
+ try:
1113
+ coro = self._execute_workflow_async(
1114
+ state=state,
1115
+ provider_id=provider_id,
1116
+ timeout_per_operation=timeout_per_operation,
1117
+ max_concurrent=max_concurrent,
1118
+ )
1119
+ if task_timeout:
1120
+ return await asyncio.wait_for(coro, timeout=task_timeout)
1121
+ return await coro
1122
+ except asyncio.CancelledError:
1123
+ state.metadata["cancelled"] = True
1124
+ self.memory.save_deep_research(state)
1125
+ self._write_audit_event(
1126
+ state,
1127
+ "workflow_cancelled",
1128
+ data={"cancelled": True},
1129
+ level="warning",
1130
+ )
1131
+ return WorkflowResult(
1132
+ success=False,
1133
+ content="",
1134
+ error="Research was cancelled",
1135
+ metadata={"research_id": state.id, "cancelled": True},
1136
+ )
1137
+ except asyncio.TimeoutError:
1138
+ state.metadata["timeout"] = True
1139
+ state.metadata["abort_phase"] = state.phase.value
1140
+ state.metadata["abort_iteration"] = state.iteration
1141
+ self.memory.save_deep_research(state)
1142
+ self._write_audit_event(
1143
+ state,
1144
+ "workflow_timeout",
1145
+ data={
1146
+ "timeout_seconds": task_timeout,
1147
+ "abort_phase": state.phase.value,
1148
+ "abort_iteration": state.iteration,
1149
+ },
1150
+ level="warning",
1151
+ )
1152
+ return WorkflowResult(
1153
+ success=False,
1154
+ content="",
1155
+ error=f"Research timed out after {task_timeout}s",
1156
+ metadata={"research_id": state.id, "timeout": True},
1157
+ )
1158
+ except Exception as exc:
1159
+ logger.exception("Background workflow failed: %s", exc)
1160
+ self._write_audit_event(
1161
+ state,
1162
+ "workflow_error",
1163
+ data={"error": str(exc)},
1164
+ level="error",
1165
+ )
1166
+ return WorkflowResult(
1167
+ success=False,
1168
+ content="",
1169
+ error=str(exc),
1170
+ metadata={"research_id": state.id},
1171
+ )
1172
+
1173
+ # Create and register the task
1174
+ try:
1175
+ loop = asyncio.get_running_loop()
1176
+ except RuntimeError:
1177
+ # No running loop, create one
1178
+ loop = asyncio.new_event_loop()
1179
+ asyncio.set_event_loop(loop)
1180
+
1181
+ task = asyncio.create_task(run_workflow())
1182
+ bg_task = BackgroundTask(
1183
+ research_id=state.id,
1184
+ task=task,
1185
+ timeout=task_timeout,
1186
+ )
1187
+ self._tasks[state.id] = bg_task
1188
+
1189
+ # Register session for crash handler visibility
1190
+ _active_research_sessions[state.id] = state
1191
+
1192
+ self._write_audit_event(
1193
+ state,
1194
+ "background_task_started",
1195
+ data={
1196
+ "task_timeout": task_timeout,
1197
+ "timeout_per_operation": timeout_per_operation,
1198
+ "max_concurrent": max_concurrent,
1199
+ },
1200
+ )
1201
+
1202
+ # Set up completion callback
1203
+ def on_complete(t: asyncio.Task) -> None:
1204
+ try:
1205
+ result = t.result()
1206
+ if result.metadata and result.metadata.get("timeout"):
1207
+ bg_task.status = TaskStatus.TIMEOUT
1208
+ bg_task.result = result
1209
+ bg_task.completed_at = time.time()
1210
+ bg_task.error = result.error
1211
+ else:
1212
+ bg_task.mark_completed(result)
1213
+ except asyncio.CancelledError:
1214
+ bg_task.status = TaskStatus.CANCELLED
1215
+ bg_task.completed_at = time.time()
1216
+ except Exception as exc:
1217
+ # Log the exception with full traceback
1218
+ logger.exception(
1219
+ "Background task failed for research %s: %s",
1220
+ state.id, exc
1221
+ )
1222
+ bg_task.status = TaskStatus.FAILED
1223
+ bg_task.error = str(exc)
1224
+ bg_task.completed_at = time.time()
1225
+ # Record to error store and audit (best effort)
1226
+ try:
1227
+ self._record_workflow_error(exc, state, "background_task")
1228
+ self._write_audit_event(
1229
+ state,
1230
+ "background_task_failed",
1231
+ data={
1232
+ "error": str(exc),
1233
+ "traceback": traceback.format_exc(),
1234
+ },
1235
+ level="error",
1236
+ )
1237
+ except Exception:
1238
+ pass # Already logged above
1239
+ finally:
1240
+ # Unregister from active sessions
1241
+ _active_research_sessions.pop(state.id, None)
1242
+
1243
+ task.add_done_callback(on_complete)
1244
+
1245
+ return WorkflowResult(
1246
+ success=True,
1247
+ content=f"Research started in background: {state.id}",
1248
+ metadata={
1249
+ "research_id": state.id,
1250
+ "background": True,
1251
+ "phase": state.phase.value,
1252
+ },
1253
+ )
1254
+
1255
+ def get_background_task(self, research_id: str) -> Optional[BackgroundTask]:
1256
+ """Get a background task by research ID."""
1257
+ return self._tasks.get(research_id)
1258
+
1259
+ # =========================================================================
1260
+ # Action Handlers
1261
+ # =========================================================================
1262
+
1263
+ def _start_research(
1264
+ self,
1265
+ query: Optional[str],
1266
+ provider_id: Optional[str],
1267
+ system_prompt: Optional[str],
1268
+ max_iterations: int,
1269
+ max_sub_queries: int,
1270
+ max_sources_per_query: int,
1271
+ follow_links: bool,
1272
+ timeout_per_operation: float,
1273
+ max_concurrent: int,
1274
+ background: bool,
1275
+ task_timeout: Optional[float],
1276
+ ) -> WorkflowResult:
1277
+ """Start a new deep research session."""
1278
+ if not query:
1279
+ return WorkflowResult(
1280
+ success=False,
1281
+ content="",
1282
+ error="Query is required to start research",
1283
+ )
1284
+
1285
+ # Resolve per-phase providers and models from config
1286
+ # Supports ProviderSpec format: "[cli]gemini:pro" -> (provider_id, model)
1287
+ planning_pid, planning_model = self.config.resolve_phase_provider("planning")
1288
+ analysis_pid, analysis_model = self.config.resolve_phase_provider("analysis")
1289
+ synthesis_pid, synthesis_model = self.config.resolve_phase_provider("synthesis")
1290
+ refinement_pid, refinement_model = self.config.resolve_phase_provider("refinement")
1291
+
1292
+ # Create initial state with per-phase provider configuration
1293
+ state = DeepResearchState(
1294
+ original_query=query,
1295
+ max_iterations=max_iterations,
1296
+ max_sub_queries=max_sub_queries,
1297
+ max_sources_per_query=max_sources_per_query,
1298
+ follow_links=follow_links,
1299
+ research_mode=ResearchMode(self.config.deep_research_mode),
1300
+ system_prompt=system_prompt,
1301
+ # Per-phase providers: explicit provider_id overrides config
1302
+ planning_provider=provider_id or planning_pid,
1303
+ analysis_provider=provider_id or analysis_pid,
1304
+ synthesis_provider=provider_id or synthesis_pid,
1305
+ refinement_provider=provider_id or refinement_pid,
1306
+ # Per-phase models from ProviderSpec (only used if provider_id not overridden)
1307
+ planning_model=None if provider_id else planning_model,
1308
+ analysis_model=None if provider_id else analysis_model,
1309
+ synthesis_model=None if provider_id else synthesis_model,
1310
+ refinement_model=None if provider_id else refinement_model,
1311
+ )
1312
+
1313
+ # Save initial state
1314
+ self.memory.save_deep_research(state)
1315
+ self._write_audit_event(
1316
+ state,
1317
+ "workflow_start",
1318
+ data={
1319
+ "query": state.original_query,
1320
+ "config": {
1321
+ "max_iterations": max_iterations,
1322
+ "max_sub_queries": max_sub_queries,
1323
+ "max_sources_per_query": max_sources_per_query,
1324
+ "follow_links": follow_links,
1325
+ "timeout_per_operation": timeout_per_operation,
1326
+ "max_concurrent": max_concurrent,
1327
+ },
1328
+ "provider_id": provider_id,
1329
+ "background": background,
1330
+ "task_timeout": task_timeout,
1331
+ },
1332
+ )
1333
+
1334
+ if background:
1335
+ return self._start_background_task(
1336
+ state=state,
1337
+ provider_id=provider_id,
1338
+ timeout_per_operation=timeout_per_operation,
1339
+ max_concurrent=max_concurrent,
1340
+ task_timeout=task_timeout,
1341
+ )
1342
+
1343
+ # Synchronous execution
1344
+ try:
1345
+ loop = asyncio.get_event_loop()
1346
+ if loop.is_running():
1347
+ # Already in async context, run directly
1348
+ import concurrent.futures
1349
+ with concurrent.futures.ThreadPoolExecutor() as executor:
1350
+ future = executor.submit(
1351
+ asyncio.run,
1352
+ self._execute_workflow_async(
1353
+ state=state,
1354
+ provider_id=provider_id,
1355
+ timeout_per_operation=timeout_per_operation,
1356
+ max_concurrent=max_concurrent,
1357
+ ),
1358
+ )
1359
+ return future.result()
1360
+ else:
1361
+ return loop.run_until_complete(
1362
+ self._execute_workflow_async(
1363
+ state=state,
1364
+ provider_id=provider_id,
1365
+ timeout_per_operation=timeout_per_operation,
1366
+ max_concurrent=max_concurrent,
1367
+ )
1368
+ )
1369
+ except RuntimeError:
1370
+ return asyncio.run(
1371
+ self._execute_workflow_async(
1372
+ state=state,
1373
+ provider_id=provider_id,
1374
+ timeout_per_operation=timeout_per_operation,
1375
+ max_concurrent=max_concurrent,
1376
+ )
1377
+ )
1378
+
1379
+ def _continue_research(
1380
+ self,
1381
+ research_id: Optional[str],
1382
+ provider_id: Optional[str],
1383
+ timeout_per_operation: float,
1384
+ max_concurrent: int,
1385
+ ) -> WorkflowResult:
1386
+ """Continue an existing research session."""
1387
+ if not research_id:
1388
+ return WorkflowResult(
1389
+ success=False,
1390
+ content="",
1391
+ error="research_id is required to continue research",
1392
+ )
1393
+
1394
+ # Load existing state
1395
+ state = self.memory.load_deep_research(research_id)
1396
+ if state is None:
1397
+ return WorkflowResult(
1398
+ success=False,
1399
+ content="",
1400
+ error=f"Research session '{research_id}' not found",
1401
+ )
1402
+
1403
+ if state.completed_at is not None:
1404
+ return WorkflowResult(
1405
+ success=True,
1406
+ content=state.report or "Research already completed",
1407
+ metadata={
1408
+ "research_id": state.id,
1409
+ "phase": state.phase.value,
1410
+ "is_complete": True,
1411
+ },
1412
+ )
1413
+
1414
+ # Continue from current phase
1415
+ try:
1416
+ return asyncio.run(
1417
+ self._execute_workflow_async(
1418
+ state=state,
1419
+ provider_id=provider_id,
1420
+ timeout_per_operation=timeout_per_operation,
1421
+ max_concurrent=max_concurrent,
1422
+ )
1423
+ )
1424
+ except RuntimeError:
1425
+ loop = asyncio.new_event_loop()
1426
+ asyncio.set_event_loop(loop)
1427
+ try:
1428
+ return loop.run_until_complete(
1429
+ self._execute_workflow_async(
1430
+ state=state,
1431
+ provider_id=provider_id,
1432
+ timeout_per_operation=timeout_per_operation,
1433
+ max_concurrent=max_concurrent,
1434
+ )
1435
+ )
1436
+ finally:
1437
+ loop.close()
1438
+
1439
+ def _get_status(self, research_id: Optional[str]) -> WorkflowResult:
1440
+ """Get the current status of a research session."""
1441
+ if not research_id:
1442
+ return WorkflowResult(
1443
+ success=False,
1444
+ content="",
1445
+ error="research_id is required",
1446
+ )
1447
+
1448
+ # Check background task first
1449
+ bg_task = self.get_background_task(research_id)
1450
+ if bg_task:
1451
+ # Also load persisted state to get progress metrics
1452
+ state = self.memory.load_deep_research(research_id)
1453
+ metadata: dict[str, Any] = {
1454
+ "research_id": research_id,
1455
+ "task_status": bg_task.status.value,
1456
+ "elapsed_ms": bg_task.elapsed_ms,
1457
+ "is_complete": bg_task.task.done(),
1458
+ }
1459
+ # Include progress from persisted state if available
1460
+ if state:
1461
+ metadata.update({
1462
+ "original_query": state.original_query,
1463
+ "phase": state.phase.value,
1464
+ "iteration": state.iteration,
1465
+ "max_iterations": state.max_iterations,
1466
+ "sub_queries_total": len(state.sub_queries),
1467
+ "sub_queries_completed": len(state.completed_sub_queries()),
1468
+ "source_count": len(state.sources),
1469
+ "finding_count": len(state.findings),
1470
+ "gap_count": len(state.unresolved_gaps()),
1471
+ "total_tokens_used": state.total_tokens_used,
1472
+ })
1473
+ return WorkflowResult(
1474
+ success=True,
1475
+ content=f"Task status: {bg_task.status.value}",
1476
+ metadata=metadata,
1477
+ )
1478
+
1479
+ # Fall back to persisted state (task completed or not running)
1480
+ state = self.memory.load_deep_research(research_id)
1481
+ if state is None:
1482
+ return WorkflowResult(
1483
+ success=False,
1484
+ content="",
1485
+ error=f"Research session '{research_id}' not found",
1486
+ )
1487
+
1488
+ status_lines = [
1489
+ f"Research ID: {state.id}",
1490
+ f"Query: {state.original_query}",
1491
+ f"Phase: {state.phase.value}",
1492
+ f"Iteration: {state.iteration}/{state.max_iterations}",
1493
+ f"Sub-queries: {len(state.completed_sub_queries())}/{len(state.sub_queries)} completed",
1494
+ f"Sources: {len(state.sources)} examined",
1495
+ f"Findings: {len(state.findings)}",
1496
+ f"Gaps: {len(state.unresolved_gaps())} unresolved",
1497
+ f"Status: {'Completed' if state.completed_at else 'In Progress'}",
1498
+ ]
1499
+ if state.metadata.get("timeout"):
1500
+ status_lines.append("Timeout: True")
1501
+ if state.metadata.get("cancelled"):
1502
+ status_lines.append("Cancelled: True")
1503
+
1504
+ return WorkflowResult(
1505
+ success=True,
1506
+ content="\n".join(status_lines),
1507
+ metadata={
1508
+ "research_id": state.id,
1509
+ "original_query": state.original_query,
1510
+ "phase": state.phase.value,
1511
+ "iteration": state.iteration,
1512
+ "max_iterations": state.max_iterations,
1513
+ "sub_queries_total": len(state.sub_queries),
1514
+ "sub_queries_completed": len(state.completed_sub_queries()),
1515
+ "source_count": len(state.sources),
1516
+ "finding_count": len(state.findings),
1517
+ "gap_count": len(state.unresolved_gaps()),
1518
+ "is_complete": state.completed_at is not None,
1519
+ "total_tokens_used": state.total_tokens_used,
1520
+ "total_duration_ms": state.total_duration_ms,
1521
+ "timed_out": bool(state.metadata.get("timeout")),
1522
+ "cancelled": bool(state.metadata.get("cancelled")),
1523
+ },
1524
+ )
1525
+
1526
+ def _get_report(self, research_id: Optional[str]) -> WorkflowResult:
1527
+ """Get the final report from a research session."""
1528
+ if not research_id:
1529
+ return WorkflowResult(
1530
+ success=False,
1531
+ content="",
1532
+ error="research_id is required",
1533
+ )
1534
+
1535
+ state = self.memory.load_deep_research(research_id)
1536
+ if state is None:
1537
+ return WorkflowResult(
1538
+ success=False,
1539
+ content="",
1540
+ error=f"Research session '{research_id}' not found",
1541
+ )
1542
+
1543
+ if not state.report:
1544
+ return WorkflowResult(
1545
+ success=False,
1546
+ content="",
1547
+ error="Research report not yet generated",
1548
+ )
1549
+
1550
+ return WorkflowResult(
1551
+ success=True,
1552
+ content=state.report,
1553
+ metadata={
1554
+ "research_id": state.id,
1555
+ "original_query": state.original_query,
1556
+ "source_count": len(state.sources),
1557
+ "finding_count": len(state.findings),
1558
+ "iteration": state.iteration,
1559
+ "is_complete": state.completed_at is not None,
1560
+ },
1561
+ )
1562
+
1563
+ def _cancel_research(self, research_id: Optional[str]) -> WorkflowResult:
1564
+ """Cancel a running research task."""
1565
+ if not research_id:
1566
+ return WorkflowResult(
1567
+ success=False,
1568
+ content="",
1569
+ error="research_id is required",
1570
+ )
1571
+
1572
+ bg_task = self.get_background_task(research_id)
1573
+ if bg_task is None:
1574
+ return WorkflowResult(
1575
+ success=False,
1576
+ content="",
1577
+ error=f"No running task found for '{research_id}'",
1578
+ )
1579
+
1580
+ if bg_task.cancel():
1581
+ state = self.memory.load_deep_research(research_id)
1582
+ if state:
1583
+ self._write_audit_event(
1584
+ state,
1585
+ "workflow_cancelled",
1586
+ data={"cancelled": True},
1587
+ level="warning",
1588
+ )
1589
+ return WorkflowResult(
1590
+ success=True,
1591
+ content=f"Research '{research_id}' cancelled",
1592
+ metadata={"research_id": research_id, "cancelled": True},
1593
+ )
1594
+ else:
1595
+ return WorkflowResult(
1596
+ success=False,
1597
+ content="",
1598
+ error=f"Task '{research_id}' already completed",
1599
+ )
1600
+
1601
+ # =========================================================================
1602
+ # Async Workflow Execution
1603
+ # =========================================================================
1604
+
1605
+ async def _execute_workflow_async(
1606
+ self,
1607
+ state: DeepResearchState,
1608
+ provider_id: Optional[str],
1609
+ timeout_per_operation: float,
1610
+ max_concurrent: int,
1611
+ ) -> WorkflowResult:
1612
+ """Execute the full workflow asynchronously.
1613
+
1614
+ This is the main async entry point that orchestrates all phases.
1615
+ """
1616
+ start_time = time.perf_counter()
1617
+
1618
+ try:
1619
+ # Phase execution based on current state
1620
+ if state.phase == DeepResearchPhase.PLANNING:
1621
+ phase_started = time.perf_counter()
1622
+ self.hooks.emit_phase_start(state)
1623
+ self._write_audit_event(
1624
+ state,
1625
+ "phase_start",
1626
+ data={"phase": state.phase.value},
1627
+ )
1628
+ result = await self._execute_planning_async(
1629
+ state=state,
1630
+ provider_id=state.planning_provider,
1631
+ timeout=self.config.get_phase_timeout("planning"),
1632
+ )
1633
+ if not result.success:
1634
+ self._write_audit_event(
1635
+ state,
1636
+ "phase_error",
1637
+ data={"phase": state.phase.value, "error": result.error},
1638
+ level="error",
1639
+ )
1640
+ self.memory.save_deep_research(state)
1641
+ return result
1642
+ self.hooks.emit_phase_complete(state)
1643
+ self._write_audit_event(
1644
+ state,
1645
+ "phase_complete",
1646
+ data={
1647
+ "phase": state.phase.value,
1648
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1649
+ },
1650
+ )
1651
+ # Think pause: supervisor evaluates planning quality
1652
+ self._safe_orchestrator_transition(state, DeepResearchPhase.PLANNING)
1653
+
1654
+ if state.phase == DeepResearchPhase.GATHERING:
1655
+ phase_started = time.perf_counter()
1656
+ self.hooks.emit_phase_start(state)
1657
+ self._write_audit_event(
1658
+ state,
1659
+ "phase_start",
1660
+ data={"phase": state.phase.value},
1661
+ )
1662
+ result = await self._execute_gathering_async(
1663
+ state=state,
1664
+ provider_id=provider_id,
1665
+ timeout=timeout_per_operation,
1666
+ max_concurrent=max_concurrent,
1667
+ )
1668
+ if not result.success:
1669
+ self._write_audit_event(
1670
+ state,
1671
+ "phase_error",
1672
+ data={"phase": state.phase.value, "error": result.error},
1673
+ level="error",
1674
+ )
1675
+ self.memory.save_deep_research(state)
1676
+ return result
1677
+ self.hooks.emit_phase_complete(state)
1678
+ self._write_audit_event(
1679
+ state,
1680
+ "phase_complete",
1681
+ data={
1682
+ "phase": state.phase.value,
1683
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1684
+ },
1685
+ )
1686
+ # Think pause: supervisor evaluates gathering quality
1687
+ self._safe_orchestrator_transition(state, DeepResearchPhase.GATHERING)
1688
+
1689
+ if state.phase == DeepResearchPhase.ANALYSIS:
1690
+ phase_started = time.perf_counter()
1691
+ self.hooks.emit_phase_start(state)
1692
+ self._write_audit_event(
1693
+ state,
1694
+ "phase_start",
1695
+ data={"phase": state.phase.value},
1696
+ )
1697
+ result = await self._execute_analysis_async(
1698
+ state=state,
1699
+ provider_id=state.analysis_provider,
1700
+ timeout=self.config.get_phase_timeout("analysis"),
1701
+ )
1702
+ if not result.success:
1703
+ self._write_audit_event(
1704
+ state,
1705
+ "phase_error",
1706
+ data={"phase": state.phase.value, "error": result.error},
1707
+ level="error",
1708
+ )
1709
+ self.memory.save_deep_research(state)
1710
+ return result
1711
+ self.hooks.emit_phase_complete(state)
1712
+ self._write_audit_event(
1713
+ state,
1714
+ "phase_complete",
1715
+ data={
1716
+ "phase": state.phase.value,
1717
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1718
+ },
1719
+ )
1720
+ # Think pause: supervisor evaluates analysis quality
1721
+ self._safe_orchestrator_transition(state, DeepResearchPhase.ANALYSIS)
1722
+
1723
+ if state.phase == DeepResearchPhase.SYNTHESIS:
1724
+ phase_started = time.perf_counter()
1725
+ self.hooks.emit_phase_start(state)
1726
+ self._write_audit_event(
1727
+ state,
1728
+ "phase_start",
1729
+ data={"phase": state.phase.value},
1730
+ )
1731
+ result = await self._execute_synthesis_async(
1732
+ state=state,
1733
+ provider_id=state.synthesis_provider,
1734
+ timeout=self.config.get_phase_timeout("synthesis"),
1735
+ )
1736
+ if not result.success:
1737
+ self._write_audit_event(
1738
+ state,
1739
+ "phase_error",
1740
+ data={"phase": state.phase.value, "error": result.error},
1741
+ level="error",
1742
+ )
1743
+ self.memory.save_deep_research(state)
1744
+ return result
1745
+ self.hooks.emit_phase_complete(state)
1746
+ self._write_audit_event(
1747
+ state,
1748
+ "phase_complete",
1749
+ data={
1750
+ "phase": state.phase.value,
1751
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1752
+ },
1753
+ )
1754
+ # Think pause: supervisor evaluates synthesis and decides iteration
1755
+ try:
1756
+ self.orchestrator.evaluate_phase_completion(state, DeepResearchPhase.SYNTHESIS)
1757
+ self.orchestrator.decide_iteration(state)
1758
+ prompt = self.orchestrator.get_reflection_prompt(state, DeepResearchPhase.SYNTHESIS)
1759
+ self.hooks.think_pause(state, prompt)
1760
+ self.orchestrator.record_to_state(state)
1761
+ except Exception as exc:
1762
+ logger.exception(
1763
+ "Orchestrator transition failed for synthesis, research %s: %s",
1764
+ state.id,
1765
+ exc,
1766
+ )
1767
+ self._write_audit_event(
1768
+ state,
1769
+ "orchestrator_error",
1770
+ data={
1771
+ "phase": "synthesis",
1772
+ "error": str(exc),
1773
+ "traceback": traceback.format_exc(),
1774
+ },
1775
+ level="error",
1776
+ )
1777
+ self._record_workflow_error(exc, state, "orchestrator_synthesis")
1778
+ raise
1779
+
1780
+ # Check if refinement needed
1781
+ if state.should_continue_refinement():
1782
+ state.phase = DeepResearchPhase.REFINEMENT
1783
+ else:
1784
+ state.mark_completed(report=result.content)
1785
+
1786
+ # Handle refinement phase
1787
+ if state.phase == DeepResearchPhase.REFINEMENT:
1788
+ phase_started = time.perf_counter()
1789
+ self.hooks.emit_phase_start(state)
1790
+ self._write_audit_event(
1791
+ state,
1792
+ "phase_start",
1793
+ data={"phase": state.phase.value},
1794
+ )
1795
+ # Generate follow-up queries from gaps
1796
+ await self._execute_refinement_async(
1797
+ state=state,
1798
+ provider_id=state.refinement_provider,
1799
+ timeout=self.config.get_phase_timeout("refinement"),
1800
+ )
1801
+ self.hooks.emit_phase_complete(state)
1802
+ self._write_audit_event(
1803
+ state,
1804
+ "phase_complete",
1805
+ data={
1806
+ "phase": state.phase.value,
1807
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1808
+ },
1809
+ )
1810
+
1811
+ if state.should_continue_refinement():
1812
+ state.start_new_iteration()
1813
+ # Recursively continue workflow
1814
+ return await self._execute_workflow_async(
1815
+ state=state,
1816
+ provider_id=provider_id,
1817
+ timeout_per_operation=timeout_per_operation,
1818
+ max_concurrent=max_concurrent,
1819
+ )
1820
+ else:
1821
+ state.mark_completed(report=state.report)
1822
+
1823
+ # Calculate duration
1824
+ duration_ms = (time.perf_counter() - start_time) * 1000
1825
+ state.total_duration_ms += duration_ms
1826
+
1827
+ # Save final state
1828
+ self.memory.save_deep_research(state)
1829
+ self._write_audit_event(
1830
+ state,
1831
+ "workflow_complete",
1832
+ data={
1833
+ "success": True,
1834
+ "phase": state.phase.value,
1835
+ "iteration": state.iteration,
1836
+ "sub_query_count": len(state.sub_queries),
1837
+ "source_count": len(state.sources),
1838
+ "finding_count": len(state.findings),
1839
+ "gap_count": len(state.unresolved_gaps()),
1840
+ "report_length": len(state.report or ""),
1841
+ # Existing totals
1842
+ "total_tokens_used": state.total_tokens_used,
1843
+ "total_duration_ms": state.total_duration_ms,
1844
+ # Token breakdown totals
1845
+ "total_input_tokens": sum(
1846
+ m.input_tokens for m in state.phase_metrics
1847
+ ),
1848
+ "total_output_tokens": sum(
1849
+ m.output_tokens for m in state.phase_metrics
1850
+ ),
1851
+ "total_cached_tokens": sum(
1852
+ m.cached_tokens for m in state.phase_metrics
1853
+ ),
1854
+ # Per-phase metrics
1855
+ "phase_metrics": [
1856
+ {
1857
+ "phase": m.phase,
1858
+ "duration_ms": m.duration_ms,
1859
+ "input_tokens": m.input_tokens,
1860
+ "output_tokens": m.output_tokens,
1861
+ "cached_tokens": m.cached_tokens,
1862
+ "provider_id": m.provider_id,
1863
+ "model_used": m.model_used,
1864
+ }
1865
+ for m in state.phase_metrics
1866
+ ],
1867
+ # Search provider stats
1868
+ "search_provider_stats": state.search_provider_stats,
1869
+ "total_search_queries": sum(state.search_provider_stats.values()),
1870
+ # Source hostnames
1871
+ "source_hostnames": sorted(
1872
+ set(
1873
+ h
1874
+ for s in state.sources
1875
+ if s.url and (h := _extract_hostname(s.url))
1876
+ )
1877
+ ),
1878
+ # Research mode
1879
+ "research_mode": state.research_mode.value,
1880
+ },
1881
+ )
1882
+
1883
+ return WorkflowResult(
1884
+ success=True,
1885
+ content=state.report or "Research completed",
1886
+ provider_id=provider_id,
1887
+ tokens_used=state.total_tokens_used,
1888
+ duration_ms=duration_ms,
1889
+ metadata={
1890
+ "research_id": state.id,
1891
+ "phase": state.phase.value,
1892
+ "iteration": state.iteration,
1893
+ "sub_query_count": len(state.sub_queries),
1894
+ "source_count": len(state.sources),
1895
+ "finding_count": len(state.findings),
1896
+ "gap_count": len(state.unresolved_gaps()),
1897
+ "is_complete": state.completed_at is not None,
1898
+ },
1899
+ )
1900
+
1901
+ except Exception as exc:
1902
+ tb_str = traceback.format_exc()
1903
+ logger.exception(
1904
+ "Workflow execution failed at phase %s, iteration %d: %s",
1905
+ state.phase.value,
1906
+ state.iteration,
1907
+ exc,
1908
+ )
1909
+ self.memory.save_deep_research(state)
1910
+ self._write_audit_event(
1911
+ state,
1912
+ "workflow_error",
1913
+ data={
1914
+ "error": str(exc),
1915
+ "traceback": tb_str,
1916
+ "phase": state.phase.value,
1917
+ "iteration": state.iteration,
1918
+ },
1919
+ level="error",
1920
+ )
1921
+ self._record_workflow_error(exc, state, "workflow_execution")
1922
+ return WorkflowResult(
1923
+ success=False,
1924
+ content="",
1925
+ error=str(exc),
1926
+ metadata={
1927
+ "research_id": state.id,
1928
+ "phase": state.phase.value,
1929
+ "iteration": state.iteration,
1930
+ },
1931
+ )
1932
+
1933
+ # =========================================================================
1934
+ # Phase Implementations (Stubs for now - implemented in later tasks)
1935
+ # =========================================================================
1936
+
1937
+ async def _execute_planning_async(
1938
+ self,
1939
+ state: DeepResearchState,
1940
+ provider_id: Optional[str],
1941
+ timeout: float,
1942
+ ) -> WorkflowResult:
1943
+ """Execute planning phase: decompose query into sub-queries.
1944
+
1945
+ This phase:
1946
+ 1. Analyzes the original research query
1947
+ 2. Generates a research brief explaining the approach
1948
+ 3. Decomposes the query into 2-5 focused sub-queries
1949
+ 4. Assigns priorities to each sub-query
1950
+
1951
+ Args:
1952
+ state: Current research state
1953
+ provider_id: LLM provider to use
1954
+ timeout: Request timeout in seconds
1955
+
1956
+ Returns:
1957
+ WorkflowResult with planning outcome
1958
+ """
1959
+ logger.info("Starting planning phase for query: %s", state.original_query[:100])
1960
+
1961
+ # Build the planning prompt
1962
+ system_prompt = self._build_planning_system_prompt(state)
1963
+ user_prompt = self._build_planning_user_prompt(state)
1964
+
1965
+ # Execute LLM call with context window error handling
1966
+ try:
1967
+ result = self._execute_provider(
1968
+ prompt=user_prompt,
1969
+ provider_id=provider_id or state.planning_provider,
1970
+ model=state.planning_model,
1971
+ system_prompt=system_prompt,
1972
+ timeout=timeout,
1973
+ temperature=0.7, # Some creativity for diverse sub-queries
1974
+ )
1975
+ except ContextWindowError as e:
1976
+ logger.error(
1977
+ "Planning phase context window exceeded: prompt_tokens=%s, "
1978
+ "max_tokens=%s, truncation_needed=%s, provider=%s",
1979
+ e.prompt_tokens,
1980
+ e.max_tokens,
1981
+ e.truncation_needed,
1982
+ e.provider,
1983
+ )
1984
+ return WorkflowResult(
1985
+ success=False,
1986
+ content="",
1987
+ error=str(e),
1988
+ metadata={
1989
+ "research_id": state.id,
1990
+ "phase": "planning",
1991
+ "error_type": "context_window_exceeded",
1992
+ "prompt_tokens": e.prompt_tokens,
1993
+ "max_tokens": e.max_tokens,
1994
+ "truncation_needed": e.truncation_needed,
1995
+ },
1996
+ )
1997
+
1998
+ if not result.success:
1999
+ logger.error("Planning phase LLM call failed: %s", result.error)
2000
+ return result
2001
+
2002
+ # Track token usage
2003
+ if result.tokens_used:
2004
+ state.total_tokens_used += result.tokens_used
2005
+
2006
+ # Track phase metrics for audit
2007
+ state.phase_metrics.append(
2008
+ PhaseMetrics(
2009
+ phase="planning",
2010
+ duration_ms=result.duration_ms or 0.0,
2011
+ input_tokens=result.input_tokens or 0,
2012
+ output_tokens=result.output_tokens or 0,
2013
+ cached_tokens=result.cached_tokens or 0,
2014
+ provider_id=result.provider_id,
2015
+ model_used=result.model_used,
2016
+ )
2017
+ )
2018
+
2019
+ # Parse the response
2020
+ parsed = self._parse_planning_response(result.content, state)
2021
+
2022
+ if not parsed["success"]:
2023
+ logger.warning("Failed to parse planning response, using fallback")
2024
+ # Fallback: treat entire query as single sub-query
2025
+ state.research_brief = f"Direct research on: {state.original_query}"
2026
+ state.add_sub_query(
2027
+ query=state.original_query,
2028
+ rationale="Original query used directly due to parsing failure",
2029
+ priority=1,
2030
+ )
2031
+ else:
2032
+ state.research_brief = parsed["research_brief"]
2033
+ for sq in parsed["sub_queries"]:
2034
+ state.add_sub_query(
2035
+ query=sq["query"],
2036
+ rationale=sq.get("rationale"),
2037
+ priority=sq.get("priority", 1),
2038
+ )
2039
+
2040
+ # Save state after planning
2041
+ self.memory.save_deep_research(state)
2042
+ self._write_audit_event(
2043
+ state,
2044
+ "planning_result",
2045
+ data={
2046
+ "provider_id": result.provider_id,
2047
+ "model_used": result.model_used,
2048
+ "tokens_used": result.tokens_used,
2049
+ "duration_ms": result.duration_ms,
2050
+ "system_prompt": system_prompt,
2051
+ "user_prompt": user_prompt,
2052
+ "raw_response": result.content,
2053
+ "parse_success": parsed["success"],
2054
+ "research_brief": state.research_brief,
2055
+ "sub_queries": [
2056
+ {
2057
+ "id": sq.id,
2058
+ "query": sq.query,
2059
+ "rationale": sq.rationale,
2060
+ "priority": sq.priority,
2061
+ }
2062
+ for sq in state.sub_queries
2063
+ ],
2064
+ },
2065
+ )
2066
+
2067
+ logger.info(
2068
+ "Planning phase complete: %d sub-queries generated",
2069
+ len(state.sub_queries),
2070
+ )
2071
+
2072
+ return WorkflowResult(
2073
+ success=True,
2074
+ content=state.research_brief or "Planning complete",
2075
+ provider_id=result.provider_id,
2076
+ model_used=result.model_used,
2077
+ tokens_used=result.tokens_used,
2078
+ duration_ms=result.duration_ms,
2079
+ metadata={
2080
+ "research_id": state.id,
2081
+ "sub_query_count": len(state.sub_queries),
2082
+ "research_brief": state.research_brief,
2083
+ },
2084
+ )
2085
+
2086
+ def _build_planning_system_prompt(self, state: DeepResearchState) -> str:
2087
+ """Build system prompt for query decomposition.
2088
+
2089
+ Args:
2090
+ state: Current research state
2091
+
2092
+ Returns:
2093
+ System prompt string
2094
+ """
2095
+ return """You are a research planning assistant. Your task is to analyze a research query and decompose it into focused sub-queries that can be researched independently.
2096
+
2097
+ Your response MUST be valid JSON with this exact structure:
2098
+ {
2099
+ "research_brief": "A 2-3 sentence summary of the research approach and what aspects will be investigated",
2100
+ "sub_queries": [
2101
+ {
2102
+ "query": "A specific, focused search query",
2103
+ "rationale": "Why this sub-query is important for the research",
2104
+ "priority": 1
2105
+ }
2106
+ ]
2107
+ }
2108
+
2109
+ Guidelines:
2110
+ - Generate 2-5 sub-queries (aim for 3-4 typically)
2111
+ - Each sub-query should focus on a distinct aspect of the research
2112
+ - Queries should be specific enough to yield relevant search results
2113
+ - Priority 1 is highest (most important), higher numbers are lower priority
2114
+ - Avoid overlapping queries - each should cover unique ground
2115
+ - Consider different angles: definition, examples, comparisons, recent developments, expert opinions
2116
+
2117
+ IMPORTANT: Return ONLY valid JSON, no markdown formatting or extra text."""
2118
+
2119
+ def _build_planning_user_prompt(self, state: DeepResearchState) -> str:
2120
+ """Build user prompt for query decomposition.
2121
+
2122
+ Args:
2123
+ state: Current research state
2124
+
2125
+ Returns:
2126
+ User prompt string
2127
+ """
2128
+ prompt = f"""Research Query: {state.original_query}
2129
+
2130
+ Please decompose this research query into {state.max_sub_queries} or fewer focused sub-queries.
2131
+
2132
+ Consider:
2133
+ 1. What are the key aspects that need investigation?
2134
+ 2. What background information would help understand this topic?
2135
+ 3. What specific questions would lead to comprehensive coverage?
2136
+ 4. What different perspectives or sources might be valuable?
2137
+
2138
+ Generate the research plan as JSON."""
2139
+
2140
+ # Add custom system prompt context if provided
2141
+ if state.system_prompt:
2142
+ prompt += f"\n\nAdditional context: {state.system_prompt}"
2143
+
2144
+ return prompt
2145
+
2146
+ def _parse_planning_response(
2147
+ self,
2148
+ content: str,
2149
+ state: DeepResearchState,
2150
+ ) -> dict[str, Any]:
2151
+ """Parse LLM response into structured planning data.
2152
+
2153
+ Attempts to extract JSON from the response, with fallback handling
2154
+ for various response formats.
2155
+
2156
+ Args:
2157
+ content: Raw LLM response content
2158
+ state: Current research state (for max_sub_queries limit)
2159
+
2160
+ Returns:
2161
+ Dict with 'success', 'research_brief', and 'sub_queries' keys
2162
+ """
2163
+ result = {
2164
+ "success": False,
2165
+ "research_brief": None,
2166
+ "sub_queries": [],
2167
+ }
2168
+
2169
+ if not content:
2170
+ return result
2171
+
2172
+ # Try to extract JSON from the response
2173
+ json_str = self._extract_json(content)
2174
+ if not json_str:
2175
+ logger.warning("No JSON found in planning response")
2176
+ return result
2177
+
2178
+ try:
2179
+ data = json.loads(json_str)
2180
+ except json.JSONDecodeError as e:
2181
+ logger.error("Failed to parse JSON from planning response: %s", e)
2182
+ return result
2183
+
2184
+ # Extract research brief
2185
+ result["research_brief"] = data.get("research_brief", "")
2186
+
2187
+ # Extract and validate sub-queries
2188
+ raw_queries = data.get("sub_queries", [])
2189
+ if not isinstance(raw_queries, list):
2190
+ logger.warning("sub_queries is not a list")
2191
+ return result
2192
+
2193
+ for i, sq in enumerate(raw_queries):
2194
+ if not isinstance(sq, dict):
2195
+ continue
2196
+ query = sq.get("query", "").strip()
2197
+ if not query:
2198
+ continue
2199
+
2200
+ # Limit to max_sub_queries
2201
+ if len(result["sub_queries"]) >= state.max_sub_queries:
2202
+ break
2203
+
2204
+ result["sub_queries"].append({
2205
+ "query": query,
2206
+ "rationale": sq.get("rationale", ""),
2207
+ "priority": min(max(int(sq.get("priority", i + 1)), 1), 10),
2208
+ })
2209
+
2210
+ # Mark success if we got at least one sub-query
2211
+ result["success"] = len(result["sub_queries"]) > 0
2212
+
2213
+ return result
2214
+
2215
+ def _extract_json(self, content: str) -> Optional[str]:
2216
+ """Extract JSON object from content that may contain other text.
2217
+
2218
+ Handles cases where JSON is wrapped in markdown code blocks
2219
+ or mixed with explanatory text.
2220
+
2221
+ Args:
2222
+ content: Raw content that may contain JSON
2223
+
2224
+ Returns:
2225
+ Extracted JSON string or None if not found
2226
+ """
2227
+ # First, try to find JSON in code blocks
2228
+ code_block_pattern = r'```(?:json)?\s*([\s\S]*?)```'
2229
+ matches = re.findall(code_block_pattern, content)
2230
+ for match in matches:
2231
+ match = match.strip()
2232
+ if match.startswith('{'):
2233
+ return match
2234
+
2235
+ # Try to find raw JSON object
2236
+ # Look for the outermost { ... } pair
2237
+ brace_start = content.find('{')
2238
+ if brace_start == -1:
2239
+ return None
2240
+
2241
+ # Find matching closing brace
2242
+ depth = 0
2243
+ for i, char in enumerate(content[brace_start:], brace_start):
2244
+ if char == '{':
2245
+ depth += 1
2246
+ elif char == '}':
2247
+ depth -= 1
2248
+ if depth == 0:
2249
+ return content[brace_start:i + 1]
2250
+
2251
+ return None
2252
+
2253
+ async def _execute_gathering_async(
2254
+ self,
2255
+ state: DeepResearchState,
2256
+ provider_id: Optional[str],
2257
+ timeout: float,
2258
+ max_concurrent: int,
2259
+ ) -> WorkflowResult:
2260
+ """Execute gathering phase: parallel sub-query execution.
2261
+
2262
+ This phase:
2263
+ 1. Gets all pending sub-queries from planning phase
2264
+ 2. Executes them concurrently with rate limiting
2265
+ 3. Collects and deduplicates sources
2266
+ 4. Marks sub-queries as completed/failed
2267
+
2268
+ Args:
2269
+ state: Current research state with sub-queries
2270
+ provider_id: LLM provider (not used in gathering)
2271
+ timeout: Request timeout in seconds
2272
+ max_concurrent: Maximum concurrent search requests
2273
+
2274
+ Returns:
2275
+ WorkflowResult with gathering outcome
2276
+ """
2277
+ pending_queries = state.pending_sub_queries()
2278
+ if not pending_queries:
2279
+ logger.warning("No pending sub-queries for gathering phase")
2280
+ return WorkflowResult(
2281
+ success=True,
2282
+ content="No sub-queries to execute",
2283
+ metadata={"research_id": state.id, "source_count": 0},
2284
+ )
2285
+
2286
+ logger.info(
2287
+ "Starting gathering phase: %d sub-queries, max_concurrent=%d",
2288
+ len(pending_queries),
2289
+ max_concurrent,
2290
+ )
2291
+
2292
+ provider_names = getattr(
2293
+ self.config,
2294
+ "deep_research_providers",
2295
+ ["tavily", "google", "semantic_scholar"],
2296
+ )
2297
+ available_providers: list[SearchProvider] = []
2298
+ unavailable_providers: list[str] = []
2299
+
2300
+ for name in provider_names:
2301
+ provider = self._get_search_provider(name)
2302
+ if provider is None:
2303
+ unavailable_providers.append(name)
2304
+ continue
2305
+ available_providers.append(provider)
2306
+
2307
+ if not available_providers:
2308
+ return WorkflowResult(
2309
+ success=False,
2310
+ content="",
2311
+ error=(
2312
+ "No search providers available. Configure API keys for "
2313
+ "Tavily, Google, or Semantic Scholar."
2314
+ ),
2315
+ )
2316
+
2317
+ # Semaphore for concurrency control
2318
+ semaphore = asyncio.Semaphore(max_concurrent)
2319
+
2320
+ # Track collected sources for deduplication
2321
+ seen_urls: set[str] = set()
2322
+ seen_titles: dict[str, str] = {} # normalized_title -> first source URL
2323
+ total_sources_added = 0
2324
+ failed_queries = 0
2325
+
2326
+ async def execute_sub_query(sub_query) -> tuple[int, Optional[str]]:
2327
+ """Execute a single sub-query and return (sources_added, error)."""
2328
+ async with semaphore:
2329
+ sub_query.status = "executing"
2330
+
2331
+ provider_errors: list[str] = []
2332
+ added = 0
2333
+
2334
+ for provider in available_providers:
2335
+ provider_name = provider.get_provider_name()
2336
+ try:
2337
+ sources = await provider.search(
2338
+ query=sub_query.query,
2339
+ max_results=state.max_sources_per_query,
2340
+ sub_query_id=sub_query.id,
2341
+ include_raw_content=state.follow_links,
2342
+ )
2343
+
2344
+ # Add sources with deduplication
2345
+ for source in sources:
2346
+ # URL-based deduplication
2347
+ if source.url and source.url in seen_urls:
2348
+ continue # Skip duplicate URL
2349
+
2350
+ # Title-based deduplication (same paper from different domains)
2351
+ normalized_title = _normalize_title(source.title)
2352
+ if normalized_title and len(normalized_title) > 20:
2353
+ if normalized_title in seen_titles:
2354
+ logger.debug(
2355
+ "Skipping duplicate by title: %s (already have %s)",
2356
+ source.url,
2357
+ seen_titles[normalized_title],
2358
+ )
2359
+ continue # Skip duplicate title
2360
+ seen_titles[normalized_title] = source.url or ""
2361
+
2362
+ if source.url:
2363
+ seen_urls.add(source.url)
2364
+ # Apply domain-based quality scoring
2365
+ if source.quality == SourceQuality.UNKNOWN:
2366
+ source.quality = get_domain_quality(
2367
+ source.url, state.research_mode
2368
+ )
2369
+
2370
+ # Add source to state
2371
+ state.sources.append(source)
2372
+ state.total_sources_examined += 1
2373
+ sub_query.source_ids.append(source.id)
2374
+ added += 1
2375
+
2376
+ self._write_audit_event(
2377
+ state,
2378
+ "gathering_provider_result",
2379
+ data={
2380
+ "provider": provider_name,
2381
+ "sub_query_id": sub_query.id,
2382
+ "sub_query": sub_query.query,
2383
+ "sources_added": len(sources),
2384
+ },
2385
+ )
2386
+ # Track search provider query count
2387
+ state.search_provider_stats[provider_name] = (
2388
+ state.search_provider_stats.get(provider_name, 0) + 1
2389
+ )
2390
+ except SearchProviderError as e:
2391
+ provider_errors.append(f"{provider_name}: {e}")
2392
+ self._write_audit_event(
2393
+ state,
2394
+ "gathering_provider_result",
2395
+ data={
2396
+ "provider": provider_name,
2397
+ "sub_query_id": sub_query.id,
2398
+ "sub_query": sub_query.query,
2399
+ "sources_added": 0,
2400
+ "error": str(e),
2401
+ },
2402
+ level="warning",
2403
+ )
2404
+ except Exception as e:
2405
+ provider_errors.append(f"{provider_name}: {e}")
2406
+ self._write_audit_event(
2407
+ state,
2408
+ "gathering_provider_result",
2409
+ data={
2410
+ "provider": provider_name,
2411
+ "sub_query_id": sub_query.id,
2412
+ "sub_query": sub_query.query,
2413
+ "sources_added": 0,
2414
+ "error": str(e),
2415
+ },
2416
+ level="warning",
2417
+ )
2418
+
2419
+ if added > 0:
2420
+ sub_query.mark_completed(
2421
+ findings=f"Found {added} sources"
2422
+ )
2423
+ logger.debug(
2424
+ "Sub-query '%s' completed: %d sources",
2425
+ sub_query.query[:50],
2426
+ added,
2427
+ )
2428
+ return added, None
2429
+
2430
+ error_summary = "; ".join(provider_errors) or "No sources found"
2431
+ sub_query.mark_failed(error_summary)
2432
+ logger.warning(
2433
+ "Sub-query '%s' failed: %s",
2434
+ sub_query.query[:50],
2435
+ error_summary,
2436
+ )
2437
+ return 0, error_summary
2438
+
2439
+ # Execute all sub-queries concurrently
2440
+ tasks = [execute_sub_query(sq) for sq in pending_queries]
2441
+ results = await asyncio.gather(*tasks, return_exceptions=True)
2442
+
2443
+ # Aggregate results
2444
+ for result in results:
2445
+ if isinstance(result, Exception):
2446
+ failed_queries += 1
2447
+ logger.error("Task exception: %s", result)
2448
+ else:
2449
+ added, error = result
2450
+ total_sources_added += added
2451
+ if error:
2452
+ failed_queries += 1
2453
+
2454
+ # Update state timestamp
2455
+ state.updated_at = __import__("datetime").datetime.utcnow()
2456
+
2457
+ # Save state
2458
+ self.memory.save_deep_research(state)
2459
+ self._write_audit_event(
2460
+ state,
2461
+ "gathering_result",
2462
+ data={
2463
+ "source_count": total_sources_added,
2464
+ "queries_executed": len(pending_queries),
2465
+ "queries_failed": failed_queries,
2466
+ "unique_urls": len(seen_urls),
2467
+ "providers_used": [p.get_provider_name() for p in available_providers],
2468
+ "providers_unavailable": unavailable_providers,
2469
+ },
2470
+ )
2471
+
2472
+ # Determine success
2473
+ success = total_sources_added > 0 or failed_queries < len(pending_queries)
2474
+
2475
+ logger.info(
2476
+ "Gathering phase complete: %d sources from %d queries (%d failed)",
2477
+ total_sources_added,
2478
+ len(pending_queries),
2479
+ failed_queries,
2480
+ )
2481
+
2482
+ return WorkflowResult(
2483
+ success=success,
2484
+ content=f"Gathered {total_sources_added} sources from {len(pending_queries)} sub-queries",
2485
+ metadata={
2486
+ "research_id": state.id,
2487
+ "source_count": total_sources_added,
2488
+ "queries_executed": len(pending_queries),
2489
+ "queries_failed": failed_queries,
2490
+ "unique_urls": len(seen_urls),
2491
+ "providers_used": [p.get_provider_name() for p in available_providers],
2492
+ "providers_unavailable": unavailable_providers,
2493
+ },
2494
+ )
2495
+
2496
+ def _get_search_provider(self, provider_name: str) -> Optional[SearchProvider]:
2497
+ """Get or create a search provider instance.
2498
+
2499
+ Args:
2500
+ provider_name: Name of the provider (e.g., "tavily")
2501
+
2502
+ Returns:
2503
+ SearchProvider instance or None if unavailable
2504
+ """
2505
+ if provider_name in self._search_providers:
2506
+ return self._search_providers[provider_name]
2507
+
2508
+ try:
2509
+ if provider_name == "tavily":
2510
+ provider = TavilySearchProvider()
2511
+ self._search_providers[provider_name] = provider
2512
+ return provider
2513
+ if provider_name == "perplexity":
2514
+ provider = PerplexitySearchProvider()
2515
+ self._search_providers[provider_name] = provider
2516
+ return provider
2517
+ if provider_name == "google":
2518
+ provider = GoogleSearchProvider()
2519
+ self._search_providers[provider_name] = provider
2520
+ return provider
2521
+ if provider_name == "semantic_scholar":
2522
+ provider = SemanticScholarProvider()
2523
+ self._search_providers[provider_name] = provider
2524
+ return provider
2525
+ else:
2526
+ logger.warning("Unknown search provider: %s", provider_name)
2527
+ return None
2528
+ except ValueError as e:
2529
+ # API key not configured
2530
+ logger.error("Failed to initialize %s provider: %s", provider_name, e)
2531
+ return None
2532
+ except Exception as e:
2533
+ logger.error("Error initializing %s provider: %s", provider_name, e)
2534
+ return None
2535
+
2536
+ async def _execute_analysis_async(
2537
+ self,
2538
+ state: DeepResearchState,
2539
+ provider_id: Optional[str],
2540
+ timeout: float,
2541
+ ) -> WorkflowResult:
2542
+ """Execute analysis phase: extract findings from sources.
2543
+
2544
+ This phase:
2545
+ 1. Builds prompt with gathered source summaries
2546
+ 2. Uses LLM to extract key findings
2547
+ 3. Assesses confidence levels for each finding
2548
+ 4. Identifies knowledge gaps requiring follow-up
2549
+ 5. Updates source quality assessments
2550
+
2551
+ Args:
2552
+ state: Current research state with gathered sources
2553
+ provider_id: LLM provider to use
2554
+ timeout: Request timeout in seconds
2555
+
2556
+ Returns:
2557
+ WorkflowResult with analysis outcome
2558
+ """
2559
+ if not state.sources:
2560
+ logger.warning("No sources to analyze")
2561
+ return WorkflowResult(
2562
+ success=True,
2563
+ content="No sources to analyze",
2564
+ metadata={"research_id": state.id, "finding_count": 0},
2565
+ )
2566
+
2567
+ logger.info(
2568
+ "Starting analysis phase: %d sources to analyze",
2569
+ len(state.sources),
2570
+ )
2571
+
2572
+ # Build the analysis prompt
2573
+ system_prompt = self._build_analysis_system_prompt(state)
2574
+ user_prompt = self._build_analysis_user_prompt(state)
2575
+
2576
+ # Execute LLM call with context window error handling
2577
+ try:
2578
+ result = self._execute_provider(
2579
+ prompt=user_prompt,
2580
+ provider_id=provider_id or state.analysis_provider,
2581
+ model=state.analysis_model,
2582
+ system_prompt=system_prompt,
2583
+ timeout=timeout,
2584
+ temperature=0.3, # Lower temperature for analytical tasks
2585
+ )
2586
+ except ContextWindowError as e:
2587
+ logger.error(
2588
+ "Analysis phase context window exceeded: prompt_tokens=%s, "
2589
+ "max_tokens=%s, truncation_needed=%s, provider=%s, source_count=%d",
2590
+ e.prompt_tokens,
2591
+ e.max_tokens,
2592
+ e.truncation_needed,
2593
+ e.provider,
2594
+ len(state.sources),
2595
+ )
2596
+ return WorkflowResult(
2597
+ success=False,
2598
+ content="",
2599
+ error=str(e),
2600
+ metadata={
2601
+ "research_id": state.id,
2602
+ "phase": "analysis",
2603
+ "error_type": "context_window_exceeded",
2604
+ "prompt_tokens": e.prompt_tokens,
2605
+ "max_tokens": e.max_tokens,
2606
+ "truncation_needed": e.truncation_needed,
2607
+ "source_count": len(state.sources),
2608
+ "guidance": "Try reducing max_sources_per_query or processing sources in batches",
2609
+ },
2610
+ )
2611
+
2612
+ if not result.success:
2613
+ logger.error("Analysis phase LLM call failed: %s", result.error)
2614
+ return result
2615
+
2616
+ # Track token usage
2617
+ if result.tokens_used:
2618
+ state.total_tokens_used += result.tokens_used
2619
+
2620
+ # Track phase metrics for audit
2621
+ state.phase_metrics.append(
2622
+ PhaseMetrics(
2623
+ phase="analysis",
2624
+ duration_ms=result.duration_ms or 0.0,
2625
+ input_tokens=result.input_tokens or 0,
2626
+ output_tokens=result.output_tokens or 0,
2627
+ cached_tokens=result.cached_tokens or 0,
2628
+ provider_id=result.provider_id,
2629
+ model_used=result.model_used,
2630
+ )
2631
+ )
2632
+
2633
+ # Parse the response
2634
+ parsed = self._parse_analysis_response(result.content, state)
2635
+
2636
+ if not parsed["success"]:
2637
+ logger.warning("Failed to parse analysis response")
2638
+ self._write_audit_event(
2639
+ state,
2640
+ "analysis_result",
2641
+ data={
2642
+ "provider_id": result.provider_id,
2643
+ "model_used": result.model_used,
2644
+ "tokens_used": result.tokens_used,
2645
+ "duration_ms": result.duration_ms,
2646
+ "system_prompt": system_prompt,
2647
+ "user_prompt": user_prompt,
2648
+ "raw_response": result.content,
2649
+ "parse_success": False,
2650
+ "findings": [],
2651
+ "gaps": [],
2652
+ "quality_updates": [],
2653
+ },
2654
+ level="warning",
2655
+ )
2656
+ # Still mark as success but with no findings
2657
+ return WorkflowResult(
2658
+ success=True,
2659
+ content="Analysis completed but no findings extracted",
2660
+ metadata={
2661
+ "research_id": state.id,
2662
+ "finding_count": 0,
2663
+ "parse_error": True,
2664
+ },
2665
+ )
2666
+
2667
+ # Add findings to state
2668
+ for finding_data in parsed["findings"]:
2669
+ state.add_finding(
2670
+ content=finding_data["content"],
2671
+ confidence=finding_data["confidence"],
2672
+ source_ids=finding_data.get("source_ids", []),
2673
+ category=finding_data.get("category"),
2674
+ )
2675
+
2676
+ # Add gaps to state
2677
+ for gap_data in parsed["gaps"]:
2678
+ state.add_gap(
2679
+ description=gap_data["description"],
2680
+ suggested_queries=gap_data.get("suggested_queries", []),
2681
+ priority=gap_data.get("priority", 1),
2682
+ )
2683
+
2684
+ # Update source quality assessments
2685
+ for quality_update in parsed.get("quality_updates", []):
2686
+ source = state.get_source(quality_update["source_id"])
2687
+ if source:
2688
+ try:
2689
+ source.quality = SourceQuality(quality_update["quality"])
2690
+ except ValueError:
2691
+ pass # Invalid quality value, skip
2692
+
2693
+ # Save state
2694
+ self.memory.save_deep_research(state)
2695
+ self._write_audit_event(
2696
+ state,
2697
+ "analysis_result",
2698
+ data={
2699
+ "provider_id": result.provider_id,
2700
+ "model_used": result.model_used,
2701
+ "tokens_used": result.tokens_used,
2702
+ "duration_ms": result.duration_ms,
2703
+ "system_prompt": system_prompt,
2704
+ "user_prompt": user_prompt,
2705
+ "raw_response": result.content,
2706
+ "parse_success": True,
2707
+ "findings": parsed["findings"],
2708
+ "gaps": parsed["gaps"],
2709
+ "quality_updates": parsed.get("quality_updates", []),
2710
+ },
2711
+ )
2712
+
2713
+ logger.info(
2714
+ "Analysis phase complete: %d findings, %d gaps identified",
2715
+ len(parsed["findings"]),
2716
+ len(parsed["gaps"]),
2717
+ )
2718
+
2719
+ return WorkflowResult(
2720
+ success=True,
2721
+ content=f"Extracted {len(parsed['findings'])} findings and identified {len(parsed['gaps'])} gaps",
2722
+ provider_id=result.provider_id,
2723
+ model_used=result.model_used,
2724
+ tokens_used=result.tokens_used,
2725
+ duration_ms=result.duration_ms,
2726
+ metadata={
2727
+ "research_id": state.id,
2728
+ "finding_count": len(parsed["findings"]),
2729
+ "gap_count": len(parsed["gaps"]),
2730
+ "source_count": len(state.sources),
2731
+ },
2732
+ )
2733
+
2734
+ def _build_analysis_system_prompt(self, state: DeepResearchState) -> str:
2735
+ """Build system prompt for source analysis.
2736
+
2737
+ Args:
2738
+ state: Current research state
2739
+
2740
+ Returns:
2741
+ System prompt string
2742
+ """
2743
+ return """You are a research analyst. Your task is to analyze research sources and extract key findings, assess their quality, and identify knowledge gaps.
2744
+
2745
+ Your response MUST be valid JSON with this exact structure:
2746
+ {
2747
+ "findings": [
2748
+ {
2749
+ "content": "A clear, specific finding or insight extracted from the sources",
2750
+ "confidence": "low|medium|high",
2751
+ "source_ids": ["src-xxx", "src-yyy"],
2752
+ "category": "optional category/theme"
2753
+ }
2754
+ ],
2755
+ "gaps": [
2756
+ {
2757
+ "description": "Description of missing information or unanswered question",
2758
+ "suggested_queries": ["follow-up query 1", "follow-up query 2"],
2759
+ "priority": 1
2760
+ }
2761
+ ],
2762
+ "quality_updates": [
2763
+ {
2764
+ "source_id": "src-xxx",
2765
+ "quality": "low|medium|high"
2766
+ }
2767
+ ]
2768
+ }
2769
+
2770
+ Guidelines for findings:
2771
+ - Extract 2-5 key findings from the sources
2772
+ - Each finding should be a specific, actionable insight
2773
+ - Confidence levels: "low" (single weak source), "medium" (multiple sources or one authoritative), "high" (multiple authoritative sources agree)
2774
+ - Include source_ids that support each finding
2775
+ - Categorize findings by theme when applicable
2776
+
2777
+ Guidelines for gaps:
2778
+ - Identify 1-3 knowledge gaps or unanswered questions
2779
+ - Provide specific follow-up queries that could fill each gap
2780
+ - Priority 1 is most important, higher numbers are lower priority
2781
+
2782
+ Guidelines for quality_updates:
2783
+ - Assess source quality based on authority, relevance, and recency
2784
+ - "low" = questionable reliability, "medium" = generally reliable, "high" = authoritative
2785
+
2786
+ IMPORTANT: Return ONLY valid JSON, no markdown formatting or extra text."""
2787
+
2788
+ def _build_analysis_user_prompt(self, state: DeepResearchState) -> str:
2789
+ """Build user prompt with source summaries for analysis.
2790
+
2791
+ Args:
2792
+ state: Current research state
2793
+
2794
+ Returns:
2795
+ User prompt string
2796
+ """
2797
+ prompt_parts = [
2798
+ f"Original Research Query: {state.original_query}",
2799
+ "",
2800
+ "Research Brief:",
2801
+ state.research_brief or "Direct research on the query",
2802
+ "",
2803
+ "Sources to Analyze:",
2804
+ "",
2805
+ ]
2806
+
2807
+ # Add source summaries
2808
+ for i, source in enumerate(state.sources[:20], 1): # Limit to 20 sources
2809
+ prompt_parts.append(f"Source {i} (ID: {source.id}):")
2810
+ prompt_parts.append(f" Title: {source.title}")
2811
+ if source.url:
2812
+ prompt_parts.append(f" URL: {source.url}")
2813
+ if source.snippet:
2814
+ # Truncate long snippets
2815
+ snippet = source.snippet[:500] + "..." if len(source.snippet) > 500 else source.snippet
2816
+ prompt_parts.append(f" Snippet: {snippet}")
2817
+ if source.content:
2818
+ # Truncate long content
2819
+ content = source.content[:1000] + "..." if len(source.content) > 1000 else source.content
2820
+ prompt_parts.append(f" Content: {content}")
2821
+ prompt_parts.append("")
2822
+
2823
+ prompt_parts.extend([
2824
+ "Please analyze these sources and:",
2825
+ "1. Extract 2-5 key findings relevant to the research query",
2826
+ "2. Assess confidence levels based on source agreement and authority",
2827
+ "3. Identify any knowledge gaps or unanswered questions",
2828
+ "4. Assess the quality of each source",
2829
+ "",
2830
+ "Return your analysis as JSON.",
2831
+ ])
2832
+
2833
+ return "\n".join(prompt_parts)
2834
+
2835
+ def _parse_analysis_response(
2836
+ self,
2837
+ content: str,
2838
+ state: DeepResearchState,
2839
+ ) -> dict[str, Any]:
2840
+ """Parse LLM response into structured analysis data.
2841
+
2842
+ Args:
2843
+ content: Raw LLM response content
2844
+ state: Current research state
2845
+
2846
+ Returns:
2847
+ Dict with 'success', 'findings', 'gaps', and 'quality_updates' keys
2848
+ """
2849
+ result = {
2850
+ "success": False,
2851
+ "findings": [],
2852
+ "gaps": [],
2853
+ "quality_updates": [],
2854
+ }
2855
+
2856
+ if not content:
2857
+ return result
2858
+
2859
+ # Try to extract JSON from the response
2860
+ json_str = self._extract_json(content)
2861
+ if not json_str:
2862
+ logger.warning("No JSON found in analysis response")
2863
+ return result
2864
+
2865
+ try:
2866
+ data = json.loads(json_str)
2867
+ except json.JSONDecodeError as e:
2868
+ logger.error("Failed to parse JSON from analysis response: %s", e)
2869
+ return result
2870
+
2871
+ # Parse findings
2872
+ raw_findings = data.get("findings", [])
2873
+ if isinstance(raw_findings, list):
2874
+ for f in raw_findings:
2875
+ if not isinstance(f, dict):
2876
+ continue
2877
+ content_text = f.get("content", "").strip()
2878
+ if not content_text:
2879
+ continue
2880
+
2881
+ # Map confidence string to enum
2882
+ confidence_str = f.get("confidence", "medium").lower()
2883
+ confidence_map = {
2884
+ "low": ConfidenceLevel.LOW,
2885
+ "medium": ConfidenceLevel.MEDIUM,
2886
+ "high": ConfidenceLevel.HIGH,
2887
+ "confirmed": ConfidenceLevel.CONFIRMED,
2888
+ "speculation": ConfidenceLevel.SPECULATION,
2889
+ }
2890
+ confidence = confidence_map.get(confidence_str, ConfidenceLevel.MEDIUM)
2891
+
2892
+ result["findings"].append({
2893
+ "content": content_text,
2894
+ "confidence": confidence,
2895
+ "source_ids": f.get("source_ids", []),
2896
+ "category": f.get("category"),
2897
+ })
2898
+
2899
+ # Parse gaps
2900
+ raw_gaps = data.get("gaps", [])
2901
+ if isinstance(raw_gaps, list):
2902
+ for g in raw_gaps:
2903
+ if not isinstance(g, dict):
2904
+ continue
2905
+ description = g.get("description", "").strip()
2906
+ if not description:
2907
+ continue
2908
+
2909
+ result["gaps"].append({
2910
+ "description": description,
2911
+ "suggested_queries": g.get("suggested_queries", []),
2912
+ "priority": min(max(int(g.get("priority", 1)), 1), 10),
2913
+ })
2914
+
2915
+ # Parse quality updates
2916
+ raw_quality = data.get("quality_updates", [])
2917
+ if isinstance(raw_quality, list):
2918
+ for q in raw_quality:
2919
+ if not isinstance(q, dict):
2920
+ continue
2921
+ source_id = q.get("source_id", "").strip()
2922
+ quality = q.get("quality", "").lower()
2923
+ if source_id and quality in ("low", "medium", "high", "unknown"):
2924
+ result["quality_updates"].append({
2925
+ "source_id": source_id,
2926
+ "quality": quality,
2927
+ })
2928
+
2929
+ # Mark success if we got at least one finding
2930
+ result["success"] = len(result["findings"]) > 0
2931
+
2932
+ return result
2933
+
2934
+ async def _execute_synthesis_async(
2935
+ self,
2936
+ state: DeepResearchState,
2937
+ provider_id: Optional[str],
2938
+ timeout: float,
2939
+ ) -> WorkflowResult:
2940
+ """Execute synthesis phase: generate comprehensive report from findings.
2941
+
2942
+ This phase:
2943
+ 1. Builds a synthesis prompt with all findings grouped by theme
2944
+ 2. Includes source references for citation
2945
+ 3. Generates a structured markdown report with:
2946
+ - Executive summary
2947
+ - Key findings organized by theme
2948
+ - Source citations
2949
+ - Knowledge gaps and limitations
2950
+ - Conclusions with actionable insights
2951
+ 4. Stores the report in state.report
2952
+
2953
+ Args:
2954
+ state: Current research state with findings from analysis
2955
+ provider_id: LLM provider to use
2956
+ timeout: Request timeout in seconds
2957
+
2958
+ Returns:
2959
+ WorkflowResult with synthesis outcome
2960
+ """
2961
+ if not state.findings:
2962
+ logger.warning("No findings to synthesize")
2963
+ # Generate a minimal report even without findings
2964
+ state.report = self._generate_empty_report(state)
2965
+ self._write_audit_event(
2966
+ state,
2967
+ "synthesis_result",
2968
+ data={
2969
+ "provider_id": None,
2970
+ "model_used": None,
2971
+ "tokens_used": None,
2972
+ "duration_ms": None,
2973
+ "system_prompt": None,
2974
+ "user_prompt": None,
2975
+ "raw_response": None,
2976
+ "report": state.report,
2977
+ "empty_report": True,
2978
+ },
2979
+ level="warning",
2980
+ )
2981
+ return WorkflowResult(
2982
+ success=True,
2983
+ content=state.report,
2984
+ metadata={
2985
+ "research_id": state.id,
2986
+ "finding_count": 0,
2987
+ "empty_report": True,
2988
+ },
2989
+ )
2990
+
2991
+ logger.info(
2992
+ "Starting synthesis phase: %d findings, %d sources",
2993
+ len(state.findings),
2994
+ len(state.sources),
2995
+ )
2996
+
2997
+ # Build the synthesis prompt
2998
+ system_prompt = self._build_synthesis_system_prompt(state)
2999
+ user_prompt = self._build_synthesis_user_prompt(state)
3000
+
3001
+ # Execute LLM call with context window error handling
3002
+ try:
3003
+ result = self._execute_provider(
3004
+ prompt=user_prompt,
3005
+ provider_id=provider_id or state.synthesis_provider,
3006
+ model=state.synthesis_model,
3007
+ system_prompt=system_prompt,
3008
+ timeout=timeout,
3009
+ temperature=0.5, # Balanced for coherent but varied writing
3010
+ )
3011
+ except ContextWindowError as e:
3012
+ logger.error(
3013
+ "Synthesis phase context window exceeded: prompt_tokens=%s, "
3014
+ "max_tokens=%s, truncation_needed=%s, provider=%s, finding_count=%d",
3015
+ e.prompt_tokens,
3016
+ e.max_tokens,
3017
+ e.truncation_needed,
3018
+ e.provider,
3019
+ len(state.findings),
3020
+ )
3021
+ return WorkflowResult(
3022
+ success=False,
3023
+ content="",
3024
+ error=str(e),
3025
+ metadata={
3026
+ "research_id": state.id,
3027
+ "phase": "synthesis",
3028
+ "error_type": "context_window_exceeded",
3029
+ "prompt_tokens": e.prompt_tokens,
3030
+ "max_tokens": e.max_tokens,
3031
+ "truncation_needed": e.truncation_needed,
3032
+ "finding_count": len(state.findings),
3033
+ "guidance": "Try reducing the number of findings or source content included",
3034
+ },
3035
+ )
3036
+
3037
+ if not result.success:
3038
+ logger.error("Synthesis phase LLM call failed: %s", result.error)
3039
+ return result
3040
+
3041
+ # Track token usage
3042
+ if result.tokens_used:
3043
+ state.total_tokens_used += result.tokens_used
3044
+
3045
+ # Track phase metrics for audit
3046
+ state.phase_metrics.append(
3047
+ PhaseMetrics(
3048
+ phase="synthesis",
3049
+ duration_ms=result.duration_ms or 0.0,
3050
+ input_tokens=result.input_tokens or 0,
3051
+ output_tokens=result.output_tokens or 0,
3052
+ cached_tokens=result.cached_tokens or 0,
3053
+ provider_id=result.provider_id,
3054
+ model_used=result.model_used,
3055
+ )
3056
+ )
3057
+
3058
+ # Extract the markdown report from the response
3059
+ report = self._extract_markdown_report(result.content)
3060
+
3061
+ if not report:
3062
+ logger.warning("Failed to extract report from synthesis response")
3063
+ # Use raw content as fallback
3064
+ report = result.content
3065
+
3066
+ # Store report in state
3067
+ state.report = report
3068
+
3069
+ # Save state
3070
+ self.memory.save_deep_research(state)
3071
+ self._write_audit_event(
3072
+ state,
3073
+ "synthesis_result",
3074
+ data={
3075
+ "provider_id": result.provider_id,
3076
+ "model_used": result.model_used,
3077
+ "tokens_used": result.tokens_used,
3078
+ "duration_ms": result.duration_ms,
3079
+ "system_prompt": system_prompt,
3080
+ "user_prompt": user_prompt,
3081
+ "raw_response": result.content,
3082
+ "report": state.report,
3083
+ "report_length": len(state.report),
3084
+ },
3085
+ )
3086
+
3087
+ logger.info(
3088
+ "Synthesis phase complete: report length %d chars",
3089
+ len(state.report),
3090
+ )
3091
+
3092
+ return WorkflowResult(
3093
+ success=True,
3094
+ content=state.report,
3095
+ provider_id=result.provider_id,
3096
+ model_used=result.model_used,
3097
+ tokens_used=result.tokens_used,
3098
+ duration_ms=result.duration_ms,
3099
+ metadata={
3100
+ "research_id": state.id,
3101
+ "finding_count": len(state.findings),
3102
+ "source_count": len(state.sources),
3103
+ "report_length": len(state.report),
3104
+ "iteration": state.iteration,
3105
+ },
3106
+ )
3107
+
3108
+ def _build_synthesis_system_prompt(self, state: DeepResearchState) -> str:
3109
+ """Build system prompt for report synthesis.
3110
+
3111
+ Args:
3112
+ state: Current research state
3113
+
3114
+ Returns:
3115
+ System prompt string
3116
+ """
3117
+ return """You are a research synthesizer. Your task is to create a comprehensive, well-structured research report from analyzed findings.
3118
+
3119
+ Generate a markdown-formatted report with the following structure:
3120
+
3121
+ # Research Report: [Topic]
3122
+
3123
+ ## Executive Summary
3124
+ A 2-3 paragraph overview of the key insights and conclusions.
3125
+
3126
+ ## Key Findings
3127
+
3128
+ ### [Theme/Category 1]
3129
+ - Finding with supporting evidence and source citations [Source ID]
3130
+ - Related findings grouped together
3131
+
3132
+ ### [Theme/Category 2]
3133
+ - Continue for each major theme...
3134
+
3135
+ ## Analysis
3136
+
3137
+ ### Supporting Evidence
3138
+ Discussion of well-supported findings with high confidence.
3139
+
3140
+ ### Conflicting Information
3141
+ Note any contradictions or disagreements between sources (if present).
3142
+
3143
+ ### Limitations
3144
+ Acknowledge gaps in the research and areas needing further investigation.
3145
+
3146
+ ## Sources
3147
+ List sources as markdown links with their IDs: **[src-xxx]** [Title](URL)
3148
+
3149
+ ## Conclusions
3150
+ Actionable insights and recommendations based on the findings.
3151
+
3152
+ ---
3153
+
3154
+ Guidelines:
3155
+ - Organize findings thematically rather than listing them sequentially
3156
+ - Cite source IDs in brackets when referencing specific information [src-xxx]
3157
+ - Distinguish between high-confidence findings (well-supported) and lower-confidence insights
3158
+ - Be specific and actionable in conclusions
3159
+ - Keep the report focused on the original research query
3160
+ - Use clear, professional language
3161
+ - Include all relevant findings - don't omit information
3162
+
3163
+ IMPORTANT: Return ONLY the markdown report, no preamble or meta-commentary."""
3164
+
3165
+ def _build_synthesis_user_prompt(self, state: DeepResearchState) -> str:
3166
+ """Build user prompt with findings and sources for synthesis.
3167
+
3168
+ Args:
3169
+ state: Current research state
3170
+
3171
+ Returns:
3172
+ User prompt string
3173
+ """
3174
+ prompt_parts = [
3175
+ f"# Research Query\n{state.original_query}",
3176
+ "",
3177
+ f"## Research Brief\n{state.research_brief or 'Direct research on the query'}",
3178
+ "",
3179
+ "## Findings to Synthesize",
3180
+ "",
3181
+ ]
3182
+
3183
+ # Group findings by category if available
3184
+ categorized: dict[str, list] = {}
3185
+ uncategorized = []
3186
+
3187
+ for finding in state.findings:
3188
+ category = finding.category or "General"
3189
+ if category not in categorized:
3190
+ categorized[category] = []
3191
+ categorized[category].append(finding)
3192
+
3193
+ # Add findings by category
3194
+ for category, findings in categorized.items():
3195
+ prompt_parts.append(f"### {category}")
3196
+ for f in findings:
3197
+ confidence_label = f.confidence.value if hasattr(f.confidence, 'value') else str(f.confidence)
3198
+ source_refs = ", ".join(f.source_ids) if f.source_ids else "no sources"
3199
+ prompt_parts.append(f"- [{confidence_label.upper()}] {f.content}")
3200
+ prompt_parts.append(f" Sources: {source_refs}")
3201
+ prompt_parts.append("")
3202
+
3203
+ # Add knowledge gaps
3204
+ if state.gaps:
3205
+ prompt_parts.append("## Knowledge Gaps Identified")
3206
+ for gap in state.gaps:
3207
+ status = "addressed" if gap.resolved else "unresolved"
3208
+ prompt_parts.append(f"- [{status}] {gap.description}")
3209
+ prompt_parts.append("")
3210
+
3211
+ # Add source reference list
3212
+ prompt_parts.append("## Source Reference")
3213
+ for source in state.sources[:30]: # Limit to 30 for context window
3214
+ quality = source.quality.value if hasattr(source.quality, 'value') else str(source.quality)
3215
+ prompt_parts.append(f"- {source.id}: {source.title} [{quality}]")
3216
+ if source.url:
3217
+ prompt_parts.append(f" URL: {source.url}")
3218
+ prompt_parts.append("")
3219
+
3220
+ # Add synthesis instructions
3221
+ prompt_parts.extend([
3222
+ "## Instructions",
3223
+ f"Generate a comprehensive research report addressing the query: '{state.original_query}'",
3224
+ "",
3225
+ f"This is iteration {state.iteration} of {state.max_iterations}.",
3226
+ f"Total findings: {len(state.findings)}",
3227
+ f"Total sources: {len(state.sources)}",
3228
+ f"Unresolved gaps: {len(state.unresolved_gaps())}",
3229
+ "",
3230
+ "Create a well-structured markdown report following the format specified.",
3231
+ ])
3232
+
3233
+ return "\n".join(prompt_parts)
3234
+
3235
+ def _extract_markdown_report(self, content: str) -> Optional[str]:
3236
+ """Extract markdown report from LLM response.
3237
+
3238
+ The response should be pure markdown, but this handles cases where
3239
+ the LLM wraps it in code blocks or adds preamble.
3240
+
3241
+ Args:
3242
+ content: Raw LLM response content
3243
+
3244
+ Returns:
3245
+ Extracted markdown report or None if extraction fails
3246
+ """
3247
+ if not content:
3248
+ return None
3249
+
3250
+ # If content starts with markdown heading, it's likely clean
3251
+ if content.strip().startswith("#"):
3252
+ return content.strip()
3253
+
3254
+ # Check for markdown code block wrapper
3255
+ if "```markdown" in content or "```md" in content:
3256
+ # Extract content between code blocks
3257
+ pattern = r'```(?:markdown|md)?\s*([\s\S]*?)```'
3258
+ matches = re.findall(pattern, content)
3259
+ if matches:
3260
+ return matches[0].strip()
3261
+
3262
+ # Check for generic code block
3263
+ if "```" in content:
3264
+ pattern = r'```\s*([\s\S]*?)```'
3265
+ matches = re.findall(pattern, content)
3266
+ for match in matches:
3267
+ # Check if it looks like markdown (has headings)
3268
+ if match.strip().startswith("#") or "##" in match:
3269
+ return match.strip()
3270
+
3271
+ # Look for first heading and take everything from there
3272
+ heading_match = re.search(r'^(#[^\n]+)', content, re.MULTILINE)
3273
+ if heading_match:
3274
+ start_pos = heading_match.start()
3275
+ return content[start_pos:].strip()
3276
+
3277
+ # If nothing else works, return the trimmed content
3278
+ return content.strip() if len(content.strip()) > 50 else None
3279
+
3280
+ def _generate_empty_report(self, state: DeepResearchState) -> str:
3281
+ """Generate a minimal report when no findings are available.
3282
+
3283
+ Args:
3284
+ state: Current research state
3285
+
3286
+ Returns:
3287
+ Minimal markdown report
3288
+ """
3289
+ return f"""# Research Report
3290
+
3291
+ ## Executive Summary
3292
+
3293
+ Research was conducted on the query: "{state.original_query}"
3294
+
3295
+ Unfortunately, the analysis phase did not yield extractable findings from the gathered sources. This may indicate:
3296
+ - The sources lacked relevant information
3297
+ - The query may need refinement
3298
+ - Additional research iterations may be needed
3299
+
3300
+ ## Research Query
3301
+
3302
+ {state.original_query}
3303
+
3304
+ ## Research Brief
3305
+
3306
+ {state.research_brief or "No research brief generated."}
3307
+
3308
+ ## Sources Examined
3309
+
3310
+ {len(state.sources)} source(s) were examined during this research session.
3311
+
3312
+ ## Recommendations
3313
+
3314
+ 1. Consider refining the research query for more specific results
3315
+ 2. Try additional research iterations if available
3316
+ 3. Review the gathered sources manually for relevant information
3317
+
3318
+ ---
3319
+
3320
+ *Report generated with no extractable findings. Iteration {state.iteration}/{state.max_iterations}.*
3321
+ """
3322
+
3323
+ async def _execute_refinement_async(
3324
+ self,
3325
+ state: DeepResearchState,
3326
+ provider_id: Optional[str],
3327
+ timeout: float,
3328
+ ) -> WorkflowResult:
3329
+ """Execute refinement phase: analyze gaps and generate follow-up queries.
3330
+
3331
+ This phase:
3332
+ 1. Reviews the current report and identified gaps
3333
+ 2. Uses LLM to assess gap severity and addressability
3334
+ 3. Generates follow-up queries for unresolved gaps
3335
+ 4. Converts high-priority gaps to new sub-queries for next iteration
3336
+ 5. Respects max_iterations limit for workflow termination
3337
+
3338
+ Args:
3339
+ state: Current research state with report and gaps
3340
+ provider_id: LLM provider to use
3341
+ timeout: Request timeout in seconds
3342
+
3343
+ Returns:
3344
+ WorkflowResult with refinement outcome
3345
+ """
3346
+ unresolved_gaps = state.unresolved_gaps()
3347
+
3348
+ # Check iteration limit
3349
+ if state.iteration >= state.max_iterations:
3350
+ logger.info(
3351
+ "Refinement: max iterations (%d) reached, no further refinement",
3352
+ state.max_iterations,
3353
+ )
3354
+ self._write_audit_event(
3355
+ state,
3356
+ "refinement_result",
3357
+ data={
3358
+ "reason": "max_iterations_reached",
3359
+ "unresolved_gaps": len(unresolved_gaps),
3360
+ "iteration": state.iteration,
3361
+ },
3362
+ level="warning",
3363
+ )
3364
+ return WorkflowResult(
3365
+ success=True,
3366
+ content="Max iterations reached, refinement complete",
3367
+ metadata={
3368
+ "research_id": state.id,
3369
+ "iteration": state.iteration,
3370
+ "max_iterations": state.max_iterations,
3371
+ "unresolved_gaps": len(unresolved_gaps),
3372
+ "reason": "max_iterations_reached",
3373
+ },
3374
+ )
3375
+
3376
+ if not unresolved_gaps:
3377
+ logger.info("Refinement: no unresolved gaps, research complete")
3378
+ self._write_audit_event(
3379
+ state,
3380
+ "refinement_result",
3381
+ data={
3382
+ "reason": "no_gaps",
3383
+ "unresolved_gaps": 0,
3384
+ "iteration": state.iteration,
3385
+ },
3386
+ )
3387
+ return WorkflowResult(
3388
+ success=True,
3389
+ content="No unresolved gaps, research complete",
3390
+ metadata={
3391
+ "research_id": state.id,
3392
+ "iteration": state.iteration,
3393
+ "reason": "no_gaps",
3394
+ },
3395
+ )
3396
+
3397
+ logger.info(
3398
+ "Starting refinement phase: %d unresolved gaps, iteration %d/%d",
3399
+ len(unresolved_gaps),
3400
+ state.iteration,
3401
+ state.max_iterations,
3402
+ )
3403
+
3404
+ # Build the refinement prompt
3405
+ system_prompt = self._build_refinement_system_prompt(state)
3406
+ user_prompt = self._build_refinement_user_prompt(state)
3407
+
3408
+ # Execute LLM call with context window error handling
3409
+ try:
3410
+ result = self._execute_provider(
3411
+ prompt=user_prompt,
3412
+ provider_id=provider_id or state.refinement_provider,
3413
+ model=state.refinement_model,
3414
+ system_prompt=system_prompt,
3415
+ timeout=timeout,
3416
+ temperature=0.4, # Lower temperature for focused analysis
3417
+ )
3418
+ except ContextWindowError as e:
3419
+ logger.error(
3420
+ "Refinement phase context window exceeded: prompt_tokens=%s, "
3421
+ "max_tokens=%s, gap_count=%d",
3422
+ e.prompt_tokens,
3423
+ e.max_tokens,
3424
+ len(unresolved_gaps),
3425
+ )
3426
+ return WorkflowResult(
3427
+ success=False,
3428
+ content="",
3429
+ error=str(e),
3430
+ metadata={
3431
+ "research_id": state.id,
3432
+ "phase": "refinement",
3433
+ "error_type": "context_window_exceeded",
3434
+ "prompt_tokens": e.prompt_tokens,
3435
+ "max_tokens": e.max_tokens,
3436
+ },
3437
+ )
3438
+
3439
+ if not result.success:
3440
+ logger.error("Refinement phase LLM call failed: %s", result.error)
3441
+ return result
3442
+
3443
+ # Track token usage
3444
+ if result.tokens_used:
3445
+ state.total_tokens_used += result.tokens_used
3446
+
3447
+ # Track phase metrics for audit
3448
+ state.phase_metrics.append(
3449
+ PhaseMetrics(
3450
+ phase="refinement",
3451
+ duration_ms=result.duration_ms or 0.0,
3452
+ input_tokens=result.input_tokens or 0,
3453
+ output_tokens=result.output_tokens or 0,
3454
+ cached_tokens=result.cached_tokens or 0,
3455
+ provider_id=result.provider_id,
3456
+ model_used=result.model_used,
3457
+ )
3458
+ )
3459
+
3460
+ # Parse the response
3461
+ parsed = self._parse_refinement_response(result.content, state)
3462
+
3463
+ if not parsed["success"]:
3464
+ logger.warning("Failed to parse refinement response, using existing gap suggestions")
3465
+ # Fallback: use existing gap suggestions as follow-up queries
3466
+ follow_up_queries = self._extract_fallback_queries(state)
3467
+ else:
3468
+ follow_up_queries = parsed["follow_up_queries"]
3469
+
3470
+ # Mark gaps as resolved if specified
3471
+ for gap_id in parsed.get("addressed_gap_ids", []):
3472
+ gap = state.get_gap(gap_id)
3473
+ if gap:
3474
+ gap.resolved = True
3475
+
3476
+ # Convert follow-up queries to new sub-queries for next iteration
3477
+ new_sub_queries = 0
3478
+ for query_data in follow_up_queries[:state.max_sub_queries]:
3479
+ # Add as new sub-query
3480
+ state.add_sub_query(
3481
+ query=query_data["query"],
3482
+ rationale=query_data.get("rationale", "Follow-up from gap analysis"),
3483
+ priority=query_data.get("priority", 1),
3484
+ )
3485
+ new_sub_queries += 1
3486
+
3487
+ # Save state
3488
+ self.memory.save_deep_research(state)
3489
+ self._write_audit_event(
3490
+ state,
3491
+ "refinement_result",
3492
+ data={
3493
+ "provider_id": result.provider_id,
3494
+ "model_used": result.model_used,
3495
+ "tokens_used": result.tokens_used,
3496
+ "duration_ms": result.duration_ms,
3497
+ "system_prompt": system_prompt,
3498
+ "user_prompt": user_prompt,
3499
+ "raw_response": result.content,
3500
+ "parse_success": parsed["success"],
3501
+ "gap_analysis": parsed.get("gap_analysis", []),
3502
+ "follow_up_queries": follow_up_queries,
3503
+ "addressed_gap_ids": parsed.get("addressed_gap_ids", []),
3504
+ "should_iterate": parsed.get("should_iterate", True),
3505
+ },
3506
+ )
3507
+
3508
+ logger.info(
3509
+ "Refinement phase complete: %d follow-up queries generated",
3510
+ new_sub_queries,
3511
+ )
3512
+
3513
+ return WorkflowResult(
3514
+ success=True,
3515
+ content=f"Generated {new_sub_queries} follow-up queries from {len(unresolved_gaps)} gaps",
3516
+ provider_id=result.provider_id,
3517
+ model_used=result.model_used,
3518
+ tokens_used=result.tokens_used,
3519
+ duration_ms=result.duration_ms,
3520
+ metadata={
3521
+ "research_id": state.id,
3522
+ "iteration": state.iteration,
3523
+ "unresolved_gaps": len(unresolved_gaps),
3524
+ "follow_up_queries": new_sub_queries,
3525
+ "gaps_addressed": len(parsed.get("addressed_gap_ids", [])),
3526
+ },
3527
+ )
3528
+
3529
+ def _build_refinement_system_prompt(self, state: DeepResearchState) -> str:
3530
+ """Build system prompt for gap analysis and refinement.
3531
+
3532
+ Args:
3533
+ state: Current research state
3534
+
3535
+ Returns:
3536
+ System prompt string
3537
+ """
3538
+ return """You are a research refiner. Your task is to analyze knowledge gaps identified during research and generate focused follow-up queries to address them.
3539
+
3540
+ Your response MUST be valid JSON with this exact structure:
3541
+ {
3542
+ "gap_analysis": [
3543
+ {
3544
+ "gap_id": "gap-xxx",
3545
+ "severity": "critical|moderate|minor",
3546
+ "addressable": true,
3547
+ "rationale": "Why this gap matters and whether it can be addressed"
3548
+ }
3549
+ ],
3550
+ "follow_up_queries": [
3551
+ {
3552
+ "query": "A specific, focused search query to address the gap",
3553
+ "target_gap_id": "gap-xxx",
3554
+ "rationale": "How this query will fill the gap",
3555
+ "priority": 1
3556
+ }
3557
+ ],
3558
+ "addressed_gap_ids": ["gap-xxx"],
3559
+ "iteration_recommendation": {
3560
+ "should_iterate": true,
3561
+ "rationale": "Why iteration is or isn't recommended"
3562
+ }
3563
+ }
3564
+
3565
+ Guidelines:
3566
+ - Assess each gap's severity: "critical" (blocks conclusions), "moderate" (affects confidence), "minor" (nice to have)
3567
+ - Only mark gaps as addressable if follow-up research can realistically fill them
3568
+ - Generate 1-3 highly focused follow-up queries per addressable gap
3569
+ - Priority 1 is highest priority
3570
+ - Mark gaps as addressed if the current report already covers them adequately
3571
+ - Recommend iteration only if there are addressable critical/moderate gaps AND value exceeds research cost
3572
+
3573
+ IMPORTANT: Return ONLY valid JSON, no markdown formatting or extra text."""
3574
+
3575
+ def _build_refinement_user_prompt(self, state: DeepResearchState) -> str:
3576
+ """Build user prompt with gaps and report context for refinement.
3577
+
3578
+ Args:
3579
+ state: Current research state
3580
+
3581
+ Returns:
3582
+ User prompt string
3583
+ """
3584
+ prompt_parts = [
3585
+ f"# Research Query\n{state.original_query}",
3586
+ "",
3587
+ f"## Research Status",
3588
+ f"- Iteration: {state.iteration}/{state.max_iterations}",
3589
+ f"- Sources examined: {len(state.sources)}",
3590
+ f"- Findings extracted: {len(state.findings)}",
3591
+ f"- Unresolved gaps: {len(state.unresolved_gaps())}",
3592
+ "",
3593
+ ]
3594
+
3595
+ # Add report summary (truncated for context window)
3596
+ if state.report:
3597
+ report_excerpt = state.report[:2000]
3598
+ if len(state.report) > 2000:
3599
+ report_excerpt += "\n\n[Report truncated...]"
3600
+ prompt_parts.append("## Current Report Summary")
3601
+ prompt_parts.append(report_excerpt)
3602
+ prompt_parts.append("")
3603
+
3604
+ # Add unresolved gaps
3605
+ prompt_parts.append("## Unresolved Knowledge Gaps")
3606
+ for gap in state.unresolved_gaps():
3607
+ prompt_parts.append(f"\n### Gap: {gap.id}")
3608
+ prompt_parts.append(f"Description: {gap.description}")
3609
+ prompt_parts.append(f"Priority: {gap.priority}")
3610
+ if gap.suggested_queries:
3611
+ prompt_parts.append("Suggested queries from analysis:")
3612
+ for sq in gap.suggested_queries[:3]:
3613
+ prompt_parts.append(f" - {sq}")
3614
+ prompt_parts.append("")
3615
+
3616
+ # Add high-confidence findings for context
3617
+ high_conf_findings = [
3618
+ f for f in state.findings
3619
+ if hasattr(f.confidence, 'value') and f.confidence.value in ('high', 'confirmed')
3620
+ ]
3621
+ if high_conf_findings:
3622
+ prompt_parts.append("## High-Confidence Findings Already Established")
3623
+ for f in high_conf_findings[:5]:
3624
+ prompt_parts.append(f"- {f.content[:200]}")
3625
+ prompt_parts.append("")
3626
+
3627
+ # Add instructions
3628
+ prompt_parts.extend([
3629
+ "## Instructions",
3630
+ "1. Analyze each gap for severity and addressability",
3631
+ "2. Generate focused follow-up queries for addressable gaps",
3632
+ "3. Mark any gaps that are actually addressed by existing findings",
3633
+ "4. Recommend whether iteration is worthwhile given remaining gaps",
3634
+ "",
3635
+ "Return your analysis as JSON.",
3636
+ ])
3637
+
3638
+ return "\n".join(prompt_parts)
3639
+
3640
+ def _parse_refinement_response(
3641
+ self,
3642
+ content: str,
3643
+ state: DeepResearchState,
3644
+ ) -> dict[str, Any]:
3645
+ """Parse LLM response into structured refinement data.
3646
+
3647
+ Args:
3648
+ content: Raw LLM response content
3649
+ state: Current research state
3650
+
3651
+ Returns:
3652
+ Dict with 'success', 'follow_up_queries', 'addressed_gap_ids', etc.
3653
+ """
3654
+ result = {
3655
+ "success": False,
3656
+ "gap_analysis": [],
3657
+ "follow_up_queries": [],
3658
+ "addressed_gap_ids": [],
3659
+ "should_iterate": True,
3660
+ }
3661
+
3662
+ if not content:
3663
+ return result
3664
+
3665
+ # Try to extract JSON from the response
3666
+ json_str = self._extract_json(content)
3667
+ if not json_str:
3668
+ logger.warning("No JSON found in refinement response")
3669
+ return result
3670
+
3671
+ try:
3672
+ data = json.loads(json_str)
3673
+ except json.JSONDecodeError as e:
3674
+ logger.error("Failed to parse JSON from refinement response: %s", e)
3675
+ return result
3676
+
3677
+ # Parse gap analysis
3678
+ raw_analysis = data.get("gap_analysis", [])
3679
+ if isinstance(raw_analysis, list):
3680
+ for ga in raw_analysis:
3681
+ if not isinstance(ga, dict):
3682
+ continue
3683
+ result["gap_analysis"].append({
3684
+ "gap_id": ga.get("gap_id", ""),
3685
+ "severity": ga.get("severity", "moderate"),
3686
+ "addressable": ga.get("addressable", True),
3687
+ "rationale": ga.get("rationale", ""),
3688
+ })
3689
+
3690
+ # Parse follow-up queries
3691
+ raw_queries = data.get("follow_up_queries", [])
3692
+ if isinstance(raw_queries, list):
3693
+ for fq in raw_queries:
3694
+ if not isinstance(fq, dict):
3695
+ continue
3696
+ query = fq.get("query", "").strip()
3697
+ if not query:
3698
+ continue
3699
+ result["follow_up_queries"].append({
3700
+ "query": query,
3701
+ "target_gap_id": fq.get("target_gap_id", ""),
3702
+ "rationale": fq.get("rationale", ""),
3703
+ "priority": min(max(int(fq.get("priority", 1)), 1), 10),
3704
+ })
3705
+
3706
+ # Parse addressed gaps
3707
+ raw_addressed = data.get("addressed_gap_ids", [])
3708
+ if isinstance(raw_addressed, list):
3709
+ result["addressed_gap_ids"] = [
3710
+ gid for gid in raw_addressed if isinstance(gid, str)
3711
+ ]
3712
+
3713
+ # Parse iteration recommendation
3714
+ iter_rec = data.get("iteration_recommendation", {})
3715
+ if isinstance(iter_rec, dict):
3716
+ result["should_iterate"] = iter_rec.get("should_iterate", True)
3717
+
3718
+ # Mark success if we got at least one follow-up query
3719
+ result["success"] = len(result["follow_up_queries"]) > 0
3720
+
3721
+ return result
3722
+
3723
+ def _extract_fallback_queries(self, state: DeepResearchState) -> list[dict[str, Any]]:
3724
+ """Extract follow-up queries from existing gap suggestions as fallback.
3725
+
3726
+ Used when LLM parsing fails but we still want to progress.
3727
+
3728
+ Args:
3729
+ state: Current research state with gaps
3730
+
3731
+ Returns:
3732
+ List of follow-up query dictionaries
3733
+ """
3734
+ queries = []
3735
+ for gap in state.unresolved_gaps():
3736
+ for i, sq in enumerate(gap.suggested_queries[:2]): # Max 2 per gap
3737
+ queries.append({
3738
+ "query": sq,
3739
+ "target_gap_id": gap.id,
3740
+ "rationale": f"Suggested query from gap: {gap.description[:50]}",
3741
+ "priority": gap.priority,
3742
+ })
3743
+ return queries[:state.max_sub_queries] # Respect limit
3744
+
3745
+ # =========================================================================
3746
+ # Utility Methods
3747
+ # =========================================================================
3748
+
3749
+ def list_sessions(
3750
+ self,
3751
+ limit: int = 50,
3752
+ cursor: Optional[str] = None,
3753
+ completed_only: bool = False,
3754
+ ) -> list[dict[str, Any]]:
3755
+ """List deep research sessions.
3756
+
3757
+ Args:
3758
+ limit: Maximum sessions to return
3759
+ cursor: Pagination cursor (research_id to start after)
3760
+ completed_only: Only return completed sessions
3761
+
3762
+ Returns:
3763
+ List of session summaries
3764
+ """
3765
+ sessions = self.memory.list_deep_research(
3766
+ limit=limit,
3767
+ cursor=cursor,
3768
+ completed_only=completed_only,
3769
+ )
3770
+
3771
+ return [
3772
+ {
3773
+ "id": s.id,
3774
+ "query": s.original_query,
3775
+ "phase": s.phase.value,
3776
+ "iteration": s.iteration,
3777
+ "source_count": len(s.sources),
3778
+ "finding_count": len(s.findings),
3779
+ "is_complete": s.completed_at is not None,
3780
+ "created_at": s.created_at.isoformat(),
3781
+ "updated_at": s.updated_at.isoformat(),
3782
+ }
3783
+ for s in sessions
3784
+ ]
3785
+
3786
+ def delete_session(self, research_id: str) -> bool:
3787
+ """Delete a research session.
3788
+
3789
+ Args:
3790
+ research_id: ID of session to delete
3791
+
3792
+ Returns:
3793
+ True if deleted, False if not found
3794
+ """
3795
+ return self.memory.delete_deep_research(research_id)
3796
+
3797
+ def resume_research(
3798
+ self,
3799
+ research_id: str,
3800
+ provider_id: Optional[str] = None,
3801
+ timeout_per_operation: float = 120.0,
3802
+ max_concurrent: int = 3,
3803
+ ) -> WorkflowResult:
3804
+ """Resume an interrupted deep research workflow from persisted state.
3805
+
3806
+ Loads the DeepResearchState from persistence, validates it, and resumes
3807
+ execution from the current phase. Handles edge cases like corrupted
3808
+ state or missing sources gracefully.
3809
+
3810
+ Args:
3811
+ research_id: ID of the research session to resume
3812
+ provider_id: Optional provider override for LLM operations
3813
+ timeout_per_operation: Timeout per operation in seconds
3814
+ max_concurrent: Maximum concurrent operations
3815
+
3816
+ Returns:
3817
+ WorkflowResult with resumed research outcome or error
3818
+ """
3819
+ logger.info("Attempting to resume research session: %s", research_id)
3820
+
3821
+ # Load existing state
3822
+ state = self.memory.load_deep_research(research_id)
3823
+
3824
+ if state is None:
3825
+ logger.warning("Research session '%s' not found in persistence", research_id)
3826
+ return WorkflowResult(
3827
+ success=False,
3828
+ content="",
3829
+ error=f"Research session '{research_id}' not found. It may have expired or been deleted.",
3830
+ metadata={"research_id": research_id, "error_type": "not_found"},
3831
+ )
3832
+
3833
+ # Check if already completed
3834
+ if state.completed_at is not None:
3835
+ logger.info(
3836
+ "Research session '%s' already completed at %s",
3837
+ research_id,
3838
+ state.completed_at.isoformat(),
3839
+ )
3840
+ return WorkflowResult(
3841
+ success=True,
3842
+ content=state.report or "Research already completed",
3843
+ metadata={
3844
+ "research_id": state.id,
3845
+ "phase": state.phase.value,
3846
+ "is_complete": True,
3847
+ "completed_at": state.completed_at.isoformat(),
3848
+ "resumed": False,
3849
+ },
3850
+ )
3851
+
3852
+ # Validate state integrity
3853
+ validation_result = self._validate_state_for_resume(state)
3854
+ if not validation_result["valid"]:
3855
+ logger.error(
3856
+ "Research session '%s' failed validation: %s",
3857
+ research_id,
3858
+ validation_result["error"],
3859
+ )
3860
+ return WorkflowResult(
3861
+ success=False,
3862
+ content="",
3863
+ error=validation_result["error"],
3864
+ metadata={
3865
+ "research_id": research_id,
3866
+ "error_type": "validation_failed",
3867
+ "phase": state.phase.value,
3868
+ "issues": validation_result.get("issues", []),
3869
+ },
3870
+ )
3871
+
3872
+ # Log resumption context
3873
+ logger.info(
3874
+ "Resuming research '%s': phase=%s, iteration=%d/%d, "
3875
+ "sub_queries=%d (completed=%d), sources=%d, findings=%d, gaps=%d",
3876
+ research_id,
3877
+ state.phase.value,
3878
+ state.iteration,
3879
+ state.max_iterations,
3880
+ len(state.sub_queries),
3881
+ len(state.completed_sub_queries()),
3882
+ len(state.sources),
3883
+ len(state.findings),
3884
+ len(state.unresolved_gaps()),
3885
+ )
3886
+
3887
+ # Resume workflow execution
3888
+ try:
3889
+ loop = asyncio.get_event_loop()
3890
+ if loop.is_running():
3891
+ import concurrent.futures
3892
+ with concurrent.futures.ThreadPoolExecutor() as executor:
3893
+ future = executor.submit(
3894
+ asyncio.run,
3895
+ self._execute_workflow_async(
3896
+ state=state,
3897
+ provider_id=provider_id,
3898
+ timeout_per_operation=timeout_per_operation,
3899
+ max_concurrent=max_concurrent,
3900
+ ),
3901
+ )
3902
+ result = future.result()
3903
+ else:
3904
+ result = loop.run_until_complete(
3905
+ self._execute_workflow_async(
3906
+ state=state,
3907
+ provider_id=provider_id,
3908
+ timeout_per_operation=timeout_per_operation,
3909
+ max_concurrent=max_concurrent,
3910
+ )
3911
+ )
3912
+ except RuntimeError:
3913
+ result = asyncio.run(
3914
+ self._execute_workflow_async(
3915
+ state=state,
3916
+ provider_id=provider_id,
3917
+ timeout_per_operation=timeout_per_operation,
3918
+ max_concurrent=max_concurrent,
3919
+ )
3920
+ )
3921
+
3922
+ # Add resumption metadata
3923
+ if result.metadata is None:
3924
+ result.metadata = {}
3925
+ result.metadata["resumed"] = True
3926
+ result.metadata["resumed_from_phase"] = state.phase.value
3927
+
3928
+ return result
3929
+
3930
+ def _validate_state_for_resume(self, state: DeepResearchState) -> dict[str, Any]:
3931
+ """Validate a DeepResearchState for safe resumption.
3932
+
3933
+ Checks for common corruption issues and missing required data.
3934
+
3935
+ Args:
3936
+ state: The state to validate
3937
+
3938
+ Returns:
3939
+ Dict with 'valid' bool and 'error'/'issues' if invalid
3940
+ """
3941
+ issues = []
3942
+
3943
+ # Check required fields
3944
+ if not state.original_query:
3945
+ issues.append("Missing original_query")
3946
+
3947
+ if not state.id:
3948
+ issues.append("Missing research ID")
3949
+
3950
+ # Phase-specific validation
3951
+ if state.phase.value in ("gathering", "analysis", "synthesis", "refinement"):
3952
+ # These phases require sub-queries from planning
3953
+ if not state.sub_queries:
3954
+ issues.append(f"No sub-queries found for {state.phase.value} phase")
3955
+
3956
+ if state.phase.value in ("analysis", "synthesis"):
3957
+ # These phases require sources from gathering
3958
+ if not state.sources and state.phase.value == "analysis":
3959
+ # Only warn for analysis - synthesis can work with findings
3960
+ issues.append("No sources found for analysis phase")
3961
+
3962
+ if state.phase.value == "synthesis":
3963
+ # Synthesis requires findings from analysis
3964
+ if not state.findings:
3965
+ issues.append("No findings found for synthesis phase")
3966
+
3967
+ # Check for corrupted collections (None instead of empty list)
3968
+ if state.sub_queries is None:
3969
+ issues.append("Corrupted sub_queries collection (null)")
3970
+ if state.sources is None:
3971
+ issues.append("Corrupted sources collection (null)")
3972
+ if state.findings is None:
3973
+ issues.append("Corrupted findings collection (null)")
3974
+ if state.gaps is None:
3975
+ issues.append("Corrupted gaps collection (null)")
3976
+
3977
+ if issues:
3978
+ return {
3979
+ "valid": False,
3980
+ "error": f"State validation failed: {'; '.join(issues)}",
3981
+ "issues": issues,
3982
+ }
3983
+
3984
+ return {"valid": True}
3985
+
3986
+ def list_resumable_sessions(self) -> list[dict[str, Any]]:
3987
+ """List all in-progress research sessions that can be resumed.
3988
+
3989
+ Scans persistence for sessions that are not completed and can be resumed.
3990
+
3991
+ Returns:
3992
+ List of session summaries with resumption context
3993
+ """
3994
+ sessions = self.memory.list_deep_research(completed_only=False)
3995
+
3996
+ resumable = []
3997
+ for state in sessions:
3998
+ if state.completed_at is not None:
3999
+ continue # Skip completed
4000
+
4001
+ validation = self._validate_state_for_resume(state)
4002
+
4003
+ resumable.append({
4004
+ "id": state.id,
4005
+ "query": state.original_query[:100] + ("..." if len(state.original_query) > 100 else ""),
4006
+ "phase": state.phase.value,
4007
+ "iteration": state.iteration,
4008
+ "max_iterations": state.max_iterations,
4009
+ "sub_queries": len(state.sub_queries),
4010
+ "completed_queries": len(state.completed_sub_queries()),
4011
+ "sources": len(state.sources),
4012
+ "findings": len(state.findings),
4013
+ "gaps": len(state.unresolved_gaps()),
4014
+ "can_resume": validation["valid"],
4015
+ "issues": validation.get("issues", []),
4016
+ "created_at": state.created_at.isoformat(),
4017
+ "updated_at": state.updated_at.isoformat(),
4018
+ })
4019
+
4020
+ return resumable