foundry-mcp 0.7.0__py3-none-any.whl → 0.8.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- foundry_mcp/cli/__init__.py +0 -13
- foundry_mcp/cli/commands/session.py +1 -8
- foundry_mcp/cli/context.py +39 -0
- foundry_mcp/config.py +381 -7
- foundry_mcp/core/batch_operations.py +1196 -0
- foundry_mcp/core/discovery.py +1 -1
- foundry_mcp/core/llm_config.py +8 -0
- foundry_mcp/core/naming.py +25 -2
- foundry_mcp/core/prometheus.py +0 -13
- foundry_mcp/core/providers/__init__.py +12 -0
- foundry_mcp/core/providers/base.py +39 -0
- foundry_mcp/core/providers/claude.py +45 -1
- foundry_mcp/core/providers/codex.py +64 -3
- foundry_mcp/core/providers/cursor_agent.py +22 -3
- foundry_mcp/core/providers/detectors.py +34 -7
- foundry_mcp/core/providers/gemini.py +63 -1
- foundry_mcp/core/providers/opencode.py +95 -71
- foundry_mcp/core/providers/package-lock.json +4 -4
- foundry_mcp/core/providers/package.json +1 -1
- foundry_mcp/core/providers/validation.py +128 -0
- foundry_mcp/core/research/memory.py +103 -0
- foundry_mcp/core/research/models.py +783 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +5 -2
- foundry_mcp/core/research/workflows/base.py +106 -12
- foundry_mcp/core/research/workflows/consensus.py +160 -17
- foundry_mcp/core/research/workflows/deep_research.py +4020 -0
- foundry_mcp/core/responses.py +240 -0
- foundry_mcp/core/spec.py +1 -0
- foundry_mcp/core/task.py +141 -12
- foundry_mcp/core/validation.py +6 -1
- foundry_mcp/server.py +0 -52
- foundry_mcp/tools/unified/__init__.py +37 -18
- foundry_mcp/tools/unified/authoring.py +0 -33
- foundry_mcp/tools/unified/environment.py +202 -29
- foundry_mcp/tools/unified/plan.py +20 -1
- foundry_mcp/tools/unified/provider.py +0 -40
- foundry_mcp/tools/unified/research.py +644 -19
- foundry_mcp/tools/unified/review.py +5 -2
- foundry_mcp/tools/unified/review_helpers.py +16 -1
- foundry_mcp/tools/unified/server.py +9 -24
- foundry_mcp/tools/unified/task.py +528 -9
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/METADATA +2 -1
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/RECORD +52 -46
- foundry_mcp/cli/flags.py +0 -266
- foundry_mcp/core/feature_flags.py +0 -592
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/WHEEL +0 -0
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/entry_points.txt +0 -0
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -25,6 +25,7 @@ class WorkflowType(str, Enum):
|
|
|
25
25
|
CONSENSUS = "consensus"
|
|
26
26
|
THINKDEEP = "thinkdeep"
|
|
27
27
|
IDEATE = "ideate"
|
|
28
|
+
DEEP_RESEARCH = "deep_research"
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class ConfidenceLevel(str, Enum):
|
|
@@ -435,3 +436,785 @@ class ConsensusState(BaseModel):
|
|
|
435
436
|
self.completed_at = datetime.utcnow()
|
|
436
437
|
if synthesis:
|
|
437
438
|
self.synthesis = synthesis
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class DeepResearchConfig(BaseModel):
|
|
442
|
+
"""Configuration for DEEP_RESEARCH workflow execution.
|
|
443
|
+
|
|
444
|
+
Groups deep research parameters into a single config object to reduce
|
|
445
|
+
parameter sprawl in the MCP tool interface. All fields have sensible
|
|
446
|
+
defaults that can be overridden at the tool level.
|
|
447
|
+
|
|
448
|
+
Note: Provider configuration is handled via ResearchConfig TOML settings,
|
|
449
|
+
not through this config object. This is intentional - providers should be
|
|
450
|
+
configured at the server level, not per-request.
|
|
451
|
+
"""
|
|
452
|
+
|
|
453
|
+
max_iterations: int = Field(
|
|
454
|
+
default=3,
|
|
455
|
+
ge=1,
|
|
456
|
+
le=10,
|
|
457
|
+
description="Maximum refinement iterations before forced completion",
|
|
458
|
+
)
|
|
459
|
+
max_sub_queries: int = Field(
|
|
460
|
+
default=5,
|
|
461
|
+
ge=1,
|
|
462
|
+
le=20,
|
|
463
|
+
description="Maximum sub-queries for query decomposition",
|
|
464
|
+
)
|
|
465
|
+
max_sources_per_query: int = Field(
|
|
466
|
+
default=5,
|
|
467
|
+
ge=1,
|
|
468
|
+
le=50,
|
|
469
|
+
description="Maximum sources to gather per sub-query",
|
|
470
|
+
)
|
|
471
|
+
follow_links: bool = Field(
|
|
472
|
+
default=True,
|
|
473
|
+
description="Whether to follow URLs and extract full content",
|
|
474
|
+
)
|
|
475
|
+
timeout_per_operation: float = Field(
|
|
476
|
+
default=30.0,
|
|
477
|
+
ge=1.0,
|
|
478
|
+
le=300.0,
|
|
479
|
+
description="Timeout in seconds for each search/fetch operation",
|
|
480
|
+
)
|
|
481
|
+
max_concurrent: int = Field(
|
|
482
|
+
default=3,
|
|
483
|
+
ge=1,
|
|
484
|
+
le=10,
|
|
485
|
+
description="Maximum concurrent operations (search, fetch)",
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
@classmethod
|
|
489
|
+
def from_defaults(cls) -> "DeepResearchConfig":
|
|
490
|
+
"""Create config with all default values.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
DeepResearchConfig with sensible defaults
|
|
494
|
+
"""
|
|
495
|
+
return cls()
|
|
496
|
+
|
|
497
|
+
def merge_overrides(self, **overrides: Any) -> "DeepResearchConfig":
|
|
498
|
+
"""Create a new config with specified overrides applied.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
**overrides: Field values to override (None values are ignored)
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
New DeepResearchConfig with overrides applied
|
|
505
|
+
"""
|
|
506
|
+
current = self.model_dump()
|
|
507
|
+
for key, value in overrides.items():
|
|
508
|
+
if value is not None and key in current:
|
|
509
|
+
current[key] = value
|
|
510
|
+
return DeepResearchConfig(**current)
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
# =============================================================================
|
|
514
|
+
# DEEP RESEARCH Models (Multi-phase iterative research)
|
|
515
|
+
# =============================================================================
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
class DeepResearchPhase(str, Enum):
|
|
519
|
+
"""Phases of the DEEP_RESEARCH workflow.
|
|
520
|
+
|
|
521
|
+
The deep research workflow progresses through five sequential phases:
|
|
522
|
+
1. PLANNING - Analyze the query and decompose into focused sub-queries
|
|
523
|
+
2. GATHERING - Execute sub-queries in parallel and collect sources
|
|
524
|
+
3. ANALYSIS - Extract findings and assess source quality
|
|
525
|
+
4. SYNTHESIS - Combine findings into a comprehensive report
|
|
526
|
+
5. REFINEMENT - Identify gaps and potentially loop back for more research
|
|
527
|
+
|
|
528
|
+
The ordering of these enum values is significant - it defines the
|
|
529
|
+
progression through advance_phase() method.
|
|
530
|
+
"""
|
|
531
|
+
|
|
532
|
+
PLANNING = "planning"
|
|
533
|
+
GATHERING = "gathering"
|
|
534
|
+
ANALYSIS = "analysis"
|
|
535
|
+
SYNTHESIS = "synthesis"
|
|
536
|
+
REFINEMENT = "refinement"
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
class PhaseMetrics(BaseModel):
|
|
540
|
+
"""Metrics for a single phase execution.
|
|
541
|
+
|
|
542
|
+
Tracks timing, token usage, and provider information for each phase
|
|
543
|
+
of the deep research workflow. Used for audit and cost tracking.
|
|
544
|
+
"""
|
|
545
|
+
|
|
546
|
+
phase: str = Field(..., description="Phase name (planning, analysis, etc.)")
|
|
547
|
+
duration_ms: float = Field(default=0.0, description="Phase duration in milliseconds")
|
|
548
|
+
input_tokens: int = Field(default=0, description="Tokens consumed by the prompt")
|
|
549
|
+
output_tokens: int = Field(default=0, description="Tokens generated in the response")
|
|
550
|
+
cached_tokens: int = Field(default=0, description="Tokens served from cache")
|
|
551
|
+
provider_id: Optional[str] = Field(default=None, description="Provider used for this phase")
|
|
552
|
+
model_used: Optional[str] = Field(default=None, description="Model used for this phase")
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
class SourceType(str, Enum):
|
|
556
|
+
"""Types of research sources that can be discovered.
|
|
557
|
+
|
|
558
|
+
V1 Implementation:
|
|
559
|
+
- WEB: General web search results (via Tavily/Google)
|
|
560
|
+
- ACADEMIC: Academic papers and journals (via Semantic Scholar)
|
|
561
|
+
|
|
562
|
+
Future Extensions (placeholders):
|
|
563
|
+
- EXPERT: Expert profiles and interviews (reserved)
|
|
564
|
+
- CODE: Code repositories and examples (reserved for GitHub search)
|
|
565
|
+
- NEWS: News articles and press releases
|
|
566
|
+
- DOCUMENTATION: Technical documentation
|
|
567
|
+
"""
|
|
568
|
+
|
|
569
|
+
WEB = "web"
|
|
570
|
+
ACADEMIC = "academic"
|
|
571
|
+
EXPERT = "expert" # Future: expert profiles, interviews
|
|
572
|
+
CODE = "code" # Future: GitHub, code search
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
class SourceQuality(str, Enum):
|
|
576
|
+
"""Quality assessment for research sources.
|
|
577
|
+
|
|
578
|
+
Quality levels are assigned during the ANALYSIS phase based on:
|
|
579
|
+
- Source authority and credibility
|
|
580
|
+
- Content recency and relevance
|
|
581
|
+
- Citation count and peer review status (for academic)
|
|
582
|
+
- Domain reputation (for web sources)
|
|
583
|
+
"""
|
|
584
|
+
|
|
585
|
+
UNKNOWN = "unknown" # Not yet assessed
|
|
586
|
+
LOW = "low" # Questionable reliability
|
|
587
|
+
MEDIUM = "medium" # Generally reliable
|
|
588
|
+
HIGH = "high" # Authoritative source
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class ResearchMode(str, Enum):
|
|
592
|
+
"""Research modes that control source prioritization.
|
|
593
|
+
|
|
594
|
+
Each mode applies different domain-based quality heuristics:
|
|
595
|
+
- GENERAL: No domain preferences, balanced approach (default)
|
|
596
|
+
- ACADEMIC: Prioritizes journals, publishers, preprints
|
|
597
|
+
- TECHNICAL: Prioritizes official docs, arxiv, code repositories
|
|
598
|
+
"""
|
|
599
|
+
|
|
600
|
+
GENERAL = "general"
|
|
601
|
+
ACADEMIC = "academic"
|
|
602
|
+
TECHNICAL = "technical"
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
# Domain tier lists for source quality assessment by research mode
|
|
606
|
+
# Patterns support wildcards: "*.edu" matches any .edu domain
|
|
607
|
+
DOMAIN_TIERS: dict[str, dict[str, list[str]]] = {
|
|
608
|
+
"academic": {
|
|
609
|
+
"high": [
|
|
610
|
+
# Aggregators & indexes
|
|
611
|
+
"scholar.google.com",
|
|
612
|
+
"semanticscholar.org",
|
|
613
|
+
"pubmed.gov",
|
|
614
|
+
"ncbi.nlm.nih.gov",
|
|
615
|
+
"jstor.org",
|
|
616
|
+
# Major publishers
|
|
617
|
+
"springer.com",
|
|
618
|
+
"link.springer.com",
|
|
619
|
+
"sciencedirect.com",
|
|
620
|
+
"elsevier.com",
|
|
621
|
+
"wiley.com",
|
|
622
|
+
"onlinelibrary.wiley.com",
|
|
623
|
+
"tandfonline.com", # Taylor & Francis
|
|
624
|
+
"sagepub.com",
|
|
625
|
+
"nature.com",
|
|
626
|
+
"science.org", # AAAS/Science
|
|
627
|
+
"frontiersin.org",
|
|
628
|
+
"plos.org",
|
|
629
|
+
"journals.plos.org",
|
|
630
|
+
"mdpi.com",
|
|
631
|
+
"oup.com",
|
|
632
|
+
"academic.oup.com", # Oxford
|
|
633
|
+
"cambridge.org",
|
|
634
|
+
# Preprints & open access
|
|
635
|
+
"arxiv.org",
|
|
636
|
+
"biorxiv.org",
|
|
637
|
+
"medrxiv.org",
|
|
638
|
+
"psyarxiv.com",
|
|
639
|
+
"ssrn.com",
|
|
640
|
+
# Field-specific
|
|
641
|
+
"apa.org",
|
|
642
|
+
"psycnet.apa.org", # Psychology
|
|
643
|
+
"aclanthology.org", # Computational linguistics
|
|
644
|
+
# CS/Tech academic
|
|
645
|
+
"acm.org",
|
|
646
|
+
"dl.acm.org",
|
|
647
|
+
"ieee.org",
|
|
648
|
+
"ieeexplore.ieee.org",
|
|
649
|
+
# Institutional patterns
|
|
650
|
+
"*.edu",
|
|
651
|
+
"*.ac.uk",
|
|
652
|
+
"*.edu.au",
|
|
653
|
+
],
|
|
654
|
+
"low": [
|
|
655
|
+
"reddit.com",
|
|
656
|
+
"quora.com",
|
|
657
|
+
"medium.com",
|
|
658
|
+
"linkedin.com",
|
|
659
|
+
"twitter.com",
|
|
660
|
+
"x.com",
|
|
661
|
+
"facebook.com",
|
|
662
|
+
"pinterest.com",
|
|
663
|
+
"instagram.com",
|
|
664
|
+
"tiktok.com",
|
|
665
|
+
"youtube.com", # Can have good content but inconsistent
|
|
666
|
+
],
|
|
667
|
+
},
|
|
668
|
+
"technical": {
|
|
669
|
+
"high": [
|
|
670
|
+
# Preprints (technical papers)
|
|
671
|
+
"arxiv.org",
|
|
672
|
+
# Official documentation patterns
|
|
673
|
+
"docs.*",
|
|
674
|
+
"developer.*",
|
|
675
|
+
"*.dev",
|
|
676
|
+
"devdocs.io",
|
|
677
|
+
# Code & technical resources
|
|
678
|
+
"github.com",
|
|
679
|
+
"stackoverflow.com",
|
|
680
|
+
"stackexchange.com",
|
|
681
|
+
# Language/framework official sites
|
|
682
|
+
"python.org",
|
|
683
|
+
"docs.python.org",
|
|
684
|
+
"nodejs.org",
|
|
685
|
+
"rust-lang.org",
|
|
686
|
+
"doc.rust-lang.org",
|
|
687
|
+
"go.dev",
|
|
688
|
+
"typescriptlang.org",
|
|
689
|
+
"react.dev",
|
|
690
|
+
"vuejs.org",
|
|
691
|
+
"angular.io",
|
|
692
|
+
# Cloud providers
|
|
693
|
+
"aws.amazon.com",
|
|
694
|
+
"cloud.google.com",
|
|
695
|
+
"docs.microsoft.com",
|
|
696
|
+
"learn.microsoft.com",
|
|
697
|
+
"azure.microsoft.com",
|
|
698
|
+
# Tech company engineering blogs
|
|
699
|
+
"engineering.fb.com",
|
|
700
|
+
"netflixtechblog.com",
|
|
701
|
+
"uber.com/blog/engineering",
|
|
702
|
+
"blog.google",
|
|
703
|
+
# Academic (relevant for technical research)
|
|
704
|
+
"acm.org",
|
|
705
|
+
"dl.acm.org",
|
|
706
|
+
"ieee.org",
|
|
707
|
+
"ieeexplore.ieee.org",
|
|
708
|
+
],
|
|
709
|
+
"low": [
|
|
710
|
+
"reddit.com",
|
|
711
|
+
"quora.com",
|
|
712
|
+
"linkedin.com",
|
|
713
|
+
"twitter.com",
|
|
714
|
+
"x.com",
|
|
715
|
+
"facebook.com",
|
|
716
|
+
"pinterest.com",
|
|
717
|
+
],
|
|
718
|
+
},
|
|
719
|
+
"general": {
|
|
720
|
+
"high": [], # No domain preferences
|
|
721
|
+
"low": [
|
|
722
|
+
# Still deprioritize social media
|
|
723
|
+
"pinterest.com",
|
|
724
|
+
"facebook.com",
|
|
725
|
+
"instagram.com",
|
|
726
|
+
"tiktok.com",
|
|
727
|
+
],
|
|
728
|
+
},
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
class SubQuery(BaseModel):
|
|
733
|
+
"""A decomposed sub-query for focused research.
|
|
734
|
+
|
|
735
|
+
During the PLANNING phase, the original research query is decomposed
|
|
736
|
+
into multiple focused sub-queries. Each sub-query targets a specific
|
|
737
|
+
aspect of the research question and can be executed independently
|
|
738
|
+
during the GATHERING phase.
|
|
739
|
+
|
|
740
|
+
Status transitions:
|
|
741
|
+
- pending -> executing -> completed (success path)
|
|
742
|
+
- pending -> executing -> failed (error path)
|
|
743
|
+
"""
|
|
744
|
+
|
|
745
|
+
id: str = Field(default_factory=lambda: f"subq-{uuid4().hex[:8]}")
|
|
746
|
+
query: str = Field(..., description="The focused sub-query text")
|
|
747
|
+
rationale: Optional[str] = Field(
|
|
748
|
+
default=None,
|
|
749
|
+
description="Why this sub-query was generated and what aspect it covers",
|
|
750
|
+
)
|
|
751
|
+
priority: int = Field(
|
|
752
|
+
default=1,
|
|
753
|
+
description="Execution priority (1=highest, larger=lower priority)",
|
|
754
|
+
)
|
|
755
|
+
status: str = Field(
|
|
756
|
+
default="pending",
|
|
757
|
+
description="Current status: pending, executing, completed, failed",
|
|
758
|
+
)
|
|
759
|
+
source_ids: list[str] = Field(
|
|
760
|
+
default_factory=list,
|
|
761
|
+
description="IDs of ResearchSource objects found for this query",
|
|
762
|
+
)
|
|
763
|
+
findings_summary: Optional[str] = Field(
|
|
764
|
+
default=None,
|
|
765
|
+
description="Brief summary of what was found from this sub-query",
|
|
766
|
+
)
|
|
767
|
+
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
768
|
+
completed_at: Optional[datetime] = Field(default=None)
|
|
769
|
+
error: Optional[str] = Field(
|
|
770
|
+
default=None,
|
|
771
|
+
description="Error message if status is 'failed'",
|
|
772
|
+
)
|
|
773
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
774
|
+
|
|
775
|
+
def mark_completed(self, findings: Optional[str] = None) -> None:
|
|
776
|
+
"""Mark this sub-query as successfully completed.
|
|
777
|
+
|
|
778
|
+
Args:
|
|
779
|
+
findings: Optional summary of findings from this sub-query
|
|
780
|
+
"""
|
|
781
|
+
self.status = "completed"
|
|
782
|
+
self.completed_at = datetime.utcnow()
|
|
783
|
+
if findings:
|
|
784
|
+
self.findings_summary = findings
|
|
785
|
+
|
|
786
|
+
def mark_failed(self, error: str) -> None:
|
|
787
|
+
"""Mark this sub-query as failed with an error message.
|
|
788
|
+
|
|
789
|
+
Args:
|
|
790
|
+
error: Description of why the sub-query failed
|
|
791
|
+
"""
|
|
792
|
+
self.status = "failed"
|
|
793
|
+
self.completed_at = datetime.utcnow()
|
|
794
|
+
self.error = error
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
class ResearchSource(BaseModel):
|
|
798
|
+
"""A source discovered during research.
|
|
799
|
+
|
|
800
|
+
Sources are collected during the GATHERING phase when sub-queries
|
|
801
|
+
are executed against search providers. Each source represents a
|
|
802
|
+
piece of external content (web page, paper, etc.) that may contain
|
|
803
|
+
relevant information for the research query.
|
|
804
|
+
|
|
805
|
+
Quality is assessed during the ANALYSIS phase based on source
|
|
806
|
+
authority, content relevance, and other factors.
|
|
807
|
+
"""
|
|
808
|
+
|
|
809
|
+
id: str = Field(default_factory=lambda: f"src-{uuid4().hex[:8]}")
|
|
810
|
+
url: Optional[str] = Field(
|
|
811
|
+
default=None,
|
|
812
|
+
description="URL of the source (may be None for non-web sources)",
|
|
813
|
+
)
|
|
814
|
+
title: str = Field(..., description="Title or headline of the source")
|
|
815
|
+
source_type: SourceType = Field(
|
|
816
|
+
default=SourceType.WEB,
|
|
817
|
+
description="Type of source (web, academic, etc.)",
|
|
818
|
+
)
|
|
819
|
+
quality: SourceQuality = Field(
|
|
820
|
+
default=SourceQuality.UNKNOWN,
|
|
821
|
+
description="Assessed quality level of this source",
|
|
822
|
+
)
|
|
823
|
+
snippet: Optional[str] = Field(
|
|
824
|
+
default=None,
|
|
825
|
+
description="Brief excerpt or description from the source",
|
|
826
|
+
)
|
|
827
|
+
content: Optional[str] = Field(
|
|
828
|
+
default=None,
|
|
829
|
+
description="Full extracted content (if follow_links enabled)",
|
|
830
|
+
)
|
|
831
|
+
sub_query_id: Optional[str] = Field(
|
|
832
|
+
default=None,
|
|
833
|
+
description="ID of the SubQuery that discovered this source",
|
|
834
|
+
)
|
|
835
|
+
discovered_at: datetime = Field(default_factory=datetime.utcnow)
|
|
836
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
class ResearchFinding(BaseModel):
|
|
840
|
+
"""A key finding extracted from research sources.
|
|
841
|
+
|
|
842
|
+
Findings are extracted during the ANALYSIS phase by examining
|
|
843
|
+
source content and identifying key insights. Each finding has
|
|
844
|
+
an associated confidence level and links back to supporting sources.
|
|
845
|
+
|
|
846
|
+
Findings are organized by category/theme during synthesis to
|
|
847
|
+
create a structured report.
|
|
848
|
+
"""
|
|
849
|
+
|
|
850
|
+
id: str = Field(default_factory=lambda: f"find-{uuid4().hex[:8]}")
|
|
851
|
+
content: str = Field(..., description="The key finding or insight")
|
|
852
|
+
confidence: ConfidenceLevel = Field(
|
|
853
|
+
default=ConfidenceLevel.MEDIUM,
|
|
854
|
+
description="Confidence level in this finding",
|
|
855
|
+
)
|
|
856
|
+
source_ids: list[str] = Field(
|
|
857
|
+
default_factory=list,
|
|
858
|
+
description="IDs of ResearchSource objects supporting this finding",
|
|
859
|
+
)
|
|
860
|
+
sub_query_id: Optional[str] = Field(
|
|
861
|
+
default=None,
|
|
862
|
+
description="ID of SubQuery that produced this finding",
|
|
863
|
+
)
|
|
864
|
+
category: Optional[str] = Field(
|
|
865
|
+
default=None,
|
|
866
|
+
description="Theme or category for organizing findings",
|
|
867
|
+
)
|
|
868
|
+
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
869
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
class ResearchGap(BaseModel):
|
|
873
|
+
"""An identified gap in the research requiring follow-up.
|
|
874
|
+
|
|
875
|
+
Gaps are identified during the ANALYSIS and SYNTHESIS phases when
|
|
876
|
+
the research reveals missing information or unanswered questions.
|
|
877
|
+
Each gap includes suggested follow-up queries that can be used
|
|
878
|
+
in subsequent refinement iterations.
|
|
879
|
+
|
|
880
|
+
Gaps drive the REFINEMENT phase: if unresolved gaps exist and
|
|
881
|
+
max_iterations hasn't been reached, the workflow loops back
|
|
882
|
+
to GATHERING with new sub-queries derived from gap suggestions.
|
|
883
|
+
"""
|
|
884
|
+
|
|
885
|
+
id: str = Field(default_factory=lambda: f"gap-{uuid4().hex[:8]}")
|
|
886
|
+
description: str = Field(
|
|
887
|
+
...,
|
|
888
|
+
description="Description of the knowledge gap or missing information",
|
|
889
|
+
)
|
|
890
|
+
suggested_queries: list[str] = Field(
|
|
891
|
+
default_factory=list,
|
|
892
|
+
description="Follow-up queries that could fill this gap",
|
|
893
|
+
)
|
|
894
|
+
priority: int = Field(
|
|
895
|
+
default=1,
|
|
896
|
+
description="Priority for follow-up (1=highest, larger=lower priority)",
|
|
897
|
+
)
|
|
898
|
+
resolved: bool = Field(
|
|
899
|
+
default=False,
|
|
900
|
+
description="Whether this gap has been addressed in a refinement iteration",
|
|
901
|
+
)
|
|
902
|
+
resolution_notes: Optional[str] = Field(
|
|
903
|
+
default=None,
|
|
904
|
+
description="Notes on how the gap was resolved",
|
|
905
|
+
)
|
|
906
|
+
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
class DeepResearchState(BaseModel):
|
|
910
|
+
"""Main state model for a deep research session.
|
|
911
|
+
|
|
912
|
+
Manages the entire lifecycle of a multi-phase research workflow:
|
|
913
|
+
- Tracks the current phase and iteration
|
|
914
|
+
- Contains all sub-queries, sources, findings, and gaps
|
|
915
|
+
- Provides helper methods for state manipulation
|
|
916
|
+
- Handles phase advancement and refinement iteration logic
|
|
917
|
+
|
|
918
|
+
The state is persisted to enable session resume capability.
|
|
919
|
+
"""
|
|
920
|
+
|
|
921
|
+
id: str = Field(default_factory=lambda: f"deepres-{uuid4().hex[:12]}")
|
|
922
|
+
original_query: str = Field(..., description="The original research query")
|
|
923
|
+
research_brief: Optional[str] = Field(
|
|
924
|
+
default=None,
|
|
925
|
+
description="Expanded research plan generated in PLANNING phase",
|
|
926
|
+
)
|
|
927
|
+
phase: DeepResearchPhase = Field(
|
|
928
|
+
default=DeepResearchPhase.PLANNING,
|
|
929
|
+
description="Current workflow phase",
|
|
930
|
+
)
|
|
931
|
+
iteration: int = Field(
|
|
932
|
+
default=1,
|
|
933
|
+
description="Current refinement iteration (1-based)",
|
|
934
|
+
)
|
|
935
|
+
max_iterations: int = Field(
|
|
936
|
+
default=3,
|
|
937
|
+
description="Maximum refinement iterations before forced completion",
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
# Collections
|
|
941
|
+
sub_queries: list[SubQuery] = Field(default_factory=list)
|
|
942
|
+
sources: list[ResearchSource] = Field(default_factory=list)
|
|
943
|
+
findings: list[ResearchFinding] = Field(default_factory=list)
|
|
944
|
+
gaps: list[ResearchGap] = Field(default_factory=list)
|
|
945
|
+
|
|
946
|
+
# Final output
|
|
947
|
+
report: Optional[str] = Field(
|
|
948
|
+
default=None,
|
|
949
|
+
description="Final synthesized research report",
|
|
950
|
+
)
|
|
951
|
+
report_sections: dict[str, str] = Field(
|
|
952
|
+
default_factory=dict,
|
|
953
|
+
description="Named sections of the report for structured access",
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
# Execution tracking
|
|
957
|
+
total_sources_examined: int = Field(default=0)
|
|
958
|
+
total_tokens_used: int = Field(default=0)
|
|
959
|
+
total_duration_ms: float = Field(default=0.0)
|
|
960
|
+
|
|
961
|
+
# Per-phase metrics for audit
|
|
962
|
+
phase_metrics: list[PhaseMetrics] = Field(
|
|
963
|
+
default_factory=list,
|
|
964
|
+
description="Metrics for each executed phase (timing, tokens, provider)",
|
|
965
|
+
)
|
|
966
|
+
# Search provider query counts (provider_name -> query_count)
|
|
967
|
+
search_provider_stats: dict[str, int] = Field(
|
|
968
|
+
default_factory=dict,
|
|
969
|
+
description="Count of queries executed per search provider",
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
# Configuration
|
|
973
|
+
source_types: list[SourceType] = Field(
|
|
974
|
+
default_factory=lambda: [SourceType.WEB, SourceType.ACADEMIC],
|
|
975
|
+
)
|
|
976
|
+
max_sources_per_query: int = Field(default=5)
|
|
977
|
+
max_sub_queries: int = Field(default=5)
|
|
978
|
+
follow_links: bool = Field(
|
|
979
|
+
default=True,
|
|
980
|
+
description="Whether to follow URLs and extract full content",
|
|
981
|
+
)
|
|
982
|
+
research_mode: ResearchMode = Field(
|
|
983
|
+
default=ResearchMode.GENERAL,
|
|
984
|
+
description="Research mode for source prioritization",
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
# Timestamps
|
|
988
|
+
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
989
|
+
updated_at: datetime = Field(default_factory=datetime.utcnow)
|
|
990
|
+
completed_at: Optional[datetime] = Field(default=None)
|
|
991
|
+
|
|
992
|
+
# Provider tracking (per-phase LLM provider configuration)
|
|
993
|
+
# Supports ProviderSpec format: "[cli]gemini:pro" or simple names: "gemini"
|
|
994
|
+
planning_provider: Optional[str] = Field(default=None)
|
|
995
|
+
analysis_provider: Optional[str] = Field(default=None)
|
|
996
|
+
synthesis_provider: Optional[str] = Field(default=None)
|
|
997
|
+
refinement_provider: Optional[str] = Field(default=None)
|
|
998
|
+
# Per-phase model overrides (from ProviderSpec parsing)
|
|
999
|
+
planning_model: Optional[str] = Field(default=None)
|
|
1000
|
+
analysis_model: Optional[str] = Field(default=None)
|
|
1001
|
+
synthesis_model: Optional[str] = Field(default=None)
|
|
1002
|
+
refinement_model: Optional[str] = Field(default=None)
|
|
1003
|
+
|
|
1004
|
+
system_prompt: Optional[str] = Field(default=None)
|
|
1005
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
1006
|
+
|
|
1007
|
+
# =========================================================================
|
|
1008
|
+
# Collection Management Methods
|
|
1009
|
+
# =========================================================================
|
|
1010
|
+
|
|
1011
|
+
def add_sub_query(
|
|
1012
|
+
self,
|
|
1013
|
+
query: str,
|
|
1014
|
+
rationale: Optional[str] = None,
|
|
1015
|
+
priority: int = 1,
|
|
1016
|
+
) -> SubQuery:
|
|
1017
|
+
"""Add a new sub-query for research.
|
|
1018
|
+
|
|
1019
|
+
Args:
|
|
1020
|
+
query: The focused sub-query text
|
|
1021
|
+
rationale: Why this sub-query was generated
|
|
1022
|
+
priority: Execution priority (1=highest)
|
|
1023
|
+
|
|
1024
|
+
Returns:
|
|
1025
|
+
The created SubQuery instance
|
|
1026
|
+
"""
|
|
1027
|
+
sub_query = SubQuery(query=query, rationale=rationale, priority=priority)
|
|
1028
|
+
self.sub_queries.append(sub_query)
|
|
1029
|
+
self.updated_at = datetime.utcnow()
|
|
1030
|
+
return sub_query
|
|
1031
|
+
|
|
1032
|
+
def get_sub_query(self, sub_query_id: str) -> Optional[SubQuery]:
|
|
1033
|
+
"""Get a sub-query by ID."""
|
|
1034
|
+
for sq in self.sub_queries:
|
|
1035
|
+
if sq.id == sub_query_id:
|
|
1036
|
+
return sq
|
|
1037
|
+
return None
|
|
1038
|
+
|
|
1039
|
+
def get_source(self, source_id: str) -> Optional[ResearchSource]:
|
|
1040
|
+
"""Get a source by ID."""
|
|
1041
|
+
for source in self.sources:
|
|
1042
|
+
if source.id == source_id:
|
|
1043
|
+
return source
|
|
1044
|
+
return None
|
|
1045
|
+
|
|
1046
|
+
def get_gap(self, gap_id: str) -> Optional[ResearchGap]:
|
|
1047
|
+
"""Get a gap by ID."""
|
|
1048
|
+
for gap in self.gaps:
|
|
1049
|
+
if gap.id == gap_id:
|
|
1050
|
+
return gap
|
|
1051
|
+
return None
|
|
1052
|
+
|
|
1053
|
+
def add_source(
|
|
1054
|
+
self,
|
|
1055
|
+
title: str,
|
|
1056
|
+
url: Optional[str] = None,
|
|
1057
|
+
source_type: SourceType = SourceType.WEB,
|
|
1058
|
+
snippet: Optional[str] = None,
|
|
1059
|
+
sub_query_id: Optional[str] = None,
|
|
1060
|
+
**kwargs: Any,
|
|
1061
|
+
) -> ResearchSource:
|
|
1062
|
+
"""Add a discovered source.
|
|
1063
|
+
|
|
1064
|
+
Args:
|
|
1065
|
+
title: Source title
|
|
1066
|
+
url: Source URL (optional)
|
|
1067
|
+
source_type: Type of source
|
|
1068
|
+
snippet: Brief excerpt
|
|
1069
|
+
sub_query_id: ID of sub-query that found this
|
|
1070
|
+
**kwargs: Additional fields
|
|
1071
|
+
|
|
1072
|
+
Returns:
|
|
1073
|
+
The created ResearchSource instance
|
|
1074
|
+
"""
|
|
1075
|
+
source = ResearchSource(
|
|
1076
|
+
title=title,
|
|
1077
|
+
url=url,
|
|
1078
|
+
source_type=source_type,
|
|
1079
|
+
snippet=snippet,
|
|
1080
|
+
sub_query_id=sub_query_id,
|
|
1081
|
+
**kwargs,
|
|
1082
|
+
)
|
|
1083
|
+
self.sources.append(source)
|
|
1084
|
+
self.total_sources_examined += 1
|
|
1085
|
+
self.updated_at = datetime.utcnow()
|
|
1086
|
+
return source
|
|
1087
|
+
|
|
1088
|
+
def add_finding(
|
|
1089
|
+
self,
|
|
1090
|
+
content: str,
|
|
1091
|
+
confidence: ConfidenceLevel = ConfidenceLevel.MEDIUM,
|
|
1092
|
+
source_ids: Optional[list[str]] = None,
|
|
1093
|
+
sub_query_id: Optional[str] = None,
|
|
1094
|
+
category: Optional[str] = None,
|
|
1095
|
+
) -> ResearchFinding:
|
|
1096
|
+
"""Add a research finding.
|
|
1097
|
+
|
|
1098
|
+
Args:
|
|
1099
|
+
content: The finding content
|
|
1100
|
+
confidence: Confidence level
|
|
1101
|
+
source_ids: Supporting source IDs
|
|
1102
|
+
sub_query_id: Originating sub-query ID
|
|
1103
|
+
category: Theme/category
|
|
1104
|
+
|
|
1105
|
+
Returns:
|
|
1106
|
+
The created ResearchFinding instance
|
|
1107
|
+
"""
|
|
1108
|
+
finding = ResearchFinding(
|
|
1109
|
+
content=content,
|
|
1110
|
+
confidence=confidence,
|
|
1111
|
+
source_ids=source_ids or [],
|
|
1112
|
+
sub_query_id=sub_query_id,
|
|
1113
|
+
category=category,
|
|
1114
|
+
)
|
|
1115
|
+
self.findings.append(finding)
|
|
1116
|
+
self.updated_at = datetime.utcnow()
|
|
1117
|
+
return finding
|
|
1118
|
+
|
|
1119
|
+
def add_gap(
|
|
1120
|
+
self,
|
|
1121
|
+
description: str,
|
|
1122
|
+
suggested_queries: Optional[list[str]] = None,
|
|
1123
|
+
priority: int = 1,
|
|
1124
|
+
) -> ResearchGap:
|
|
1125
|
+
"""Add an identified research gap.
|
|
1126
|
+
|
|
1127
|
+
Args:
|
|
1128
|
+
description: What information is missing
|
|
1129
|
+
suggested_queries: Follow-up queries to fill the gap
|
|
1130
|
+
priority: Priority for follow-up (1=highest)
|
|
1131
|
+
|
|
1132
|
+
Returns:
|
|
1133
|
+
The created ResearchGap instance
|
|
1134
|
+
"""
|
|
1135
|
+
gap = ResearchGap(
|
|
1136
|
+
description=description,
|
|
1137
|
+
suggested_queries=suggested_queries or [],
|
|
1138
|
+
priority=priority,
|
|
1139
|
+
)
|
|
1140
|
+
self.gaps.append(gap)
|
|
1141
|
+
self.updated_at = datetime.utcnow()
|
|
1142
|
+
return gap
|
|
1143
|
+
|
|
1144
|
+
# =========================================================================
|
|
1145
|
+
# Query Helpers
|
|
1146
|
+
# =========================================================================
|
|
1147
|
+
|
|
1148
|
+
def pending_sub_queries(self) -> list[SubQuery]:
|
|
1149
|
+
"""Get sub-queries that haven't been executed yet."""
|
|
1150
|
+
return [sq for sq in self.sub_queries if sq.status == "pending"]
|
|
1151
|
+
|
|
1152
|
+
def completed_sub_queries(self) -> list[SubQuery]:
|
|
1153
|
+
"""Get successfully completed sub-queries."""
|
|
1154
|
+
return [sq for sq in self.sub_queries if sq.status == "completed"]
|
|
1155
|
+
|
|
1156
|
+
def unresolved_gaps(self) -> list[ResearchGap]:
|
|
1157
|
+
"""Get gaps that haven't been resolved yet."""
|
|
1158
|
+
return [g for g in self.gaps if not g.resolved]
|
|
1159
|
+
|
|
1160
|
+
# =========================================================================
|
|
1161
|
+
# Phase Management
|
|
1162
|
+
# =========================================================================
|
|
1163
|
+
|
|
1164
|
+
def advance_phase(self) -> DeepResearchPhase:
|
|
1165
|
+
"""Advance to the next research phase.
|
|
1166
|
+
|
|
1167
|
+
Phases advance in order: PLANNING -> GATHERING -> ANALYSIS ->
|
|
1168
|
+
SYNTHESIS -> REFINEMENT. Does nothing if already at REFINEMENT.
|
|
1169
|
+
|
|
1170
|
+
Returns:
|
|
1171
|
+
The new phase after advancement
|
|
1172
|
+
"""
|
|
1173
|
+
phase_order = list(DeepResearchPhase)
|
|
1174
|
+
current_index = phase_order.index(self.phase)
|
|
1175
|
+
if current_index < len(phase_order) - 1:
|
|
1176
|
+
self.phase = phase_order[current_index + 1]
|
|
1177
|
+
self.updated_at = datetime.utcnow()
|
|
1178
|
+
return self.phase
|
|
1179
|
+
|
|
1180
|
+
def should_continue_refinement(self) -> bool:
|
|
1181
|
+
"""Check if another refinement iteration should occur.
|
|
1182
|
+
|
|
1183
|
+
Returns True if:
|
|
1184
|
+
- Current iteration < max_iterations AND
|
|
1185
|
+
- There are unresolved gaps
|
|
1186
|
+
|
|
1187
|
+
Returns:
|
|
1188
|
+
True if refinement should continue, False otherwise
|
|
1189
|
+
"""
|
|
1190
|
+
if self.iteration >= self.max_iterations:
|
|
1191
|
+
return False
|
|
1192
|
+
if not self.unresolved_gaps():
|
|
1193
|
+
return False
|
|
1194
|
+
return True
|
|
1195
|
+
|
|
1196
|
+
def start_new_iteration(self) -> int:
|
|
1197
|
+
"""Start a new refinement iteration.
|
|
1198
|
+
|
|
1199
|
+
Increments iteration counter and resets phase to GATHERING
|
|
1200
|
+
to begin collecting sources for the new sub-queries.
|
|
1201
|
+
|
|
1202
|
+
Returns:
|
|
1203
|
+
The new iteration number
|
|
1204
|
+
"""
|
|
1205
|
+
self.iteration += 1
|
|
1206
|
+
self.phase = DeepResearchPhase.GATHERING
|
|
1207
|
+
self.updated_at = datetime.utcnow()
|
|
1208
|
+
return self.iteration
|
|
1209
|
+
|
|
1210
|
+
def mark_completed(self, report: Optional[str] = None) -> None:
|
|
1211
|
+
"""Mark the research session as completed.
|
|
1212
|
+
|
|
1213
|
+
Args:
|
|
1214
|
+
report: Optional final report content
|
|
1215
|
+
"""
|
|
1216
|
+
self.phase = DeepResearchPhase.SYNTHESIS
|
|
1217
|
+
self.completed_at = datetime.utcnow()
|
|
1218
|
+
self.updated_at = datetime.utcnow()
|
|
1219
|
+
if report:
|
|
1220
|
+
self.report = report
|