emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
"""State dataclasses for the Deep Research Agent.
|
|
2
|
+
|
|
3
|
+
This module defines all data structures used throughout the research process,
|
|
4
|
+
including evidence tracking, claims, gaps, and the overall research state.
|
|
5
|
+
|
|
6
|
+
Team Values Enforcement:
|
|
7
|
+
- V1: Truth over fluency - Claims must have evidence_ids
|
|
8
|
+
- V2: Evidence-first - EvidenceItem tracks all tool outputs
|
|
9
|
+
- V3: Reviewer-first - ResearchPlan includes required sections
|
|
10
|
+
- V4: Cost awareness - Budgets tracked in ResearchState
|
|
11
|
+
- V5: Actionable outcomes - Gap and ResearchQuestion structures
|
|
12
|
+
- V6: Team alignment - Deliverable categories match team workflows
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import Literal, Optional, Any
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class EvidenceItem:
|
|
22
|
+
"""Machine-verifiable evidence from tool execution.
|
|
23
|
+
|
|
24
|
+
Every piece of evidence has a unique ID that can be referenced
|
|
25
|
+
in claims. The output_ref points to the raw tool output for
|
|
26
|
+
reproducibility.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
id: Unique identifier (e.g., "E12")
|
|
30
|
+
tool: Tool name that produced this evidence
|
|
31
|
+
input: Exact arguments passed to the tool
|
|
32
|
+
output_ref: Pointer to raw output (for reproducibility)
|
|
33
|
+
summary: 1-3 line human-readable summary
|
|
34
|
+
entities: File paths, PR IDs, symbols, node IDs found
|
|
35
|
+
timestamp: When the evidence was collected
|
|
36
|
+
"""
|
|
37
|
+
id: str
|
|
38
|
+
tool: str
|
|
39
|
+
input: dict
|
|
40
|
+
output_ref: str
|
|
41
|
+
summary: str
|
|
42
|
+
entities: list[str] = field(default_factory=list)
|
|
43
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
44
|
+
|
|
45
|
+
def to_dict(self) -> dict:
|
|
46
|
+
"""Convert to dictionary for serialization."""
|
|
47
|
+
return {
|
|
48
|
+
"id": self.id,
|
|
49
|
+
"tool": self.tool,
|
|
50
|
+
"input": self.input,
|
|
51
|
+
"output_ref": self.output_ref,
|
|
52
|
+
"summary": self.summary,
|
|
53
|
+
"entities": self.entities,
|
|
54
|
+
"timestamp": self.timestamp,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class Claim:
|
|
60
|
+
"""A grounded statement backed by evidence.
|
|
61
|
+
|
|
62
|
+
Claims are the core output of research. Every claim MUST reference
|
|
63
|
+
at least one evidence ID. This is enforced by __post_init__.
|
|
64
|
+
|
|
65
|
+
Confidence levels:
|
|
66
|
+
- 0: Speculation (should be avoided)
|
|
67
|
+
- 1: Single source, may have assumptions
|
|
68
|
+
- 2: Multiple sources corroborate (requires 2+ evidence_ids)
|
|
69
|
+
- 3: Strong evidence, no assumptions
|
|
70
|
+
|
|
71
|
+
Attributes:
|
|
72
|
+
id: Unique identifier (e.g., "C7")
|
|
73
|
+
statement: The claim being made
|
|
74
|
+
evidence_ids: References to EvidenceItems (MUST NOT be empty)
|
|
75
|
+
confidence: Discrete confidence level 0-3
|
|
76
|
+
assumptions: Explicit assumptions (caps confidence at 1)
|
|
77
|
+
counterevidence_ids: Evidence that contradicts this claim
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ValueError: If evidence_ids is empty or assumptions conflict with confidence
|
|
81
|
+
"""
|
|
82
|
+
id: str
|
|
83
|
+
statement: str
|
|
84
|
+
evidence_ids: list[str]
|
|
85
|
+
confidence: Literal[0, 1, 2, 3]
|
|
86
|
+
assumptions: list[str] = field(default_factory=list)
|
|
87
|
+
counterevidence_ids: list[str] = field(default_factory=list)
|
|
88
|
+
|
|
89
|
+
def __post_init__(self):
|
|
90
|
+
"""Enforce team value V1: Truth over fluency."""
|
|
91
|
+
if not self.evidence_ids:
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"Claim {self.id} must have at least one evidence_id. "
|
|
94
|
+
"Team value V1: No claim without evidence."
|
|
95
|
+
)
|
|
96
|
+
if self.assumptions and self.confidence > 1:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
f"Claim {self.id} has assumptions but confidence > 1. "
|
|
99
|
+
"Claims with assumptions cannot exceed confidence 1."
|
|
100
|
+
)
|
|
101
|
+
if self.confidence >= 2 and len(self.evidence_ids) < 2:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
f"Claim {self.id} has confidence >= 2 but only {len(self.evidence_ids)} evidence. "
|
|
104
|
+
"Confidence 2+ requires evidence from 2+ distinct sources."
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def to_dict(self) -> dict:
|
|
108
|
+
"""Convert to dictionary for serialization."""
|
|
109
|
+
return {
|
|
110
|
+
"id": self.id,
|
|
111
|
+
"statement": self.statement,
|
|
112
|
+
"evidence_ids": self.evidence_ids,
|
|
113
|
+
"confidence": self.confidence,
|
|
114
|
+
"assumptions": self.assumptions,
|
|
115
|
+
"counterevidence_ids": self.counterevidence_ids,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class Gap:
|
|
121
|
+
"""Explicit unknown that couldn't be resolved.
|
|
122
|
+
|
|
123
|
+
Gaps are questions that remain unanswered after research.
|
|
124
|
+
They represent honest acknowledgment of limitations (V1: Truth over fluency).
|
|
125
|
+
|
|
126
|
+
Attributes:
|
|
127
|
+
question: The unanswered question
|
|
128
|
+
reason: Why this couldn't be answered
|
|
129
|
+
suggested_tools: Tools that might help answer this
|
|
130
|
+
"""
|
|
131
|
+
question: str
|
|
132
|
+
reason: str
|
|
133
|
+
suggested_tools: list[str] = field(default_factory=list)
|
|
134
|
+
|
|
135
|
+
def to_dict(self) -> dict:
|
|
136
|
+
"""Convert to dictionary for serialization."""
|
|
137
|
+
return {
|
|
138
|
+
"question": self.question,
|
|
139
|
+
"reason": self.reason,
|
|
140
|
+
"suggested_tools": self.suggested_tools,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dataclass
|
|
145
|
+
class ResearchQuestion:
|
|
146
|
+
"""A question in the research plan.
|
|
147
|
+
|
|
148
|
+
Questions are prioritized (P0/P1/P2) and map to team deliverables.
|
|
149
|
+
Each question has success criteria that the Critic evaluates.
|
|
150
|
+
|
|
151
|
+
Attributes:
|
|
152
|
+
qid: Unique question identifier (e.g., "Q1")
|
|
153
|
+
question: The research question
|
|
154
|
+
priority: P0 (must answer), P1 (should answer), P2 (nice to have)
|
|
155
|
+
success_criteria: Checkable criteria for completion
|
|
156
|
+
suggested_tools: Tool macros or sequences to use
|
|
157
|
+
deliverable: Team workflow category this maps to
|
|
158
|
+
"""
|
|
159
|
+
qid: str
|
|
160
|
+
question: str
|
|
161
|
+
priority: Literal["P0", "P1", "P2"]
|
|
162
|
+
success_criteria: list[str]
|
|
163
|
+
suggested_tools: list[str]
|
|
164
|
+
deliverable: Literal["Design", "Implementation", "Testing", "Review", "Ops"]
|
|
165
|
+
|
|
166
|
+
def to_dict(self) -> dict:
|
|
167
|
+
"""Convert to dictionary for serialization."""
|
|
168
|
+
return {
|
|
169
|
+
"qid": self.qid,
|
|
170
|
+
"question": self.question,
|
|
171
|
+
"priority": self.priority,
|
|
172
|
+
"success_criteria": self.success_criteria,
|
|
173
|
+
"suggested_tools": self.suggested_tools,
|
|
174
|
+
"deliverable": self.deliverable,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@dataclass
|
|
179
|
+
class ResearchPlan:
|
|
180
|
+
"""Output of the Planner agent.
|
|
181
|
+
|
|
182
|
+
The plan decomposes the research goal into questions aligned with
|
|
183
|
+
how the team works. It includes budgets for cost awareness (V4)
|
|
184
|
+
and required sections for reviewer-first output (V3).
|
|
185
|
+
|
|
186
|
+
Attributes:
|
|
187
|
+
goal: The original research goal
|
|
188
|
+
questions: List of prioritized research questions
|
|
189
|
+
max_iterations: Maximum research-critique loops
|
|
190
|
+
budgets: Resource limits {tool_calls, tokens, time_s}
|
|
191
|
+
required_sections: Sections that must appear in final report
|
|
192
|
+
team_values_checklist: Values to check during critique
|
|
193
|
+
"""
|
|
194
|
+
goal: str
|
|
195
|
+
questions: list[ResearchQuestion]
|
|
196
|
+
max_iterations: int
|
|
197
|
+
budgets: dict # {tool_calls: int, tokens: int, time_s: int}
|
|
198
|
+
required_sections: list[str]
|
|
199
|
+
team_values_checklist: list[str]
|
|
200
|
+
|
|
201
|
+
def get_p0_questions(self) -> list[ResearchQuestion]:
|
|
202
|
+
"""Get all P0 (must answer) questions."""
|
|
203
|
+
return [q for q in self.questions if q.priority == "P0"]
|
|
204
|
+
|
|
205
|
+
def get_p1_questions(self) -> list[ResearchQuestion]:
|
|
206
|
+
"""Get all P1 (should answer) questions."""
|
|
207
|
+
return [q for q in self.questions if q.priority == "P1"]
|
|
208
|
+
|
|
209
|
+
def get_p2_questions(self) -> list[ResearchQuestion]:
|
|
210
|
+
"""Get all P2 (nice to have) questions."""
|
|
211
|
+
return [q for q in self.questions if q.priority == "P2"]
|
|
212
|
+
|
|
213
|
+
def to_dict(self) -> dict:
|
|
214
|
+
"""Convert to dictionary for serialization."""
|
|
215
|
+
return {
|
|
216
|
+
"goal": self.goal,
|
|
217
|
+
"questions": [q.to_dict() for q in self.questions],
|
|
218
|
+
"max_iterations": self.max_iterations,
|
|
219
|
+
"budgets": self.budgets,
|
|
220
|
+
"required_sections": self.required_sections,
|
|
221
|
+
"team_values_checklist": self.team_values_checklist,
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@dataclass
|
|
226
|
+
class FollowUpQuestion:
|
|
227
|
+
"""A follow-up question from the Critic.
|
|
228
|
+
|
|
229
|
+
Attributes:
|
|
230
|
+
qid: Optional link to original question
|
|
231
|
+
question: The follow-up question
|
|
232
|
+
why: Reason this follow-up is needed
|
|
233
|
+
suggested_tools: Tools to use for answering
|
|
234
|
+
"""
|
|
235
|
+
question: str
|
|
236
|
+
why: str
|
|
237
|
+
suggested_tools: list[str] = field(default_factory=list)
|
|
238
|
+
qid: Optional[str] = None
|
|
239
|
+
|
|
240
|
+
def to_dict(self) -> dict:
|
|
241
|
+
"""Convert to dictionary for serialization."""
|
|
242
|
+
return {
|
|
243
|
+
"qid": self.qid,
|
|
244
|
+
"question": self.question,
|
|
245
|
+
"why": self.why,
|
|
246
|
+
"suggested_tools": self.suggested_tools,
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@dataclass
|
|
251
|
+
class Contradiction:
|
|
252
|
+
"""A detected contradiction between claims.
|
|
253
|
+
|
|
254
|
+
Attributes:
|
|
255
|
+
claim_a: First claim ID
|
|
256
|
+
claim_b: Second claim ID
|
|
257
|
+
note: Explanation of the contradiction
|
|
258
|
+
"""
|
|
259
|
+
claim_a: str
|
|
260
|
+
claim_b: str
|
|
261
|
+
note: str
|
|
262
|
+
|
|
263
|
+
def to_dict(self) -> dict:
|
|
264
|
+
"""Convert to dictionary for serialization."""
|
|
265
|
+
return {
|
|
266
|
+
"claim_a": self.claim_a,
|
|
267
|
+
"claim_b": self.claim_b,
|
|
268
|
+
"note": self.note,
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
@dataclass
|
|
273
|
+
class ValuesViolation:
|
|
274
|
+
"""A violation of team values detected by the Critic.
|
|
275
|
+
|
|
276
|
+
Attributes:
|
|
277
|
+
value: Which value was violated (V1-V6)
|
|
278
|
+
issue: Description of the violation
|
|
279
|
+
affected_claims: Claims that violate this value
|
|
280
|
+
"""
|
|
281
|
+
value: str
|
|
282
|
+
issue: str
|
|
283
|
+
affected_claims: list[str] = field(default_factory=list)
|
|
284
|
+
|
|
285
|
+
def to_dict(self) -> dict:
|
|
286
|
+
"""Convert to dictionary for serialization."""
|
|
287
|
+
return {
|
|
288
|
+
"value": self.value,
|
|
289
|
+
"issue": self.issue,
|
|
290
|
+
"affected_claims": self.affected_claims,
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@dataclass
|
|
295
|
+
class CritiqueScores:
|
|
296
|
+
"""Scores assigned by the Critic agent.
|
|
297
|
+
|
|
298
|
+
Each dimension is scored 1-5:
|
|
299
|
+
- 1: Poor
|
|
300
|
+
- 2: Below expectations
|
|
301
|
+
- 3: Acceptable
|
|
302
|
+
- 4: Good
|
|
303
|
+
- 5: Excellent
|
|
304
|
+
|
|
305
|
+
Attributes:
|
|
306
|
+
coverage: Were all questions addressed?
|
|
307
|
+
evidence: Are claims backed by tool results?
|
|
308
|
+
depth: Is analysis thorough or superficial?
|
|
309
|
+
coherence: Do findings connect logically?
|
|
310
|
+
team_alignment: Does output follow team values/workflows?
|
|
311
|
+
"""
|
|
312
|
+
coverage: Literal[1, 2, 3, 4, 5]
|
|
313
|
+
evidence: Literal[1, 2, 3, 4, 5]
|
|
314
|
+
depth: Literal[1, 2, 3, 4, 5]
|
|
315
|
+
coherence: Literal[1, 2, 3, 4, 5]
|
|
316
|
+
team_alignment: Literal[1, 2, 3, 4, 5]
|
|
317
|
+
|
|
318
|
+
def average(self) -> float:
|
|
319
|
+
"""Calculate average score across all dimensions."""
|
|
320
|
+
return (
|
|
321
|
+
self.coverage + self.evidence + self.depth +
|
|
322
|
+
self.coherence + self.team_alignment
|
|
323
|
+
) / 5.0
|
|
324
|
+
|
|
325
|
+
def to_dict(self) -> dict:
|
|
326
|
+
"""Convert to dictionary for serialization."""
|
|
327
|
+
return {
|
|
328
|
+
"coverage": self.coverage,
|
|
329
|
+
"evidence": self.evidence,
|
|
330
|
+
"depth": self.depth,
|
|
331
|
+
"coherence": self.coherence,
|
|
332
|
+
"team_alignment": self.team_alignment,
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@dataclass
|
|
337
|
+
class Critique:
|
|
338
|
+
"""Output of the Critic agent.
|
|
339
|
+
|
|
340
|
+
The Critic evaluates research completeness AND team value adherence.
|
|
341
|
+
|
|
342
|
+
Decision meanings:
|
|
343
|
+
- APPROVE: All P0 criteria met, scores pass thresholds
|
|
344
|
+
- CONTINUE: Progress made, follow-ups needed
|
|
345
|
+
- REJECT: Values violated, must fix issues
|
|
346
|
+
- ESCALATE: Need more powerful model/budget
|
|
347
|
+
|
|
348
|
+
Hard rules enforced:
|
|
349
|
+
- evidence < 3 -> APPROVE forbidden
|
|
350
|
+
- team_alignment < 4 -> APPROVE forbidden
|
|
351
|
+
- All P0 questions must meet success criteria for APPROVE
|
|
352
|
+
- Any contradiction must be resolved or listed
|
|
353
|
+
|
|
354
|
+
Attributes:
|
|
355
|
+
decision: The Critic's decision
|
|
356
|
+
scores: Scores across all dimensions
|
|
357
|
+
must_fix: Blocking issues that must be fixed
|
|
358
|
+
follow_up_questions: Questions to investigate next
|
|
359
|
+
risky_claims: Claims that need more evidence
|
|
360
|
+
contradictions: Detected contradictions between claims
|
|
361
|
+
values_violations: Team value violations found
|
|
362
|
+
"""
|
|
363
|
+
decision: Literal["APPROVE", "CONTINUE", "REJECT", "ESCALATE"]
|
|
364
|
+
scores: CritiqueScores
|
|
365
|
+
must_fix: list[str] = field(default_factory=list)
|
|
366
|
+
follow_up_questions: list[FollowUpQuestion] = field(default_factory=list)
|
|
367
|
+
risky_claims: list[str] = field(default_factory=list)
|
|
368
|
+
contradictions: list[Contradiction] = field(default_factory=list)
|
|
369
|
+
values_violations: list[ValuesViolation] = field(default_factory=list)
|
|
370
|
+
|
|
371
|
+
def __post_init__(self):
|
|
372
|
+
"""Enforce hard approval rules."""
|
|
373
|
+
if self.decision == "APPROVE":
|
|
374
|
+
if self.scores.evidence < 3:
|
|
375
|
+
raise ValueError(
|
|
376
|
+
"Cannot APPROVE with evidence score < 3. "
|
|
377
|
+
"Team value V2: Evidence-first."
|
|
378
|
+
)
|
|
379
|
+
if self.scores.team_alignment < 4:
|
|
380
|
+
raise ValueError(
|
|
381
|
+
"Cannot APPROVE with team_alignment score < 4. "
|
|
382
|
+
"Team value V6: Team alignment."
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
def to_dict(self) -> dict:
|
|
386
|
+
"""Convert to dictionary for serialization."""
|
|
387
|
+
return {
|
|
388
|
+
"decision": self.decision,
|
|
389
|
+
"scores": self.scores.to_dict(),
|
|
390
|
+
"must_fix": self.must_fix,
|
|
391
|
+
"follow_up_questions": [q.to_dict() for q in self.follow_up_questions],
|
|
392
|
+
"risky_claims": self.risky_claims,
|
|
393
|
+
"contradictions": [c.to_dict() for c in self.contradictions],
|
|
394
|
+
"values_violations": [v.to_dict() for v in self.values_violations],
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
@dataclass
|
|
399
|
+
class IterationResult:
|
|
400
|
+
"""Record of one research iteration.
|
|
401
|
+
|
|
402
|
+
Captures all outputs from a single research-critique cycle
|
|
403
|
+
for history tracking and final synthesis.
|
|
404
|
+
|
|
405
|
+
Attributes:
|
|
406
|
+
iteration: Iteration number (0-indexed)
|
|
407
|
+
evidence: Evidence collected in this iteration
|
|
408
|
+
claims: Claims proposed in this iteration
|
|
409
|
+
gaps: Gaps identified in this iteration
|
|
410
|
+
critique: Critic's evaluation
|
|
411
|
+
model_tier: Model tier used for this iteration
|
|
412
|
+
"""
|
|
413
|
+
iteration: int
|
|
414
|
+
evidence: list[EvidenceItem]
|
|
415
|
+
claims: list[Claim]
|
|
416
|
+
gaps: list[Gap]
|
|
417
|
+
critique: Critique
|
|
418
|
+
model_tier: str = "fast"
|
|
419
|
+
|
|
420
|
+
def to_dict(self) -> dict:
|
|
421
|
+
"""Convert to dictionary for serialization."""
|
|
422
|
+
return {
|
|
423
|
+
"iteration": self.iteration,
|
|
424
|
+
"evidence": [e.to_dict() for e in self.evidence],
|
|
425
|
+
"claims": [c.to_dict() for c in self.claims],
|
|
426
|
+
"gaps": [g.to_dict() for g in self.gaps],
|
|
427
|
+
"critique": self.critique.to_dict(),
|
|
428
|
+
"model_tier": self.model_tier,
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
@dataclass
|
|
433
|
+
class ResearchState:
|
|
434
|
+
"""Full state of the research process.
|
|
435
|
+
|
|
436
|
+
Tracks everything needed for the multi-loop research cycle,
|
|
437
|
+
including history, budgets, and model tiers.
|
|
438
|
+
|
|
439
|
+
Attributes:
|
|
440
|
+
plan: The research plan from the Planner
|
|
441
|
+
history: Results from all iterations
|
|
442
|
+
context: Shared context updated each iteration
|
|
443
|
+
iteration: Current iteration number
|
|
444
|
+
remaining_budget: Remaining resource budgets
|
|
445
|
+
current_model_tier: Current model tier (fast/standard/powerful)
|
|
446
|
+
question_queue: Questions still to be answered
|
|
447
|
+
answered_questions: Questions that have been answered
|
|
448
|
+
"""
|
|
449
|
+
plan: ResearchPlan
|
|
450
|
+
history: list[IterationResult] = field(default_factory=list)
|
|
451
|
+
context: dict = field(default_factory=dict)
|
|
452
|
+
iteration: int = 0
|
|
453
|
+
remaining_budget: dict = field(default_factory=dict)
|
|
454
|
+
current_model_tier: Literal["fast", "standard", "powerful"] = "fast"
|
|
455
|
+
question_queue: list[str] = field(default_factory=list) # qids
|
|
456
|
+
answered_questions: set[str] = field(default_factory=set) # qids
|
|
457
|
+
|
|
458
|
+
def __post_init__(self):
|
|
459
|
+
"""Initialize remaining budget from plan."""
|
|
460
|
+
if not self.remaining_budget and self.plan:
|
|
461
|
+
self.remaining_budget = dict(self.plan.budgets)
|
|
462
|
+
if not self.question_queue and self.plan:
|
|
463
|
+
# Initialize queue with P0 first, then P1, then P2
|
|
464
|
+
self.question_queue = [
|
|
465
|
+
q.qid for q in self.plan.get_p0_questions()
|
|
466
|
+
] + [
|
|
467
|
+
q.qid for q in self.plan.get_p1_questions()
|
|
468
|
+
] + [
|
|
469
|
+
q.qid for q in self.plan.get_p2_questions()
|
|
470
|
+
]
|
|
471
|
+
|
|
472
|
+
def get_all_evidence(self) -> list[EvidenceItem]:
|
|
473
|
+
"""Get all evidence from all iterations."""
|
|
474
|
+
evidence = []
|
|
475
|
+
for result in self.history:
|
|
476
|
+
evidence.extend(result.evidence)
|
|
477
|
+
return evidence
|
|
478
|
+
|
|
479
|
+
def get_all_claims(self) -> list[Claim]:
|
|
480
|
+
"""Get all claims from all iterations."""
|
|
481
|
+
claims = []
|
|
482
|
+
for result in self.history:
|
|
483
|
+
claims.extend(result.claims)
|
|
484
|
+
return claims
|
|
485
|
+
|
|
486
|
+
def get_all_gaps(self) -> list[Gap]:
|
|
487
|
+
"""Get all gaps from all iterations."""
|
|
488
|
+
gaps = []
|
|
489
|
+
for result in self.history:
|
|
490
|
+
gaps.extend(result.gaps)
|
|
491
|
+
return gaps
|
|
492
|
+
|
|
493
|
+
def get_question_by_qid(self, qid: str) -> Optional[ResearchQuestion]:
|
|
494
|
+
"""Get a question by its ID."""
|
|
495
|
+
for q in self.plan.questions:
|
|
496
|
+
if q.qid == qid:
|
|
497
|
+
return q
|
|
498
|
+
return None
|
|
499
|
+
|
|
500
|
+
def is_approved(self) -> bool:
|
|
501
|
+
"""Check if research has been approved."""
|
|
502
|
+
if not self.history:
|
|
503
|
+
return False
|
|
504
|
+
return self.history[-1].critique.decision == "APPROVE"
|
|
505
|
+
|
|
506
|
+
def budget_used_percent(self) -> float:
|
|
507
|
+
"""Calculate percentage of tool call budget used."""
|
|
508
|
+
total = self.plan.budgets.get("tool_calls", 100)
|
|
509
|
+
remaining = self.remaining_budget.get("tool_calls", total)
|
|
510
|
+
return ((total - remaining) / total) * 100
|
|
511
|
+
|
|
512
|
+
def to_dict(self) -> dict:
|
|
513
|
+
"""Convert to dictionary for serialization."""
|
|
514
|
+
return {
|
|
515
|
+
"plan": self.plan.to_dict(),
|
|
516
|
+
"history": [h.to_dict() for h in self.history],
|
|
517
|
+
"context": self.context,
|
|
518
|
+
"iteration": self.iteration,
|
|
519
|
+
"remaining_budget": self.remaining_budget,
|
|
520
|
+
"current_model_tier": self.current_model_tier,
|
|
521
|
+
"question_queue": self.question_queue,
|
|
522
|
+
"answered_questions": list(self.answered_questions),
|
|
523
|
+
}
|