haiku.rag 0.10.2__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/app.py +15 -16
- haiku/rag/research/__init__.py +8 -0
- haiku/rag/research/common.py +71 -6
- haiku/rag/research/dependencies.py +179 -11
- haiku/rag/research/graph.py +5 -3
- haiku/rag/research/models.py +134 -1
- haiku/rag/research/nodes/analysis.py +181 -0
- haiku/rag/research/nodes/plan.py +16 -9
- haiku/rag/research/nodes/search.py +14 -11
- haiku/rag/research/nodes/synthesize.py +7 -3
- haiku/rag/research/prompts.py +67 -28
- haiku/rag/research/state.py +11 -4
- haiku/rag/research/stream.py +177 -0
- {haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/METADATA +32 -13
- {haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/RECORD +18 -17
- haiku/rag/research/nodes/evaluate.py +0 -80
- {haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/WHEEL +0 -0
- {haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.10.2.dist-info → haiku_rag-0.11.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/app.py
CHANGED
|
@@ -18,6 +18,7 @@ from haiku.rag.research.graph import (
|
|
|
18
18
|
ResearchState,
|
|
19
19
|
build_research_graph,
|
|
20
20
|
)
|
|
21
|
+
from haiku.rag.research.stream import stream_research_graph
|
|
21
22
|
from haiku.rag.store.models.chunk import Chunk
|
|
22
23
|
from haiku.rag.store.models.document import Document
|
|
23
24
|
|
|
@@ -221,9 +222,9 @@ class HaikuRAGApp:
|
|
|
221
222
|
self.console.print()
|
|
222
223
|
|
|
223
224
|
graph = build_research_graph()
|
|
225
|
+
context = ResearchContext(original_question=question)
|
|
224
226
|
state = ResearchState(
|
|
225
|
-
|
|
226
|
-
context=ResearchContext(original_question=question),
|
|
227
|
+
context=context,
|
|
227
228
|
max_iterations=max_iterations,
|
|
228
229
|
confidence_threshold=confidence_threshold,
|
|
229
230
|
max_concurrency=max_concurrency,
|
|
@@ -236,22 +237,20 @@ class HaikuRAGApp:
|
|
|
236
237
|
provider=Config.RESEARCH_PROVIDER or Config.QA_PROVIDER,
|
|
237
238
|
model=Config.RESEARCH_MODEL or Config.QA_MODEL,
|
|
238
239
|
)
|
|
239
|
-
# Prefer graph.run; fall back to iter if unavailable
|
|
240
240
|
report = None
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
if run.result:
|
|
252
|
-
report = run.result.output
|
|
241
|
+
async for event in stream_research_graph(graph, start, state, deps):
|
|
242
|
+
if event.type == "report":
|
|
243
|
+
report = event.report
|
|
244
|
+
break
|
|
245
|
+
if event.type == "error":
|
|
246
|
+
self.console.print(
|
|
247
|
+
f"[red]Error during research: {event.message}[/red]"
|
|
248
|
+
)
|
|
249
|
+
return
|
|
250
|
+
|
|
253
251
|
if report is None:
|
|
254
|
-
|
|
252
|
+
self.console.print("[red]Research did not produce a report.[/red]")
|
|
253
|
+
return
|
|
255
254
|
|
|
256
255
|
# Display the report
|
|
257
256
|
self.console.print("[bold green]Research Report[/bold green]")
|
haiku/rag/research/__init__.py
CHANGED
|
@@ -6,6 +6,11 @@ from haiku.rag.research.graph import (
|
|
|
6
6
|
build_research_graph,
|
|
7
7
|
)
|
|
8
8
|
from haiku.rag.research.models import EvaluationResult, ResearchReport, SearchAnswer
|
|
9
|
+
from haiku.rag.research.stream import (
|
|
10
|
+
ResearchStateSnapshot,
|
|
11
|
+
ResearchStreamEvent,
|
|
12
|
+
stream_research_graph,
|
|
13
|
+
)
|
|
9
14
|
|
|
10
15
|
__all__ = [
|
|
11
16
|
"ResearchDependencies",
|
|
@@ -17,4 +22,7 @@ __all__ = [
|
|
|
17
22
|
"ResearchState",
|
|
18
23
|
"PlanNode",
|
|
19
24
|
"build_research_graph",
|
|
25
|
+
"stream_research_graph",
|
|
26
|
+
"ResearchStreamEvent",
|
|
27
|
+
"ResearchStateSnapshot",
|
|
20
28
|
]
|
haiku/rag/research/common.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import TYPE_CHECKING, Any
|
|
2
2
|
|
|
3
3
|
from pydantic_ai import format_as_xml
|
|
4
4
|
from pydantic_ai.models.openai import OpenAIChatModel
|
|
@@ -7,6 +7,10 @@ from pydantic_ai.providers.openai import OpenAIProvider
|
|
|
7
7
|
|
|
8
8
|
from haiku.rag.config import Config
|
|
9
9
|
from haiku.rag.research.dependencies import ResearchContext
|
|
10
|
+
from haiku.rag.research.models import InsightAnalysis
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
13
|
+
from haiku.rag.research.state import ResearchDeps, ResearchState
|
|
10
14
|
|
|
11
15
|
|
|
12
16
|
def get_model(provider: str, model: str) -> Any:
|
|
@@ -27,9 +31,8 @@ def get_model(provider: str, model: str) -> Any:
|
|
|
27
31
|
return f"{provider}:{model}"
|
|
28
32
|
|
|
29
33
|
|
|
30
|
-
def log(
|
|
31
|
-
|
|
32
|
-
console.print(msg)
|
|
34
|
+
def log(deps: "ResearchDeps", state: "ResearchState", msg: str) -> None:
|
|
35
|
+
deps.emit_log(msg, state)
|
|
33
36
|
|
|
34
37
|
|
|
35
38
|
def format_context_for_prompt(context: ResearchContext) -> str:
|
|
@@ -47,7 +50,69 @@ def format_context_for_prompt(context: ResearchContext) -> str:
|
|
|
47
50
|
}
|
|
48
51
|
for qa in context.qa_responses
|
|
49
52
|
],
|
|
50
|
-
"insights":
|
|
51
|
-
|
|
53
|
+
"insights": [
|
|
54
|
+
{
|
|
55
|
+
"id": insight.id,
|
|
56
|
+
"summary": insight.summary,
|
|
57
|
+
"status": insight.status.value,
|
|
58
|
+
"supporting_sources": insight.supporting_sources,
|
|
59
|
+
"originating_questions": insight.originating_questions,
|
|
60
|
+
"notes": insight.notes,
|
|
61
|
+
}
|
|
62
|
+
for insight in context.insights
|
|
63
|
+
],
|
|
64
|
+
"gaps": [
|
|
65
|
+
{
|
|
66
|
+
"id": gap.id,
|
|
67
|
+
"description": gap.description,
|
|
68
|
+
"severity": gap.severity.value,
|
|
69
|
+
"blocking": gap.blocking,
|
|
70
|
+
"resolved": gap.resolved,
|
|
71
|
+
"resolved_by": gap.resolved_by,
|
|
72
|
+
"supporting_sources": gap.supporting_sources,
|
|
73
|
+
"notes": gap.notes,
|
|
74
|
+
}
|
|
75
|
+
for gap in context.gaps
|
|
76
|
+
],
|
|
52
77
|
}
|
|
53
78
|
return format_as_xml(context_data, root_tag="research_context")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def format_analysis_for_prompt(
|
|
82
|
+
analysis: InsightAnalysis | None,
|
|
83
|
+
) -> str:
|
|
84
|
+
"""Format the latest insight analysis as XML for prompts."""
|
|
85
|
+
|
|
86
|
+
if analysis is None:
|
|
87
|
+
return "<latest_analysis />"
|
|
88
|
+
|
|
89
|
+
data = {
|
|
90
|
+
"commentary": analysis.commentary,
|
|
91
|
+
"highlights": [
|
|
92
|
+
{
|
|
93
|
+
"id": insight.id,
|
|
94
|
+
"summary": insight.summary,
|
|
95
|
+
"status": insight.status.value,
|
|
96
|
+
"supporting_sources": insight.supporting_sources,
|
|
97
|
+
"originating_questions": insight.originating_questions,
|
|
98
|
+
"notes": insight.notes,
|
|
99
|
+
}
|
|
100
|
+
for insight in analysis.highlights
|
|
101
|
+
],
|
|
102
|
+
"gap_assessments": [
|
|
103
|
+
{
|
|
104
|
+
"id": gap.id,
|
|
105
|
+
"description": gap.description,
|
|
106
|
+
"severity": gap.severity.value,
|
|
107
|
+
"blocking": gap.blocking,
|
|
108
|
+
"resolved": gap.resolved,
|
|
109
|
+
"resolved_by": gap.resolved_by,
|
|
110
|
+
"supporting_sources": gap.supporting_sources,
|
|
111
|
+
"notes": gap.notes,
|
|
112
|
+
}
|
|
113
|
+
for gap in analysis.gap_assessments
|
|
114
|
+
],
|
|
115
|
+
"resolved_gaps": analysis.resolved_gaps,
|
|
116
|
+
"new_questions": analysis.new_questions,
|
|
117
|
+
}
|
|
118
|
+
return format_as_xml(data, root_tag="latest_analysis")
|
|
@@ -1,8 +1,16 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
|
|
1
3
|
from pydantic import BaseModel, Field
|
|
2
4
|
from rich.console import Console
|
|
3
5
|
|
|
4
6
|
from haiku.rag.client import HaikuRAG
|
|
5
|
-
from haiku.rag.research.models import
|
|
7
|
+
from haiku.rag.research.models import (
|
|
8
|
+
GapRecord,
|
|
9
|
+
InsightAnalysis,
|
|
10
|
+
InsightRecord,
|
|
11
|
+
SearchAnswer,
|
|
12
|
+
)
|
|
13
|
+
from haiku.rag.research.stream import ResearchStream
|
|
6
14
|
|
|
7
15
|
|
|
8
16
|
class ResearchContext(BaseModel):
|
|
@@ -15,10 +23,10 @@ class ResearchContext(BaseModel):
|
|
|
15
23
|
qa_responses: list[SearchAnswer] = Field(
|
|
16
24
|
default_factory=list, description="Structured QA pairs used during research"
|
|
17
25
|
)
|
|
18
|
-
insights: list[
|
|
26
|
+
insights: list[InsightRecord] = Field(
|
|
19
27
|
default_factory=list, description="Key insights discovered"
|
|
20
28
|
)
|
|
21
|
-
gaps: list[
|
|
29
|
+
gaps: list[GapRecord] = Field(
|
|
22
30
|
default_factory=list, description="Identified information gaps"
|
|
23
31
|
)
|
|
24
32
|
|
|
@@ -26,15 +34,147 @@ class ResearchContext(BaseModel):
|
|
|
26
34
|
"""Add a structured QA response (minimal context already included)."""
|
|
27
35
|
self.qa_responses.append(qa)
|
|
28
36
|
|
|
29
|
-
def
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
|
|
37
|
+
def upsert_insights(self, records: Iterable[InsightRecord]) -> list[InsightRecord]:
|
|
38
|
+
"""Merge one or more insights into the shared context with deduplication."""
|
|
39
|
+
|
|
40
|
+
merged: list[InsightRecord] = []
|
|
41
|
+
for record in records:
|
|
42
|
+
candidate = InsightRecord.model_validate(record)
|
|
43
|
+
existing = next(
|
|
44
|
+
(ins for ins in self.insights if ins.id == candidate.id), None
|
|
45
|
+
)
|
|
46
|
+
if not existing:
|
|
47
|
+
existing = next(
|
|
48
|
+
(ins for ins in self.insights if ins.summary == candidate.summary),
|
|
49
|
+
None,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if existing:
|
|
53
|
+
existing.summary = candidate.summary
|
|
54
|
+
existing.status = candidate.status
|
|
55
|
+
if candidate.notes:
|
|
56
|
+
existing.notes = candidate.notes
|
|
57
|
+
existing.supporting_sources = _merge_unique(
|
|
58
|
+
existing.supporting_sources, candidate.supporting_sources
|
|
59
|
+
)
|
|
60
|
+
existing.originating_questions = _merge_unique(
|
|
61
|
+
existing.originating_questions, candidate.originating_questions
|
|
62
|
+
)
|
|
63
|
+
merged.append(existing)
|
|
64
|
+
else:
|
|
65
|
+
candidate = candidate.model_copy(deep=True)
|
|
66
|
+
if candidate.id is None: # pragma: no cover - defensive
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"InsightRecord.id must be populated after validation"
|
|
69
|
+
)
|
|
70
|
+
candidate_id: str = candidate.id
|
|
71
|
+
candidate.id = self._allocate_insight_id(candidate_id)
|
|
72
|
+
self.insights.append(candidate)
|
|
73
|
+
merged.append(candidate)
|
|
74
|
+
|
|
75
|
+
return merged
|
|
76
|
+
|
|
77
|
+
def upsert_gaps(self, records: Iterable[GapRecord]) -> list[GapRecord]:
|
|
78
|
+
"""Merge one or more gap records into the shared context with deduplication."""
|
|
79
|
+
|
|
80
|
+
merged: list[GapRecord] = []
|
|
81
|
+
for record in records:
|
|
82
|
+
candidate = GapRecord.model_validate(record)
|
|
83
|
+
existing = next((gap for gap in self.gaps if gap.id == candidate.id), None)
|
|
84
|
+
if not existing:
|
|
85
|
+
existing = next(
|
|
86
|
+
(
|
|
87
|
+
gap
|
|
88
|
+
for gap in self.gaps
|
|
89
|
+
if gap.description == candidate.description
|
|
90
|
+
),
|
|
91
|
+
None,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if existing:
|
|
95
|
+
existing.description = candidate.description
|
|
96
|
+
existing.severity = candidate.severity
|
|
97
|
+
existing.blocking = candidate.blocking
|
|
98
|
+
existing.resolved = candidate.resolved
|
|
99
|
+
if candidate.notes:
|
|
100
|
+
existing.notes = candidate.notes
|
|
101
|
+
existing.supporting_sources = _merge_unique(
|
|
102
|
+
existing.supporting_sources, candidate.supporting_sources
|
|
103
|
+
)
|
|
104
|
+
existing.resolved_by = _merge_unique(
|
|
105
|
+
existing.resolved_by, candidate.resolved_by
|
|
106
|
+
)
|
|
107
|
+
merged.append(existing)
|
|
108
|
+
else:
|
|
109
|
+
candidate = candidate.model_copy(deep=True)
|
|
110
|
+
if candidate.id is None: # pragma: no cover - defensive
|
|
111
|
+
raise ValueError("GapRecord.id must be populated after validation")
|
|
112
|
+
candidate_id: str = candidate.id
|
|
113
|
+
candidate.id = self._allocate_gap_id(candidate_id)
|
|
114
|
+
self.gaps.append(candidate)
|
|
115
|
+
merged.append(candidate)
|
|
116
|
+
|
|
117
|
+
return merged
|
|
118
|
+
|
|
119
|
+
def mark_gap_resolved(
|
|
120
|
+
self, identifier: str, resolved_by: Iterable[str] | None = None
|
|
121
|
+
) -> GapRecord | None:
|
|
122
|
+
"""Mark a gap as resolved by identifier (id or description)."""
|
|
123
|
+
|
|
124
|
+
gap = self._find_gap(identifier)
|
|
125
|
+
if gap is None:
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
gap.resolved = True
|
|
129
|
+
gap.blocking = False
|
|
130
|
+
if resolved_by:
|
|
131
|
+
gap.resolved_by = _merge_unique(gap.resolved_by, list(resolved_by))
|
|
132
|
+
return gap
|
|
33
133
|
|
|
34
|
-
def
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
|
|
134
|
+
def integrate_analysis(self, analysis: InsightAnalysis) -> None:
|
|
135
|
+
"""Apply an analysis result to the shared context."""
|
|
136
|
+
|
|
137
|
+
merged_insights: list[InsightRecord] = []
|
|
138
|
+
if analysis.highlights:
|
|
139
|
+
merged_insights = self.upsert_insights(analysis.highlights)
|
|
140
|
+
analysis.highlights = merged_insights
|
|
141
|
+
if analysis.gap_assessments:
|
|
142
|
+
merged_gaps = self.upsert_gaps(analysis.gap_assessments)
|
|
143
|
+
analysis.gap_assessments = merged_gaps
|
|
144
|
+
if analysis.resolved_gaps:
|
|
145
|
+
resolved_by_list = (
|
|
146
|
+
[ins.id for ins in merged_insights if ins.id is not None]
|
|
147
|
+
if merged_insights
|
|
148
|
+
else None
|
|
149
|
+
)
|
|
150
|
+
for resolved in analysis.resolved_gaps:
|
|
151
|
+
self.mark_gap_resolved(resolved, resolved_by=resolved_by_list)
|
|
152
|
+
for question in analysis.new_questions:
|
|
153
|
+
if question not in self.sub_questions:
|
|
154
|
+
self.sub_questions.append(question)
|
|
155
|
+
|
|
156
|
+
def _allocate_insight_id(self, candidate_id: str) -> str:
|
|
157
|
+
taken: set[str] = set()
|
|
158
|
+
for ins in self.insights:
|
|
159
|
+
if ins.id is not None:
|
|
160
|
+
taken.add(ins.id)
|
|
161
|
+
return _allocate_sequential_id(candidate_id, taken)
|
|
162
|
+
|
|
163
|
+
def _allocate_gap_id(self, candidate_id: str) -> str:
|
|
164
|
+
taken: set[str] = set()
|
|
165
|
+
for gap in self.gaps:
|
|
166
|
+
if gap.id is not None:
|
|
167
|
+
taken.add(gap.id)
|
|
168
|
+
return _allocate_sequential_id(candidate_id, taken)
|
|
169
|
+
|
|
170
|
+
def _find_gap(self, identifier: str) -> GapRecord | None:
|
|
171
|
+
normalized = identifier.lower().strip()
|
|
172
|
+
for gap in self.gaps:
|
|
173
|
+
if gap.id is not None and gap.id == normalized:
|
|
174
|
+
return gap
|
|
175
|
+
if gap.description.lower().strip() == normalized:
|
|
176
|
+
return gap
|
|
177
|
+
return None
|
|
38
178
|
|
|
39
179
|
|
|
40
180
|
class ResearchDependencies(BaseModel):
|
|
@@ -45,3 +185,31 @@ class ResearchDependencies(BaseModel):
|
|
|
45
185
|
client: HaikuRAG = Field(description="RAG client for document operations")
|
|
46
186
|
context: ResearchContext = Field(description="Shared research context")
|
|
47
187
|
console: Console | None = None
|
|
188
|
+
stream: ResearchStream | None = Field(
|
|
189
|
+
default=None, description="Optional research event stream"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _merge_unique(existing: list[str], incoming: Iterable[str]) -> list[str]:
|
|
194
|
+
"""Merge two iterables preserving order while removing duplicates."""
|
|
195
|
+
|
|
196
|
+
merged = list(existing)
|
|
197
|
+
seen = {item for item in existing if item}
|
|
198
|
+
for item in incoming:
|
|
199
|
+
if item and item not in seen:
|
|
200
|
+
merged.append(item)
|
|
201
|
+
seen.add(item)
|
|
202
|
+
return merged
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _allocate_sequential_id(candidate: str, taken: set[str]) -> str:
|
|
206
|
+
slug = candidate
|
|
207
|
+
if slug not in taken:
|
|
208
|
+
return slug
|
|
209
|
+
base = slug
|
|
210
|
+
counter = 2
|
|
211
|
+
while True:
|
|
212
|
+
slug = f"{base}-{counter}"
|
|
213
|
+
if slug not in taken:
|
|
214
|
+
return slug
|
|
215
|
+
counter += 1
|
haiku/rag/research/graph.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from pydantic_graph import Graph
|
|
2
2
|
|
|
3
3
|
from haiku.rag.research.models import ResearchReport
|
|
4
|
-
from haiku.rag.research.nodes.
|
|
4
|
+
from haiku.rag.research.nodes.analysis import AnalyzeInsightsNode, DecisionNode
|
|
5
5
|
from haiku.rag.research.nodes.plan import PlanNode
|
|
6
6
|
from haiku.rag.research.nodes.search import SearchDispatchNode
|
|
7
7
|
from haiku.rag.research.nodes.synthesize import SynthesizeNode
|
|
@@ -10,7 +10,8 @@ from haiku.rag.research.state import ResearchDeps, ResearchState
|
|
|
10
10
|
__all__ = [
|
|
11
11
|
"PlanNode",
|
|
12
12
|
"SearchDispatchNode",
|
|
13
|
-
"
|
|
13
|
+
"AnalyzeInsightsNode",
|
|
14
|
+
"DecisionNode",
|
|
14
15
|
"SynthesizeNode",
|
|
15
16
|
"ResearchState",
|
|
16
17
|
"ResearchDeps",
|
|
@@ -23,7 +24,8 @@ def build_research_graph() -> Graph[ResearchState, ResearchDeps, ResearchReport]
|
|
|
23
24
|
nodes=[
|
|
24
25
|
PlanNode,
|
|
25
26
|
SearchDispatchNode,
|
|
26
|
-
|
|
27
|
+
AnalyzeInsightsNode,
|
|
28
|
+
DecisionNode,
|
|
27
29
|
SynthesizeNode,
|
|
28
30
|
]
|
|
29
31
|
)
|
haiku/rag/research/models.py
CHANGED
|
@@ -1,4 +1,134 @@
|
|
|
1
|
-
|
|
1
|
+
import re
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field, model_validator
|
|
5
|
+
|
|
6
|
+
_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _make_slug(text: str, prefix: str) -> str:
|
|
10
|
+
"""Generate a lowercase slug with the given prefix as fallback."""
|
|
11
|
+
|
|
12
|
+
base = _SLUG_RE.sub("-", text.lower()).strip("-")
|
|
13
|
+
if not base:
|
|
14
|
+
base = prefix
|
|
15
|
+
# Trim overly long slugs but keep enough entropy for readability
|
|
16
|
+
return base[:48]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class InsightStatus(str, Enum):
|
|
20
|
+
OPEN = "open"
|
|
21
|
+
VALIDATED = "validated"
|
|
22
|
+
TENTATIVE = "tentative"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GapSeverity(str, Enum):
|
|
26
|
+
LOW = "low"
|
|
27
|
+
MEDIUM = "medium"
|
|
28
|
+
HIGH = "high"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class InsightRecord(BaseModel):
|
|
32
|
+
"""Structured insight with provenance and lifecycle metadata."""
|
|
33
|
+
|
|
34
|
+
id: str | None = Field(
|
|
35
|
+
default=None,
|
|
36
|
+
description="Stable slug identifier for the insight (auto-generated if omitted)",
|
|
37
|
+
)
|
|
38
|
+
summary: str = Field(description="Concise description of the insight")
|
|
39
|
+
status: InsightStatus = Field(
|
|
40
|
+
default=InsightStatus.OPEN,
|
|
41
|
+
description="Lifecycle status for the insight",
|
|
42
|
+
)
|
|
43
|
+
supporting_sources: list[str] = Field(
|
|
44
|
+
default_factory=list,
|
|
45
|
+
description="Source identifiers backing the insight",
|
|
46
|
+
)
|
|
47
|
+
originating_questions: list[str] = Field(
|
|
48
|
+
default_factory=list,
|
|
49
|
+
description="Research sub-questions that produced this insight",
|
|
50
|
+
)
|
|
51
|
+
notes: str | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
description="Optional elaboration or caveats for the insight",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@model_validator(mode="after")
|
|
57
|
+
def _set_defaults(self) -> "InsightRecord":
|
|
58
|
+
if not self.id:
|
|
59
|
+
self.id = _make_slug(self.summary, "insight")
|
|
60
|
+
self.id = self.id.lower()
|
|
61
|
+
self.supporting_sources = list(dict.fromkeys(self.supporting_sources))
|
|
62
|
+
self.originating_questions = list(dict.fromkeys(self.originating_questions))
|
|
63
|
+
return self
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class GapRecord(BaseModel):
|
|
67
|
+
"""Structured representation of an identified research gap."""
|
|
68
|
+
|
|
69
|
+
id: str | None = Field(
|
|
70
|
+
default=None,
|
|
71
|
+
description="Stable slug identifier for the gap (auto-generated if omitted)",
|
|
72
|
+
)
|
|
73
|
+
description: str = Field(description="Concrete statement of what is missing")
|
|
74
|
+
severity: GapSeverity = Field(
|
|
75
|
+
default=GapSeverity.MEDIUM,
|
|
76
|
+
description="Severity of the gap for answering the main question",
|
|
77
|
+
)
|
|
78
|
+
blocking: bool = Field(
|
|
79
|
+
default=True,
|
|
80
|
+
description="Whether this gap blocks a confident answer",
|
|
81
|
+
)
|
|
82
|
+
resolved: bool = Field(
|
|
83
|
+
default=False,
|
|
84
|
+
description="Flag indicating if the gap has been resolved",
|
|
85
|
+
)
|
|
86
|
+
resolved_by: list[str] = Field(
|
|
87
|
+
default_factory=list,
|
|
88
|
+
description="Insight IDs or notes explaining how the gap was closed",
|
|
89
|
+
)
|
|
90
|
+
supporting_sources: list[str] = Field(
|
|
91
|
+
default_factory=list,
|
|
92
|
+
description="Sources confirming the gap status (e.g., evidence of absence)",
|
|
93
|
+
)
|
|
94
|
+
notes: str | None = Field(
|
|
95
|
+
default=None,
|
|
96
|
+
description="Optional clarification about the gap or follow-up actions",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@model_validator(mode="after")
|
|
100
|
+
def _set_defaults(self) -> "GapRecord":
|
|
101
|
+
if not self.id:
|
|
102
|
+
self.id = _make_slug(self.description, "gap")
|
|
103
|
+
self.id = self.id.lower()
|
|
104
|
+
self.resolved_by = list(dict.fromkeys(self.resolved_by))
|
|
105
|
+
self.supporting_sources = list(dict.fromkeys(self.supporting_sources))
|
|
106
|
+
return self
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class InsightAnalysis(BaseModel):
|
|
110
|
+
"""Output of the insight aggregation agent."""
|
|
111
|
+
|
|
112
|
+
highlights: list[InsightRecord] = Field(
|
|
113
|
+
default_factory=list,
|
|
114
|
+
description="New or updated insights discovered this iteration",
|
|
115
|
+
)
|
|
116
|
+
gap_assessments: list[GapRecord] = Field(
|
|
117
|
+
default_factory=list,
|
|
118
|
+
description="New or updated gap records based on current evidence",
|
|
119
|
+
)
|
|
120
|
+
resolved_gaps: list[str] = Field(
|
|
121
|
+
default_factory=list,
|
|
122
|
+
description="Gap identifiers or descriptions considered resolved",
|
|
123
|
+
)
|
|
124
|
+
new_questions: list[str] = Field(
|
|
125
|
+
default_factory=list,
|
|
126
|
+
max_length=3,
|
|
127
|
+
description="Up to three follow-up sub-questions to pursue next",
|
|
128
|
+
)
|
|
129
|
+
commentary: str = Field(
|
|
130
|
+
description="Short narrative summary of the incremental findings",
|
|
131
|
+
)
|
|
2
132
|
|
|
3
133
|
|
|
4
134
|
class ResearchPlan(BaseModel):
|
|
@@ -37,6 +167,9 @@ class EvaluationResult(BaseModel):
|
|
|
37
167
|
max_length=3,
|
|
38
168
|
default=[],
|
|
39
169
|
)
|
|
170
|
+
gaps: list[str] = Field(
|
|
171
|
+
description="Concrete information gaps that remain", default_factory=list
|
|
172
|
+
)
|
|
40
173
|
confidence_score: float = Field(
|
|
41
174
|
description="Confidence level in the completeness of research (0-1)",
|
|
42
175
|
ge=0.0,
|