haiku.rag-slim 0.16.0__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag-slim might be problematic. Click here for more details.
- haiku/rag/app.py +430 -72
- haiku/rag/chunkers/__init__.py +31 -0
- haiku/rag/chunkers/base.py +31 -0
- haiku/rag/chunkers/docling_local.py +164 -0
- haiku/rag/chunkers/docling_serve.py +179 -0
- haiku/rag/cli.py +207 -24
- haiku/rag/cli_chat.py +489 -0
- haiku/rag/client.py +1251 -266
- haiku/rag/config/__init__.py +16 -10
- haiku/rag/config/loader.py +5 -44
- haiku/rag/config/models.py +126 -17
- haiku/rag/converters/__init__.py +31 -0
- haiku/rag/converters/base.py +63 -0
- haiku/rag/converters/docling_local.py +193 -0
- haiku/rag/converters/docling_serve.py +229 -0
- haiku/rag/converters/text_utils.py +237 -0
- haiku/rag/embeddings/__init__.py +123 -24
- haiku/rag/embeddings/voyageai.py +175 -20
- haiku/rag/graph/__init__.py +0 -11
- haiku/rag/graph/agui/__init__.py +8 -2
- haiku/rag/graph/agui/cli_renderer.py +1 -1
- haiku/rag/graph/agui/emitter.py +219 -31
- haiku/rag/graph/agui/server.py +20 -62
- haiku/rag/graph/agui/stream.py +1 -2
- haiku/rag/graph/research/__init__.py +5 -2
- haiku/rag/graph/research/dependencies.py +12 -126
- haiku/rag/graph/research/graph.py +390 -135
- haiku/rag/graph/research/models.py +91 -112
- haiku/rag/graph/research/prompts.py +99 -91
- haiku/rag/graph/research/state.py +35 -27
- haiku/rag/inspector/__init__.py +8 -0
- haiku/rag/inspector/app.py +259 -0
- haiku/rag/inspector/widgets/__init__.py +6 -0
- haiku/rag/inspector/widgets/chunk_list.py +100 -0
- haiku/rag/inspector/widgets/context_modal.py +89 -0
- haiku/rag/inspector/widgets/detail_view.py +130 -0
- haiku/rag/inspector/widgets/document_list.py +75 -0
- haiku/rag/inspector/widgets/info_modal.py +209 -0
- haiku/rag/inspector/widgets/search_modal.py +183 -0
- haiku/rag/inspector/widgets/visual_modal.py +126 -0
- haiku/rag/mcp.py +106 -102
- haiku/rag/monitor.py +33 -9
- haiku/rag/providers/__init__.py +5 -0
- haiku/rag/providers/docling_serve.py +108 -0
- haiku/rag/qa/__init__.py +12 -10
- haiku/rag/qa/agent.py +43 -61
- haiku/rag/qa/prompts.py +35 -57
- haiku/rag/reranking/__init__.py +9 -6
- haiku/rag/reranking/base.py +1 -1
- haiku/rag/reranking/cohere.py +5 -4
- haiku/rag/reranking/mxbai.py +5 -2
- haiku/rag/reranking/vllm.py +3 -4
- haiku/rag/reranking/zeroentropy.py +6 -5
- haiku/rag/store/__init__.py +2 -1
- haiku/rag/store/engine.py +242 -42
- haiku/rag/store/exceptions.py +4 -0
- haiku/rag/store/models/__init__.py +8 -2
- haiku/rag/store/models/chunk.py +190 -0
- haiku/rag/store/models/document.py +46 -0
- haiku/rag/store/repositories/chunk.py +141 -121
- haiku/rag/store/repositories/document.py +25 -84
- haiku/rag/store/repositories/settings.py +11 -14
- haiku/rag/store/upgrades/__init__.py +19 -3
- haiku/rag/store/upgrades/v0_10_1.py +1 -1
- haiku/rag/store/upgrades/v0_19_6.py +65 -0
- haiku/rag/store/upgrades/v0_20_0.py +68 -0
- haiku/rag/store/upgrades/v0_23_1.py +100 -0
- haiku/rag/store/upgrades/v0_9_3.py +3 -3
- haiku/rag/utils.py +371 -146
- {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/METADATA +15 -12
- haiku_rag_slim-0.24.0.dist-info/RECORD +78 -0
- {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/WHEEL +1 -1
- haiku/rag/chunker.py +0 -65
- haiku/rag/embeddings/base.py +0 -25
- haiku/rag/embeddings/ollama.py +0 -28
- haiku/rag/embeddings/openai.py +0 -26
- haiku/rag/embeddings/vllm.py +0 -29
- haiku/rag/graph/agui/events.py +0 -254
- haiku/rag/graph/common/__init__.py +0 -5
- haiku/rag/graph/common/models.py +0 -42
- haiku/rag/graph/common/nodes.py +0 -265
- haiku/rag/graph/common/prompts.py +0 -46
- haiku/rag/graph/common/utils.py +0 -44
- haiku/rag/graph/deep_qa/__init__.py +0 -1
- haiku/rag/graph/deep_qa/dependencies.py +0 -27
- haiku/rag/graph/deep_qa/graph.py +0 -243
- haiku/rag/graph/deep_qa/models.py +0 -20
- haiku/rag/graph/deep_qa/prompts.py +0 -59
- haiku/rag/graph/deep_qa/state.py +0 -56
- haiku/rag/graph/research/common.py +0 -87
- haiku/rag/reader.py +0 -135
- haiku_rag_slim-0.16.0.dist-info/RECORD +0 -71
- {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# pyright: reportPossiblyUnboundVariable=false
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from haiku.rag.client import HaikuRAG
|
|
7
|
+
from haiku.rag.config import get_config
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from textual.app import ComposeResult
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from textual.app import App
|
|
14
|
+
from textual.binding import Binding
|
|
15
|
+
from textual.screen import Screen
|
|
16
|
+
from textual.widgets import Footer, Header
|
|
17
|
+
|
|
18
|
+
from haiku.rag.inspector.widgets.chunk_list import ChunkList
|
|
19
|
+
from haiku.rag.inspector.widgets.detail_view import DetailView
|
|
20
|
+
from haiku.rag.inspector.widgets.document_list import DocumentList
|
|
21
|
+
from haiku.rag.inspector.widgets.search_modal import SearchModal
|
|
22
|
+
|
|
23
|
+
TEXTUAL_AVAILABLE = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
TEXTUAL_AVAILABLE = False
|
|
26
|
+
App = object # type: ignore
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class InspectorApp(App): # type: ignore[misc] # pragma: no cover
|
|
30
|
+
"""Textual TUI for inspecting LanceDB data."""
|
|
31
|
+
|
|
32
|
+
TITLE = "haiku.rag DB Inspector"
|
|
33
|
+
|
|
34
|
+
CSS = """
|
|
35
|
+
Screen {
|
|
36
|
+
layout: grid;
|
|
37
|
+
grid-size: 2 2;
|
|
38
|
+
grid-columns: 1fr 2fr;
|
|
39
|
+
grid-rows: 1fr 1fr;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
#document-list {
|
|
43
|
+
column-span: 1;
|
|
44
|
+
row-span: 2;
|
|
45
|
+
border: solid $primary;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
#chunk-list {
|
|
49
|
+
column-span: 1;
|
|
50
|
+
row-span: 1;
|
|
51
|
+
border: solid $secondary;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
#detail-view {
|
|
55
|
+
column-span: 1;
|
|
56
|
+
row-span: 1;
|
|
57
|
+
border: solid $accent;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
ListItem {
|
|
61
|
+
overflow: hidden;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
ListItem Static {
|
|
65
|
+
overflow: hidden;
|
|
66
|
+
text-overflow: ellipsis;
|
|
67
|
+
}
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
BINDINGS = [
|
|
71
|
+
Binding("q", "quit", "Quit", show=True),
|
|
72
|
+
Binding("/", "search", "Search", show=True),
|
|
73
|
+
Binding("i", "show_info", "Info", show=True),
|
|
74
|
+
Binding("v", "show_visual", "Visual", show=True),
|
|
75
|
+
Binding("c", "show_context", "Context", show=True),
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
def __init__(
|
|
79
|
+
self, db_path: Path, read_only: bool = False, before: datetime | None = None
|
|
80
|
+
):
|
|
81
|
+
super().__init__()
|
|
82
|
+
self.db_path = db_path
|
|
83
|
+
self.read_only = read_only
|
|
84
|
+
self.before = before
|
|
85
|
+
self.client: HaikuRAG | None = None
|
|
86
|
+
|
|
87
|
+
def compose(self) -> "ComposeResult":
|
|
88
|
+
"""Compose the UI layout."""
|
|
89
|
+
yield Header()
|
|
90
|
+
yield DocumentList(id="document-list")
|
|
91
|
+
yield ChunkList(id="chunk-list")
|
|
92
|
+
yield DetailView(id="detail-view")
|
|
93
|
+
yield Footer()
|
|
94
|
+
|
|
95
|
+
async def on_mount(self) -> None:
|
|
96
|
+
"""Initialize the app when mounted."""
|
|
97
|
+
config = get_config()
|
|
98
|
+
self.client = HaikuRAG(
|
|
99
|
+
db_path=self.db_path,
|
|
100
|
+
config=config,
|
|
101
|
+
read_only=self.read_only,
|
|
102
|
+
before=self.before,
|
|
103
|
+
)
|
|
104
|
+
await self.client.__aenter__()
|
|
105
|
+
|
|
106
|
+
# Load initial documents
|
|
107
|
+
doc_list = self.query_one(DocumentList)
|
|
108
|
+
await doc_list.load_documents(self.client)
|
|
109
|
+
|
|
110
|
+
doc_list.list_view.focus()
|
|
111
|
+
|
|
112
|
+
async def on_unmount(self) -> None:
|
|
113
|
+
"""Clean up when unmounting."""
|
|
114
|
+
if self.client:
|
|
115
|
+
await self.client.__aexit__(None, None, None)
|
|
116
|
+
|
|
117
|
+
def _select_chunk(self, chunk_list: ChunkList, chunk_id: str) -> None:
|
|
118
|
+
"""Helper to select a chunk after refresh."""
|
|
119
|
+
for idx, c in enumerate(chunk_list.chunks):
|
|
120
|
+
if c.id == chunk_id:
|
|
121
|
+
chunk_list.list_view.index = idx
|
|
122
|
+
chunk_list.list_view.focus()
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
async def _dismiss_modals(self) -> None:
|
|
126
|
+
"""Dismiss all modal screens, returning to the main screen."""
|
|
127
|
+
while len(self.screen_stack) > 1:
|
|
128
|
+
self.pop_screen()
|
|
129
|
+
|
|
130
|
+
async def _switch_modal(self, screen: Screen) -> None:
|
|
131
|
+
"""Switch to a new modal, dismissing any existing modals first."""
|
|
132
|
+
await self._dismiss_modals()
|
|
133
|
+
await self.push_screen(screen)
|
|
134
|
+
|
|
135
|
+
async def action_search(self) -> None:
|
|
136
|
+
"""Open search modal."""
|
|
137
|
+
if self.client:
|
|
138
|
+
await self._switch_modal(SearchModal(self.client))
|
|
139
|
+
|
|
140
|
+
async def action_show_info(self) -> None:
|
|
141
|
+
"""Show database info modal."""
|
|
142
|
+
if self.client:
|
|
143
|
+
from haiku.rag.inspector.widgets.info_modal import InfoModal
|
|
144
|
+
|
|
145
|
+
await self._switch_modal(InfoModal(self.client, self.db_path))
|
|
146
|
+
|
|
147
|
+
async def on_search_modal_chunk_selected(
|
|
148
|
+
self, message: SearchModal.ChunkSelected
|
|
149
|
+
) -> None:
|
|
150
|
+
"""Handle chunk selection from search modal."""
|
|
151
|
+
if not self.client:
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
chunk = message.chunk
|
|
155
|
+
|
|
156
|
+
# Navigate to the document containing this chunk
|
|
157
|
+
if chunk.document_id:
|
|
158
|
+
doc = await self.client.document_repository.get_by_id(chunk.document_id)
|
|
159
|
+
if doc:
|
|
160
|
+
doc_list = self.query_one(DocumentList)
|
|
161
|
+
chunk_list = self.query_one(ChunkList)
|
|
162
|
+
|
|
163
|
+
# Find and select the document
|
|
164
|
+
for idx, d in enumerate(doc_list.documents):
|
|
165
|
+
if d.id == chunk.document_id:
|
|
166
|
+
doc_list.list_view.index = idx
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
# Load chunks for this document
|
|
170
|
+
await chunk_list.load_chunks_for_document(
|
|
171
|
+
self.client, chunk.document_id
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Wait a tick for the ListView to process the new items
|
|
175
|
+
self.call_after_refresh(self._select_chunk, chunk_list, chunk.id)
|
|
176
|
+
|
|
177
|
+
async def on_document_list_document_selected(
|
|
178
|
+
self, message: DocumentList.DocumentSelected
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Handle document selection from document list.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
message: Message containing selected document
|
|
184
|
+
"""
|
|
185
|
+
if not self.client:
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
# Show document details
|
|
189
|
+
detail_view = self.query_one(DetailView)
|
|
190
|
+
await detail_view.show_document(message.document)
|
|
191
|
+
|
|
192
|
+
# Load chunks for this document
|
|
193
|
+
if message.document.id:
|
|
194
|
+
chunk_list = self.query_one(ChunkList)
|
|
195
|
+
await chunk_list.load_chunks_for_document(self.client, message.document.id)
|
|
196
|
+
|
|
197
|
+
async def on_chunk_list_chunk_selected(
|
|
198
|
+
self, message: ChunkList.ChunkSelected
|
|
199
|
+
) -> None:
|
|
200
|
+
"""Handle chunk selection from chunk list.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
message: Message containing selected chunk
|
|
204
|
+
"""
|
|
205
|
+
# Show chunk details
|
|
206
|
+
detail_view = self.query_one(DetailView)
|
|
207
|
+
await detail_view.show_chunk(message.chunk)
|
|
208
|
+
|
|
209
|
+
async def action_show_visual(self) -> None:
|
|
210
|
+
"""Show visual grounding for the currently selected chunk."""
|
|
211
|
+
if not self.client:
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
chunk_list = self.query_one(ChunkList)
|
|
215
|
+
idx = chunk_list.list_view.index
|
|
216
|
+
if idx is None or idx >= len(chunk_list.chunks):
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
chunk = chunk_list.chunks[idx]
|
|
220
|
+
|
|
221
|
+
from haiku.rag.inspector.widgets.visual_modal import VisualGroundingModal
|
|
222
|
+
|
|
223
|
+
await self._switch_modal(VisualGroundingModal(chunk=chunk, client=self.client))
|
|
224
|
+
|
|
225
|
+
async def action_show_context(self) -> None:
|
|
226
|
+
"""Show how the currently selected chunk would be formatted for agents."""
|
|
227
|
+
if not self.client:
|
|
228
|
+
return
|
|
229
|
+
|
|
230
|
+
chunk_list = self.query_one(ChunkList)
|
|
231
|
+
idx = chunk_list.list_view.index
|
|
232
|
+
if idx is None or idx >= len(chunk_list.chunks):
|
|
233
|
+
return
|
|
234
|
+
|
|
235
|
+
chunk = chunk_list.chunks[idx]
|
|
236
|
+
|
|
237
|
+
from haiku.rag.inspector.widgets.context_modal import ContextModal
|
|
238
|
+
|
|
239
|
+
await self._switch_modal(ContextModal(chunk=chunk, client=self.client))
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def run_inspector(
|
|
243
|
+
db_path: Path | None = None,
|
|
244
|
+
read_only: bool = False,
|
|
245
|
+
before: datetime | None = None,
|
|
246
|
+
) -> None: # pragma: no cover
|
|
247
|
+
"""Run the inspector TUI.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
db_path: Path to the LanceDB database. If None, uses default from config.
|
|
251
|
+
read_only: Whether to open the database in read-only mode.
|
|
252
|
+
before: Query database as it existed before this datetime.
|
|
253
|
+
"""
|
|
254
|
+
config = get_config()
|
|
255
|
+
if db_path is None:
|
|
256
|
+
db_path = config.storage.data_dir / "haiku.rag.lancedb"
|
|
257
|
+
|
|
258
|
+
app = InspectorApp(db_path, read_only=read_only, before=before)
|
|
259
|
+
app.run()
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from haiku.rag.inspector.widgets.chunk_list import ChunkList
|
|
2
|
+
from haiku.rag.inspector.widgets.detail_view import DetailView
|
|
3
|
+
from haiku.rag.inspector.widgets.document_list import DocumentList
|
|
4
|
+
from haiku.rag.inspector.widgets.visual_modal import VisualGroundingModal
|
|
5
|
+
|
|
6
|
+
__all__ = ["ChunkList", "DetailView", "DocumentList", "VisualGroundingModal"]
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from textual import on
|
|
2
|
+
from textual.app import ComposeResult
|
|
3
|
+
from textual.containers import VerticalScroll
|
|
4
|
+
from textual.message import Message
|
|
5
|
+
from textual.widgets import ListItem, ListView, Static
|
|
6
|
+
|
|
7
|
+
from haiku.rag.client import HaikuRAG
|
|
8
|
+
from haiku.rag.store.models import Chunk
|
|
9
|
+
|
|
10
|
+
BATCH_SIZE = 50
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ChunkList(VerticalScroll): # pragma: no cover
|
|
14
|
+
"""Widget for displaying and browsing chunks."""
|
|
15
|
+
|
|
16
|
+
can_focus = False
|
|
17
|
+
|
|
18
|
+
class ChunkSelected(Message):
|
|
19
|
+
"""Message sent when a chunk is selected."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, chunk: Chunk) -> None:
|
|
22
|
+
super().__init__()
|
|
23
|
+
self.chunk = chunk
|
|
24
|
+
|
|
25
|
+
def __init__(self, **kwargs) -> None:
|
|
26
|
+
super().__init__(**kwargs)
|
|
27
|
+
self.chunks: list[Chunk] = []
|
|
28
|
+
self.list_view = ListView()
|
|
29
|
+
self.has_more: bool = False
|
|
30
|
+
self._client: HaikuRAG | None = None
|
|
31
|
+
self._document_id: str | None = None
|
|
32
|
+
self._loading: bool = False
|
|
33
|
+
self._total_chunks: int = 0
|
|
34
|
+
|
|
35
|
+
def compose(self) -> ComposeResult:
|
|
36
|
+
"""Compose the chunk list."""
|
|
37
|
+
yield Static("[bold]Chunks[/bold]", classes="title")
|
|
38
|
+
yield self.list_view
|
|
39
|
+
|
|
40
|
+
async def load_chunks_for_document(
|
|
41
|
+
self, client: HaikuRAG, document_id: str
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Load initial batch of chunks for a specific document."""
|
|
44
|
+
self._client = client
|
|
45
|
+
self._document_id = document_id
|
|
46
|
+
self._total_chunks = await client.chunk_repository.count_by_document_id(
|
|
47
|
+
document_id
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
self.chunks = await client.chunk_repository.get_by_document_id(
|
|
51
|
+
document_id, limit=BATCH_SIZE, offset=0
|
|
52
|
+
)
|
|
53
|
+
self.has_more = len(self.chunks) < self._total_chunks
|
|
54
|
+
|
|
55
|
+
await self.list_view.clear()
|
|
56
|
+
for chunk in self.chunks:
|
|
57
|
+
first_line = chunk.content.split("\n")[0]
|
|
58
|
+
await self.list_view.append(
|
|
59
|
+
ListItem(Static(f"[{chunk.order}] {first_line}"))
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
async def load_more(self) -> None:
|
|
63
|
+
"""Load the next batch of chunks."""
|
|
64
|
+
if (
|
|
65
|
+
not self.has_more
|
|
66
|
+
or self._loading
|
|
67
|
+
or not self._client
|
|
68
|
+
or not self._document_id
|
|
69
|
+
):
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
self._loading = True
|
|
73
|
+
offset = len(self.chunks)
|
|
74
|
+
new_chunks = await self._client.chunk_repository.get_by_document_id(
|
|
75
|
+
self._document_id, limit=BATCH_SIZE, offset=offset
|
|
76
|
+
)
|
|
77
|
+
self.has_more = (offset + len(new_chunks)) < self._total_chunks
|
|
78
|
+
self.chunks.extend(new_chunks)
|
|
79
|
+
|
|
80
|
+
for chunk in new_chunks:
|
|
81
|
+
first_line = chunk.content.split("\n")[0]
|
|
82
|
+
await self.list_view.append(
|
|
83
|
+
ListItem(Static(f"[{chunk.order}] {first_line}"))
|
|
84
|
+
)
|
|
85
|
+
self._loading = False
|
|
86
|
+
|
|
87
|
+
@on(ListView.Highlighted)
|
|
88
|
+
@on(ListView.Selected)
|
|
89
|
+
async def handle_chunk_selection(
|
|
90
|
+
self, event: ListView.Highlighted | ListView.Selected
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Handle chunk selection (arrow keys or Enter)."""
|
|
93
|
+
if event.list_view != self.list_view:
|
|
94
|
+
return
|
|
95
|
+
idx = event.list_view.index
|
|
96
|
+
if idx is not None and 0 <= idx < len(self.chunks):
|
|
97
|
+
self.post_message(self.ChunkSelected(self.chunks[idx]))
|
|
98
|
+
# Infinite scroll: load more when near the end
|
|
99
|
+
if self.has_more and idx >= len(self.chunks) - 10:
|
|
100
|
+
await self.load_more()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from textual.app import ComposeResult
|
|
4
|
+
from textual.binding import Binding
|
|
5
|
+
from textual.containers import VerticalScroll
|
|
6
|
+
from textual.screen import Screen
|
|
7
|
+
from textual.widgets import Markdown, Static
|
|
8
|
+
|
|
9
|
+
from haiku.rag.store.models import SearchResult
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from haiku.rag.client import HaikuRAG
|
|
13
|
+
from haiku.rag.store.models import Chunk
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ContextModal(Screen): # pragma: no cover
|
|
17
|
+
"""Modal screen for displaying how a chunk appears to agents."""
|
|
18
|
+
|
|
19
|
+
BINDINGS = [
|
|
20
|
+
Binding("escape", "dismiss", "Close", show=True),
|
|
21
|
+
Binding("c", "dismiss", "Close", show=True),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
CSS = """
|
|
25
|
+
ContextModal {
|
|
26
|
+
background: $surface;
|
|
27
|
+
layout: vertical;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
#context-header {
|
|
31
|
+
dock: top;
|
|
32
|
+
height: auto;
|
|
33
|
+
padding: 1;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
#context-content {
|
|
37
|
+
height: 1fr;
|
|
38
|
+
width: 100%;
|
|
39
|
+
padding: 1;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
#context-content Markdown {
|
|
43
|
+
width: 100%;
|
|
44
|
+
}
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, chunk: "Chunk", client: "HaikuRAG"):
|
|
48
|
+
super().__init__()
|
|
49
|
+
self.chunk = chunk
|
|
50
|
+
self.client = client
|
|
51
|
+
self._content_widget = Markdown("Loading...")
|
|
52
|
+
|
|
53
|
+
def compose(self) -> ComposeResult:
|
|
54
|
+
yield Static("[bold]Agent Context Format[/bold]", id="context-header")
|
|
55
|
+
with VerticalScroll(id="context-content"):
|
|
56
|
+
yield self._content_widget
|
|
57
|
+
|
|
58
|
+
async def on_mount(self) -> None:
|
|
59
|
+
"""Load and display the expanded context."""
|
|
60
|
+
# Create a SearchResult from the chunk
|
|
61
|
+
chunk_meta = self.chunk.get_chunk_metadata()
|
|
62
|
+
search_result = SearchResult(
|
|
63
|
+
content=self.chunk.content,
|
|
64
|
+
score=0.0,
|
|
65
|
+
chunk_id=self.chunk.id,
|
|
66
|
+
document_id=self.chunk.document_id,
|
|
67
|
+
document_uri=self.chunk.document_uri,
|
|
68
|
+
document_title=self.chunk.document_title,
|
|
69
|
+
doc_item_refs=chunk_meta.doc_item_refs,
|
|
70
|
+
page_numbers=chunk_meta.page_numbers,
|
|
71
|
+
headings=chunk_meta.headings,
|
|
72
|
+
labels=chunk_meta.labels,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Expand context using the client (this is what agents actually receive)
|
|
76
|
+
expanded_results = await self.client.expand_context([search_result])
|
|
77
|
+
expanded = expanded_results[0] if expanded_results else search_result
|
|
78
|
+
|
|
79
|
+
formatted = expanded.format_for_agent()
|
|
80
|
+
|
|
81
|
+
content = (
|
|
82
|
+
"*This is how the chunk appears to agents after context expansion:*\n\n---\n\n"
|
|
83
|
+
f"{formatted}"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
await self._content_widget.update(content)
|
|
87
|
+
|
|
88
|
+
async def action_dismiss(self, result=None) -> None:
|
|
89
|
+
self.app.pop_screen()
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from typing import Protocol
|
|
2
|
+
|
|
3
|
+
from textual.app import ComposeResult
|
|
4
|
+
from textual.containers import VerticalScroll
|
|
5
|
+
from textual.widgets import Markdown, Static
|
|
6
|
+
|
|
7
|
+
from haiku.rag.store.models import Chunk, Document, SearchResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ProvenanceData(Protocol):
|
|
11
|
+
"""Protocol for objects that have provenance metadata."""
|
|
12
|
+
|
|
13
|
+
page_numbers: list[int]
|
|
14
|
+
headings: list[str] | None
|
|
15
|
+
labels: list[str]
|
|
16
|
+
doc_item_refs: list[str]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DetailView(VerticalScroll): # pragma: no cover
|
|
20
|
+
"""Widget for displaying detailed content of documents or chunks."""
|
|
21
|
+
|
|
22
|
+
can_focus = True
|
|
23
|
+
|
|
24
|
+
def __init__(self, **kwargs) -> None:
|
|
25
|
+
super().__init__(**kwargs)
|
|
26
|
+
self.title_widget = Static("[bold]Detail View[/bold]", classes="title")
|
|
27
|
+
self.content_widget = Markdown("")
|
|
28
|
+
self.content_widget.can_focus = True
|
|
29
|
+
|
|
30
|
+
def compose(self) -> ComposeResult:
|
|
31
|
+
yield self.title_widget
|
|
32
|
+
yield self.content_widget
|
|
33
|
+
|
|
34
|
+
def _format_provenance(self, prov: ProvenanceData) -> list[str]:
|
|
35
|
+
"""Format provenance metadata as markdown lines."""
|
|
36
|
+
parts: list[str] = []
|
|
37
|
+
if prov.page_numbers:
|
|
38
|
+
pages_str = ", ".join(str(p) for p in prov.page_numbers)
|
|
39
|
+
parts.append(f"**Page(s):** {pages_str}")
|
|
40
|
+
if prov.headings:
|
|
41
|
+
headings_str = " > ".join(prov.headings)
|
|
42
|
+
parts.append(f"**Section:** {headings_str}")
|
|
43
|
+
if prov.labels:
|
|
44
|
+
labels_str = ", ".join(prov.labels)
|
|
45
|
+
parts.append(f"**Labels:** {labels_str}")
|
|
46
|
+
if prov.doc_item_refs:
|
|
47
|
+
refs_str = ", ".join(prov.doc_item_refs[:5])
|
|
48
|
+
if len(prov.doc_item_refs) > 5:
|
|
49
|
+
refs_str += f" ... (+{len(prov.doc_item_refs) - 5} more)"
|
|
50
|
+
parts.append(f"**DocItem Refs:** `{refs_str}`")
|
|
51
|
+
return parts
|
|
52
|
+
|
|
53
|
+
async def show_document(self, document: Document) -> None:
|
|
54
|
+
"""Display document details."""
|
|
55
|
+
title = document.title or document.uri or "Untitled Document"
|
|
56
|
+
self.title_widget.update(f"[bold]Document: {title}[/bold]")
|
|
57
|
+
|
|
58
|
+
content_parts: list[str] = []
|
|
59
|
+
if document.id:
|
|
60
|
+
content_parts.append(f"**ID:** `{document.id}`")
|
|
61
|
+
if document.uri:
|
|
62
|
+
content_parts.append(f"**URI:** `{document.uri}`")
|
|
63
|
+
if document.metadata:
|
|
64
|
+
metadata_str = "\n".join(
|
|
65
|
+
f" - {k}: {v}" for k, v in document.metadata.items()
|
|
66
|
+
)
|
|
67
|
+
content_parts.append(f"**Metadata:**\n{metadata_str}")
|
|
68
|
+
if document.created_at:
|
|
69
|
+
content_parts.append(f"**Created:** {document.created_at}")
|
|
70
|
+
if document.updated_at:
|
|
71
|
+
content_parts.append(f"**Updated:** {document.updated_at}")
|
|
72
|
+
|
|
73
|
+
content_parts.append("\n---\n")
|
|
74
|
+
content_parts.append(document.content)
|
|
75
|
+
|
|
76
|
+
await self.content_widget.update("\n\n".join(content_parts))
|
|
77
|
+
|
|
78
|
+
async def show_chunk(self, chunk: Chunk) -> None:
|
|
79
|
+
"""Display chunk details."""
|
|
80
|
+
self.title_widget.update(f"[bold]Chunk {chunk.order}[/bold]")
|
|
81
|
+
|
|
82
|
+
content_parts: list[str] = []
|
|
83
|
+
if chunk.id:
|
|
84
|
+
content_parts.append(f"**ID:** `{chunk.id}`")
|
|
85
|
+
if chunk.document_id:
|
|
86
|
+
content_parts.append(f"**Document ID:** `{chunk.document_id}`")
|
|
87
|
+
if chunk.document_title:
|
|
88
|
+
content_parts.append(f"**Document Title:** {chunk.document_title}")
|
|
89
|
+
if chunk.document_uri:
|
|
90
|
+
content_parts.append(f"**Document URI:** `{chunk.document_uri}`")
|
|
91
|
+
content_parts.append(f"**Order:** {chunk.order}")
|
|
92
|
+
|
|
93
|
+
chunk_meta = chunk.get_chunk_metadata()
|
|
94
|
+
content_parts.extend(self._format_provenance(chunk_meta))
|
|
95
|
+
|
|
96
|
+
if chunk.embedding:
|
|
97
|
+
content_parts.append(f"**Embedding:** {len(chunk.embedding)} dimensions")
|
|
98
|
+
|
|
99
|
+
content_parts.append("\n---\n")
|
|
100
|
+
content_parts.append(chunk.content)
|
|
101
|
+
|
|
102
|
+
await self.content_widget.update("\n\n".join(content_parts))
|
|
103
|
+
|
|
104
|
+
async def show_search_result(
|
|
105
|
+
self, chunk: Chunk, search_result: SearchResult
|
|
106
|
+
) -> None:
|
|
107
|
+
"""Display chunk details with search result metadata."""
|
|
108
|
+
self.title_widget.update(f"[bold]Chunk {chunk.order}[/bold]")
|
|
109
|
+
|
|
110
|
+
content_parts: list[str] = []
|
|
111
|
+
if chunk.id:
|
|
112
|
+
content_parts.append(f"**ID:** `{chunk.id}`")
|
|
113
|
+
if chunk.document_id:
|
|
114
|
+
content_parts.append(f"**Document ID:** `{chunk.document_id}`")
|
|
115
|
+
if search_result.document_title:
|
|
116
|
+
content_parts.append(f"**Document Title:** {search_result.document_title}")
|
|
117
|
+
if search_result.document_uri:
|
|
118
|
+
content_parts.append(f"**Document URI:** `{search_result.document_uri}`")
|
|
119
|
+
content_parts.append(f"**Order:** {chunk.order}")
|
|
120
|
+
content_parts.append(f"**Score:** {search_result.score:.4f}")
|
|
121
|
+
|
|
122
|
+
content_parts.extend(self._format_provenance(search_result))
|
|
123
|
+
|
|
124
|
+
if chunk.embedding:
|
|
125
|
+
content_parts.append(f"**Embedding:** {len(chunk.embedding)} dimensions")
|
|
126
|
+
|
|
127
|
+
content_parts.append("\n---\n")
|
|
128
|
+
content_parts.append(chunk.content)
|
|
129
|
+
|
|
130
|
+
await self.content_widget.update("\n\n".join(content_parts))
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from textual import on
|
|
2
|
+
from textual.app import ComposeResult
|
|
3
|
+
from textual.containers import VerticalScroll
|
|
4
|
+
from textual.message import Message
|
|
5
|
+
from textual.widgets import ListItem, ListView, Static
|
|
6
|
+
|
|
7
|
+
from haiku.rag.client import HaikuRAG
|
|
8
|
+
from haiku.rag.store.models import Document
|
|
9
|
+
|
|
10
|
+
BATCH_SIZE = 50
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DocumentList(VerticalScroll): # pragma: no cover
|
|
14
|
+
"""Widget for displaying and browsing documents."""
|
|
15
|
+
|
|
16
|
+
can_focus = False
|
|
17
|
+
|
|
18
|
+
class DocumentSelected(Message):
|
|
19
|
+
"""Message sent when a document is selected."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, document: Document) -> None:
|
|
22
|
+
super().__init__()
|
|
23
|
+
self.document = document
|
|
24
|
+
|
|
25
|
+
def __init__(self, **kwargs) -> None:
|
|
26
|
+
super().__init__(**kwargs)
|
|
27
|
+
self.documents: list[Document] = []
|
|
28
|
+
self.list_view = ListView()
|
|
29
|
+
self.has_more: bool = True
|
|
30
|
+
self._client: HaikuRAG | None = None
|
|
31
|
+
self._loading: bool = False
|
|
32
|
+
|
|
33
|
+
def compose(self) -> ComposeResult:
|
|
34
|
+
"""Compose the document list."""
|
|
35
|
+
yield Static("[bold]Documents[/bold]", classes="title")
|
|
36
|
+
yield self.list_view
|
|
37
|
+
|
|
38
|
+
async def load_documents(self, client: HaikuRAG) -> None:
|
|
39
|
+
"""Load initial batch of documents from the database."""
|
|
40
|
+
self._client = client
|
|
41
|
+
self.documents = await client.list_documents(limit=BATCH_SIZE, offset=0)
|
|
42
|
+
self.has_more = len(self.documents) >= BATCH_SIZE
|
|
43
|
+
await self.list_view.clear()
|
|
44
|
+
for doc in self.documents:
|
|
45
|
+
title = doc.title or doc.uri or doc.id
|
|
46
|
+
await self.list_view.append(ListItem(Static(f"{title}")))
|
|
47
|
+
|
|
48
|
+
async def load_more(self, client: HaikuRAG) -> None:
|
|
49
|
+
"""Load the next batch of documents."""
|
|
50
|
+
if not self.has_more or self._loading:
|
|
51
|
+
return
|
|
52
|
+
self._loading = True
|
|
53
|
+
offset = len(self.documents)
|
|
54
|
+
new_docs = await client.list_documents(limit=BATCH_SIZE, offset=offset)
|
|
55
|
+
self.has_more = len(new_docs) >= BATCH_SIZE
|
|
56
|
+
self.documents.extend(new_docs)
|
|
57
|
+
for doc in new_docs:
|
|
58
|
+
title = doc.title or doc.uri or doc.id
|
|
59
|
+
await self.list_view.append(ListItem(Static(f"{title}")))
|
|
60
|
+
self._loading = False
|
|
61
|
+
|
|
62
|
+
@on(ListView.Highlighted)
|
|
63
|
+
@on(ListView.Selected)
|
|
64
|
+
async def handle_document_selection(
|
|
65
|
+
self, event: ListView.Highlighted | ListView.Selected
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Handle document selection (arrow keys or Enter)."""
|
|
68
|
+
if event.list_view != self.list_view:
|
|
69
|
+
return
|
|
70
|
+
idx = event.list_view.index
|
|
71
|
+
if idx is not None and 0 <= idx < len(self.documents):
|
|
72
|
+
self.post_message(self.DocumentSelected(self.documents[idx]))
|
|
73
|
+
# Infinite scroll: load more when near the end
|
|
74
|
+
if self._client and self.has_more and idx >= len(self.documents) - 10:
|
|
75
|
+
await self.load_more(self._client)
|