haiku.rag-slim 0.16.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag-slim might be problematic. Click here for more details.

Files changed (94) hide show
  1. haiku/rag/app.py +430 -72
  2. haiku/rag/chunkers/__init__.py +31 -0
  3. haiku/rag/chunkers/base.py +31 -0
  4. haiku/rag/chunkers/docling_local.py +164 -0
  5. haiku/rag/chunkers/docling_serve.py +179 -0
  6. haiku/rag/cli.py +207 -24
  7. haiku/rag/cli_chat.py +489 -0
  8. haiku/rag/client.py +1251 -266
  9. haiku/rag/config/__init__.py +16 -10
  10. haiku/rag/config/loader.py +5 -44
  11. haiku/rag/config/models.py +126 -17
  12. haiku/rag/converters/__init__.py +31 -0
  13. haiku/rag/converters/base.py +63 -0
  14. haiku/rag/converters/docling_local.py +193 -0
  15. haiku/rag/converters/docling_serve.py +229 -0
  16. haiku/rag/converters/text_utils.py +237 -0
  17. haiku/rag/embeddings/__init__.py +123 -24
  18. haiku/rag/embeddings/voyageai.py +175 -20
  19. haiku/rag/graph/__init__.py +0 -11
  20. haiku/rag/graph/agui/__init__.py +8 -2
  21. haiku/rag/graph/agui/cli_renderer.py +1 -1
  22. haiku/rag/graph/agui/emitter.py +219 -31
  23. haiku/rag/graph/agui/server.py +20 -62
  24. haiku/rag/graph/agui/stream.py +1 -2
  25. haiku/rag/graph/research/__init__.py +5 -2
  26. haiku/rag/graph/research/dependencies.py +12 -126
  27. haiku/rag/graph/research/graph.py +390 -135
  28. haiku/rag/graph/research/models.py +91 -112
  29. haiku/rag/graph/research/prompts.py +99 -91
  30. haiku/rag/graph/research/state.py +35 -27
  31. haiku/rag/inspector/__init__.py +8 -0
  32. haiku/rag/inspector/app.py +259 -0
  33. haiku/rag/inspector/widgets/__init__.py +6 -0
  34. haiku/rag/inspector/widgets/chunk_list.py +100 -0
  35. haiku/rag/inspector/widgets/context_modal.py +89 -0
  36. haiku/rag/inspector/widgets/detail_view.py +130 -0
  37. haiku/rag/inspector/widgets/document_list.py +75 -0
  38. haiku/rag/inspector/widgets/info_modal.py +209 -0
  39. haiku/rag/inspector/widgets/search_modal.py +183 -0
  40. haiku/rag/inspector/widgets/visual_modal.py +126 -0
  41. haiku/rag/mcp.py +106 -102
  42. haiku/rag/monitor.py +33 -9
  43. haiku/rag/providers/__init__.py +5 -0
  44. haiku/rag/providers/docling_serve.py +108 -0
  45. haiku/rag/qa/__init__.py +12 -10
  46. haiku/rag/qa/agent.py +43 -61
  47. haiku/rag/qa/prompts.py +35 -57
  48. haiku/rag/reranking/__init__.py +9 -6
  49. haiku/rag/reranking/base.py +1 -1
  50. haiku/rag/reranking/cohere.py +5 -4
  51. haiku/rag/reranking/mxbai.py +5 -2
  52. haiku/rag/reranking/vllm.py +3 -4
  53. haiku/rag/reranking/zeroentropy.py +6 -5
  54. haiku/rag/store/__init__.py +2 -1
  55. haiku/rag/store/engine.py +242 -42
  56. haiku/rag/store/exceptions.py +4 -0
  57. haiku/rag/store/models/__init__.py +8 -2
  58. haiku/rag/store/models/chunk.py +190 -0
  59. haiku/rag/store/models/document.py +46 -0
  60. haiku/rag/store/repositories/chunk.py +141 -121
  61. haiku/rag/store/repositories/document.py +25 -84
  62. haiku/rag/store/repositories/settings.py +11 -14
  63. haiku/rag/store/upgrades/__init__.py +19 -3
  64. haiku/rag/store/upgrades/v0_10_1.py +1 -1
  65. haiku/rag/store/upgrades/v0_19_6.py +65 -0
  66. haiku/rag/store/upgrades/v0_20_0.py +68 -0
  67. haiku/rag/store/upgrades/v0_23_1.py +100 -0
  68. haiku/rag/store/upgrades/v0_9_3.py +3 -3
  69. haiku/rag/utils.py +371 -146
  70. {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/METADATA +15 -12
  71. haiku_rag_slim-0.24.0.dist-info/RECORD +78 -0
  72. {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/WHEEL +1 -1
  73. haiku/rag/chunker.py +0 -65
  74. haiku/rag/embeddings/base.py +0 -25
  75. haiku/rag/embeddings/ollama.py +0 -28
  76. haiku/rag/embeddings/openai.py +0 -26
  77. haiku/rag/embeddings/vllm.py +0 -29
  78. haiku/rag/graph/agui/events.py +0 -254
  79. haiku/rag/graph/common/__init__.py +0 -5
  80. haiku/rag/graph/common/models.py +0 -42
  81. haiku/rag/graph/common/nodes.py +0 -265
  82. haiku/rag/graph/common/prompts.py +0 -46
  83. haiku/rag/graph/common/utils.py +0 -44
  84. haiku/rag/graph/deep_qa/__init__.py +0 -1
  85. haiku/rag/graph/deep_qa/dependencies.py +0 -27
  86. haiku/rag/graph/deep_qa/graph.py +0 -243
  87. haiku/rag/graph/deep_qa/models.py +0 -20
  88. haiku/rag/graph/deep_qa/prompts.py +0 -59
  89. haiku/rag/graph/deep_qa/state.py +0 -56
  90. haiku/rag/graph/research/common.py +0 -87
  91. haiku/rag/reader.py +0 -135
  92. haiku_rag_slim-0.16.0.dist-info/RECORD +0 -71
  93. {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/entry_points.txt +0 -0
  94. {haiku_rag_slim-0.16.0.dist-info → haiku_rag_slim-0.24.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,259 @@
1
+ # pyright: reportPossiblyUnboundVariable=false
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING
5
+
6
+ from haiku.rag.client import HaikuRAG
7
+ from haiku.rag.config import get_config
8
+
9
+ if TYPE_CHECKING:
10
+ from textual.app import ComposeResult
11
+
12
+ try:
13
+ from textual.app import App
14
+ from textual.binding import Binding
15
+ from textual.screen import Screen
16
+ from textual.widgets import Footer, Header
17
+
18
+ from haiku.rag.inspector.widgets.chunk_list import ChunkList
19
+ from haiku.rag.inspector.widgets.detail_view import DetailView
20
+ from haiku.rag.inspector.widgets.document_list import DocumentList
21
+ from haiku.rag.inspector.widgets.search_modal import SearchModal
22
+
23
+ TEXTUAL_AVAILABLE = True
24
+ except ImportError:
25
+ TEXTUAL_AVAILABLE = False
26
+ App = object # type: ignore
27
+
28
+
29
+ class InspectorApp(App): # type: ignore[misc] # pragma: no cover
30
+ """Textual TUI for inspecting LanceDB data."""
31
+
32
+ TITLE = "haiku.rag DB Inspector"
33
+
34
+ CSS = """
35
+ Screen {
36
+ layout: grid;
37
+ grid-size: 2 2;
38
+ grid-columns: 1fr 2fr;
39
+ grid-rows: 1fr 1fr;
40
+ }
41
+
42
+ #document-list {
43
+ column-span: 1;
44
+ row-span: 2;
45
+ border: solid $primary;
46
+ }
47
+
48
+ #chunk-list {
49
+ column-span: 1;
50
+ row-span: 1;
51
+ border: solid $secondary;
52
+ }
53
+
54
+ #detail-view {
55
+ column-span: 1;
56
+ row-span: 1;
57
+ border: solid $accent;
58
+ }
59
+
60
+ ListItem {
61
+ overflow: hidden;
62
+ }
63
+
64
+ ListItem Static {
65
+ overflow: hidden;
66
+ text-overflow: ellipsis;
67
+ }
68
+ """
69
+
70
+ BINDINGS = [
71
+ Binding("q", "quit", "Quit", show=True),
72
+ Binding("/", "search", "Search", show=True),
73
+ Binding("i", "show_info", "Info", show=True),
74
+ Binding("v", "show_visual", "Visual", show=True),
75
+ Binding("c", "show_context", "Context", show=True),
76
+ ]
77
+
78
+ def __init__(
79
+ self, db_path: Path, read_only: bool = False, before: datetime | None = None
80
+ ):
81
+ super().__init__()
82
+ self.db_path = db_path
83
+ self.read_only = read_only
84
+ self.before = before
85
+ self.client: HaikuRAG | None = None
86
+
87
+ def compose(self) -> "ComposeResult":
88
+ """Compose the UI layout."""
89
+ yield Header()
90
+ yield DocumentList(id="document-list")
91
+ yield ChunkList(id="chunk-list")
92
+ yield DetailView(id="detail-view")
93
+ yield Footer()
94
+
95
+ async def on_mount(self) -> None:
96
+ """Initialize the app when mounted."""
97
+ config = get_config()
98
+ self.client = HaikuRAG(
99
+ db_path=self.db_path,
100
+ config=config,
101
+ read_only=self.read_only,
102
+ before=self.before,
103
+ )
104
+ await self.client.__aenter__()
105
+
106
+ # Load initial documents
107
+ doc_list = self.query_one(DocumentList)
108
+ await doc_list.load_documents(self.client)
109
+
110
+ doc_list.list_view.focus()
111
+
112
+ async def on_unmount(self) -> None:
113
+ """Clean up when unmounting."""
114
+ if self.client:
115
+ await self.client.__aexit__(None, None, None)
116
+
117
+ def _select_chunk(self, chunk_list: ChunkList, chunk_id: str) -> None:
118
+ """Helper to select a chunk after refresh."""
119
+ for idx, c in enumerate(chunk_list.chunks):
120
+ if c.id == chunk_id:
121
+ chunk_list.list_view.index = idx
122
+ chunk_list.list_view.focus()
123
+ break
124
+
125
+ async def _dismiss_modals(self) -> None:
126
+ """Dismiss all modal screens, returning to the main screen."""
127
+ while len(self.screen_stack) > 1:
128
+ self.pop_screen()
129
+
130
+ async def _switch_modal(self, screen: Screen) -> None:
131
+ """Switch to a new modal, dismissing any existing modals first."""
132
+ await self._dismiss_modals()
133
+ await self.push_screen(screen)
134
+
135
+ async def action_search(self) -> None:
136
+ """Open search modal."""
137
+ if self.client:
138
+ await self._switch_modal(SearchModal(self.client))
139
+
140
+ async def action_show_info(self) -> None:
141
+ """Show database info modal."""
142
+ if self.client:
143
+ from haiku.rag.inspector.widgets.info_modal import InfoModal
144
+
145
+ await self._switch_modal(InfoModal(self.client, self.db_path))
146
+
147
+ async def on_search_modal_chunk_selected(
148
+ self, message: SearchModal.ChunkSelected
149
+ ) -> None:
150
+ """Handle chunk selection from search modal."""
151
+ if not self.client:
152
+ return
153
+
154
+ chunk = message.chunk
155
+
156
+ # Navigate to the document containing this chunk
157
+ if chunk.document_id:
158
+ doc = await self.client.document_repository.get_by_id(chunk.document_id)
159
+ if doc:
160
+ doc_list = self.query_one(DocumentList)
161
+ chunk_list = self.query_one(ChunkList)
162
+
163
+ # Find and select the document
164
+ for idx, d in enumerate(doc_list.documents):
165
+ if d.id == chunk.document_id:
166
+ doc_list.list_view.index = idx
167
+ break
168
+
169
+ # Load chunks for this document
170
+ await chunk_list.load_chunks_for_document(
171
+ self.client, chunk.document_id
172
+ )
173
+
174
+ # Wait a tick for the ListView to process the new items
175
+ self.call_after_refresh(self._select_chunk, chunk_list, chunk.id)
176
+
177
+ async def on_document_list_document_selected(
178
+ self, message: DocumentList.DocumentSelected
179
+ ) -> None:
180
+ """Handle document selection from document list.
181
+
182
+ Args:
183
+ message: Message containing selected document
184
+ """
185
+ if not self.client:
186
+ return
187
+
188
+ # Show document details
189
+ detail_view = self.query_one(DetailView)
190
+ await detail_view.show_document(message.document)
191
+
192
+ # Load chunks for this document
193
+ if message.document.id:
194
+ chunk_list = self.query_one(ChunkList)
195
+ await chunk_list.load_chunks_for_document(self.client, message.document.id)
196
+
197
+ async def on_chunk_list_chunk_selected(
198
+ self, message: ChunkList.ChunkSelected
199
+ ) -> None:
200
+ """Handle chunk selection from chunk list.
201
+
202
+ Args:
203
+ message: Message containing selected chunk
204
+ """
205
+ # Show chunk details
206
+ detail_view = self.query_one(DetailView)
207
+ await detail_view.show_chunk(message.chunk)
208
+
209
+ async def action_show_visual(self) -> None:
210
+ """Show visual grounding for the currently selected chunk."""
211
+ if not self.client:
212
+ return
213
+
214
+ chunk_list = self.query_one(ChunkList)
215
+ idx = chunk_list.list_view.index
216
+ if idx is None or idx >= len(chunk_list.chunks):
217
+ return
218
+
219
+ chunk = chunk_list.chunks[idx]
220
+
221
+ from haiku.rag.inspector.widgets.visual_modal import VisualGroundingModal
222
+
223
+ await self._switch_modal(VisualGroundingModal(chunk=chunk, client=self.client))
224
+
225
+ async def action_show_context(self) -> None:
226
+ """Show how the currently selected chunk would be formatted for agents."""
227
+ if not self.client:
228
+ return
229
+
230
+ chunk_list = self.query_one(ChunkList)
231
+ idx = chunk_list.list_view.index
232
+ if idx is None or idx >= len(chunk_list.chunks):
233
+ return
234
+
235
+ chunk = chunk_list.chunks[idx]
236
+
237
+ from haiku.rag.inspector.widgets.context_modal import ContextModal
238
+
239
+ await self._switch_modal(ContextModal(chunk=chunk, client=self.client))
240
+
241
+
242
+ def run_inspector(
243
+ db_path: Path | None = None,
244
+ read_only: bool = False,
245
+ before: datetime | None = None,
246
+ ) -> None: # pragma: no cover
247
+ """Run the inspector TUI.
248
+
249
+ Args:
250
+ db_path: Path to the LanceDB database. If None, uses default from config.
251
+ read_only: Whether to open the database in read-only mode.
252
+ before: Query database as it existed before this datetime.
253
+ """
254
+ config = get_config()
255
+ if db_path is None:
256
+ db_path = config.storage.data_dir / "haiku.rag.lancedb"
257
+
258
+ app = InspectorApp(db_path, read_only=read_only, before=before)
259
+ app.run()
@@ -0,0 +1,6 @@
1
+ from haiku.rag.inspector.widgets.chunk_list import ChunkList
2
+ from haiku.rag.inspector.widgets.detail_view import DetailView
3
+ from haiku.rag.inspector.widgets.document_list import DocumentList
4
+ from haiku.rag.inspector.widgets.visual_modal import VisualGroundingModal
5
+
6
+ __all__ = ["ChunkList", "DetailView", "DocumentList", "VisualGroundingModal"]
@@ -0,0 +1,100 @@
1
+ from textual import on
2
+ from textual.app import ComposeResult
3
+ from textual.containers import VerticalScroll
4
+ from textual.message import Message
5
+ from textual.widgets import ListItem, ListView, Static
6
+
7
+ from haiku.rag.client import HaikuRAG
8
+ from haiku.rag.store.models import Chunk
9
+
10
+ BATCH_SIZE = 50
11
+
12
+
13
+ class ChunkList(VerticalScroll): # pragma: no cover
14
+ """Widget for displaying and browsing chunks."""
15
+
16
+ can_focus = False
17
+
18
+ class ChunkSelected(Message):
19
+ """Message sent when a chunk is selected."""
20
+
21
+ def __init__(self, chunk: Chunk) -> None:
22
+ super().__init__()
23
+ self.chunk = chunk
24
+
25
+ def __init__(self, **kwargs) -> None:
26
+ super().__init__(**kwargs)
27
+ self.chunks: list[Chunk] = []
28
+ self.list_view = ListView()
29
+ self.has_more: bool = False
30
+ self._client: HaikuRAG | None = None
31
+ self._document_id: str | None = None
32
+ self._loading: bool = False
33
+ self._total_chunks: int = 0
34
+
35
+ def compose(self) -> ComposeResult:
36
+ """Compose the chunk list."""
37
+ yield Static("[bold]Chunks[/bold]", classes="title")
38
+ yield self.list_view
39
+
40
+ async def load_chunks_for_document(
41
+ self, client: HaikuRAG, document_id: str
42
+ ) -> None:
43
+ """Load initial batch of chunks for a specific document."""
44
+ self._client = client
45
+ self._document_id = document_id
46
+ self._total_chunks = await client.chunk_repository.count_by_document_id(
47
+ document_id
48
+ )
49
+
50
+ self.chunks = await client.chunk_repository.get_by_document_id(
51
+ document_id, limit=BATCH_SIZE, offset=0
52
+ )
53
+ self.has_more = len(self.chunks) < self._total_chunks
54
+
55
+ await self.list_view.clear()
56
+ for chunk in self.chunks:
57
+ first_line = chunk.content.split("\n")[0]
58
+ await self.list_view.append(
59
+ ListItem(Static(f"[{chunk.order}] {first_line}"))
60
+ )
61
+
62
+ async def load_more(self) -> None:
63
+ """Load the next batch of chunks."""
64
+ if (
65
+ not self.has_more
66
+ or self._loading
67
+ or not self._client
68
+ or not self._document_id
69
+ ):
70
+ return
71
+
72
+ self._loading = True
73
+ offset = len(self.chunks)
74
+ new_chunks = await self._client.chunk_repository.get_by_document_id(
75
+ self._document_id, limit=BATCH_SIZE, offset=offset
76
+ )
77
+ self.has_more = (offset + len(new_chunks)) < self._total_chunks
78
+ self.chunks.extend(new_chunks)
79
+
80
+ for chunk in new_chunks:
81
+ first_line = chunk.content.split("\n")[0]
82
+ await self.list_view.append(
83
+ ListItem(Static(f"[{chunk.order}] {first_line}"))
84
+ )
85
+ self._loading = False
86
+
87
+ @on(ListView.Highlighted)
88
+ @on(ListView.Selected)
89
+ async def handle_chunk_selection(
90
+ self, event: ListView.Highlighted | ListView.Selected
91
+ ) -> None:
92
+ """Handle chunk selection (arrow keys or Enter)."""
93
+ if event.list_view != self.list_view:
94
+ return
95
+ idx = event.list_view.index
96
+ if idx is not None and 0 <= idx < len(self.chunks):
97
+ self.post_message(self.ChunkSelected(self.chunks[idx]))
98
+ # Infinite scroll: load more when near the end
99
+ if self.has_more and idx >= len(self.chunks) - 10:
100
+ await self.load_more()
@@ -0,0 +1,89 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from textual.app import ComposeResult
4
+ from textual.binding import Binding
5
+ from textual.containers import VerticalScroll
6
+ from textual.screen import Screen
7
+ from textual.widgets import Markdown, Static
8
+
9
+ from haiku.rag.store.models import SearchResult
10
+
11
+ if TYPE_CHECKING:
12
+ from haiku.rag.client import HaikuRAG
13
+ from haiku.rag.store.models import Chunk
14
+
15
+
16
+ class ContextModal(Screen): # pragma: no cover
17
+ """Modal screen for displaying how a chunk appears to agents."""
18
+
19
+ BINDINGS = [
20
+ Binding("escape", "dismiss", "Close", show=True),
21
+ Binding("c", "dismiss", "Close", show=True),
22
+ ]
23
+
24
+ CSS = """
25
+ ContextModal {
26
+ background: $surface;
27
+ layout: vertical;
28
+ }
29
+
30
+ #context-header {
31
+ dock: top;
32
+ height: auto;
33
+ padding: 1;
34
+ }
35
+
36
+ #context-content {
37
+ height: 1fr;
38
+ width: 100%;
39
+ padding: 1;
40
+ }
41
+
42
+ #context-content Markdown {
43
+ width: 100%;
44
+ }
45
+ """
46
+
47
+ def __init__(self, chunk: "Chunk", client: "HaikuRAG"):
48
+ super().__init__()
49
+ self.chunk = chunk
50
+ self.client = client
51
+ self._content_widget = Markdown("Loading...")
52
+
53
+ def compose(self) -> ComposeResult:
54
+ yield Static("[bold]Agent Context Format[/bold]", id="context-header")
55
+ with VerticalScroll(id="context-content"):
56
+ yield self._content_widget
57
+
58
+ async def on_mount(self) -> None:
59
+ """Load and display the expanded context."""
60
+ # Create a SearchResult from the chunk
61
+ chunk_meta = self.chunk.get_chunk_metadata()
62
+ search_result = SearchResult(
63
+ content=self.chunk.content,
64
+ score=0.0,
65
+ chunk_id=self.chunk.id,
66
+ document_id=self.chunk.document_id,
67
+ document_uri=self.chunk.document_uri,
68
+ document_title=self.chunk.document_title,
69
+ doc_item_refs=chunk_meta.doc_item_refs,
70
+ page_numbers=chunk_meta.page_numbers,
71
+ headings=chunk_meta.headings,
72
+ labels=chunk_meta.labels,
73
+ )
74
+
75
+ # Expand context using the client (this is what agents actually receive)
76
+ expanded_results = await self.client.expand_context([search_result])
77
+ expanded = expanded_results[0] if expanded_results else search_result
78
+
79
+ formatted = expanded.format_for_agent()
80
+
81
+ content = (
82
+ "*This is how the chunk appears to agents after context expansion:*\n\n---\n\n"
83
+ f"{formatted}"
84
+ )
85
+
86
+ await self._content_widget.update(content)
87
+
88
+ async def action_dismiss(self, result=None) -> None:
89
+ self.app.pop_screen()
@@ -0,0 +1,130 @@
1
+ from typing import Protocol
2
+
3
+ from textual.app import ComposeResult
4
+ from textual.containers import VerticalScroll
5
+ from textual.widgets import Markdown, Static
6
+
7
+ from haiku.rag.store.models import Chunk, Document, SearchResult
8
+
9
+
10
+ class ProvenanceData(Protocol):
11
+ """Protocol for objects that have provenance metadata."""
12
+
13
+ page_numbers: list[int]
14
+ headings: list[str] | None
15
+ labels: list[str]
16
+ doc_item_refs: list[str]
17
+
18
+
19
+ class DetailView(VerticalScroll): # pragma: no cover
20
+ """Widget for displaying detailed content of documents or chunks."""
21
+
22
+ can_focus = True
23
+
24
+ def __init__(self, **kwargs) -> None:
25
+ super().__init__(**kwargs)
26
+ self.title_widget = Static("[bold]Detail View[/bold]", classes="title")
27
+ self.content_widget = Markdown("")
28
+ self.content_widget.can_focus = True
29
+
30
+ def compose(self) -> ComposeResult:
31
+ yield self.title_widget
32
+ yield self.content_widget
33
+
34
+ def _format_provenance(self, prov: ProvenanceData) -> list[str]:
35
+ """Format provenance metadata as markdown lines."""
36
+ parts: list[str] = []
37
+ if prov.page_numbers:
38
+ pages_str = ", ".join(str(p) for p in prov.page_numbers)
39
+ parts.append(f"**Page(s):** {pages_str}")
40
+ if prov.headings:
41
+ headings_str = " > ".join(prov.headings)
42
+ parts.append(f"**Section:** {headings_str}")
43
+ if prov.labels:
44
+ labels_str = ", ".join(prov.labels)
45
+ parts.append(f"**Labels:** {labels_str}")
46
+ if prov.doc_item_refs:
47
+ refs_str = ", ".join(prov.doc_item_refs[:5])
48
+ if len(prov.doc_item_refs) > 5:
49
+ refs_str += f" ... (+{len(prov.doc_item_refs) - 5} more)"
50
+ parts.append(f"**DocItem Refs:** `{refs_str}`")
51
+ return parts
52
+
53
+ async def show_document(self, document: Document) -> None:
54
+ """Display document details."""
55
+ title = document.title or document.uri or "Untitled Document"
56
+ self.title_widget.update(f"[bold]Document: {title}[/bold]")
57
+
58
+ content_parts: list[str] = []
59
+ if document.id:
60
+ content_parts.append(f"**ID:** `{document.id}`")
61
+ if document.uri:
62
+ content_parts.append(f"**URI:** `{document.uri}`")
63
+ if document.metadata:
64
+ metadata_str = "\n".join(
65
+ f" - {k}: {v}" for k, v in document.metadata.items()
66
+ )
67
+ content_parts.append(f"**Metadata:**\n{metadata_str}")
68
+ if document.created_at:
69
+ content_parts.append(f"**Created:** {document.created_at}")
70
+ if document.updated_at:
71
+ content_parts.append(f"**Updated:** {document.updated_at}")
72
+
73
+ content_parts.append("\n---\n")
74
+ content_parts.append(document.content)
75
+
76
+ await self.content_widget.update("\n\n".join(content_parts))
77
+
78
+ async def show_chunk(self, chunk: Chunk) -> None:
79
+ """Display chunk details."""
80
+ self.title_widget.update(f"[bold]Chunk {chunk.order}[/bold]")
81
+
82
+ content_parts: list[str] = []
83
+ if chunk.id:
84
+ content_parts.append(f"**ID:** `{chunk.id}`")
85
+ if chunk.document_id:
86
+ content_parts.append(f"**Document ID:** `{chunk.document_id}`")
87
+ if chunk.document_title:
88
+ content_parts.append(f"**Document Title:** {chunk.document_title}")
89
+ if chunk.document_uri:
90
+ content_parts.append(f"**Document URI:** `{chunk.document_uri}`")
91
+ content_parts.append(f"**Order:** {chunk.order}")
92
+
93
+ chunk_meta = chunk.get_chunk_metadata()
94
+ content_parts.extend(self._format_provenance(chunk_meta))
95
+
96
+ if chunk.embedding:
97
+ content_parts.append(f"**Embedding:** {len(chunk.embedding)} dimensions")
98
+
99
+ content_parts.append("\n---\n")
100
+ content_parts.append(chunk.content)
101
+
102
+ await self.content_widget.update("\n\n".join(content_parts))
103
+
104
+ async def show_search_result(
105
+ self, chunk: Chunk, search_result: SearchResult
106
+ ) -> None:
107
+ """Display chunk details with search result metadata."""
108
+ self.title_widget.update(f"[bold]Chunk {chunk.order}[/bold]")
109
+
110
+ content_parts: list[str] = []
111
+ if chunk.id:
112
+ content_parts.append(f"**ID:** `{chunk.id}`")
113
+ if chunk.document_id:
114
+ content_parts.append(f"**Document ID:** `{chunk.document_id}`")
115
+ if search_result.document_title:
116
+ content_parts.append(f"**Document Title:** {search_result.document_title}")
117
+ if search_result.document_uri:
118
+ content_parts.append(f"**Document URI:** `{search_result.document_uri}`")
119
+ content_parts.append(f"**Order:** {chunk.order}")
120
+ content_parts.append(f"**Score:** {search_result.score:.4f}")
121
+
122
+ content_parts.extend(self._format_provenance(search_result))
123
+
124
+ if chunk.embedding:
125
+ content_parts.append(f"**Embedding:** {len(chunk.embedding)} dimensions")
126
+
127
+ content_parts.append("\n---\n")
128
+ content_parts.append(chunk.content)
129
+
130
+ await self.content_widget.update("\n\n".join(content_parts))
@@ -0,0 +1,75 @@
1
+ from textual import on
2
+ from textual.app import ComposeResult
3
+ from textual.containers import VerticalScroll
4
+ from textual.message import Message
5
+ from textual.widgets import ListItem, ListView, Static
6
+
7
+ from haiku.rag.client import HaikuRAG
8
+ from haiku.rag.store.models import Document
9
+
10
+ BATCH_SIZE = 50
11
+
12
+
13
+ class DocumentList(VerticalScroll): # pragma: no cover
14
+ """Widget for displaying and browsing documents."""
15
+
16
+ can_focus = False
17
+
18
+ class DocumentSelected(Message):
19
+ """Message sent when a document is selected."""
20
+
21
+ def __init__(self, document: Document) -> None:
22
+ super().__init__()
23
+ self.document = document
24
+
25
+ def __init__(self, **kwargs) -> None:
26
+ super().__init__(**kwargs)
27
+ self.documents: list[Document] = []
28
+ self.list_view = ListView()
29
+ self.has_more: bool = True
30
+ self._client: HaikuRAG | None = None
31
+ self._loading: bool = False
32
+
33
+ def compose(self) -> ComposeResult:
34
+ """Compose the document list."""
35
+ yield Static("[bold]Documents[/bold]", classes="title")
36
+ yield self.list_view
37
+
38
+ async def load_documents(self, client: HaikuRAG) -> None:
39
+ """Load initial batch of documents from the database."""
40
+ self._client = client
41
+ self.documents = await client.list_documents(limit=BATCH_SIZE, offset=0)
42
+ self.has_more = len(self.documents) >= BATCH_SIZE
43
+ await self.list_view.clear()
44
+ for doc in self.documents:
45
+ title = doc.title or doc.uri or doc.id
46
+ await self.list_view.append(ListItem(Static(f"{title}")))
47
+
48
+ async def load_more(self, client: HaikuRAG) -> None:
49
+ """Load the next batch of documents."""
50
+ if not self.has_more or self._loading:
51
+ return
52
+ self._loading = True
53
+ offset = len(self.documents)
54
+ new_docs = await client.list_documents(limit=BATCH_SIZE, offset=offset)
55
+ self.has_more = len(new_docs) >= BATCH_SIZE
56
+ self.documents.extend(new_docs)
57
+ for doc in new_docs:
58
+ title = doc.title or doc.uri or doc.id
59
+ await self.list_view.append(ListItem(Static(f"{title}")))
60
+ self._loading = False
61
+
62
+ @on(ListView.Highlighted)
63
+ @on(ListView.Selected)
64
+ async def handle_document_selection(
65
+ self, event: ListView.Highlighted | ListView.Selected
66
+ ) -> None:
67
+ """Handle document selection (arrow keys or Enter)."""
68
+ if event.list_view != self.list_view:
69
+ return
70
+ idx = event.list_view.index
71
+ if idx is not None and 0 <= idx < len(self.documents):
72
+ self.post_message(self.DocumentSelected(self.documents[idx]))
73
+ # Infinite scroll: load more when near the end
74
+ if self._client and self.has_more and idx >= len(self.documents) - 10:
75
+ await self.load_more(self._client)