chunksmith-cli 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. chunksmith_cli-0.4.0/LICENSE.vectify +21 -0
  2. chunksmith_cli-0.4.0/PKG-INFO +61 -0
  3. chunksmith_cli-0.4.0/README.md +44 -0
  4. chunksmith_cli-0.4.0/pyproject.toml +43 -0
  5. chunksmith_cli-0.4.0/setup.cfg +4 -0
  6. chunksmith_cli-0.4.0/src/chunksmith_cli/__init__.py +3 -0
  7. chunksmith_cli-0.4.0/src/chunksmith_cli/__main__.py +41 -0
  8. chunksmith_cli-0.4.0/src/chunksmith_cli/agent/__init__.py +1 -0
  9. chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_display.py +160 -0
  10. chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_session.py +294 -0
  11. chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_stream.py +152 -0
  12. chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_wizard.py +5 -0
  13. chunksmith_cli-0.4.0/src/chunksmith_cli/agent_display.py +6 -0
  14. chunksmith_cli-0.4.0/src/chunksmith_cli/agent_session.py +6 -0
  15. chunksmith_cli-0.4.0/src/chunksmith_cli/agent_stream.py +6 -0
  16. chunksmith_cli-0.4.0/src/chunksmith_cli/agent_wizard.py +6 -0
  17. chunksmith_cli-0.4.0/src/chunksmith_cli/assets/chunksmith_logo.png +0 -0
  18. chunksmith_cli-0.4.0/src/chunksmith_cli/branding.py +6 -0
  19. chunksmith_cli-0.4.0/src/chunksmith_cli/config.py +6 -0
  20. chunksmith_cli-0.4.0/src/chunksmith_cli/core/__init__.py +21 -0
  21. chunksmith_cli-0.4.0/src/chunksmith_cli/core/artifact_layout.py +60 -0
  22. chunksmith_cli-0.4.0/src/chunksmith_cli/core/branding.py +137 -0
  23. chunksmith_cli-0.4.0/src/chunksmith_cli/core/config.py +32 -0
  24. chunksmith_cli-0.4.0/src/chunksmith_cli/core/media_preview.py +72 -0
  25. chunksmith_cli-0.4.0/src/chunksmith_cli/core/menu.py +110 -0
  26. chunksmith_cli-0.4.0/src/chunksmith_cli/core/menus.py +55 -0
  27. chunksmith_cli-0.4.0/src/chunksmith_cli/core/panels.py +24 -0
  28. chunksmith_cli-0.4.0/src/chunksmith_cli/core/paths.py +869 -0
  29. chunksmith_cli-0.4.0/src/chunksmith_cli/core/prefs_mapper.py +97 -0
  30. chunksmith_cli-0.4.0/src/chunksmith_cli/core/saved_catalog.py +180 -0
  31. chunksmith_cli-0.4.0/src/chunksmith_cli/core/theme.py +38 -0
  32. chunksmith_cli-0.4.0/src/chunksmith_cli/elements_json_prompt.py +6 -0
  33. chunksmith_cli-0.4.0/src/chunksmith_cli/json_view.py +6 -0
  34. chunksmith_cli-0.4.0/src/chunksmith_cli/media_preview.py +6 -0
  35. chunksmith_cli-0.4.0/src/chunksmith_cli/menu.py +6 -0
  36. chunksmith_cli-0.4.0/src/chunksmith_cli/menus.py +55 -0
  37. chunksmith_cli-0.4.0/src/chunksmith_cli/multi_indexing_wizard.py +3 -0
  38. chunksmith_cli-0.4.0/src/chunksmith_cli/outline_browser.py +6 -0
  39. chunksmith_cli-0.4.0/src/chunksmith_cli/panels.py +6 -0
  40. chunksmith_cli-0.4.0/src/chunksmith_cli/partition_prefs.py +74 -0
  41. chunksmith_cli-0.4.0/src/chunksmith_cli/paths.py +6 -0
  42. chunksmith_cli-0.4.0/src/chunksmith_cli/pdf_prompt.py +6 -0
  43. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/__init__.py +1 -0
  44. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/mapping_validation.py +31 -0
  45. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_config.py +35 -0
  46. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_prompts.py +375 -0
  47. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_runtime.py +38 -0
  48. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_storage.py +157 -0
  49. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_wizard.py +218 -0
  50. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/pageindex_wizard.py +140 -0
  51. chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/run_multi.py +21 -0
  52. chunksmith_cli-0.4.0/src/chunksmith_cli/prompts/__init__.py +1 -0
  53. chunksmith_cli-0.4.0/src/chunksmith_cli/prompts/elements_json_prompt.py +49 -0
  54. chunksmith_cli-0.4.0/src/chunksmith_cli/prompts/pdf_prompt.py +37 -0
  55. chunksmith_cli-0.4.0/src/chunksmith_cli/saved_catalog.py +6 -0
  56. chunksmith_cli-0.4.0/src/chunksmith_cli/theme.py +6 -0
  57. chunksmith_cli-0.4.0/src/chunksmith_cli/tree_view.py +6 -0
  58. chunksmith_cli-0.4.0/src/chunksmith_cli/view_session.py +6 -0
  59. chunksmith_cli-0.4.0/src/chunksmith_cli/views/__init__.py +1 -0
  60. chunksmith_cli-0.4.0/src/chunksmith_cli/views/json_view.py +11 -0
  61. chunksmith_cli-0.4.0/src/chunksmith_cli/views/outline_browser.py +247 -0
  62. chunksmith_cli-0.4.0/src/chunksmith_cli/views/tree_view.py +59 -0
  63. chunksmith_cli-0.4.0/src/chunksmith_cli/views/view_session.py +32 -0
  64. chunksmith_cli-0.4.0/src/chunksmith_cli/wizard.py +357 -0
  65. chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/PKG-INFO +61 -0
  66. chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/SOURCES.txt +72 -0
  67. chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/dependency_links.txt +1 -0
  68. chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/entry_points.txt +2 -0
  69. chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/requires.txt +5 -0
  70. chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/top_level.txt +1 -0
  71. chunksmith_cli-0.4.0/tests/test_cli_entry.py +9 -0
  72. chunksmith_cli-0.4.0/tests/test_pageindex_settings.py +13 -0
  73. chunksmith_cli-0.4.0/tests/test_prefs_mapper.py +48 -0
  74. chunksmith_cli-0.4.0/tests/test_run_multi.py +22 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ChunkSmith
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.4
2
+ Name: chunksmith-cli
3
+ Version: 0.4.0
4
+ Summary: ChunkSmith CLI — multi-indexing (JSON/TOON) and PageIndexer pipelines.
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/AnshulParate2004/ChunkSmith
7
+ Project-URL: Repository, https://github.com/AnshulParate2004/ChunkSmith
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE.vectify
11
+ Requires-Dist: python-dotenv>=1.0.0
12
+ Requires-Dist: rich>=13.0.0
13
+ Requires-Dist: chunksmith-core<0.4,>=0.3.0
14
+ Requires-Dist: chunksmith-multimodal[llm,pdf,toon]<0.4,>=0.3.0
15
+ Requires-Dist: chunksmith-pageindex[llm,pdf]<0.4,>=0.3.0
16
+ Dynamic: license-file
17
+
18
+ # chunksmith-cli (Python)
19
+
20
+ PyPI package **`chunksmith-cli`** v0.4.0 — Rich terminal for ChunkSmith indexing.
21
+
22
+ Part of the **ChunkSmith** monorepo (`packages/chunksmith-cli`).
23
+
24
+ ## Dependencies (workspace)
25
+
26
+ | Layer | Packages |
27
+ |-------|----------|
28
+ | CLI | `chunksmith-core`, `chunksmith-multimodal`, `chunksmith-pageindex`, `rich` |
29
+ | Agent Q&A (separate install) | `chunksmith-agent[langchain]` |
30
+
31
+ ## Development
32
+
33
+ From repo root:
34
+
35
+ ```bash
36
+ cd ChunkSmith
37
+ uv sync
38
+ chunksmith
39
+ ```
40
+
41
+ From this package directory only (requires workspace root lock):
42
+
43
+ ```bash
44
+ cd ../..
45
+ uv sync
46
+ uv run chunksmith
47
+ ```
48
+
49
+ ## Tests
50
+
51
+ ```bash
52
+ cd ../..
53
+ uv run pytest packages/chunksmith-cli/tests/ -q
54
+ ```
55
+
56
+ ## PyPI install
57
+
58
+ ```bash
59
+ pip install chunksmith-cli
60
+ pip install "chunksmith-agent[langchain]"
61
+ ```
@@ -0,0 +1,44 @@
1
+ # chunksmith-cli (Python)
2
+
3
+ PyPI package **`chunksmith-cli`** v0.4.0 — Rich terminal for ChunkSmith indexing.
4
+
5
+ Part of the **ChunkSmith** monorepo (`packages/chunksmith-cli`).
6
+
7
+ ## Dependencies (workspace)
8
+
9
+ | Layer | Packages |
10
+ |-------|----------|
11
+ | CLI | `chunksmith-core`, `chunksmith-multimodal`, `chunksmith-pageindex`, `rich` |
12
+ | Agent Q&A (separate install) | `chunksmith-agent[langchain]` |
13
+
14
+ ## Development
15
+
16
+ From repo root:
17
+
18
+ ```bash
19
+ cd ChunkSmith
20
+ uv sync
21
+ chunksmith
22
+ ```
23
+
24
+ From this package directory only (requires workspace root lock):
25
+
26
+ ```bash
27
+ cd ../..
28
+ uv sync
29
+ uv run chunksmith
30
+ ```
31
+
32
+ ## Tests
33
+
34
+ ```bash
35
+ cd ../..
36
+ uv run pytest packages/chunksmith-cli/tests/ -q
37
+ ```
38
+
39
+ ## PyPI install
40
+
41
+ ```bash
42
+ pip install chunksmith-cli
43
+ pip install "chunksmith-agent[langchain]"
44
+ ```
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "chunksmith-cli"
7
+ version = "0.4.0"
8
+ description = "ChunkSmith CLI — multi-indexing (JSON/TOON) and PageIndexer pipelines."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ dependencies = [
13
+ "python-dotenv>=1.0.0",
14
+ "rich>=13.0.0",
15
+ "chunksmith-core>=0.3.0,<0.4",
16
+ "chunksmith-multimodal[pdf,toon,llm]>=0.3.0,<0.4",
17
+ "chunksmith-pageindex[pdf,llm]>=0.3.0,<0.4",
18
+ ]
19
+
20
+ [dependency-groups]
21
+ dev = ["pytest>=8.0.0"]
22
+
23
+ [project.scripts]
24
+ chunksmith = "chunksmith_cli.__main__:main"
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/AnshulParate2004/ChunkSmith"
28
+ Repository = "https://github.com/AnshulParate2004/ChunkSmith"
29
+
30
+ [tool.setuptools]
31
+ package-dir = { "" = "src" }
32
+
33
+ [tool.setuptools.packages.find]
34
+ where = ["src"]
35
+ include = ["chunksmith_cli*"]
36
+ exclude = ["tests*"]
37
+
38
+ [tool.setuptools.package-data]
39
+ chunksmith_cli = ["assets/*"]
40
+
41
+ [tool.pytest.ini_options]
42
+ testpaths = ["tests"]
43
+ pythonpath = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """Interactive CLI: ChunkSmith PDF (pageindex), multimodal (Unstructured JSON + PDF), or saved JSON view."""
2
+
3
+ __all__: list[str] = []
@@ -0,0 +1,41 @@
1
+ """ChunkSmith interactive CLI — ``python -m chunksmith_cli`` or ``chunksmith``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+
8
+ def main() -> int:
9
+ if len(sys.argv) > 1 and sys.argv[1] in ("--version", "-V"):
10
+ from chunksmith_cli.theme import VERSION
11
+
12
+ print(f"chunksmith-cli {VERSION}")
13
+ return 0
14
+
15
+ from chunksmith_cli.branding import ensure_utf8_stdio
16
+ from chunksmith_cli.theme import make_console
17
+ from chunksmith_cli.wizard import run_interactive
18
+
19
+ ensure_utf8_stdio()
20
+ console = make_console()
21
+
22
+ try:
23
+ return run_interactive(console)
24
+ except KeyboardInterrupt:
25
+ console.print("\n[dim]Interrupted. Goodbye.[/dim]")
26
+ return 130
27
+
28
+
29
+ def __version__() -> str:
30
+ from chunksmith_cli.theme import VERSION as v
31
+
32
+ return v
33
+
34
+
35
+ if __name__ == "__main__":
36
+ if len(sys.argv) > 1 and sys.argv[1] in ("--version", "-V"):
37
+ from chunksmith_cli.theme import VERSION
38
+
39
+ print(f"chunksmith-cli {VERSION}")
40
+ raise SystemExit(0)
41
+ raise SystemExit(main())
@@ -0,0 +1 @@
1
+ """LangChain agent over saved ChunkSmith indexes."""
@@ -0,0 +1,160 @@
1
+ """Render tables and figures from agent events in the terminal."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import re
7
+ import sys
8
+ from html import unescape
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from rich.align import Align
13
+ from rich.console import Console
14
+ from rich.markup import escape
15
+ from rich.panel import Panel
16
+
17
+ from chunksmith_cli.config import open_images_in_viewer, show_images_in_terminal, show_tables_in_terminal
18
+
19
+ try:
20
+ from rich.image import Image as RichImage
21
+ except ImportError: # pragma: no cover
22
+ RichImage = None # type: ignore[misc, assignment]
23
+
24
+
25
+ def html_table_preview(html: str, *, max_len: int = 2400) -> str:
26
+ """Plain-text preview of HTML table content for terminal display."""
27
+ text = re.sub(r"<br\s*/?>", "\n", html or "", flags=re.IGNORECASE)
28
+ text = re.sub(r"</t[rdh]>", "\t", text, flags=re.IGNORECASE)
29
+ text = re.sub(r"<[^>]+>", "", text)
30
+ text = unescape(text)
31
+ text = re.sub(r"[ \t\f\v]+", " ", text)
32
+ text = re.sub(r" *\n *", "\n", text)
33
+ text = re.sub(r"\n{3,}", "\n\n", text).strip()
34
+ if len(text) > max_len:
35
+ return text[:max_len] + "\n…"
36
+ return text or "(empty table)"
37
+
38
+
39
+ def print_table(console: Console, payload: dict[str, Any]) -> None:
40
+ nid = payload.get("node_id")
41
+ pg = payload.get("page_number")
42
+ preview = html_table_preview(str(payload.get("html") or ""))
43
+ title = f"[bold yellow]Table[/bold yellow] · node {escape(str(nid or '?'))} · page {escape(str(pg or '?'))}"
44
+ console.print(Panel(escape(preview), title=title, border_style="yellow"))
45
+
46
+
47
+ def _open_image_file(path: Path) -> bool:
48
+ """Open image in the system default viewer (Windows/macOS/Linux)."""
49
+ if not path.is_file():
50
+ return False
51
+ try:
52
+ if sys.platform == "win32":
53
+ os.startfile(str(path)) # noqa: S606
54
+ elif sys.platform == "darwin":
55
+ os.system(f'open "{path}"') # noqa: S605
56
+ else:
57
+ os.system(f'xdg-open "{path}"') # noqa: S605
58
+ return True
59
+ except OSError:
60
+ return False
61
+
62
+
63
+ def print_figure(console: Console, payload: dict[str, Any]) -> None:
64
+ path = payload.get("path")
65
+ pg = payload.get("page_number")
66
+ nid = payload.get("node_id")
67
+ caption = f"node {nid} · page {pg}"
68
+ path_str = str(path or "").strip()
69
+ file_path = Path(path_str) if path_str else None
70
+ rendered_inline = False
71
+
72
+ if file_path and file_path.is_file() and RichImage is not None:
73
+ try:
74
+ img = RichImage.from_file(str(file_path), width=min(72, max(40, console.width - 4)))
75
+ console.print(
76
+ Panel(
77
+ Align.center(img),
78
+ title=f"[bold cyan]Figure[/bold cyan] · {escape(caption)}",
79
+ border_style="cyan",
80
+ )
81
+ )
82
+ rendered_inline = True
83
+ except Exception:
84
+ rendered_inline = False
85
+
86
+ if rendered_inline:
87
+ console.print(f"[dim]{escape(path_str)}[/dim]\n")
88
+ if open_images_in_viewer():
89
+ _open_image_file(file_path)
90
+ return
91
+
92
+ hint = "[dim]Inline image preview is not supported in this terminal (common on Windows PowerShell).[/dim]\n"
93
+ if file_path and file_path.is_file():
94
+ if sys.platform == "win32":
95
+ hint += f'[dim]Open manually:[/dim] start "" {escape(path_str)}\n'
96
+ if open_images_in_viewer():
97
+ if _open_image_file(file_path):
98
+ hint += "[green]Opened in your default image viewer.[/green]\n"
99
+ elif sys.platform == "win32":
100
+ hint += (
101
+ "[dim]Tip: set[/dim] [bold]CHUNKSMITH_CLI_OPEN_IMAGES=1[/bold] "
102
+ "[dim]to auto-open figures in Photos.[/dim]\n"
103
+ )
104
+ else:
105
+ hint += "[yellow]Image file not found on disk.[/yellow]\n"
106
+
107
+ console.print(
108
+ Panel(
109
+ f"{hint}\n[bold]{escape(path_str or '(no image path)')}[/bold]",
110
+ title=f"[bold cyan]Figure file[/bold cyan] · {escape(caption)}",
111
+ border_style="cyan",
112
+ )
113
+ )
114
+ console.print()
115
+
116
+
117
+ def print_tables_summary(console: Console, count: int) -> None:
118
+ if count:
119
+ console.print(f"[dim]Tables in context:[/dim] {count}\n")
120
+
121
+
122
+ def print_figures_summary(console: Console, count: int) -> None:
123
+ if count:
124
+ console.print(f"[dim]Figures in context:[/dim] {count}\n")
125
+
126
+
127
+ def print_media_mentions(console: Console, payload: dict[str, Any]) -> None:
128
+ hint = str(payload.get("hint") or "").strip()
129
+ if hint:
130
+ console.print(Panel(escape(hint), title="[bold yellow]Media[/bold yellow]", border_style="yellow"))
131
+
132
+ figures = payload.get("figures") or []
133
+ if figures and show_images_in_terminal():
134
+ lines = [
135
+ f"• [dim]node {escape(str(f.get('node_id') or '?'))}[/dim] — {escape(str(f.get('caption') or ''))}"
136
+ for f in figures[:12]
137
+ ]
138
+ console.print(
139
+ Panel(
140
+ "\n".join(lines),
141
+ title="[bold cyan]Figures mentioned in text[/bold cyan]",
142
+ border_style="cyan",
143
+ )
144
+ )
145
+
146
+ tables = payload.get("tables") or []
147
+ if tables and show_tables_in_terminal():
148
+ lines = [
149
+ f"• [dim]node {escape(str(t.get('node_id') or '?'))}[/dim] — {escape(str(t.get('caption') or ''))}"
150
+ for t in tables[:12]
151
+ ]
152
+ console.print(
153
+ Panel(
154
+ "\n".join(lines),
155
+ title="[bold yellow]Tables mentioned in text[/bold yellow]",
156
+ border_style="yellow",
157
+ )
158
+ )
159
+ if figures or tables:
160
+ console.print()
@@ -0,0 +1,294 @@
1
+ """Agent CLI session state and actions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Any, Callable
8
+
9
+ from rich.console import Console
10
+ from rich.markup import escape
11
+ from rich.prompt import Confirm, Prompt
12
+
13
+ from chunksmith_cli.agent_stream import handle_agent_event
14
+ from chunksmith_cli.menu import prompt_menu
15
+ from chunksmith_cli.menus import AGENT_MENU
16
+ from chunksmith_cli.paths import (
17
+ default_project_logs_dir,
18
+ ensure_cli_storage,
19
+ resolve_agent_index_input,
20
+ )
21
+
22
+
23
+ def _index_media_counts(doc_index: Any) -> tuple[int, int]:
24
+ from chunksmith_agent.index_context import index_media_counts
25
+
26
+ return index_media_counts(doc_index)
27
+
28
+
29
+ def _media_loaded_line(doc_index: Any) -> str:
30
+ """Counts shown on the main Loaded / Ready line."""
31
+ n_tables, n_images = _index_media_counts(doc_index)
32
+ if n_tables or n_images:
33
+ return (
34
+ f"[dim]·[/dim] [bold cyan]{n_tables}[/bold cyan] tables loaded "
35
+ f"[dim]·[/dim] [bold cyan]{n_images}[/bold cyan] figures loaded"
36
+ )
37
+ return "[dim]· 0 tables · 0 figures (text-only index)[/dim]"
38
+
39
+
40
+ def _print_index_media_hint(console: Console, doc_index: Any) -> None:
41
+ from rich.panel import Panel
42
+
43
+ from chunksmith_agent.index_context import index_media_inventory
44
+
45
+ n_tables, n_images = _index_media_counts(doc_index)
46
+ table_lines, figure_lines = index_media_inventory(doc_index)
47
+ extra_tables = max(0, n_tables - len(table_lines))
48
+ extra_figures = max(0, n_images - len(figure_lines))
49
+
50
+ if n_tables or n_images:
51
+ body_parts = [
52
+ f"[bold yellow]Tables loaded:[/bold yellow] [bold]{n_tables}[/bold]",
53
+ ]
54
+ if table_lines:
55
+ body_parts.append("\n".join(escape(line) for line in table_lines))
56
+ if extra_tables:
57
+ body_parts.append(f"[dim]… and {extra_tables} more table(s)[/dim]")
58
+ else:
59
+ body_parts.append("[dim](none)[/dim]")
60
+
61
+ body_parts.append(f"\n[bold cyan]Figures loaded:[/bold cyan] [bold]{n_images}[/bold]")
62
+ if figure_lines:
63
+ body_parts.append("\n".join(escape(line) for line in figure_lines))
64
+ if extra_figures:
65
+ body_parts.append(f"[dim]… and {extra_figures} more figure(s)[/dim]")
66
+ else:
67
+ body_parts.append("[dim](none)[/dim]")
68
+
69
+ body_parts.append(
70
+ "\n[dim]In chat, tables/figures show after search (sections in Sources, up to 3 each). "
71
+ "Windows PowerShell often shows [bold]paths only[/bold], not inline pictures — set "
72
+ "[bold]CHUNKSMITH_CLI_OPEN_IMAGES=1[/bold] to open PNGs in Photos.[/dim]"
73
+ )
74
+ console.print(
75
+ Panel(
76
+ "\n".join(body_parts),
77
+ title="[bold green]Loaded from JSON[/bold green]",
78
+ border_style="green",
79
+ )
80
+ )
81
+ console.print()
82
+ return
83
+ console.print(
84
+ Panel(
85
+ "[bold]Tables loaded:[/bold] 0\n"
86
+ "[bold]Figures loaded:[/bold] 0\n\n"
87
+ "[dim]This JSON has outline/text only. For tables and images, index with "
88
+ "load a [bold]*_canonical_bundle.json[/bold] or use [bold]chunking → Multimodal index[/bold].[/dim]",
89
+ title="[bold yellow]Text-only index[/bold yellow]",
90
+ border_style="yellow",
91
+ )
92
+ )
93
+ console.print()
94
+
95
+
96
+ @dataclass
97
+ class AgentCliState:
98
+ agent: Any | None = None
99
+ loaded_name: str | None = None
100
+
101
+ @property
102
+ def is_ready(self) -> bool:
103
+ return self.agent is not None
104
+
105
+ def status_line(self) -> str:
106
+ if not self.is_ready:
107
+ return "[dim]No document loaded yet — pick option 1 to open a saved index.[/dim]"
108
+ n = len(self.agent.index.media_by_node)
109
+ media = _media_loaded_line(self.agent.index)
110
+ return (
111
+ f"[green]Ready:[/green] [bold]{escape(self.loaded_name or 'index')}[/bold] [dim]({n} nodes)[/dim] {media}"
112
+ )
113
+
114
+
115
+ def _find_saved_indexes(storage: Path) -> list[dict[str, str]]:
116
+ rows: list[dict[str, str]] = []
117
+ seen: set[str] = set()
118
+
119
+ def _add(*, stem: str, pageindex: str, canonical: str, source: str) -> None:
120
+ if stem in seen:
121
+ return
122
+ seen.add(stem)
123
+ rows.append(
124
+ {
125
+ "stem": stem,
126
+ "pageindex": pageindex,
127
+ "canonical": canonical,
128
+ "source": source,
129
+ }
130
+ )
131
+
132
+ for p in sorted(storage.glob("*_pageindex.json"), reverse=True):
133
+ stem = p.name.replace("_pageindex.json", "")
134
+ cb = storage / f"{stem}_canonical_bundle.json"
135
+ _add(
136
+ stem=stem,
137
+ pageindex=str(p),
138
+ canonical=str(cb) if cb.is_file() else "",
139
+ source="cli/data",
140
+ )
141
+
142
+ logs = default_project_logs_dir()
143
+ if logs is not None:
144
+ json_dir = logs / "json"
145
+ if json_dir.is_dir():
146
+ for cb in sorted(json_dir.glob("*_canonical_bundle.json"), reverse=True):
147
+ stem = cb.name.replace("_canonical_bundle.json", "")
148
+ pi = json_dir / f"{stem}_pageindex.json"
149
+ _add(
150
+ stem=stem,
151
+ pageindex=str(pi) if pi.is_file() else "",
152
+ canonical=str(cb),
153
+ source="logs",
154
+ )
155
+ return rows
156
+
157
+
158
+ def _load_saved(console: Console, state: AgentCliState, storage: Path) -> None:
159
+ from chunksmith_agent import ChunkSmithAgent
160
+ from chunksmith_agent.index_builder import build_document_index_from_saved
161
+
162
+ rows = _find_saved_indexes(storage)
163
+ logs_hint = ""
164
+ logs = default_project_logs_dir()
165
+ if logs is not None:
166
+ logs_hint = f"\n [cyan]{logs}[/cyan] [dim](artifact folder → newest bundle + images)[/dim]"
167
+ console.print(
168
+ f"\n[bold]Load saved index[/bold]\n"
169
+ f"[dim]Enter a path, artifact folder, or stem. Examples:[/dim]\n"
170
+ f" [cyan]{storage / 'mydoc_20260517T120000Z_pageindex.json'}[/cyan]\n"
171
+ f" [cyan]logs[/cyan] or [cyan]logs/json/mydoc_*_canonical_bundle.json[/cyan]"
172
+ f"{logs_hint}\n"
173
+ f" [cyan]mydoc_20260517T120000Z[/cyan]\n"
174
+ f"[dim]Empty line = pick from list · quotes and ~ supported[/dim]\n"
175
+ )
176
+ if rows:
177
+ console.print("[dim]Recent indexes:[/dim]")
178
+ for i, r in enumerate(rows[:8], start=1):
179
+ tag = f" [dim]({r['source']})[/dim]" if r.get("source") else ""
180
+ console.print(f" [cyan]{i:>2}[/] {escape(r['stem'])}{tag}")
181
+
182
+ while True:
183
+ raw = Prompt.ask(
184
+ "[bold]Index path or stem[/bold]",
185
+ default="",
186
+ show_default=False,
187
+ ).strip()
188
+
189
+ if not raw and rows:
190
+ pick = Prompt.ask("[bold]Or pick number[/bold]", default="1")
191
+ try:
192
+ row = rows[int(pick) - 1]
193
+ except (ValueError, IndexError):
194
+ console.print("[red]Invalid number.[/red]")
195
+ continue
196
+ resolved_pageindex = Path(row["pageindex"])
197
+ resolved_canonical = Path(row["canonical"]) if row["canonical"] else None
198
+ label = row["stem"]
199
+ break
200
+
201
+ if not raw:
202
+ console.print("[dim]Cancelled.[/dim]")
203
+ return
204
+
205
+ resolved = resolve_agent_index_input(raw, storage=storage)
206
+ if resolved is None:
207
+ console.print(
208
+ "[yellow]Could not find a valid index at that path.[/yellow]\n"
209
+ "[dim]Use an artifact folder (``logs`` with ``json/`` + ``image/``), a "
210
+ "``*_canonical_bundle.json``, ``*_pageindex.json``, or a stem.[/dim]\n"
211
+ )
212
+ continue
213
+
214
+ resolved_pageindex = resolved.pageindex_path
215
+ resolved_canonical = resolved.canonical_bundle_path
216
+ label = resolved.label
217
+ break
218
+
219
+ try:
220
+ doc_index = build_document_index_from_saved(
221
+ pageindex_path=resolved_pageindex,
222
+ canonical_bundle_path=resolved_canonical,
223
+ artifact_root=resolved.artifact_root,
224
+ )
225
+ except Exception as e:
226
+ console.print(f"[bold red]Load failed:[/bold red] {e}")
227
+ return
228
+
229
+ state.agent = ChunkSmithAgent(doc_index)
230
+ state.agent.reset_conversation()
231
+ state.loaded_name = label
232
+ console.print(f"\n[bold green]Loaded.[/bold green] {state.status_line()}\n")
233
+ _print_index_media_hint(console, doc_index)
234
+ if Confirm.ask("[bold]Start chatting now?[/bold]", default=True):
235
+ _chat_loop(console, state)
236
+
237
+
238
+ def _chat_loop(console: Console, state: AgentCliState) -> None:
239
+ if not state.is_ready:
240
+ console.print("[yellow]Load a document first (option 1 — open saved index).[/yellow]")
241
+ return
242
+
243
+ console.print(
244
+ f"\n[dim]Chatting about[/dim] [bold]{escape(state.loaded_name or 'document')}[/bold]. "
245
+ "[dim]Type[/dim] [bold]exit[/bold] [dim]to return to the agent menu.[/dim]\n"
246
+ )
247
+ while True:
248
+ q = Prompt.ask("[bold #00b4a6]You[/]").strip()
249
+ if not q:
250
+ continue
251
+ if q.lower() in ("exit", "quit", "q", "back"):
252
+ console.print("[dim]Back to agent menu.[/dim]\n")
253
+ break
254
+ console.print()
255
+ for _ in state.agent.ask_events(
256
+ q,
257
+ event_sink=lambda n, p: handle_agent_event(console, n, p),
258
+ emit_image_events=True,
259
+ emit_table_events=True,
260
+ ):
261
+ pass
262
+ console.print()
263
+
264
+
265
+ _AGENT_ACTIONS: dict[str, Callable[[Console, AgentCliState, Path], None]] = {
266
+ "1": lambda c, s, st: _load_saved(c, s, st),
267
+ "2": lambda c, s, st: _chat_loop(c, s),
268
+ }
269
+
270
+
271
+ def run_agent_session(console: Console) -> int:
272
+ state = AgentCliState()
273
+ storage = ensure_cli_storage()
274
+
275
+ while True:
276
+ console.print()
277
+ console.print(state.status_line())
278
+ choice = prompt_menu(
279
+ console,
280
+ title="ChunkSmith Agent",
281
+ options=AGENT_MENU,
282
+ prompt="What next",
283
+ default_key="2" if state.is_ready else "1",
284
+ allow_back=True,
285
+ hint="Option 2 (chat) needs a loaded index — use option 1 first.",
286
+ )
287
+ if choice == "exit":
288
+ return 0
289
+ if choice == "back":
290
+ return 0
291
+
292
+ action = _AGENT_ACTIONS.get(choice)
293
+ if action:
294
+ action(console, state, storage)