chunksmith-cli 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunksmith_cli-0.4.0/LICENSE.vectify +21 -0
- chunksmith_cli-0.4.0/PKG-INFO +61 -0
- chunksmith_cli-0.4.0/README.md +44 -0
- chunksmith_cli-0.4.0/pyproject.toml +43 -0
- chunksmith_cli-0.4.0/setup.cfg +4 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/__init__.py +3 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/__main__.py +41 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent/__init__.py +1 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_display.py +160 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_session.py +294 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_stream.py +152 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent/agent_wizard.py +5 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent_display.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent_session.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent_stream.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/agent_wizard.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/assets/chunksmith_logo.png +0 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/branding.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/config.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/__init__.py +21 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/artifact_layout.py +60 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/branding.py +137 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/config.py +32 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/media_preview.py +72 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/menu.py +110 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/menus.py +55 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/panels.py +24 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/paths.py +869 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/prefs_mapper.py +97 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/saved_catalog.py +180 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/core/theme.py +38 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/elements_json_prompt.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/json_view.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/media_preview.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/menu.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/menus.py +55 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/multi_indexing_wizard.py +3 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/outline_browser.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/panels.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/partition_prefs.py +74 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/paths.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pdf_prompt.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/__init__.py +1 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/mapping_validation.py +31 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_config.py +35 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_prompts.py +375 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_runtime.py +38 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_storage.py +157 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/multi_indexing_wizard.py +218 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/pageindex_wizard.py +140 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/pipelines/run_multi.py +21 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/prompts/__init__.py +1 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/prompts/elements_json_prompt.py +49 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/prompts/pdf_prompt.py +37 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/saved_catalog.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/theme.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/tree_view.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/view_session.py +6 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/views/__init__.py +1 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/views/json_view.py +11 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/views/outline_browser.py +247 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/views/tree_view.py +59 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/views/view_session.py +32 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli/wizard.py +357 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/PKG-INFO +61 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/SOURCES.txt +72 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/dependency_links.txt +1 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/entry_points.txt +2 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/requires.txt +5 -0
- chunksmith_cli-0.4.0/src/chunksmith_cli.egg-info/top_level.txt +1 -0
- chunksmith_cli-0.4.0/tests/test_cli_entry.py +9 -0
- chunksmith_cli-0.4.0/tests/test_pageindex_settings.py +13 -0
- chunksmith_cli-0.4.0/tests/test_prefs_mapper.py +48 -0
- chunksmith_cli-0.4.0/tests/test_run_multi.py +22 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 ChunkSmith
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chunksmith-cli
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: ChunkSmith CLI — multi-indexing (JSON/TOON) and PageIndexer pipelines.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/AnshulParate2004/ChunkSmith
|
|
7
|
+
Project-URL: Repository, https://github.com/AnshulParate2004/ChunkSmith
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE.vectify
|
|
11
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
12
|
+
Requires-Dist: rich>=13.0.0
|
|
13
|
+
Requires-Dist: chunksmith-core<0.4,>=0.3.0
|
|
14
|
+
Requires-Dist: chunksmith-multimodal[llm,pdf,toon]<0.4,>=0.3.0
|
|
15
|
+
Requires-Dist: chunksmith-pageindex[llm,pdf]<0.4,>=0.3.0
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# chunksmith-cli (Python)
|
|
19
|
+
|
|
20
|
+
PyPI package **`chunksmith-cli`** v0.4.0 — Rich terminal for ChunkSmith indexing.
|
|
21
|
+
|
|
22
|
+
Part of the **ChunkSmith** monorepo (`packages/chunksmith-cli`).
|
|
23
|
+
|
|
24
|
+
## Dependencies (workspace)
|
|
25
|
+
|
|
26
|
+
| Layer | Packages |
|
|
27
|
+
|-------|----------|
|
|
28
|
+
| CLI | `chunksmith-core`, `chunksmith-multimodal`, `chunksmith-pageindex`, `rich` |
|
|
29
|
+
| Agent Q&A (separate install) | `chunksmith-agent[langchain]` |
|
|
30
|
+
|
|
31
|
+
## Development
|
|
32
|
+
|
|
33
|
+
From repo root:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
cd ChunkSmith
|
|
37
|
+
uv sync
|
|
38
|
+
chunksmith
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
From this package directory only (requires workspace root lock):
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
cd ../..
|
|
45
|
+
uv sync
|
|
46
|
+
uv run chunksmith
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Tests
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
cd ../..
|
|
53
|
+
uv run pytest packages/chunksmith-cli/tests/ -q
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## PyPI install
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install chunksmith-cli
|
|
60
|
+
pip install "chunksmith-agent[langchain]"
|
|
61
|
+
```
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# chunksmith-cli (Python)
|
|
2
|
+
|
|
3
|
+
PyPI package **`chunksmith-cli`** v0.4.0 — Rich terminal for ChunkSmith indexing.
|
|
4
|
+
|
|
5
|
+
Part of the **ChunkSmith** monorepo (`packages/chunksmith-cli`).
|
|
6
|
+
|
|
7
|
+
## Dependencies (workspace)
|
|
8
|
+
|
|
9
|
+
| Layer | Packages |
|
|
10
|
+
|-------|----------|
|
|
11
|
+
| CLI | `chunksmith-core`, `chunksmith-multimodal`, `chunksmith-pageindex`, `rich` |
|
|
12
|
+
| Agent Q&A (separate install) | `chunksmith-agent[langchain]` |
|
|
13
|
+
|
|
14
|
+
## Development
|
|
15
|
+
|
|
16
|
+
From repo root:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
cd ChunkSmith
|
|
20
|
+
uv sync
|
|
21
|
+
chunksmith
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
From this package directory only (requires workspace root lock):
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
cd ../..
|
|
28
|
+
uv sync
|
|
29
|
+
uv run chunksmith
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Tests
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
cd ../..
|
|
36
|
+
uv run pytest packages/chunksmith-cli/tests/ -q
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## PyPI install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install chunksmith-cli
|
|
43
|
+
pip install "chunksmith-agent[langchain]"
|
|
44
|
+
```
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "chunksmith-cli"
|
|
7
|
+
version = "0.4.0"
|
|
8
|
+
description = "ChunkSmith CLI — multi-indexing (JSON/TOON) and PageIndexer pipelines."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
dependencies = [
|
|
13
|
+
"python-dotenv>=1.0.0",
|
|
14
|
+
"rich>=13.0.0",
|
|
15
|
+
"chunksmith-core>=0.3.0,<0.4",
|
|
16
|
+
"chunksmith-multimodal[pdf,toon,llm]>=0.3.0,<0.4",
|
|
17
|
+
"chunksmith-pageindex[pdf,llm]>=0.3.0,<0.4",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[dependency-groups]
|
|
21
|
+
dev = ["pytest>=8.0.0"]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
chunksmith = "chunksmith_cli.__main__:main"
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/AnshulParate2004/ChunkSmith"
|
|
28
|
+
Repository = "https://github.com/AnshulParate2004/ChunkSmith"
|
|
29
|
+
|
|
30
|
+
[tool.setuptools]
|
|
31
|
+
package-dir = { "" = "src" }
|
|
32
|
+
|
|
33
|
+
[tool.setuptools.packages.find]
|
|
34
|
+
where = ["src"]
|
|
35
|
+
include = ["chunksmith_cli*"]
|
|
36
|
+
exclude = ["tests*"]
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.package-data]
|
|
39
|
+
chunksmith_cli = ["assets/*"]
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
testpaths = ["tests"]
|
|
43
|
+
pythonpath = ["src"]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""ChunkSmith interactive CLI — ``python -m chunksmith_cli`` or ``chunksmith``."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main() -> int:
|
|
9
|
+
if len(sys.argv) > 1 and sys.argv[1] in ("--version", "-V"):
|
|
10
|
+
from chunksmith_cli.theme import VERSION
|
|
11
|
+
|
|
12
|
+
print(f"chunksmith-cli {VERSION}")
|
|
13
|
+
return 0
|
|
14
|
+
|
|
15
|
+
from chunksmith_cli.branding import ensure_utf8_stdio
|
|
16
|
+
from chunksmith_cli.theme import make_console
|
|
17
|
+
from chunksmith_cli.wizard import run_interactive
|
|
18
|
+
|
|
19
|
+
ensure_utf8_stdio()
|
|
20
|
+
console = make_console()
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
return run_interactive(console)
|
|
24
|
+
except KeyboardInterrupt:
|
|
25
|
+
console.print("\n[dim]Interrupted. Goodbye.[/dim]")
|
|
26
|
+
return 130
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def __version__() -> str:
|
|
30
|
+
from chunksmith_cli.theme import VERSION as v
|
|
31
|
+
|
|
32
|
+
return v
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
if len(sys.argv) > 1 and sys.argv[1] in ("--version", "-V"):
|
|
37
|
+
from chunksmith_cli.theme import VERSION
|
|
38
|
+
|
|
39
|
+
print(f"chunksmith-cli {VERSION}")
|
|
40
|
+
raise SystemExit(0)
|
|
41
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""LangChain agent over saved ChunkSmith indexes."""
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Render tables and figures from agent events in the terminal."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from html import unescape
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from rich.align import Align
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from rich.markup import escape
|
|
15
|
+
from rich.panel import Panel
|
|
16
|
+
|
|
17
|
+
from chunksmith_cli.config import open_images_in_viewer, show_images_in_terminal, show_tables_in_terminal
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from rich.image import Image as RichImage
|
|
21
|
+
except ImportError: # pragma: no cover
|
|
22
|
+
RichImage = None # type: ignore[misc, assignment]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def html_table_preview(html: str, *, max_len: int = 2400) -> str:
|
|
26
|
+
"""Plain-text preview of HTML table content for terminal display."""
|
|
27
|
+
text = re.sub(r"<br\s*/?>", "\n", html or "", flags=re.IGNORECASE)
|
|
28
|
+
text = re.sub(r"</t[rdh]>", "\t", text, flags=re.IGNORECASE)
|
|
29
|
+
text = re.sub(r"<[^>]+>", "", text)
|
|
30
|
+
text = unescape(text)
|
|
31
|
+
text = re.sub(r"[ \t\f\v]+", " ", text)
|
|
32
|
+
text = re.sub(r" *\n *", "\n", text)
|
|
33
|
+
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
|
34
|
+
if len(text) > max_len:
|
|
35
|
+
return text[:max_len] + "\n…"
|
|
36
|
+
return text or "(empty table)"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def print_table(console: Console, payload: dict[str, Any]) -> None:
|
|
40
|
+
nid = payload.get("node_id")
|
|
41
|
+
pg = payload.get("page_number")
|
|
42
|
+
preview = html_table_preview(str(payload.get("html") or ""))
|
|
43
|
+
title = f"[bold yellow]Table[/bold yellow] · node {escape(str(nid or '?'))} · page {escape(str(pg or '?'))}"
|
|
44
|
+
console.print(Panel(escape(preview), title=title, border_style="yellow"))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _open_image_file(path: Path) -> bool:
|
|
48
|
+
"""Open image in the system default viewer (Windows/macOS/Linux)."""
|
|
49
|
+
if not path.is_file():
|
|
50
|
+
return False
|
|
51
|
+
try:
|
|
52
|
+
if sys.platform == "win32":
|
|
53
|
+
os.startfile(str(path)) # noqa: S606
|
|
54
|
+
elif sys.platform == "darwin":
|
|
55
|
+
os.system(f'open "{path}"') # noqa: S605
|
|
56
|
+
else:
|
|
57
|
+
os.system(f'xdg-open "{path}"') # noqa: S605
|
|
58
|
+
return True
|
|
59
|
+
except OSError:
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def print_figure(console: Console, payload: dict[str, Any]) -> None:
|
|
64
|
+
path = payload.get("path")
|
|
65
|
+
pg = payload.get("page_number")
|
|
66
|
+
nid = payload.get("node_id")
|
|
67
|
+
caption = f"node {nid} · page {pg}"
|
|
68
|
+
path_str = str(path or "").strip()
|
|
69
|
+
file_path = Path(path_str) if path_str else None
|
|
70
|
+
rendered_inline = False
|
|
71
|
+
|
|
72
|
+
if file_path and file_path.is_file() and RichImage is not None:
|
|
73
|
+
try:
|
|
74
|
+
img = RichImage.from_file(str(file_path), width=min(72, max(40, console.width - 4)))
|
|
75
|
+
console.print(
|
|
76
|
+
Panel(
|
|
77
|
+
Align.center(img),
|
|
78
|
+
title=f"[bold cyan]Figure[/bold cyan] · {escape(caption)}",
|
|
79
|
+
border_style="cyan",
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
rendered_inline = True
|
|
83
|
+
except Exception:
|
|
84
|
+
rendered_inline = False
|
|
85
|
+
|
|
86
|
+
if rendered_inline:
|
|
87
|
+
console.print(f"[dim]{escape(path_str)}[/dim]\n")
|
|
88
|
+
if open_images_in_viewer():
|
|
89
|
+
_open_image_file(file_path)
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
hint = "[dim]Inline image preview is not supported in this terminal (common on Windows PowerShell).[/dim]\n"
|
|
93
|
+
if file_path and file_path.is_file():
|
|
94
|
+
if sys.platform == "win32":
|
|
95
|
+
hint += f'[dim]Open manually:[/dim] start "" {escape(path_str)}\n'
|
|
96
|
+
if open_images_in_viewer():
|
|
97
|
+
if _open_image_file(file_path):
|
|
98
|
+
hint += "[green]Opened in your default image viewer.[/green]\n"
|
|
99
|
+
elif sys.platform == "win32":
|
|
100
|
+
hint += (
|
|
101
|
+
"[dim]Tip: set[/dim] [bold]CHUNKSMITH_CLI_OPEN_IMAGES=1[/bold] "
|
|
102
|
+
"[dim]to auto-open figures in Photos.[/dim]\n"
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
hint += "[yellow]Image file not found on disk.[/yellow]\n"
|
|
106
|
+
|
|
107
|
+
console.print(
|
|
108
|
+
Panel(
|
|
109
|
+
f"{hint}\n[bold]{escape(path_str or '(no image path)')}[/bold]",
|
|
110
|
+
title=f"[bold cyan]Figure file[/bold cyan] · {escape(caption)}",
|
|
111
|
+
border_style="cyan",
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
console.print()
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def print_tables_summary(console: Console, count: int) -> None:
|
|
118
|
+
if count:
|
|
119
|
+
console.print(f"[dim]Tables in context:[/dim] {count}\n")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def print_figures_summary(console: Console, count: int) -> None:
|
|
123
|
+
if count:
|
|
124
|
+
console.print(f"[dim]Figures in context:[/dim] {count}\n")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def print_media_mentions(console: Console, payload: dict[str, Any]) -> None:
|
|
128
|
+
hint = str(payload.get("hint") or "").strip()
|
|
129
|
+
if hint:
|
|
130
|
+
console.print(Panel(escape(hint), title="[bold yellow]Media[/bold yellow]", border_style="yellow"))
|
|
131
|
+
|
|
132
|
+
figures = payload.get("figures") or []
|
|
133
|
+
if figures and show_images_in_terminal():
|
|
134
|
+
lines = [
|
|
135
|
+
f"• [dim]node {escape(str(f.get('node_id') or '?'))}[/dim] — {escape(str(f.get('caption') or ''))}"
|
|
136
|
+
for f in figures[:12]
|
|
137
|
+
]
|
|
138
|
+
console.print(
|
|
139
|
+
Panel(
|
|
140
|
+
"\n".join(lines),
|
|
141
|
+
title="[bold cyan]Figures mentioned in text[/bold cyan]",
|
|
142
|
+
border_style="cyan",
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
tables = payload.get("tables") or []
|
|
147
|
+
if tables and show_tables_in_terminal():
|
|
148
|
+
lines = [
|
|
149
|
+
f"• [dim]node {escape(str(t.get('node_id') or '?'))}[/dim] — {escape(str(t.get('caption') or ''))}"
|
|
150
|
+
for t in tables[:12]
|
|
151
|
+
]
|
|
152
|
+
console.print(
|
|
153
|
+
Panel(
|
|
154
|
+
"\n".join(lines),
|
|
155
|
+
title="[bold yellow]Tables mentioned in text[/bold yellow]",
|
|
156
|
+
border_style="yellow",
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
if figures or tables:
|
|
160
|
+
console.print()
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Agent CLI session state and actions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.markup import escape
|
|
11
|
+
from rich.prompt import Confirm, Prompt
|
|
12
|
+
|
|
13
|
+
from chunksmith_cli.agent_stream import handle_agent_event
|
|
14
|
+
from chunksmith_cli.menu import prompt_menu
|
|
15
|
+
from chunksmith_cli.menus import AGENT_MENU
|
|
16
|
+
from chunksmith_cli.paths import (
|
|
17
|
+
default_project_logs_dir,
|
|
18
|
+
ensure_cli_storage,
|
|
19
|
+
resolve_agent_index_input,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _index_media_counts(doc_index: Any) -> tuple[int, int]:
|
|
24
|
+
from chunksmith_agent.index_context import index_media_counts
|
|
25
|
+
|
|
26
|
+
return index_media_counts(doc_index)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _media_loaded_line(doc_index: Any) -> str:
|
|
30
|
+
"""Counts shown on the main Loaded / Ready line."""
|
|
31
|
+
n_tables, n_images = _index_media_counts(doc_index)
|
|
32
|
+
if n_tables or n_images:
|
|
33
|
+
return (
|
|
34
|
+
f"[dim]·[/dim] [bold cyan]{n_tables}[/bold cyan] tables loaded "
|
|
35
|
+
f"[dim]·[/dim] [bold cyan]{n_images}[/bold cyan] figures loaded"
|
|
36
|
+
)
|
|
37
|
+
return "[dim]· 0 tables · 0 figures (text-only index)[/dim]"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _print_index_media_hint(console: Console, doc_index: Any) -> None:
|
|
41
|
+
from rich.panel import Panel
|
|
42
|
+
|
|
43
|
+
from chunksmith_agent.index_context import index_media_inventory
|
|
44
|
+
|
|
45
|
+
n_tables, n_images = _index_media_counts(doc_index)
|
|
46
|
+
table_lines, figure_lines = index_media_inventory(doc_index)
|
|
47
|
+
extra_tables = max(0, n_tables - len(table_lines))
|
|
48
|
+
extra_figures = max(0, n_images - len(figure_lines))
|
|
49
|
+
|
|
50
|
+
if n_tables or n_images:
|
|
51
|
+
body_parts = [
|
|
52
|
+
f"[bold yellow]Tables loaded:[/bold yellow] [bold]{n_tables}[/bold]",
|
|
53
|
+
]
|
|
54
|
+
if table_lines:
|
|
55
|
+
body_parts.append("\n".join(escape(line) for line in table_lines))
|
|
56
|
+
if extra_tables:
|
|
57
|
+
body_parts.append(f"[dim]… and {extra_tables} more table(s)[/dim]")
|
|
58
|
+
else:
|
|
59
|
+
body_parts.append("[dim](none)[/dim]")
|
|
60
|
+
|
|
61
|
+
body_parts.append(f"\n[bold cyan]Figures loaded:[/bold cyan] [bold]{n_images}[/bold]")
|
|
62
|
+
if figure_lines:
|
|
63
|
+
body_parts.append("\n".join(escape(line) for line in figure_lines))
|
|
64
|
+
if extra_figures:
|
|
65
|
+
body_parts.append(f"[dim]… and {extra_figures} more figure(s)[/dim]")
|
|
66
|
+
else:
|
|
67
|
+
body_parts.append("[dim](none)[/dim]")
|
|
68
|
+
|
|
69
|
+
body_parts.append(
|
|
70
|
+
"\n[dim]In chat, tables/figures show after search (sections in Sources, up to 3 each). "
|
|
71
|
+
"Windows PowerShell often shows [bold]paths only[/bold], not inline pictures — set "
|
|
72
|
+
"[bold]CHUNKSMITH_CLI_OPEN_IMAGES=1[/bold] to open PNGs in Photos.[/dim]"
|
|
73
|
+
)
|
|
74
|
+
console.print(
|
|
75
|
+
Panel(
|
|
76
|
+
"\n".join(body_parts),
|
|
77
|
+
title="[bold green]Loaded from JSON[/bold green]",
|
|
78
|
+
border_style="green",
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
console.print()
|
|
82
|
+
return
|
|
83
|
+
console.print(
|
|
84
|
+
Panel(
|
|
85
|
+
"[bold]Tables loaded:[/bold] 0\n"
|
|
86
|
+
"[bold]Figures loaded:[/bold] 0\n\n"
|
|
87
|
+
"[dim]This JSON has outline/text only. For tables and images, index with "
|
|
88
|
+
"load a [bold]*_canonical_bundle.json[/bold] or use [bold]chunking → Multimodal index[/bold].[/dim]",
|
|
89
|
+
title="[bold yellow]Text-only index[/bold yellow]",
|
|
90
|
+
border_style="yellow",
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
console.print()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class AgentCliState:
|
|
98
|
+
agent: Any | None = None
|
|
99
|
+
loaded_name: str | None = None
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def is_ready(self) -> bool:
|
|
103
|
+
return self.agent is not None
|
|
104
|
+
|
|
105
|
+
def status_line(self) -> str:
|
|
106
|
+
if not self.is_ready:
|
|
107
|
+
return "[dim]No document loaded yet — pick option 1 to open a saved index.[/dim]"
|
|
108
|
+
n = len(self.agent.index.media_by_node)
|
|
109
|
+
media = _media_loaded_line(self.agent.index)
|
|
110
|
+
return (
|
|
111
|
+
f"[green]Ready:[/green] [bold]{escape(self.loaded_name or 'index')}[/bold] [dim]({n} nodes)[/dim] {media}"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _find_saved_indexes(storage: Path) -> list[dict[str, str]]:
|
|
116
|
+
rows: list[dict[str, str]] = []
|
|
117
|
+
seen: set[str] = set()
|
|
118
|
+
|
|
119
|
+
def _add(*, stem: str, pageindex: str, canonical: str, source: str) -> None:
|
|
120
|
+
if stem in seen:
|
|
121
|
+
return
|
|
122
|
+
seen.add(stem)
|
|
123
|
+
rows.append(
|
|
124
|
+
{
|
|
125
|
+
"stem": stem,
|
|
126
|
+
"pageindex": pageindex,
|
|
127
|
+
"canonical": canonical,
|
|
128
|
+
"source": source,
|
|
129
|
+
}
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
for p in sorted(storage.glob("*_pageindex.json"), reverse=True):
|
|
133
|
+
stem = p.name.replace("_pageindex.json", "")
|
|
134
|
+
cb = storage / f"{stem}_canonical_bundle.json"
|
|
135
|
+
_add(
|
|
136
|
+
stem=stem,
|
|
137
|
+
pageindex=str(p),
|
|
138
|
+
canonical=str(cb) if cb.is_file() else "",
|
|
139
|
+
source="cli/data",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
logs = default_project_logs_dir()
|
|
143
|
+
if logs is not None:
|
|
144
|
+
json_dir = logs / "json"
|
|
145
|
+
if json_dir.is_dir():
|
|
146
|
+
for cb in sorted(json_dir.glob("*_canonical_bundle.json"), reverse=True):
|
|
147
|
+
stem = cb.name.replace("_canonical_bundle.json", "")
|
|
148
|
+
pi = json_dir / f"{stem}_pageindex.json"
|
|
149
|
+
_add(
|
|
150
|
+
stem=stem,
|
|
151
|
+
pageindex=str(pi) if pi.is_file() else "",
|
|
152
|
+
canonical=str(cb),
|
|
153
|
+
source="logs",
|
|
154
|
+
)
|
|
155
|
+
return rows
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _load_saved(console: Console, state: AgentCliState, storage: Path) -> None:
|
|
159
|
+
from chunksmith_agent import ChunkSmithAgent
|
|
160
|
+
from chunksmith_agent.index_builder import build_document_index_from_saved
|
|
161
|
+
|
|
162
|
+
rows = _find_saved_indexes(storage)
|
|
163
|
+
logs_hint = ""
|
|
164
|
+
logs = default_project_logs_dir()
|
|
165
|
+
if logs is not None:
|
|
166
|
+
logs_hint = f"\n [cyan]{logs}[/cyan] [dim](artifact folder → newest bundle + images)[/dim]"
|
|
167
|
+
console.print(
|
|
168
|
+
f"\n[bold]Load saved index[/bold]\n"
|
|
169
|
+
f"[dim]Enter a path, artifact folder, or stem. Examples:[/dim]\n"
|
|
170
|
+
f" [cyan]{storage / 'mydoc_20260517T120000Z_pageindex.json'}[/cyan]\n"
|
|
171
|
+
f" [cyan]logs[/cyan] or [cyan]logs/json/mydoc_*_canonical_bundle.json[/cyan]"
|
|
172
|
+
f"{logs_hint}\n"
|
|
173
|
+
f" [cyan]mydoc_20260517T120000Z[/cyan]\n"
|
|
174
|
+
f"[dim]Empty line = pick from list · quotes and ~ supported[/dim]\n"
|
|
175
|
+
)
|
|
176
|
+
if rows:
|
|
177
|
+
console.print("[dim]Recent indexes:[/dim]")
|
|
178
|
+
for i, r in enumerate(rows[:8], start=1):
|
|
179
|
+
tag = f" [dim]({r['source']})[/dim]" if r.get("source") else ""
|
|
180
|
+
console.print(f" [cyan]{i:>2}[/] {escape(r['stem'])}{tag}")
|
|
181
|
+
|
|
182
|
+
while True:
|
|
183
|
+
raw = Prompt.ask(
|
|
184
|
+
"[bold]Index path or stem[/bold]",
|
|
185
|
+
default="",
|
|
186
|
+
show_default=False,
|
|
187
|
+
).strip()
|
|
188
|
+
|
|
189
|
+
if not raw and rows:
|
|
190
|
+
pick = Prompt.ask("[bold]Or pick number[/bold]", default="1")
|
|
191
|
+
try:
|
|
192
|
+
row = rows[int(pick) - 1]
|
|
193
|
+
except (ValueError, IndexError):
|
|
194
|
+
console.print("[red]Invalid number.[/red]")
|
|
195
|
+
continue
|
|
196
|
+
resolved_pageindex = Path(row["pageindex"])
|
|
197
|
+
resolved_canonical = Path(row["canonical"]) if row["canonical"] else None
|
|
198
|
+
label = row["stem"]
|
|
199
|
+
break
|
|
200
|
+
|
|
201
|
+
if not raw:
|
|
202
|
+
console.print("[dim]Cancelled.[/dim]")
|
|
203
|
+
return
|
|
204
|
+
|
|
205
|
+
resolved = resolve_agent_index_input(raw, storage=storage)
|
|
206
|
+
if resolved is None:
|
|
207
|
+
console.print(
|
|
208
|
+
"[yellow]Could not find a valid index at that path.[/yellow]\n"
|
|
209
|
+
"[dim]Use an artifact folder (``logs`` with ``json/`` + ``image/``), a "
|
|
210
|
+
"``*_canonical_bundle.json``, ``*_pageindex.json``, or a stem.[/dim]\n"
|
|
211
|
+
)
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
resolved_pageindex = resolved.pageindex_path
|
|
215
|
+
resolved_canonical = resolved.canonical_bundle_path
|
|
216
|
+
label = resolved.label
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
doc_index = build_document_index_from_saved(
|
|
221
|
+
pageindex_path=resolved_pageindex,
|
|
222
|
+
canonical_bundle_path=resolved_canonical,
|
|
223
|
+
artifact_root=resolved.artifact_root,
|
|
224
|
+
)
|
|
225
|
+
except Exception as e:
|
|
226
|
+
console.print(f"[bold red]Load failed:[/bold red] {e}")
|
|
227
|
+
return
|
|
228
|
+
|
|
229
|
+
state.agent = ChunkSmithAgent(doc_index)
|
|
230
|
+
state.agent.reset_conversation()
|
|
231
|
+
state.loaded_name = label
|
|
232
|
+
console.print(f"\n[bold green]Loaded.[/bold green] {state.status_line()}\n")
|
|
233
|
+
_print_index_media_hint(console, doc_index)
|
|
234
|
+
if Confirm.ask("[bold]Start chatting now?[/bold]", default=True):
|
|
235
|
+
_chat_loop(console, state)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _chat_loop(console: Console, state: AgentCliState) -> None:
|
|
239
|
+
if not state.is_ready:
|
|
240
|
+
console.print("[yellow]Load a document first (option 1 — open saved index).[/yellow]")
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
console.print(
|
|
244
|
+
f"\n[dim]Chatting about[/dim] [bold]{escape(state.loaded_name or 'document')}[/bold]. "
|
|
245
|
+
"[dim]Type[/dim] [bold]exit[/bold] [dim]to return to the agent menu.[/dim]\n"
|
|
246
|
+
)
|
|
247
|
+
while True:
|
|
248
|
+
q = Prompt.ask("[bold #00b4a6]You[/]").strip()
|
|
249
|
+
if not q:
|
|
250
|
+
continue
|
|
251
|
+
if q.lower() in ("exit", "quit", "q", "back"):
|
|
252
|
+
console.print("[dim]Back to agent menu.[/dim]\n")
|
|
253
|
+
break
|
|
254
|
+
console.print()
|
|
255
|
+
for _ in state.agent.ask_events(
|
|
256
|
+
q,
|
|
257
|
+
event_sink=lambda n, p: handle_agent_event(console, n, p),
|
|
258
|
+
emit_image_events=True,
|
|
259
|
+
emit_table_events=True,
|
|
260
|
+
):
|
|
261
|
+
pass
|
|
262
|
+
console.print()
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
_AGENT_ACTIONS: dict[str, Callable[[Console, AgentCliState, Path], None]] = {
|
|
266
|
+
"1": lambda c, s, st: _load_saved(c, s, st),
|
|
267
|
+
"2": lambda c, s, st: _chat_loop(c, s),
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def run_agent_session(console: Console) -> int:
|
|
272
|
+
state = AgentCliState()
|
|
273
|
+
storage = ensure_cli_storage()
|
|
274
|
+
|
|
275
|
+
while True:
|
|
276
|
+
console.print()
|
|
277
|
+
console.print(state.status_line())
|
|
278
|
+
choice = prompt_menu(
|
|
279
|
+
console,
|
|
280
|
+
title="ChunkSmith Agent",
|
|
281
|
+
options=AGENT_MENU,
|
|
282
|
+
prompt="What next",
|
|
283
|
+
default_key="2" if state.is_ready else "1",
|
|
284
|
+
allow_back=True,
|
|
285
|
+
hint="Option 2 (chat) needs a loaded index — use option 1 first.",
|
|
286
|
+
)
|
|
287
|
+
if choice == "exit":
|
|
288
|
+
return 0
|
|
289
|
+
if choice == "back":
|
|
290
|
+
return 0
|
|
291
|
+
|
|
292
|
+
action = _AGENT_ACTIONS.get(choice)
|
|
293
|
+
if action:
|
|
294
|
+
action(console, state, storage)
|