chunksmith-cli 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chunksmith_cli/__init__.py +3 -0
- chunksmith_cli/__main__.py +41 -0
- chunksmith_cli/agent/__init__.py +1 -0
- chunksmith_cli/agent/agent_display.py +160 -0
- chunksmith_cli/agent/agent_session.py +294 -0
- chunksmith_cli/agent/agent_stream.py +152 -0
- chunksmith_cli/agent/agent_wizard.py +5 -0
- chunksmith_cli/agent_display.py +6 -0
- chunksmith_cli/agent_session.py +6 -0
- chunksmith_cli/agent_stream.py +6 -0
- chunksmith_cli/agent_wizard.py +6 -0
- chunksmith_cli/assets/chunksmith_logo.png +0 -0
- chunksmith_cli/branding.py +6 -0
- chunksmith_cli/config.py +6 -0
- chunksmith_cli/core/__init__.py +21 -0
- chunksmith_cli/core/artifact_layout.py +60 -0
- chunksmith_cli/core/branding.py +137 -0
- chunksmith_cli/core/config.py +32 -0
- chunksmith_cli/core/media_preview.py +72 -0
- chunksmith_cli/core/menu.py +110 -0
- chunksmith_cli/core/menus.py +55 -0
- chunksmith_cli/core/panels.py +24 -0
- chunksmith_cli/core/paths.py +869 -0
- chunksmith_cli/core/prefs_mapper.py +97 -0
- chunksmith_cli/core/saved_catalog.py +180 -0
- chunksmith_cli/core/theme.py +38 -0
- chunksmith_cli/elements_json_prompt.py +6 -0
- chunksmith_cli/json_view.py +6 -0
- chunksmith_cli/media_preview.py +6 -0
- chunksmith_cli/menu.py +6 -0
- chunksmith_cli/menus.py +55 -0
- chunksmith_cli/multi_indexing_wizard.py +3 -0
- chunksmith_cli/outline_browser.py +6 -0
- chunksmith_cli/panels.py +6 -0
- chunksmith_cli/partition_prefs.py +74 -0
- chunksmith_cli/paths.py +6 -0
- chunksmith_cli/pdf_prompt.py +6 -0
- chunksmith_cli/pipelines/__init__.py +1 -0
- chunksmith_cli/pipelines/mapping_validation.py +31 -0
- chunksmith_cli/pipelines/multi_indexing_config.py +35 -0
- chunksmith_cli/pipelines/multi_indexing_prompts.py +375 -0
- chunksmith_cli/pipelines/multi_indexing_runtime.py +38 -0
- chunksmith_cli/pipelines/multi_indexing_storage.py +157 -0
- chunksmith_cli/pipelines/multi_indexing_wizard.py +218 -0
- chunksmith_cli/pipelines/pageindex_wizard.py +140 -0
- chunksmith_cli/pipelines/run_multi.py +21 -0
- chunksmith_cli/prompts/__init__.py +1 -0
- chunksmith_cli/prompts/elements_json_prompt.py +49 -0
- chunksmith_cli/prompts/pdf_prompt.py +37 -0
- chunksmith_cli/saved_catalog.py +6 -0
- chunksmith_cli/theme.py +6 -0
- chunksmith_cli/tree_view.py +6 -0
- chunksmith_cli/view_session.py +6 -0
- chunksmith_cli/views/__init__.py +1 -0
- chunksmith_cli/views/json_view.py +11 -0
- chunksmith_cli/views/outline_browser.py +247 -0
- chunksmith_cli/views/tree_view.py +59 -0
- chunksmith_cli/views/view_session.py +32 -0
- chunksmith_cli/wizard.py +357 -0
- chunksmith_cli-0.4.0.dist-info/METADATA +61 -0
- chunksmith_cli-0.4.0.dist-info/RECORD +65 -0
- chunksmith_cli-0.4.0.dist-info/WHEEL +5 -0
- chunksmith_cli-0.4.0.dist-info/entry_points.txt +2 -0
- chunksmith_cli-0.4.0.dist-info/licenses/LICENSE.vectify +21 -0
- chunksmith_cli-0.4.0.dist-info/top_level.txt +1 -0
|
Binary file
|
chunksmith_cli/config.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Shared CLI infrastructure: paths, theme, menus, catalogs."""
|
|
2
|
+
|
|
3
|
+
from chunksmith_cli.core.paths import (
|
|
4
|
+
cli_archive_stamp,
|
|
5
|
+
cli_data_dir,
|
|
6
|
+
cli_json_storage_dir,
|
|
7
|
+
cli_runs_dir,
|
|
8
|
+
ensure_cli_storage,
|
|
9
|
+
normalize_path_string,
|
|
10
|
+
resolve_pdf_candidate,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"cli_archive_stamp",
|
|
15
|
+
"cli_data_dir",
|
|
16
|
+
"cli_json_storage_dir",
|
|
17
|
+
"cli_runs_dir",
|
|
18
|
+
"ensure_cli_storage",
|
|
19
|
+
"normalize_path_string",
|
|
20
|
+
"resolve_pdf_candidate",
|
|
21
|
+
]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Artifact folder layout for multi-indexing CLI runs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
ARTIFACT_SUBDIR_JSON = "json"
|
|
9
|
+
ARTIFACT_SUBDIR_IMAGE = "image"
|
|
10
|
+
ARTIFACT_SUBDIR_PICKLE = "pickle"
|
|
11
|
+
ARTIFACT_SUBDIR_TEXT = "text"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class ArtifactLayout:
|
|
16
|
+
root: Path
|
|
17
|
+
pdf_stem: str
|
|
18
|
+
json_dir: Path
|
|
19
|
+
image_dir: Path
|
|
20
|
+
pickle_dir: Path
|
|
21
|
+
text_dir: Path
|
|
22
|
+
|
|
23
|
+
def pickle_path(self, *, stamp: str) -> Path:
|
|
24
|
+
return self.pickle_dir / f"{self.pdf_stem}_{stamp}.pkl"
|
|
25
|
+
|
|
26
|
+
def unstructured_json_path(self, *, stamp: str) -> Path:
|
|
27
|
+
return self.json_dir / f"{self.pdf_stem}_{stamp}_unstructured_elements.json"
|
|
28
|
+
|
|
29
|
+
def pageindex_json_path(self, *, stamp: str) -> Path:
|
|
30
|
+
return self.json_dir / f"{self.pdf_stem}_{stamp}_pageindex.json"
|
|
31
|
+
|
|
32
|
+
def multi_index_json_path(self, *, stamp: str) -> Path:
|
|
33
|
+
return self.json_dir / f"{self.pdf_stem}_{stamp}_multi_index.json"
|
|
34
|
+
|
|
35
|
+
def canonical_bundle_json_path(self, *, stamp: str) -> Path:
|
|
36
|
+
return self.json_dir / f"{self.pdf_stem}_{stamp}_canonical_bundle.json"
|
|
37
|
+
|
|
38
|
+
def coded_text_path(self, *, stamp: str) -> Path:
|
|
39
|
+
return self.text_dir / f"{self.pdf_stem}_{stamp}_title_coded_formate.txt"
|
|
40
|
+
|
|
41
|
+
def compressed_tree_text_path(self, *, stamp: str) -> Path:
|
|
42
|
+
return self.text_dir / f"{self.pdf_stem}_{stamp}_compressed_tree.txt"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def ensure_artifact_layout(artifact_root: Path, pdf_stem: str) -> ArtifactLayout:
|
|
46
|
+
root = artifact_root.resolve()
|
|
47
|
+
json_dir = root / ARTIFACT_SUBDIR_JSON
|
|
48
|
+
image_dir = root / ARTIFACT_SUBDIR_IMAGE
|
|
49
|
+
pickle_dir = root / ARTIFACT_SUBDIR_PICKLE
|
|
50
|
+
text_dir = root / ARTIFACT_SUBDIR_TEXT
|
|
51
|
+
for d in (json_dir, image_dir, pickle_dir, text_dir):
|
|
52
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
return ArtifactLayout(
|
|
54
|
+
root=root,
|
|
55
|
+
pdf_stem=pdf_stem,
|
|
56
|
+
json_dir=json_dir,
|
|
57
|
+
image_dir=image_dir,
|
|
58
|
+
pickle_dir=pickle_dir,
|
|
59
|
+
text_dir=text_dir,
|
|
60
|
+
)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""ChunkSmith CLI welcome banner (ASCII wordmark, no inline images)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from rich import box
|
|
9
|
+
from rich.align import Align
|
|
10
|
+
from rich.console import Console, Group, RenderableType
|
|
11
|
+
from rich.panel import Panel
|
|
12
|
+
from rich.text import Text
|
|
13
|
+
|
|
14
|
+
from chunksmith_cli.theme import MUTED, NAVY, NAVY_LIGHT, TEAL, TEAL_BRIGHT, VERSION
|
|
15
|
+
|
|
16
|
+
_LOGO_PNG = Path(__file__).resolve().parent.parent / "assets" / "chunksmith_logo.png"
|
|
17
|
+
_TAGLINE = "Multimodal RAG · document chunking · outline indexing"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def logo_png_path() -> Path:
|
|
21
|
+
return _LOGO_PNG
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_WORD_GAP = " "
|
|
25
|
+
|
|
26
|
+
# Block glyphs: CHUNK (navy) + gap + SMITH (teal)
|
|
27
|
+
_CHUNK_BLOCK = [
|
|
28
|
+
" #### # # # # # # # # ",
|
|
29
|
+
"# # # # # ## # # # ",
|
|
30
|
+
"# ###### # # # # # ### ",
|
|
31
|
+
"# # # # # # # # # # ",
|
|
32
|
+
" #### # # #### # # # # ",
|
|
33
|
+
]
|
|
34
|
+
_SMITH_BLOCK = [
|
|
35
|
+
" #### # # ###### ###### # # ",
|
|
36
|
+
"# ## ## ## ## # # ",
|
|
37
|
+
" #### # ## # ## ## ###### ",
|
|
38
|
+
" # # # ## ## # # ",
|
|
39
|
+
" #### # # ###### ## # # ",
|
|
40
|
+
]
|
|
41
|
+
_CHUNKSMITH_BLOCK = [c + _WORD_GAP + s for c, s in zip(_CHUNK_BLOCK, _SMITH_BLOCK, strict=True)]
|
|
42
|
+
_CHUNK_PART_LEN = len(_CHUNK_BLOCK[0]) + len(_WORD_GAP)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def ensure_utf8_stdio() -> None:
|
|
46
|
+
"""Use UTF-8 on Windows Terminal so block glyphs render cleanly."""
|
|
47
|
+
if sys.platform != "win32":
|
|
48
|
+
return
|
|
49
|
+
for stream in (sys.stdout, sys.stderr):
|
|
50
|
+
reconfigure = getattr(stream, "reconfigure", None)
|
|
51
|
+
if not callable(reconfigure):
|
|
52
|
+
continue
|
|
53
|
+
try:
|
|
54
|
+
reconfigure(encoding="utf-8", errors="replace")
|
|
55
|
+
except Exception:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _console_supports_unicode(console: Console) -> bool:
|
|
60
|
+
enc = getattr(console.file, "encoding", None) or "utf-8"
|
|
61
|
+
try:
|
|
62
|
+
for ch in "█":
|
|
63
|
+
ch.encode(enc)
|
|
64
|
+
return True
|
|
65
|
+
except (UnicodeEncodeError, LookupError):
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _logo_art(console: Console) -> Text:
|
|
70
|
+
"""Block letters: CHUNK · SMITH (two-tone)."""
|
|
71
|
+
use_blocks = _console_supports_unicode(console)
|
|
72
|
+
out = Text()
|
|
73
|
+
for i, row in enumerate(_CHUNKSMITH_BLOCK):
|
|
74
|
+
line = row.replace("#", "█") if use_blocks else row
|
|
75
|
+
chunk_part = line[:_CHUNK_PART_LEN]
|
|
76
|
+
smith_part = line[_CHUNK_PART_LEN:]
|
|
77
|
+
if i:
|
|
78
|
+
out.append("\n")
|
|
79
|
+
out.append(chunk_part, style=f"bold {NAVY_LIGHT if i % 2 else NAVY}")
|
|
80
|
+
out.append(smith_part, style=f"bold {TEAL if i % 2 else TEAL_BRIGHT}")
|
|
81
|
+
return out
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _brand_stack(*, subtitle: str | None, console: Console) -> RenderableType:
|
|
85
|
+
lines: list[RenderableType] = [
|
|
86
|
+
Align.center(_logo_art(console)),
|
|
87
|
+
Text(""),
|
|
88
|
+
Align.center(Text(_TAGLINE, style=f"bold {NAVY_LIGHT}")),
|
|
89
|
+
Text(""),
|
|
90
|
+
Align.center(Text(f"v{VERSION}", style=f"dim {MUTED}")),
|
|
91
|
+
]
|
|
92
|
+
if subtitle:
|
|
93
|
+
lines.append(Align.center(Text(subtitle, style=f"italic {MUTED}")))
|
|
94
|
+
return Group(*lines)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def print_chunksmith_banner(console: Console, *, subtitle: str | None = None) -> None:
|
|
98
|
+
"""Welcome panel — ASCII wordmark only (no terminal inline images)."""
|
|
99
|
+
console.print()
|
|
100
|
+
console.print(
|
|
101
|
+
Panel(
|
|
102
|
+
_brand_stack(subtitle=subtitle, console=console),
|
|
103
|
+
border_style=TEAL,
|
|
104
|
+
box=box.ROUNDED,
|
|
105
|
+
padding=(1, 2),
|
|
106
|
+
subtitle="[dim]Interactive CLI[/dim]",
|
|
107
|
+
subtitle_align="center",
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
console.print()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def print_session_hint(console: Console) -> None:
|
|
114
|
+
console.print(
|
|
115
|
+
Panel(
|
|
116
|
+
"[dim]Choose a mode from the menu. Type [bold red]exit[/bold red] to quit · "
|
|
117
|
+
"[bold cyan]back[/bold cyan] where shown to return.[/dim]",
|
|
118
|
+
border_style=NAVY_LIGHT,
|
|
119
|
+
box=box.SIMPLE,
|
|
120
|
+
padding=(0, 1),
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
console.print()
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def print_root_menu(console: Console) -> None:
|
|
127
|
+
from chunksmith_cli.menu import render_menu_table
|
|
128
|
+
from chunksmith_cli.menus import ROOT_MENU
|
|
129
|
+
|
|
130
|
+
render_menu_table(console, title="Start here", options=ROOT_MENU)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def print_main_menu(console: Console) -> None:
|
|
134
|
+
from chunksmith_cli.menu import render_menu_table
|
|
135
|
+
from chunksmith_cli.menus import CHUNKING_MENU
|
|
136
|
+
|
|
137
|
+
render_menu_table(console, title="Chunking", options=CHUNKING_MENU)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""CLI display and behavior (environment overrides)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _env_truthy(name: str, *, default: bool = False) -> bool:
|
|
9
|
+
raw = os.environ.get(name, "")
|
|
10
|
+
if not str(raw).strip():
|
|
11
|
+
return default
|
|
12
|
+
return str(raw).strip().lower() in ("1", "true", "yes", "on")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def show_images_in_terminal() -> bool:
|
|
16
|
+
"""Show figure previews in agent chat (set CHUNKSMITH_CLI_SHOW_IMAGES=0 to disable)."""
|
|
17
|
+
return _env_truthy("CHUNKSMITH_CLI_SHOW_IMAGES", default=True)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def open_images_in_viewer() -> bool:
|
|
21
|
+
"""Open each figure file in the OS default viewer (Windows: Photos). CHUNKSMITH_CLI_OPEN_IMAGES=1."""
|
|
22
|
+
return _env_truthy("CHUNKSMITH_CLI_OPEN_IMAGES", default=False)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def show_tables_in_terminal() -> bool:
|
|
26
|
+
"""Allow table panels in chat (set CHUNKSMITH_CLI_SHOW_TABLES=0 to disable)."""
|
|
27
|
+
return _env_truthy("CHUNKSMITH_CLI_SHOW_TABLES", default=True)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def show_verbose_pipeline_events() -> bool:
|
|
31
|
+
"""Print low-level pipeline event names during indexing runs."""
|
|
32
|
+
return _env_truthy("CHUNKSMITH_CLI_VERBOSE_EVENTS", default=False)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Optional PNG export for ``media_by_node`` images (CLI terminals rarely render raw bitmaps inline)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.markup import escape
|
|
12
|
+
from rich.panel import Panel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def export_media_images_to_folder(
|
|
16
|
+
media_by_node: dict[str, Any],
|
|
17
|
+
dest_dir: Path,
|
|
18
|
+
*,
|
|
19
|
+
max_files: int = 8,
|
|
20
|
+
) -> list[Path]:
|
|
21
|
+
"""
|
|
22
|
+
Decode ``image_base64`` payloads from ``media_by_node`` values and write ``nodeId__img__*.png`` files.
|
|
23
|
+
|
|
24
|
+
Returns paths written (may be fewer than ``max_files`` if decoding fails).
|
|
25
|
+
"""
|
|
26
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
written: list[Path] = []
|
|
28
|
+
safe_node = re.compile(r"[^a-zA-Z0-9_-]+")
|
|
29
|
+
|
|
30
|
+
for node_id, blob in media_by_node.items():
|
|
31
|
+
if len(written) >= max_files:
|
|
32
|
+
break
|
|
33
|
+
if not isinstance(blob, dict):
|
|
34
|
+
continue
|
|
35
|
+
nid = str(node_id)
|
|
36
|
+
slug = safe_node.sub("_", nid)[:40]
|
|
37
|
+
for i, img in enumerate(blob.get("images") or []):
|
|
38
|
+
if len(written) >= max_files:
|
|
39
|
+
break
|
|
40
|
+
if not isinstance(img, dict):
|
|
41
|
+
continue
|
|
42
|
+
b64 = img.get("image_base64")
|
|
43
|
+
if not isinstance(b64, str) or not b64.strip():
|
|
44
|
+
continue
|
|
45
|
+
try:
|
|
46
|
+
raw = base64.b64decode(b64, validate=False)
|
|
47
|
+
except Exception:
|
|
48
|
+
continue
|
|
49
|
+
pnum = img.get("page_number", 0)
|
|
50
|
+
out = dest_dir / f"{slug}__p{pnum}__img{i}.png"
|
|
51
|
+
try:
|
|
52
|
+
out.write_bytes(raw)
|
|
53
|
+
written.append(out)
|
|
54
|
+
except OSError:
|
|
55
|
+
continue
|
|
56
|
+
return written
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def offer_image_export_panel(console: Console, media_by_node: dict[str, Any], dest_dir: Path) -> None:
|
|
60
|
+
"""Print a short panel listing exported paths (call after ``export_media_images_to_folder``)."""
|
|
61
|
+
if not dest_dir.is_dir():
|
|
62
|
+
return
|
|
63
|
+
lines = "\n".join(f"- {escape(str(p))}" for p in sorted(dest_dir.glob("*.png"))[:20])
|
|
64
|
+
if lines.strip():
|
|
65
|
+
console.print(
|
|
66
|
+
Panel(
|
|
67
|
+
lines,
|
|
68
|
+
title="[bold]Image previews (PNG on disk)[/bold]",
|
|
69
|
+
border_style="yellow",
|
|
70
|
+
expand=False,
|
|
71
|
+
)
|
|
72
|
+
)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Friendly menus: one table + one prompt loop (no scattered if/else)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Sequence
|
|
7
|
+
|
|
8
|
+
from rich import box
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.prompt import Prompt
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
|
|
13
|
+
from chunksmith_cli.theme import NAVY, TEAL, TEAL_BRIGHT, MUTED
|
|
14
|
+
|
|
15
|
+
_EXIT_KEYS = frozenset({"exit", "quit", "q"})
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class MenuOption:
|
|
20
|
+
key: str
|
|
21
|
+
title: str
|
|
22
|
+
detail: str = ""
|
|
23
|
+
aliases: tuple[str, ...] = ()
|
|
24
|
+
|
|
25
|
+
def matches(self, raw: str) -> bool:
|
|
26
|
+
s = (raw or "").strip().lower()
|
|
27
|
+
return s == self.key.lower() or s in self.aliases
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def is_exit(raw: str) -> bool:
|
|
31
|
+
return (raw or "").strip().lower() in _EXIT_KEYS
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def render_menu_table(
|
|
35
|
+
console: Console,
|
|
36
|
+
*,
|
|
37
|
+
title: str,
|
|
38
|
+
options: Sequence[MenuOption],
|
|
39
|
+
allow_back: bool = False,
|
|
40
|
+
hint: str | None = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
"""Print menu table only (no prompt)."""
|
|
43
|
+
table = Table(
|
|
44
|
+
title=f"[bold]{title}[/bold]",
|
|
45
|
+
box=box.ROUNDED,
|
|
46
|
+
border_style=TEAL,
|
|
47
|
+
padding=(0, 1),
|
|
48
|
+
show_header=True,
|
|
49
|
+
header_style=f"bold {NAVY}",
|
|
50
|
+
)
|
|
51
|
+
table.add_column("", style=f"bold {TEAL_BRIGHT}", width=10, justify="center")
|
|
52
|
+
table.add_column("What", style="bold white", min_width=22)
|
|
53
|
+
table.add_column("Description", style=MUTED)
|
|
54
|
+
for opt in options:
|
|
55
|
+
table.add_row(opt.key, opt.title, opt.detail)
|
|
56
|
+
if allow_back:
|
|
57
|
+
table.add_row("back", "Go back", "Previous menu")
|
|
58
|
+
console.print(table)
|
|
59
|
+
if hint:
|
|
60
|
+
console.print(f"[dim]{hint}[/dim]")
|
|
61
|
+
labels = ", ".join(f"[cyan]{o.key}[/cyan]" for o in options)
|
|
62
|
+
if allow_back:
|
|
63
|
+
labels += ", [cyan]back[/cyan]"
|
|
64
|
+
labels += ", [bold red]exit[/bold red]"
|
|
65
|
+
console.print(f"[dim]Pick:[/dim] {labels}\n")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def prompt_menu(
|
|
69
|
+
console: Console,
|
|
70
|
+
*,
|
|
71
|
+
title: str,
|
|
72
|
+
options: Sequence[MenuOption],
|
|
73
|
+
prompt: str = "Choice",
|
|
74
|
+
default_key: str | None = None,
|
|
75
|
+
allow_back: bool = False,
|
|
76
|
+
hint: str | None = None,
|
|
77
|
+
) -> str:
|
|
78
|
+
"""
|
|
79
|
+
Show a menu table and read until the input is valid.
|
|
80
|
+
|
|
81
|
+
Returns an option ``key``, ``"exit"``, or ``"back"`` (when ``allow_back``).
|
|
82
|
+
"""
|
|
83
|
+
default_key = default_key or options[0].key
|
|
84
|
+
valid = {o.key for o in options}
|
|
85
|
+
render_menu_table(console, title=title, options=options, allow_back=allow_back, hint=hint)
|
|
86
|
+
|
|
87
|
+
while True:
|
|
88
|
+
raw = Prompt.ask(f"[bold #00b4a6]{prompt}[/]", default=default_key, show_default=True)
|
|
89
|
+
if is_exit(raw):
|
|
90
|
+
return "exit"
|
|
91
|
+
if allow_back and (raw or "").strip().lower() == "back":
|
|
92
|
+
return "back"
|
|
93
|
+
for opt in options:
|
|
94
|
+
if opt.matches(raw):
|
|
95
|
+
return opt.key
|
|
96
|
+
console.print(f"[yellow]Not recognized.[/yellow] Try: {', '.join(sorted(valid))} or exit.")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def confirm_continue_session(console: Console) -> bool:
|
|
100
|
+
"""``True`` = return to main menu; ``False`` = quit CLI."""
|
|
101
|
+
console.print()
|
|
102
|
+
raw = Prompt.ask(
|
|
103
|
+
"[dim]Press[/dim] [bold]Enter[/bold] [dim]for main menu · type[/dim] [bold red]exit[/bold red] [dim]to quit[/dim]",
|
|
104
|
+
default="",
|
|
105
|
+
show_default=False,
|
|
106
|
+
)
|
|
107
|
+
if is_exit(raw):
|
|
108
|
+
console.print("[dim]Goodbye.[/dim]")
|
|
109
|
+
return False
|
|
110
|
+
return True
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Menu definitions for the interactive CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from chunksmith_cli.menu import MenuOption
|
|
6
|
+
|
|
7
|
+
ROOT_MENU: tuple[MenuOption, ...] = (
|
|
8
|
+
MenuOption(
|
|
9
|
+
"chunking",
|
|
10
|
+
"Multi-indexing",
|
|
11
|
+
"Unstructured → group-by-title → coded markup → LLM outline → mapper",
|
|
12
|
+
aliases=("c", "index"),
|
|
13
|
+
),
|
|
14
|
+
MenuOption(
|
|
15
|
+
"view",
|
|
16
|
+
"View saved runs",
|
|
17
|
+
"Inspect outline JSON from saved runs",
|
|
18
|
+
aliases=("v", "browse"),
|
|
19
|
+
),
|
|
20
|
+
MenuOption(
|
|
21
|
+
"agent",
|
|
22
|
+
"Document Q&A",
|
|
23
|
+
"Load a saved index and chat (legacy multimodal bundles)",
|
|
24
|
+
aliases=("a", "chat"),
|
|
25
|
+
),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
CHUNKING_MENU: tuple[MenuOption, ...] = (
|
|
29
|
+
MenuOption(
|
|
30
|
+
"1",
|
|
31
|
+
"Multi-indexing",
|
|
32
|
+
"Partition → group → coded markup → LLM outline → mapper (JSON or TOON)",
|
|
33
|
+
aliases=("multi", "indexing"),
|
|
34
|
+
),
|
|
35
|
+
MenuOption(
|
|
36
|
+
"2",
|
|
37
|
+
"PageIndexer",
|
|
38
|
+
"PDF parser + LLM outline (no Unstructured partition)",
|
|
39
|
+
aliases=("pageindex", "pi"),
|
|
40
|
+
),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
VIEW_MENU: tuple[MenuOption, ...] = (
|
|
44
|
+
MenuOption(
|
|
45
|
+
"1",
|
|
46
|
+
"View outlines",
|
|
47
|
+
"Section tree, summary, raw JSON (read-only)",
|
|
48
|
+
aliases=("outline", "json", "tree"),
|
|
49
|
+
),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
AGENT_MENU: tuple[MenuOption, ...] = (
|
|
53
|
+
MenuOption("1", "Open saved index", "Enter a path or stem from saved runs"),
|
|
54
|
+
MenuOption("2", "Chat", "Ask questions about the loaded document"),
|
|
55
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Shared Rich panels for the CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.markup import escape
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def print_extraction_warnings_panel(console: Console, warns: Any) -> None:
|
|
13
|
+
"""If ``warns`` is a non-empty list, print the yellow quality panel (PDF or replay from JSON)."""
|
|
14
|
+
if not (isinstance(warns, list) and warns):
|
|
15
|
+
return
|
|
16
|
+
body = "\n".join(escape(str(x)) for x in warns)
|
|
17
|
+
console.print(
|
|
18
|
+
Panel(
|
|
19
|
+
body,
|
|
20
|
+
title="[bold yellow]PDF text quality (heuristic)[/bold yellow]",
|
|
21
|
+
subtitle="[dim]Does not replace OCR; flags suspicious pages (from run or stored JSON).[/dim]",
|
|
22
|
+
border_style="yellow",
|
|
23
|
+
)
|
|
24
|
+
)
|