kbase-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kbase/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ __all__ = ["__version__"]
2
+
3
+ __version__ = "0.1.0"
kbase/cli.py ADDED
@@ -0,0 +1,31 @@
1
+ import typer
2
+
3
+ from kbase import __version__
4
+ from kbase.commands import add_cmd
5
+ from kbase.commands import config_cmd
6
+ from kbase.commands.ask_cmd import ask_command
7
+ from kbase.commands.doctor_cmd import doctor_command
8
+ from kbase.commands import kb_cmd
9
+ from kbase.commands.init_cmd import init
10
+ from kbase.commands.status_cmd import status_command
11
+
12
+ app = typer.Typer(help="KBase CLI")
13
+ app.command("init")(init)
14
+ app.add_typer(kb_cmd.app, name="kb")
15
+ app.add_typer(add_cmd.app, name="add")
16
+ app.add_typer(config_cmd.app, name="config")
17
+ app.command("ask")(ask_command)
18
+ app.command("status")(status_command)
19
+ app.command("doctor")(doctor_command)
20
+
21
+
22
+ @app.callback()
23
+ def main() -> None:
24
+ """kbase root command group."""
25
+
26
+
27
+ @app.command()
28
+ def version() -> None:
29
+ """Print kbase version."""
30
+ typer.echo(f"kbase-cli {__version__}")
31
+
@@ -0,0 +1,4 @@
1
+ from . import kb_cmd
2
+ from .init_cmd import init
3
+
4
+ __all__ = ["kb_cmd", "init"]
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import time
5
+
6
+ import typer
7
+
8
+ from kbase.config import load_config
9
+ from kbase.domain.ingest_service import IngestService
10
+ from kbase.domain.kb_validator import ensure_kb_exists, validate_kb_name
11
+
12
+
13
+ app = typer.Typer(help="Add files into a knowledge base.")
14
+ _ingest_service = IngestService()
15
+
16
+
17
+ def get_ingest_service() -> IngestService:
18
+ return _ingest_service
19
+
20
+
21
+ def _resolve_kb_name(kb: str | None) -> str:
22
+ if kb is not None and kb.strip():
23
+ return validate_kb_name(kb)
24
+ return validate_kb_name(load_config().active_kb)
25
+
26
+
27
+ def _resolve_target_kb(kb: str | None) -> str:
28
+ try:
29
+ return ensure_kb_exists(_resolve_kb_name(kb))
30
+ except ValueError as exc:
31
+ raise typer.BadParameter(str(exc), param_hint="--kb") from exc
32
+
33
+
34
+ @app.command("file")
35
+ def add_file_command(
36
+ path: str,
37
+ kb: str | None = typer.Option(None, "--kb", help="Target knowledge base."),
38
+ mineru_online: bool = typer.Option(
39
+ False,
40
+ "--mineru-online/--mineru-local",
41
+ help="Use MinerU online API mode (token/url from env).",
42
+ ),
43
+ ) -> None:
44
+ """Add a single file to knowledge base."""
45
+ start = time.perf_counter()
46
+ kb_name = _resolve_target_kb(kb)
47
+ typer.echo(f"开始入库文件(知识库:{kb_name})...")
48
+ service = get_ingest_service()
49
+ try:
50
+ asyncio.run(service.add_file(kb_name, path, mineru_online=mineru_online))
51
+ except Exception as exc:
52
+ elapsed = time.perf_counter() - start
53
+ typer.echo(f"文件入库失败,耗时: {elapsed:.2f}s")
54
+ typer.echo(f"Failed to add file: {exc}")
55
+ raise typer.Exit(code=1) from exc
56
+ elapsed = time.perf_counter() - start
57
+ typer.echo("入库完成。")
58
+ typer.echo(f"文件入库耗时: {elapsed:.2f}s")
59
+ typer.echo(f'Added file "{path}" to kb "{kb_name}".')
60
+
61
+
62
+ @app.command("dir")
63
+ def add_dir_command(
64
+ path: str,
65
+ recursive: bool = typer.Option(True, "--recursive/--no-recursive", help="Scan sub-directories."),
66
+ kb: str | None = typer.Option(None, "--kb", help="Target knowledge base."),
67
+ glob: list[str] | None = typer.Option(None, "--glob", help="Glob pattern, can be provided multiple times."),
68
+ mineru_online: bool = typer.Option(
69
+ False,
70
+ "--mineru-online/--mineru-local",
71
+ help="Use MinerU online API mode (token/url from env).",
72
+ ),
73
+ ) -> None:
74
+ """Add all files under a directory to knowledge base."""
75
+ start = time.perf_counter()
76
+ kb_name = _resolve_target_kb(kb)
77
+ typer.echo(f"开始入库目录(知识库:{kb_name})...")
78
+ service = get_ingest_service()
79
+ try:
80
+ result = asyncio.run(service.add_dir(kb_name, path, recursive, glob, mineru_online=mineru_online))
81
+ except Exception as exc:
82
+ elapsed = time.perf_counter() - start
83
+ typer.echo(f"目录入库失败,耗时: {elapsed:.2f}s")
84
+ typer.echo(f"Failed to add directory: {exc}")
85
+ raise typer.Exit(code=1) from exc
86
+
87
+ total = int(result.get("total", 0))
88
+ succeeded = int(result.get("succeeded", 0))
89
+ skipped = int(result.get("skipped", 0))
90
+ failed = result.get("failed", [])
91
+ elapsed = time.perf_counter() - start
92
+ typer.echo("目录入库完成。")
93
+ typer.echo(f"目录入库耗时: {elapsed:.2f}s")
94
+ typer.echo(f'Add summary for kb "{kb_name}": success={succeeded}, failed={len(failed)}, skipped={skipped}, total={total}')
95
+ for item in failed:
96
+ item_path = item.get("path", "<unknown>")
97
+ item_error = item.get("error", "unknown error")
98
+ typer.echo(f"- FAILED {item_path}: {item_error}")
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import time
5
+
6
+ import typer
7
+
8
+ from kbase.config import load_config
9
+ from kbase.domain.kb_validator import ensure_kb_exists, validate_kb_name
10
+ from kbase.domain.query_service import QueryService
11
+
12
+
13
+ app = typer.Typer(help="Ask questions against a knowledge base.")
14
+ _query_service = QueryService()
15
+
16
+
17
+ def get_query_service() -> QueryService:
18
+ return _query_service
19
+
20
+
21
+ def _resolve_kb_name(kb: str | None) -> str:
22
+ if kb is not None and kb.strip():
23
+ return validate_kb_name(kb)
24
+ return validate_kb_name(load_config().active_kb)
25
+
26
+
27
+ def _resolve_target_kb(kb: str | None) -> str:
28
+ try:
29
+ return ensure_kb_exists(_resolve_kb_name(kb))
30
+ except ValueError as exc:
31
+ raise typer.BadParameter(str(exc), param_hint="--kb") from exc
32
+
33
+
34
+ def _resolve_query_mode(mode: str | None) -> str:
35
+ if mode is not None and mode.strip():
36
+ return mode
37
+ return load_config().default_query_mode
38
+
39
+
40
+ @app.command("ask")
41
+ def ask_command(
42
+ question: str,
43
+ kb: str | None = typer.Option(None, "--kb", help="Target knowledge base."),
44
+ mode: str | None = typer.Option(None, "--mode", help="Query mode, e.g. hybrid/local/global."),
45
+ ) -> None:
46
+ """Ask a question and print the answer."""
47
+ start = time.perf_counter()
48
+ kb_name = _resolve_target_kb(kb)
49
+ query_mode = _resolve_query_mode(mode)
50
+ typer.echo(f"开始检索并生成回答(知识库:{kb_name},模式:{query_mode})...")
51
+ service = get_query_service()
52
+ try:
53
+ answer = asyncio.run(service.ask(kb_name=kb_name, question=question, mode=query_mode))
54
+ except Exception as exc:
55
+ elapsed = time.perf_counter() - start
56
+ typer.echo(f"问答失败,耗时: {elapsed:.2f}s")
57
+ typer.echo(f"Failed to ask question: {exc}")
58
+ raise typer.Exit(code=1) from exc
59
+ elapsed = time.perf_counter() - start
60
+ typer.echo("问答完成。")
61
+ typer.echo(f"问答耗时: {elapsed:.2f}s")
62
+ typer.echo(answer)
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any
5
+
6
+ import typer
7
+
8
+ from kbase.config import load_config
9
+
10
+
11
+ app = typer.Typer(help="Show effective configuration.")
12
+
13
+
14
+ def _mask_secret(value: str) -> str:
15
+ if not value:
16
+ return "<empty>"
17
+ if len(value) <= 6:
18
+ return "*" * len(value)
19
+ return f"{value[:3]}...{value[-3:]}"
20
+
21
+
22
+ def _resolve_env(name: str, fallback: str | None = None) -> str:
23
+ value = os.getenv(name)
24
+ if value is not None and value.strip():
25
+ return value.strip()
26
+ return (fallback or "").strip()
27
+
28
+
29
+ def _build_effective_config() -> dict[str, Any]:
30
+ cfg = load_config()
31
+ llm_api_key = _resolve_env("LLM_API_KEY", _resolve_env("OPENAI_API_KEY"))
32
+ llm_base_url = _resolve_env("LLM_BASE_URL", _resolve_env("OPENAI_BASE_URL"))
33
+ embed_provider = _resolve_env("EMBED_PROVIDER", "online").lower()
34
+ embed_model = _resolve_env("EMBED_MODEL", "text-embedding-3-large" if embed_provider == "online" else "bge-m3")
35
+ embed_dim = _resolve_env("EMBED_DIM", "3072" if embed_provider == "online" else "1024")
36
+ embed_base_url = _resolve_env("EMBED_BASE_URL", llm_base_url)
37
+ embed_api_key = _resolve_env("EMBED_API_KEY", llm_api_key if embed_provider == "online" else "ollama")
38
+ mineru_token = _resolve_env("KBASE_MINERU_API_TOKEN", _resolve_env("MINERU_API_TOKEN"))
39
+
40
+ return {
41
+ "active_kb": cfg.active_kb,
42
+ "parser": cfg.parser,
43
+ "default_query_mode": cfg.default_query_mode,
44
+ "llm_api_key": _mask_secret(llm_api_key),
45
+ "llm_base_url": llm_base_url or "<missing>",
46
+ "llm_model": _resolve_env("LLM_MODEL", _resolve_env("KBASE_LLM_MODEL", "gpt-4o-mini")),
47
+ "vision_model": _resolve_env("VISION_MODEL", _resolve_env("KBASE_VISION_MODEL", "gpt-4o-mini")),
48
+ "mineru_api_token": _mask_secret(mineru_token),
49
+ "embed_provider": embed_provider,
50
+ "embed_model": embed_model,
51
+ "embed_dim": embed_dim,
52
+ "embed_base_url": embed_base_url or "<missing>",
53
+ "embed_api_key": _mask_secret(embed_api_key),
54
+ }
55
+
56
+
57
+ @app.command("show")
58
+ def show_command() -> None:
59
+ """Show effective configuration with masked secrets."""
60
+ values = _build_effective_config()
61
+ for key, value in values.items():
62
+ typer.echo(f"{key}: {value}")
63
+
@@ -0,0 +1,148 @@
1
+ from __future__ import annotations
2
+
3
+ import tempfile
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ import os
7
+ from typing import Callable
8
+
9
+ import typer
10
+
11
+ from kbase.config import load_config
12
+ from kbase.domain.kb_validator import ensure_kb_exists
13
+ from kbase.infra.storage import kbase_home
14
+
15
+
16
+ @dataclass
17
+ class CheckResult:
18
+ status: str # PASS | WARN | FAIL
19
+ name: str
20
+ detail: str
21
+
22
+
23
+ def _run_check(name: str, fn: Callable[[], tuple[bool, str]], warn: bool = False) -> CheckResult:
24
+ try:
25
+ ok, detail = fn()
26
+ except Exception as exc: # pragma: no cover
27
+ return CheckResult("FAIL", name, str(exc))
28
+ if ok:
29
+ return CheckResult("PASS", name, detail)
30
+ return CheckResult("WARN" if warn else "FAIL", name, detail)
31
+
32
+
33
+ def doctor_command() -> None:
34
+ """Run preflight diagnostics for environment and runtime dependencies."""
35
+ results: list[CheckResult] = []
36
+
37
+ cfg_box: dict[str, object] = {}
38
+
39
+ def check_config() -> tuple[bool, str]:
40
+ cfg = load_config()
41
+ cfg_box["cfg"] = cfg
42
+ return True, f"active_kb={cfg.active_kb}, parser={cfg.parser}, default_query_mode={cfg.default_query_mode}"
43
+
44
+ results.append(_run_check("config", check_config))
45
+
46
+ def check_kbase_home() -> tuple[bool, str]:
47
+ home = kbase_home()
48
+ exists = home.exists()
49
+ return (exists, f"KBASE_HOME={home} {'exists' if exists else 'not exists yet'}")
50
+
51
+ results.append(_run_check("kbase_home", check_kbase_home, warn=True))
52
+
53
+ def check_active_kb() -> tuple[bool, str]:
54
+ cfg = cfg_box.get("cfg")
55
+ if cfg is None:
56
+ return False, "config unavailable"
57
+ kb_name = getattr(cfg, "active_kb", "default")
58
+ ensure_kb_exists(str(kb_name))
59
+ return True, f'knowledge base "{kb_name}" exists'
60
+
61
+ results.append(_run_check("active_kb", check_active_kb))
62
+
63
+ def check_llm_key() -> tuple[bool, str]:
64
+ api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY", "")
65
+ if not api_key:
66
+ return False, "LLM_API_KEY missing (or OPENAI_API_KEY)"
67
+ return True, f"LLM API key present (length={len(api_key)})"
68
+
69
+ results.append(_run_check("llm_api_key", check_llm_key))
70
+
71
+ def check_llm_base_url() -> tuple[bool, str]:
72
+ base_url = os.getenv("LLM_BASE_URL") or os.getenv("OPENAI_BASE_URL", "")
73
+ if not base_url:
74
+ return False, "LLM_BASE_URL missing (or OPENAI_BASE_URL)"
75
+ return True, f"LLM_BASE_URL={base_url}"
76
+
77
+ results.append(_run_check("llm_base_url", check_llm_base_url))
78
+
79
+ def check_mineru_token() -> tuple[bool, str]:
80
+ token = os.getenv("KBASE_MINERU_API_TOKEN") or os.getenv("MINERU_API_TOKEN", "")
81
+ if not token:
82
+ return False, "MINERU_API_TOKEN missing (or KBASE_MINERU_API_TOKEN)"
83
+ return True, f"MinerU token present (length={len(token)})"
84
+
85
+ results.append(_run_check("mineru_api_token", check_mineru_token))
86
+
87
+ rag_box: dict[str, object] = {}
88
+
89
+ def check_raganything_import() -> tuple[bool, str]:
90
+ try:
91
+ from raganything import RAGAnything, RAGAnythingConfig # type: ignore
92
+ except Exception:
93
+ from raganything.raganything import RAGAnything # type: ignore
94
+ from raganything.config import RAGAnythingConfig # type: ignore
95
+ rag_box["RAGAnything"] = RAGAnything
96
+ rag_box["RAGAnythingConfig"] = RAGAnythingConfig
97
+ return True, "raganything import ok"
98
+
99
+ results.append(_run_check("raganything_import", check_raganything_import))
100
+
101
+ def check_parser_installation() -> tuple[bool, str]:
102
+ cfg = cfg_box.get("cfg")
103
+ if cfg is None:
104
+ return False, "config unavailable"
105
+ parser = getattr(cfg, "parser", "mineru-cloud")
106
+ RAGAnything = rag_box.get("RAGAnything")
107
+ RAGAnythingConfig = rag_box.get("RAGAnythingConfig")
108
+ if RAGAnything is None or RAGAnythingConfig is None:
109
+ return False, "raganything unavailable"
110
+ with tempfile.TemporaryDirectory(prefix="kbase-doctor-") as temp_dir:
111
+ try:
112
+ config = RAGAnythingConfig(working_dir=temp_dir, parser=parser, parse_method="auto")
113
+ rag = RAGAnything(
114
+ config=config,
115
+ lightrag_kwargs={"working_dir": temp_dir},
116
+ )
117
+ except Exception as exc:
118
+ message = str(exc)
119
+ if "Unsupported parser type" in message and "mineru-cloud" in message:
120
+ return (
121
+ False,
122
+ 'Current raganything package does not support "mineru-cloud". '
123
+ "Please install your patched raganything build.",
124
+ )
125
+ raise
126
+ checker = getattr(rag, "check_parser_installation", None)
127
+ if checker is None:
128
+ return True, f'parser "{parser}" configured (no explicit checker method)'
129
+ try:
130
+ ok = bool(checker())
131
+ except TypeError:
132
+ ok = bool(checker(parser))
133
+ return ok, f'parser "{parser}" installation {"verified" if ok else "not available"}'
134
+
135
+ results.append(_run_check("parser_installation", check_parser_installation))
136
+
137
+ for item in results:
138
+ typer.echo(f"[{item.status}] {item.name}: {item.detail}")
139
+
140
+ fail_count = sum(1 for i in results if i.status == "FAIL")
141
+ warn_count = sum(1 for i in results if i.status == "WARN")
142
+ typer.echo(f"\nDoctor summary: fail={fail_count}, warn={warn_count}, total={len(results)}")
143
+ if fail_count > 0:
144
+ typer.echo(
145
+ "Suggested fixes: set MINERU_API_TOKEN, LLM_API_KEY, LLM_BASE_URL, "
146
+ "ensure active kb exists, run `uv run kbase init`."
147
+ )
148
+ raise typer.Exit(code=1)
@@ -0,0 +1,13 @@
1
+ import typer
2
+
3
+ from kbase.domain.kb_manager import init_workspace
4
+
5
+
6
+ def init() -> None:
7
+ """Initialize kbase workspace with default knowledge base."""
8
+ try:
9
+ init_workspace()
10
+ except ValueError as exc:
11
+ typer.echo(f"Init failed: {exc}")
12
+ raise typer.Exit(code=1) from exc
13
+ typer.echo("Workspace initialized with default knowledge base.")
@@ -0,0 +1,60 @@
1
+ import typer
2
+ import time
3
+
4
+ from kbase.domain.kb_manager import create_kb, list_kbs, use_kb
5
+
6
+
7
+ app = typer.Typer(help="Manage knowledge bases.")
8
+
9
+
10
+ @app.command("list")
11
+ def list_command() -> None:
12
+ """List all knowledge bases."""
13
+ start = time.perf_counter()
14
+ typer.echo("开始列出知识库...")
15
+ kbs = list_kbs()
16
+ if not kbs:
17
+ elapsed = time.perf_counter() - start
18
+ typer.echo("No knowledge bases found.")
19
+ typer.echo(f"耗时: {elapsed:.2f}s")
20
+ return
21
+ for name, active in kbs:
22
+ marker = " (active)" if active else ""
23
+ typer.echo(f"- {name}{marker}")
24
+ elapsed = time.perf_counter() - start
25
+ typer.echo(f"列出知识库完成,耗时: {elapsed:.2f}s")
26
+
27
+
28
+ @app.command("create")
29
+ def create_command(name: str, use: bool = typer.Option(False, "--use", help="Switch to the kb after creation.")) -> None:
30
+ """Create a knowledge base."""
31
+ start = time.perf_counter()
32
+ typer.echo(f'开始创建知识库 "{name}"...')
33
+ try:
34
+ create_kb(name, use=use)
35
+ except ValueError as exc:
36
+ elapsed = time.perf_counter() - start
37
+ typer.echo(f"创建知识库失败,耗时: {elapsed:.2f}s")
38
+ raise typer.BadParameter(str(exc)) from exc
39
+ elapsed = time.perf_counter() - start
40
+ if use:
41
+ typer.echo(f'Created knowledge base "{name}" and set active.')
42
+ else:
43
+ typer.echo(f'Created knowledge base "{name}".')
44
+ typer.echo(f"创建知识库完成,耗时: {elapsed:.2f}s")
45
+
46
+
47
+ @app.command("use")
48
+ def use_command(name: str) -> None:
49
+ """Switch active knowledge base."""
50
+ start = time.perf_counter()
51
+ typer.echo(f'开始切换知识库到 "{name}"...')
52
+ try:
53
+ use_kb(name)
54
+ except ValueError as exc:
55
+ elapsed = time.perf_counter() - start
56
+ typer.echo(f"切换知识库失败,耗时: {elapsed:.2f}s")
57
+ raise typer.BadParameter(str(exc)) from exc
58
+ elapsed = time.perf_counter() - start
59
+ typer.echo(f'Active knowledge base set to "{name}".')
60
+ typer.echo(f"切换知识库完成,耗时: {elapsed:.2f}s")
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ import typer
8
+
9
+ from kbase.config import load_config
10
+ from kbase.domain.kb_validator import ensure_kb_exists, validate_kb_name
11
+ from kbase.infra.storage import kb_dir
12
+
13
+
14
+ def _resolve_kb_name(kb: str | None) -> str:
15
+ if kb is not None and kb.strip():
16
+ return validate_kb_name(kb)
17
+ return validate_kb_name(load_config().active_kb)
18
+
19
+
20
+ def _resolve_target_kb(kb: str | None) -> str:
21
+ return ensure_kb_exists(_resolve_kb_name(kb))
22
+
23
+
24
+ def _load_json_file(path: Path) -> dict[str, Any]:
25
+ if not path.exists() or not path.is_file():
26
+ return {}
27
+ with path.open("r", encoding="utf-8") as f:
28
+ data = json.load(f)
29
+ return data if isinstance(data, dict) else {}
30
+
31
+
32
+ def _kv_count(data: dict[str, Any]) -> int:
33
+ return sum(1 for k in data.keys() if not str(k).startswith("_"))
34
+
35
+
36
+ def _status_counts(doc_status: dict[str, Any]) -> tuple[int, int, int]:
37
+ success = 0
38
+ failed = 0
39
+ pending = 0
40
+ for value in doc_status.values():
41
+ if not isinstance(value, dict):
42
+ continue
43
+ state = str(value.get("status", "")).lower()
44
+ if state in {"processed", "success", "done"}:
45
+ success += 1
46
+ elif state in {"failed", "error"}:
47
+ failed += 1
48
+ else:
49
+ pending += 1
50
+ return success, failed, pending
51
+
52
+
53
+ def _sum_chunks(doc_status: dict[str, Any]) -> int:
54
+ total = 0
55
+ for value in doc_status.values():
56
+ if not isinstance(value, dict):
57
+ continue
58
+ try:
59
+ total += int(value.get("chunks_count", 0))
60
+ except (TypeError, ValueError):
61
+ continue
62
+ return total
63
+
64
+
65
+ def _latest_update(doc_status: dict[str, Any]) -> str:
66
+ latest = ""
67
+ for value in doc_status.values():
68
+ if not isinstance(value, dict):
69
+ continue
70
+ updated = str(value.get("updated_at", ""))
71
+ if updated and updated > latest:
72
+ latest = updated
73
+ return latest or "n/a"
74
+
75
+
76
+ def status_command(kb: str | None = typer.Option(None, "--kb", help="Target knowledge base.")) -> None:
77
+ """Show active knowledge base and storage/index statistics."""
78
+ try:
79
+ active_kb = validate_kb_name(load_config().active_kb)
80
+ kb_name = _resolve_target_kb(kb)
81
+ target_path = kb_dir(kb_name)
82
+ full_docs = _load_json_file(target_path / "kv_store_full_docs.json")
83
+ doc_status = _load_json_file(target_path / "kv_store_doc_status.json")
84
+ parse_cache = _load_json_file(target_path / "kv_store_parse_cache.json")
85
+ docs = max(_kv_count(full_docs), _kv_count(doc_status))
86
+ chunks = _sum_chunks(doc_status)
87
+ success, failed, pending = _status_counts(doc_status)
88
+ cache_entries = _kv_count(parse_cache)
89
+ file_count = len([p for p in target_path.rglob("*") if p.is_file()]) if target_path.exists() else 0
90
+
91
+ typer.echo(f"active_kb: {active_kb}")
92
+ typer.echo(f"kb: {kb_name}")
93
+ typer.echo(f"kb_path: {target_path}")
94
+ typer.echo("stats:")
95
+ typer.echo(f" docs: {docs}")
96
+ typer.echo(f" chunks: {chunks}")
97
+ typer.echo(f" doc_status_success: {success}")
98
+ typer.echo(f" doc_status_failed: {failed}")
99
+ typer.echo(f" doc_status_pending: {pending}")
100
+ typer.echo(f" parse_cache_entries: {cache_entries}")
101
+ typer.echo(f" storage_files: {file_count}")
102
+ typer.echo(f" last_updated: {_latest_update(doc_status)}")
103
+ except Exception as exc:
104
+ typer.echo(f"Failed to get status: {exc}")
105
+ raise typer.Exit(code=1) from exc
@@ -0,0 +1,4 @@
1
+ from kbase.config.loader import load_config
2
+ from kbase.config.model import AppConfig
3
+
4
+ __all__ = ["AppConfig", "load_config"]
kbase/config/loader.py ADDED
@@ -0,0 +1,42 @@
1
+ import os
2
+ from typing import Any
3
+
4
+ from kbase.config.model import AppConfig
5
+ from kbase.infra.storage import config_path
6
+
7
+ try:
8
+ import tomllib
9
+ except ModuleNotFoundError: # pragma: no cover
10
+ import tomli as tomllib
11
+
12
+
13
+ def _load_file_values() -> dict[str, Any]:
14
+ path = config_path()
15
+ if not path.exists():
16
+ return {}
17
+ raw = path.read_text(encoding="utf-8")
18
+ try:
19
+ values = tomllib.loads(raw)
20
+ except tomllib.TOMLDecodeError as exc:
21
+ raise ValueError(f"Failed to parse config file at {path}: {exc}") from exc
22
+ if not isinstance(values, dict):
23
+ raise ValueError(f"Invalid config format at {path}: root must be a table")
24
+ return values
25
+
26
+
27
+ def _load_env_values() -> dict[str, str]:
28
+ mapping = {
29
+ "active_kb": os.getenv("KBASE_ACTIVE_KB"),
30
+ "parser": os.getenv("KBASE_PARSER"),
31
+ "default_query_mode": os.getenv("KBASE_DEFAULT_QUERY_MODE"),
32
+ }
33
+ return {key: value for key, value in mapping.items() if value not in (None, "")}
34
+
35
+
36
+ def load_config() -> AppConfig:
37
+ file_values = _load_file_values()
38
+ env_values = _load_env_values()
39
+ merged = {**file_values, **env_values}
40
+ if "parser" not in merged:
41
+ merged["parser"] = "mineru-cloud"
42
+ return AppConfig.model_validate(merged)
kbase/config/model.py ADDED
@@ -0,0 +1,8 @@
1
+ from pydantic import BaseModel
2
+ from typing import Literal
3
+
4
+
5
+ class AppConfig(BaseModel):
6
+ active_kb: str = "default"
7
+ parser: Literal["mineru-cloud", "mineru", "docling", "paddleocr"] = "mineru-cloud"
8
+ default_query_mode: Literal["hybrid", "local", "global", "naive", "mix", "bypass"] = "hybrid"
@@ -0,0 +1,3 @@
1
+ from .kb_manager import create_kb, init_workspace, list_kbs, use_kb
2
+
3
+ __all__ = ["init_workspace", "create_kb", "list_kbs", "use_kb"]