kbase-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kbase/__init__.py +3 -0
- kbase/cli.py +31 -0
- kbase/commands/__init__.py +4 -0
- kbase/commands/add_cmd.py +98 -0
- kbase/commands/ask_cmd.py +62 -0
- kbase/commands/config_cmd.py +63 -0
- kbase/commands/doctor_cmd.py +148 -0
- kbase/commands/init_cmd.py +13 -0
- kbase/commands/kb_cmd.py +60 -0
- kbase/commands/status_cmd.py +105 -0
- kbase/config/__init__.py +4 -0
- kbase/config/loader.py +42 -0
- kbase/config/model.py +8 -0
- kbase/domain/__init__.py +3 -0
- kbase/domain/ingest_service.py +29 -0
- kbase/domain/kb_manager.py +71 -0
- kbase/domain/kb_validator.py +26 -0
- kbase/domain/query_service.py +12 -0
- kbase/infra/__init__.py +3 -0
- kbase/infra/model_builders.py +145 -0
- kbase/infra/rag_adapter.py +402 -0
- kbase/infra/storage.py +17 -0
- kbase_cli-0.1.0.dist-info/METADATA +124 -0
- kbase_cli-0.1.0.dist-info/RECORD +26 -0
- kbase_cli-0.1.0.dist-info/WHEEL +4 -0
- kbase_cli-0.1.0.dist-info/entry_points.txt +2 -0
kbase/__init__.py
ADDED
kbase/cli.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
|
|
3
|
+
from kbase import __version__
|
|
4
|
+
from kbase.commands import add_cmd
|
|
5
|
+
from kbase.commands import config_cmd
|
|
6
|
+
from kbase.commands.ask_cmd import ask_command
|
|
7
|
+
from kbase.commands.doctor_cmd import doctor_command
|
|
8
|
+
from kbase.commands import kb_cmd
|
|
9
|
+
from kbase.commands.init_cmd import init
|
|
10
|
+
from kbase.commands.status_cmd import status_command
|
|
11
|
+
|
|
12
|
+
app = typer.Typer(help="KBase CLI")
|
|
13
|
+
app.command("init")(init)
|
|
14
|
+
app.add_typer(kb_cmd.app, name="kb")
|
|
15
|
+
app.add_typer(add_cmd.app, name="add")
|
|
16
|
+
app.add_typer(config_cmd.app, name="config")
|
|
17
|
+
app.command("ask")(ask_command)
|
|
18
|
+
app.command("status")(status_command)
|
|
19
|
+
app.command("doctor")(doctor_command)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@app.callback()
|
|
23
|
+
def main() -> None:
|
|
24
|
+
"""kbase root command group."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.command()
|
|
28
|
+
def version() -> None:
|
|
29
|
+
"""Print kbase version."""
|
|
30
|
+
typer.echo(f"kbase-cli {__version__}")
|
|
31
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from kbase.config import load_config
|
|
9
|
+
from kbase.domain.ingest_service import IngestService
|
|
10
|
+
from kbase.domain.kb_validator import ensure_kb_exists, validate_kb_name
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
app = typer.Typer(help="Add files into a knowledge base.")
|
|
14
|
+
_ingest_service = IngestService()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_ingest_service() -> IngestService:
|
|
18
|
+
return _ingest_service
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _resolve_kb_name(kb: str | None) -> str:
|
|
22
|
+
if kb is not None and kb.strip():
|
|
23
|
+
return validate_kb_name(kb)
|
|
24
|
+
return validate_kb_name(load_config().active_kb)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _resolve_target_kb(kb: str | None) -> str:
|
|
28
|
+
try:
|
|
29
|
+
return ensure_kb_exists(_resolve_kb_name(kb))
|
|
30
|
+
except ValueError as exc:
|
|
31
|
+
raise typer.BadParameter(str(exc), param_hint="--kb") from exc
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@app.command("file")
|
|
35
|
+
def add_file_command(
|
|
36
|
+
path: str,
|
|
37
|
+
kb: str | None = typer.Option(None, "--kb", help="Target knowledge base."),
|
|
38
|
+
mineru_online: bool = typer.Option(
|
|
39
|
+
False,
|
|
40
|
+
"--mineru-online/--mineru-local",
|
|
41
|
+
help="Use MinerU online API mode (token/url from env).",
|
|
42
|
+
),
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Add a single file to knowledge base."""
|
|
45
|
+
start = time.perf_counter()
|
|
46
|
+
kb_name = _resolve_target_kb(kb)
|
|
47
|
+
typer.echo(f"开始入库文件(知识库:{kb_name})...")
|
|
48
|
+
service = get_ingest_service()
|
|
49
|
+
try:
|
|
50
|
+
asyncio.run(service.add_file(kb_name, path, mineru_online=mineru_online))
|
|
51
|
+
except Exception as exc:
|
|
52
|
+
elapsed = time.perf_counter() - start
|
|
53
|
+
typer.echo(f"文件入库失败,耗时: {elapsed:.2f}s")
|
|
54
|
+
typer.echo(f"Failed to add file: {exc}")
|
|
55
|
+
raise typer.Exit(code=1) from exc
|
|
56
|
+
elapsed = time.perf_counter() - start
|
|
57
|
+
typer.echo("入库完成。")
|
|
58
|
+
typer.echo(f"文件入库耗时: {elapsed:.2f}s")
|
|
59
|
+
typer.echo(f'Added file "{path}" to kb "{kb_name}".')
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@app.command("dir")
|
|
63
|
+
def add_dir_command(
|
|
64
|
+
path: str,
|
|
65
|
+
recursive: bool = typer.Option(True, "--recursive/--no-recursive", help="Scan sub-directories."),
|
|
66
|
+
kb: str | None = typer.Option(None, "--kb", help="Target knowledge base."),
|
|
67
|
+
glob: list[str] | None = typer.Option(None, "--glob", help="Glob pattern, can be provided multiple times."),
|
|
68
|
+
mineru_online: bool = typer.Option(
|
|
69
|
+
False,
|
|
70
|
+
"--mineru-online/--mineru-local",
|
|
71
|
+
help="Use MinerU online API mode (token/url from env).",
|
|
72
|
+
),
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Add all files under a directory to knowledge base."""
|
|
75
|
+
start = time.perf_counter()
|
|
76
|
+
kb_name = _resolve_target_kb(kb)
|
|
77
|
+
typer.echo(f"开始入库目录(知识库:{kb_name})...")
|
|
78
|
+
service = get_ingest_service()
|
|
79
|
+
try:
|
|
80
|
+
result = asyncio.run(service.add_dir(kb_name, path, recursive, glob, mineru_online=mineru_online))
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
elapsed = time.perf_counter() - start
|
|
83
|
+
typer.echo(f"目录入库失败,耗时: {elapsed:.2f}s")
|
|
84
|
+
typer.echo(f"Failed to add directory: {exc}")
|
|
85
|
+
raise typer.Exit(code=1) from exc
|
|
86
|
+
|
|
87
|
+
total = int(result.get("total", 0))
|
|
88
|
+
succeeded = int(result.get("succeeded", 0))
|
|
89
|
+
skipped = int(result.get("skipped", 0))
|
|
90
|
+
failed = result.get("failed", [])
|
|
91
|
+
elapsed = time.perf_counter() - start
|
|
92
|
+
typer.echo("目录入库完成。")
|
|
93
|
+
typer.echo(f"目录入库耗时: {elapsed:.2f}s")
|
|
94
|
+
typer.echo(f'Add summary for kb "{kb_name}": success={succeeded}, failed={len(failed)}, skipped={skipped}, total={total}')
|
|
95
|
+
for item in failed:
|
|
96
|
+
item_path = item.get("path", "<unknown>")
|
|
97
|
+
item_error = item.get("error", "unknown error")
|
|
98
|
+
typer.echo(f"- FAILED {item_path}: {item_error}")
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from kbase.config import load_config
|
|
9
|
+
from kbase.domain.kb_validator import ensure_kb_exists, validate_kb_name
|
|
10
|
+
from kbase.domain.query_service import QueryService
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
app = typer.Typer(help="Ask questions against a knowledge base.")
|
|
14
|
+
_query_service = QueryService()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_query_service() -> QueryService:
|
|
18
|
+
return _query_service
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _resolve_kb_name(kb: str | None) -> str:
|
|
22
|
+
if kb is not None and kb.strip():
|
|
23
|
+
return validate_kb_name(kb)
|
|
24
|
+
return validate_kb_name(load_config().active_kb)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _resolve_target_kb(kb: str | None) -> str:
|
|
28
|
+
try:
|
|
29
|
+
return ensure_kb_exists(_resolve_kb_name(kb))
|
|
30
|
+
except ValueError as exc:
|
|
31
|
+
raise typer.BadParameter(str(exc), param_hint="--kb") from exc
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _resolve_query_mode(mode: str | None) -> str:
|
|
35
|
+
if mode is not None and mode.strip():
|
|
36
|
+
return mode
|
|
37
|
+
return load_config().default_query_mode
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.command("ask")
|
|
41
|
+
def ask_command(
|
|
42
|
+
question: str,
|
|
43
|
+
kb: str | None = typer.Option(None, "--kb", help="Target knowledge base."),
|
|
44
|
+
mode: str | None = typer.Option(None, "--mode", help="Query mode, e.g. hybrid/local/global."),
|
|
45
|
+
) -> None:
|
|
46
|
+
"""Ask a question and print the answer."""
|
|
47
|
+
start = time.perf_counter()
|
|
48
|
+
kb_name = _resolve_target_kb(kb)
|
|
49
|
+
query_mode = _resolve_query_mode(mode)
|
|
50
|
+
typer.echo(f"开始检索并生成回答(知识库:{kb_name},模式:{query_mode})...")
|
|
51
|
+
service = get_query_service()
|
|
52
|
+
try:
|
|
53
|
+
answer = asyncio.run(service.ask(kb_name=kb_name, question=question, mode=query_mode))
|
|
54
|
+
except Exception as exc:
|
|
55
|
+
elapsed = time.perf_counter() - start
|
|
56
|
+
typer.echo(f"问答失败,耗时: {elapsed:.2f}s")
|
|
57
|
+
typer.echo(f"Failed to ask question: {exc}")
|
|
58
|
+
raise typer.Exit(code=1) from exc
|
|
59
|
+
elapsed = time.perf_counter() - start
|
|
60
|
+
typer.echo("问答完成。")
|
|
61
|
+
typer.echo(f"问答耗时: {elapsed:.2f}s")
|
|
62
|
+
typer.echo(answer)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from kbase.config import load_config
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(help="Show effective configuration.")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _mask_secret(value: str) -> str:
|
|
15
|
+
if not value:
|
|
16
|
+
return "<empty>"
|
|
17
|
+
if len(value) <= 6:
|
|
18
|
+
return "*" * len(value)
|
|
19
|
+
return f"{value[:3]}...{value[-3:]}"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _resolve_env(name: str, fallback: str | None = None) -> str:
|
|
23
|
+
value = os.getenv(name)
|
|
24
|
+
if value is not None and value.strip():
|
|
25
|
+
return value.strip()
|
|
26
|
+
return (fallback or "").strip()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _build_effective_config() -> dict[str, Any]:
|
|
30
|
+
cfg = load_config()
|
|
31
|
+
llm_api_key = _resolve_env("LLM_API_KEY", _resolve_env("OPENAI_API_KEY"))
|
|
32
|
+
llm_base_url = _resolve_env("LLM_BASE_URL", _resolve_env("OPENAI_BASE_URL"))
|
|
33
|
+
embed_provider = _resolve_env("EMBED_PROVIDER", "online").lower()
|
|
34
|
+
embed_model = _resolve_env("EMBED_MODEL", "text-embedding-3-large" if embed_provider == "online" else "bge-m3")
|
|
35
|
+
embed_dim = _resolve_env("EMBED_DIM", "3072" if embed_provider == "online" else "1024")
|
|
36
|
+
embed_base_url = _resolve_env("EMBED_BASE_URL", llm_base_url)
|
|
37
|
+
embed_api_key = _resolve_env("EMBED_API_KEY", llm_api_key if embed_provider == "online" else "ollama")
|
|
38
|
+
mineru_token = _resolve_env("KBASE_MINERU_API_TOKEN", _resolve_env("MINERU_API_TOKEN"))
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
"active_kb": cfg.active_kb,
|
|
42
|
+
"parser": cfg.parser,
|
|
43
|
+
"default_query_mode": cfg.default_query_mode,
|
|
44
|
+
"llm_api_key": _mask_secret(llm_api_key),
|
|
45
|
+
"llm_base_url": llm_base_url or "<missing>",
|
|
46
|
+
"llm_model": _resolve_env("LLM_MODEL", _resolve_env("KBASE_LLM_MODEL", "gpt-4o-mini")),
|
|
47
|
+
"vision_model": _resolve_env("VISION_MODEL", _resolve_env("KBASE_VISION_MODEL", "gpt-4o-mini")),
|
|
48
|
+
"mineru_api_token": _mask_secret(mineru_token),
|
|
49
|
+
"embed_provider": embed_provider,
|
|
50
|
+
"embed_model": embed_model,
|
|
51
|
+
"embed_dim": embed_dim,
|
|
52
|
+
"embed_base_url": embed_base_url or "<missing>",
|
|
53
|
+
"embed_api_key": _mask_secret(embed_api_key),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@app.command("show")
|
|
58
|
+
def show_command() -> None:
|
|
59
|
+
"""Show effective configuration with masked secrets."""
|
|
60
|
+
values = _build_effective_config()
|
|
61
|
+
for key, value in values.items():
|
|
62
|
+
typer.echo(f"{key}: {value}")
|
|
63
|
+
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import os
|
|
7
|
+
from typing import Callable
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
from kbase.config import load_config
|
|
12
|
+
from kbase.domain.kb_validator import ensure_kb_exists
|
|
13
|
+
from kbase.infra.storage import kbase_home
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class CheckResult:
|
|
18
|
+
status: str # PASS | WARN | FAIL
|
|
19
|
+
name: str
|
|
20
|
+
detail: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _run_check(name: str, fn: Callable[[], tuple[bool, str]], warn: bool = False) -> CheckResult:
|
|
24
|
+
try:
|
|
25
|
+
ok, detail = fn()
|
|
26
|
+
except Exception as exc: # pragma: no cover
|
|
27
|
+
return CheckResult("FAIL", name, str(exc))
|
|
28
|
+
if ok:
|
|
29
|
+
return CheckResult("PASS", name, detail)
|
|
30
|
+
return CheckResult("WARN" if warn else "FAIL", name, detail)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def doctor_command() -> None:
|
|
34
|
+
"""Run preflight diagnostics for environment and runtime dependencies."""
|
|
35
|
+
results: list[CheckResult] = []
|
|
36
|
+
|
|
37
|
+
cfg_box: dict[str, object] = {}
|
|
38
|
+
|
|
39
|
+
def check_config() -> tuple[bool, str]:
|
|
40
|
+
cfg = load_config()
|
|
41
|
+
cfg_box["cfg"] = cfg
|
|
42
|
+
return True, f"active_kb={cfg.active_kb}, parser={cfg.parser}, default_query_mode={cfg.default_query_mode}"
|
|
43
|
+
|
|
44
|
+
results.append(_run_check("config", check_config))
|
|
45
|
+
|
|
46
|
+
def check_kbase_home() -> tuple[bool, str]:
|
|
47
|
+
home = kbase_home()
|
|
48
|
+
exists = home.exists()
|
|
49
|
+
return (exists, f"KBASE_HOME={home} {'exists' if exists else 'not exists yet'}")
|
|
50
|
+
|
|
51
|
+
results.append(_run_check("kbase_home", check_kbase_home, warn=True))
|
|
52
|
+
|
|
53
|
+
def check_active_kb() -> tuple[bool, str]:
|
|
54
|
+
cfg = cfg_box.get("cfg")
|
|
55
|
+
if cfg is None:
|
|
56
|
+
return False, "config unavailable"
|
|
57
|
+
kb_name = getattr(cfg, "active_kb", "default")
|
|
58
|
+
ensure_kb_exists(str(kb_name))
|
|
59
|
+
return True, f'knowledge base "{kb_name}" exists'
|
|
60
|
+
|
|
61
|
+
results.append(_run_check("active_kb", check_active_kb))
|
|
62
|
+
|
|
63
|
+
def check_llm_key() -> tuple[bool, str]:
|
|
64
|
+
api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY", "")
|
|
65
|
+
if not api_key:
|
|
66
|
+
return False, "LLM_API_KEY missing (or OPENAI_API_KEY)"
|
|
67
|
+
return True, f"LLM API key present (length={len(api_key)})"
|
|
68
|
+
|
|
69
|
+
results.append(_run_check("llm_api_key", check_llm_key))
|
|
70
|
+
|
|
71
|
+
def check_llm_base_url() -> tuple[bool, str]:
|
|
72
|
+
base_url = os.getenv("LLM_BASE_URL") or os.getenv("OPENAI_BASE_URL", "")
|
|
73
|
+
if not base_url:
|
|
74
|
+
return False, "LLM_BASE_URL missing (or OPENAI_BASE_URL)"
|
|
75
|
+
return True, f"LLM_BASE_URL={base_url}"
|
|
76
|
+
|
|
77
|
+
results.append(_run_check("llm_base_url", check_llm_base_url))
|
|
78
|
+
|
|
79
|
+
def check_mineru_token() -> tuple[bool, str]:
|
|
80
|
+
token = os.getenv("KBASE_MINERU_API_TOKEN") or os.getenv("MINERU_API_TOKEN", "")
|
|
81
|
+
if not token:
|
|
82
|
+
return False, "MINERU_API_TOKEN missing (or KBASE_MINERU_API_TOKEN)"
|
|
83
|
+
return True, f"MinerU token present (length={len(token)})"
|
|
84
|
+
|
|
85
|
+
results.append(_run_check("mineru_api_token", check_mineru_token))
|
|
86
|
+
|
|
87
|
+
rag_box: dict[str, object] = {}
|
|
88
|
+
|
|
89
|
+
def check_raganything_import() -> tuple[bool, str]:
|
|
90
|
+
try:
|
|
91
|
+
from raganything import RAGAnything, RAGAnythingConfig # type: ignore
|
|
92
|
+
except Exception:
|
|
93
|
+
from raganything.raganything import RAGAnything # type: ignore
|
|
94
|
+
from raganything.config import RAGAnythingConfig # type: ignore
|
|
95
|
+
rag_box["RAGAnything"] = RAGAnything
|
|
96
|
+
rag_box["RAGAnythingConfig"] = RAGAnythingConfig
|
|
97
|
+
return True, "raganything import ok"
|
|
98
|
+
|
|
99
|
+
results.append(_run_check("raganything_import", check_raganything_import))
|
|
100
|
+
|
|
101
|
+
def check_parser_installation() -> tuple[bool, str]:
|
|
102
|
+
cfg = cfg_box.get("cfg")
|
|
103
|
+
if cfg is None:
|
|
104
|
+
return False, "config unavailable"
|
|
105
|
+
parser = getattr(cfg, "parser", "mineru-cloud")
|
|
106
|
+
RAGAnything = rag_box.get("RAGAnything")
|
|
107
|
+
RAGAnythingConfig = rag_box.get("RAGAnythingConfig")
|
|
108
|
+
if RAGAnything is None or RAGAnythingConfig is None:
|
|
109
|
+
return False, "raganything unavailable"
|
|
110
|
+
with tempfile.TemporaryDirectory(prefix="kbase-doctor-") as temp_dir:
|
|
111
|
+
try:
|
|
112
|
+
config = RAGAnythingConfig(working_dir=temp_dir, parser=parser, parse_method="auto")
|
|
113
|
+
rag = RAGAnything(
|
|
114
|
+
config=config,
|
|
115
|
+
lightrag_kwargs={"working_dir": temp_dir},
|
|
116
|
+
)
|
|
117
|
+
except Exception as exc:
|
|
118
|
+
message = str(exc)
|
|
119
|
+
if "Unsupported parser type" in message and "mineru-cloud" in message:
|
|
120
|
+
return (
|
|
121
|
+
False,
|
|
122
|
+
'Current raganything package does not support "mineru-cloud". '
|
|
123
|
+
"Please install your patched raganything build.",
|
|
124
|
+
)
|
|
125
|
+
raise
|
|
126
|
+
checker = getattr(rag, "check_parser_installation", None)
|
|
127
|
+
if checker is None:
|
|
128
|
+
return True, f'parser "{parser}" configured (no explicit checker method)'
|
|
129
|
+
try:
|
|
130
|
+
ok = bool(checker())
|
|
131
|
+
except TypeError:
|
|
132
|
+
ok = bool(checker(parser))
|
|
133
|
+
return ok, f'parser "{parser}" installation {"verified" if ok else "not available"}'
|
|
134
|
+
|
|
135
|
+
results.append(_run_check("parser_installation", check_parser_installation))
|
|
136
|
+
|
|
137
|
+
for item in results:
|
|
138
|
+
typer.echo(f"[{item.status}] {item.name}: {item.detail}")
|
|
139
|
+
|
|
140
|
+
fail_count = sum(1 for i in results if i.status == "FAIL")
|
|
141
|
+
warn_count = sum(1 for i in results if i.status == "WARN")
|
|
142
|
+
typer.echo(f"\nDoctor summary: fail={fail_count}, warn={warn_count}, total={len(results)}")
|
|
143
|
+
if fail_count > 0:
|
|
144
|
+
typer.echo(
|
|
145
|
+
"Suggested fixes: set MINERU_API_TOKEN, LLM_API_KEY, LLM_BASE_URL, "
|
|
146
|
+
"ensure active kb exists, run `uv run kbase init`."
|
|
147
|
+
)
|
|
148
|
+
raise typer.Exit(code=1)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
|
|
3
|
+
from kbase.domain.kb_manager import init_workspace
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def init() -> None:
|
|
7
|
+
"""Initialize kbase workspace with default knowledge base."""
|
|
8
|
+
try:
|
|
9
|
+
init_workspace()
|
|
10
|
+
except ValueError as exc:
|
|
11
|
+
typer.echo(f"Init failed: {exc}")
|
|
12
|
+
raise typer.Exit(code=1) from exc
|
|
13
|
+
typer.echo("Workspace initialized with default knowledge base.")
|
kbase/commands/kb_cmd.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from kbase.domain.kb_manager import create_kb, list_kbs, use_kb
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
app = typer.Typer(help="Manage knowledge bases.")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@app.command("list")
|
|
11
|
+
def list_command() -> None:
|
|
12
|
+
"""List all knowledge bases."""
|
|
13
|
+
start = time.perf_counter()
|
|
14
|
+
typer.echo("开始列出知识库...")
|
|
15
|
+
kbs = list_kbs()
|
|
16
|
+
if not kbs:
|
|
17
|
+
elapsed = time.perf_counter() - start
|
|
18
|
+
typer.echo("No knowledge bases found.")
|
|
19
|
+
typer.echo(f"耗时: {elapsed:.2f}s")
|
|
20
|
+
return
|
|
21
|
+
for name, active in kbs:
|
|
22
|
+
marker = " (active)" if active else ""
|
|
23
|
+
typer.echo(f"- {name}{marker}")
|
|
24
|
+
elapsed = time.perf_counter() - start
|
|
25
|
+
typer.echo(f"列出知识库完成,耗时: {elapsed:.2f}s")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@app.command("create")
|
|
29
|
+
def create_command(name: str, use: bool = typer.Option(False, "--use", help="Switch to the kb after creation.")) -> None:
|
|
30
|
+
"""Create a knowledge base."""
|
|
31
|
+
start = time.perf_counter()
|
|
32
|
+
typer.echo(f'开始创建知识库 "{name}"...')
|
|
33
|
+
try:
|
|
34
|
+
create_kb(name, use=use)
|
|
35
|
+
except ValueError as exc:
|
|
36
|
+
elapsed = time.perf_counter() - start
|
|
37
|
+
typer.echo(f"创建知识库失败,耗时: {elapsed:.2f}s")
|
|
38
|
+
raise typer.BadParameter(str(exc)) from exc
|
|
39
|
+
elapsed = time.perf_counter() - start
|
|
40
|
+
if use:
|
|
41
|
+
typer.echo(f'Created knowledge base "{name}" and set active.')
|
|
42
|
+
else:
|
|
43
|
+
typer.echo(f'Created knowledge base "{name}".')
|
|
44
|
+
typer.echo(f"创建知识库完成,耗时: {elapsed:.2f}s")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.command("use")
|
|
48
|
+
def use_command(name: str) -> None:
|
|
49
|
+
"""Switch active knowledge base."""
|
|
50
|
+
start = time.perf_counter()
|
|
51
|
+
typer.echo(f'开始切换知识库到 "{name}"...')
|
|
52
|
+
try:
|
|
53
|
+
use_kb(name)
|
|
54
|
+
except ValueError as exc:
|
|
55
|
+
elapsed = time.perf_counter() - start
|
|
56
|
+
typer.echo(f"切换知识库失败,耗时: {elapsed:.2f}s")
|
|
57
|
+
raise typer.BadParameter(str(exc)) from exc
|
|
58
|
+
elapsed = time.perf_counter() - start
|
|
59
|
+
typer.echo(f'Active knowledge base set to "{name}".')
|
|
60
|
+
typer.echo(f"切换知识库完成,耗时: {elapsed:.2f}s")
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from kbase.config import load_config
|
|
10
|
+
from kbase.domain.kb_validator import ensure_kb_exists, validate_kb_name
|
|
11
|
+
from kbase.infra.storage import kb_dir
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _resolve_kb_name(kb: str | None) -> str:
|
|
15
|
+
if kb is not None and kb.strip():
|
|
16
|
+
return validate_kb_name(kb)
|
|
17
|
+
return validate_kb_name(load_config().active_kb)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _resolve_target_kb(kb: str | None) -> str:
|
|
21
|
+
return ensure_kb_exists(_resolve_kb_name(kb))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_json_file(path: Path) -> dict[str, Any]:
|
|
25
|
+
if not path.exists() or not path.is_file():
|
|
26
|
+
return {}
|
|
27
|
+
with path.open("r", encoding="utf-8") as f:
|
|
28
|
+
data = json.load(f)
|
|
29
|
+
return data if isinstance(data, dict) else {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _kv_count(data: dict[str, Any]) -> int:
|
|
33
|
+
return sum(1 for k in data.keys() if not str(k).startswith("_"))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _status_counts(doc_status: dict[str, Any]) -> tuple[int, int, int]:
|
|
37
|
+
success = 0
|
|
38
|
+
failed = 0
|
|
39
|
+
pending = 0
|
|
40
|
+
for value in doc_status.values():
|
|
41
|
+
if not isinstance(value, dict):
|
|
42
|
+
continue
|
|
43
|
+
state = str(value.get("status", "")).lower()
|
|
44
|
+
if state in {"processed", "success", "done"}:
|
|
45
|
+
success += 1
|
|
46
|
+
elif state in {"failed", "error"}:
|
|
47
|
+
failed += 1
|
|
48
|
+
else:
|
|
49
|
+
pending += 1
|
|
50
|
+
return success, failed, pending
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _sum_chunks(doc_status: dict[str, Any]) -> int:
|
|
54
|
+
total = 0
|
|
55
|
+
for value in doc_status.values():
|
|
56
|
+
if not isinstance(value, dict):
|
|
57
|
+
continue
|
|
58
|
+
try:
|
|
59
|
+
total += int(value.get("chunks_count", 0))
|
|
60
|
+
except (TypeError, ValueError):
|
|
61
|
+
continue
|
|
62
|
+
return total
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _latest_update(doc_status: dict[str, Any]) -> str:
|
|
66
|
+
latest = ""
|
|
67
|
+
for value in doc_status.values():
|
|
68
|
+
if not isinstance(value, dict):
|
|
69
|
+
continue
|
|
70
|
+
updated = str(value.get("updated_at", ""))
|
|
71
|
+
if updated and updated > latest:
|
|
72
|
+
latest = updated
|
|
73
|
+
return latest or "n/a"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def status_command(kb: str | None = typer.Option(None, "--kb", help="Target knowledge base.")) -> None:
|
|
77
|
+
"""Show active knowledge base and storage/index statistics."""
|
|
78
|
+
try:
|
|
79
|
+
active_kb = validate_kb_name(load_config().active_kb)
|
|
80
|
+
kb_name = _resolve_target_kb(kb)
|
|
81
|
+
target_path = kb_dir(kb_name)
|
|
82
|
+
full_docs = _load_json_file(target_path / "kv_store_full_docs.json")
|
|
83
|
+
doc_status = _load_json_file(target_path / "kv_store_doc_status.json")
|
|
84
|
+
parse_cache = _load_json_file(target_path / "kv_store_parse_cache.json")
|
|
85
|
+
docs = max(_kv_count(full_docs), _kv_count(doc_status))
|
|
86
|
+
chunks = _sum_chunks(doc_status)
|
|
87
|
+
success, failed, pending = _status_counts(doc_status)
|
|
88
|
+
cache_entries = _kv_count(parse_cache)
|
|
89
|
+
file_count = len([p for p in target_path.rglob("*") if p.is_file()]) if target_path.exists() else 0
|
|
90
|
+
|
|
91
|
+
typer.echo(f"active_kb: {active_kb}")
|
|
92
|
+
typer.echo(f"kb: {kb_name}")
|
|
93
|
+
typer.echo(f"kb_path: {target_path}")
|
|
94
|
+
typer.echo("stats:")
|
|
95
|
+
typer.echo(f" docs: {docs}")
|
|
96
|
+
typer.echo(f" chunks: {chunks}")
|
|
97
|
+
typer.echo(f" doc_status_success: {success}")
|
|
98
|
+
typer.echo(f" doc_status_failed: {failed}")
|
|
99
|
+
typer.echo(f" doc_status_pending: {pending}")
|
|
100
|
+
typer.echo(f" parse_cache_entries: {cache_entries}")
|
|
101
|
+
typer.echo(f" storage_files: {file_count}")
|
|
102
|
+
typer.echo(f" last_updated: {_latest_update(doc_status)}")
|
|
103
|
+
except Exception as exc:
|
|
104
|
+
typer.echo(f"Failed to get status: {exc}")
|
|
105
|
+
raise typer.Exit(code=1) from exc
|
kbase/config/__init__.py
ADDED
kbase/config/loader.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from kbase.config.model import AppConfig
|
|
5
|
+
from kbase.infra.storage import config_path
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import tomllib
|
|
9
|
+
except ModuleNotFoundError: # pragma: no cover
|
|
10
|
+
import tomli as tomllib
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _load_file_values() -> dict[str, Any]:
|
|
14
|
+
path = config_path()
|
|
15
|
+
if not path.exists():
|
|
16
|
+
return {}
|
|
17
|
+
raw = path.read_text(encoding="utf-8")
|
|
18
|
+
try:
|
|
19
|
+
values = tomllib.loads(raw)
|
|
20
|
+
except tomllib.TOMLDecodeError as exc:
|
|
21
|
+
raise ValueError(f"Failed to parse config file at {path}: {exc}") from exc
|
|
22
|
+
if not isinstance(values, dict):
|
|
23
|
+
raise ValueError(f"Invalid config format at {path}: root must be a table")
|
|
24
|
+
return values
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _load_env_values() -> dict[str, str]:
|
|
28
|
+
mapping = {
|
|
29
|
+
"active_kb": os.getenv("KBASE_ACTIVE_KB"),
|
|
30
|
+
"parser": os.getenv("KBASE_PARSER"),
|
|
31
|
+
"default_query_mode": os.getenv("KBASE_DEFAULT_QUERY_MODE"),
|
|
32
|
+
}
|
|
33
|
+
return {key: value for key, value in mapping.items() if value not in (None, "")}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def load_config() -> AppConfig:
|
|
37
|
+
file_values = _load_file_values()
|
|
38
|
+
env_values = _load_env_values()
|
|
39
|
+
merged = {**file_values, **env_values}
|
|
40
|
+
if "parser" not in merged:
|
|
41
|
+
merged["parser"] = "mineru-cloud"
|
|
42
|
+
return AppConfig.model_validate(merged)
|
kbase/config/model.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class AppConfig(BaseModel):
|
|
6
|
+
active_kb: str = "default"
|
|
7
|
+
parser: Literal["mineru-cloud", "mineru", "docling", "paddleocr"] = "mineru-cloud"
|
|
8
|
+
default_query_mode: Literal["hybrid", "local", "global", "naive", "mix", "bypass"] = "hybrid"
|
kbase/domain/__init__.py
ADDED