kc-cli 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kc/__init__.py +5 -0
- kc/__main__.py +11 -0
- kc/artifacts/__init__.py +1 -0
- kc/artifacts/diff.py +76 -0
- kc/artifacts/frontmatter.py +26 -0
- kc/artifacts/markdown.py +116 -0
- kc/atomic_write.py +33 -0
- kc/cli.py +284 -0
- kc/commands/__init__.py +1 -0
- kc/commands/artifact.py +1190 -0
- kc/commands/citation.py +231 -0
- kc/commands/common.py +346 -0
- kc/commands/conformance.py +293 -0
- kc/commands/context.py +190 -0
- kc/commands/doctor.py +81 -0
- kc/commands/eval.py +133 -0
- kc/commands/export.py +97 -0
- kc/commands/guide.py +571 -0
- kc/commands/index.py +54 -0
- kc/commands/init.py +207 -0
- kc/commands/lint.py +238 -0
- kc/commands/source.py +464 -0
- kc/commands/status.py +52 -0
- kc/commands/task.py +260 -0
- kc/config.py +127 -0
- kc/embedding_models/potion-base-8M/README.md +97 -0
- kc/embedding_models/potion-base-8M/config.json +13 -0
- kc/embedding_models/potion-base-8M/model.safetensors +0 -0
- kc/embedding_models/potion-base-8M/modules.json +14 -0
- kc/embedding_models/potion-base-8M/tokenizer.json +1 -0
- kc/errors.py +141 -0
- kc/fingerprints.py +35 -0
- kc/ids.py +23 -0
- kc/locks.py +65 -0
- kc/models/__init__.py +17 -0
- kc/models/artifact.py +34 -0
- kc/models/citation.py +60 -0
- kc/models/context.py +23 -0
- kc/models/eval.py +21 -0
- kc/models/plan.py +37 -0
- kc/models/source.py +37 -0
- kc/models/source_range.py +29 -0
- kc/models/source_revision.py +19 -0
- kc/models/task.py +35 -0
- kc/output.py +838 -0
- kc/paths.py +126 -0
- kc/provenance/__init__.py +1 -0
- kc/provenance/citations.py +296 -0
- kc/search/__init__.py +1 -0
- kc/search/extract.py +268 -0
- kc/search/fts.py +284 -0
- kc/search/semantic.py +346 -0
- kc/store/__init__.py +1 -0
- kc/store/jsonl.py +55 -0
- kc/store/sqlite.py +444 -0
- kc/store/transaction.py +67 -0
- kc/templates/agents/skills/kc/SKILL.md +282 -0
- kc/templates/agents/skills/kc/agents/openai.yaml +5 -0
- kc/templates/agents/skills/kc/scripts/resolve_query_citations.py +134 -0
- kc/workspace.py +98 -0
- kc_cli-0.4.0.dist-info/METADATA +522 -0
- kc_cli-0.4.0.dist-info/RECORD +65 -0
- kc_cli-0.4.0.dist-info/WHEEL +4 -0
- kc_cli-0.4.0.dist-info/entry_points.txt +2 -0
- kc_cli-0.4.0.dist-info/licenses/LICENSE +21 -0
kc/commands/eval.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, Any
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
from kc.atomic_write import atomic_write_text
|
|
10
|
+
from kc.commands.common import run
|
|
11
|
+
from kc.errors import KcError
|
|
12
|
+
from kc.models.eval import EvalPack
|
|
13
|
+
from kc.output import emit_success
|
|
14
|
+
from kc.paths import current_paths, repo_relative, resolve_repo_path
|
|
15
|
+
from kc.search.fts import ensure_index, search_ranges
|
|
16
|
+
from kc.store.transaction import mutation_transaction
|
|
17
|
+
|
|
18
|
+
app = typer.Typer(help="Run deterministic retrieval evaluation packs.")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@app.command("run", help="Run retrieval eval cases from a YAML pack.")
|
|
22
|
+
def run_eval(
|
|
23
|
+
pack: Annotated[Path | None, typer.Option("--pack", help="Eval pack YAML.")] = None,
|
|
24
|
+
out: Annotated[Path | None, typer.Option("--out", help="Write eval result JSON.")] = None,
|
|
25
|
+
) -> None:
|
|
26
|
+
def _run() -> None:
|
|
27
|
+
paths = current_paths()
|
|
28
|
+
cases: list[dict[str, Any]] = []
|
|
29
|
+
pack_path = resolve_repo_path(pack, paths.root) if pack else None
|
|
30
|
+
if pack_path is None:
|
|
31
|
+
raise KcError(
|
|
32
|
+
code="KC_USAGE_ERROR",
|
|
33
|
+
message="Provide --pack.",
|
|
34
|
+
details={"option": "--pack"},
|
|
35
|
+
)
|
|
36
|
+
ensure_index(paths.sqlite_path, paths.sources_jsonl, paths.ranges_jsonl)
|
|
37
|
+
if not pack_path.exists():
|
|
38
|
+
raise KcError(
|
|
39
|
+
code="KC_FILE_NOT_FOUND",
|
|
40
|
+
message=f"Eval pack not found: {repo_relative(pack_path)}",
|
|
41
|
+
details={"path": repo_relative(pack_path)},
|
|
42
|
+
)
|
|
43
|
+
data = yaml.safe_load(pack_path.read_text(encoding="utf-8")) or {}
|
|
44
|
+
if not isinstance(data, dict):
|
|
45
|
+
raise KcError(
|
|
46
|
+
code="KC_CONFIG_INVALID",
|
|
47
|
+
message="Eval pack must be a YAML object.",
|
|
48
|
+
details={"path": repo_relative(pack_path)},
|
|
49
|
+
)
|
|
50
|
+
try:
|
|
51
|
+
eval_pack = EvalPack.model_validate(data)
|
|
52
|
+
except Exception as exc:
|
|
53
|
+
raise KcError(
|
|
54
|
+
code="KC_ARTIFACT_SCHEMA_INVALID",
|
|
55
|
+
message=f"Invalid eval pack: {exc}",
|
|
56
|
+
details={"path": repo_relative(pack_path)},
|
|
57
|
+
) from exc
|
|
58
|
+
cases = [case.model_dump(mode="json") for case in eval_pack.cases]
|
|
59
|
+
results = []
|
|
60
|
+
for case in cases:
|
|
61
|
+
query = str(case.get("ask") or case.get("query") or "")
|
|
62
|
+
found = search_ranges(
|
|
63
|
+
paths.sqlite_path,
|
|
64
|
+
query,
|
|
65
|
+
domain=case.get("domain"),
|
|
66
|
+
limit=int(case.get("limit", 10)),
|
|
67
|
+
)
|
|
68
|
+
expected_sources = set(case.get("expected_source_ids", []))
|
|
69
|
+
expected_ranges = set(case.get("expected_range_ids", []))
|
|
70
|
+
expected_citations = set(case.get("must_include_citation_tokens", []))
|
|
71
|
+
found_sources = {item["source_id"] for item in found}
|
|
72
|
+
found_ranges = [item["range_id"] for item in found]
|
|
73
|
+
found_range_set = set(found_ranges)
|
|
74
|
+
found_citations = {item["citation_token"] for item in found}
|
|
75
|
+
expected_total = len(expected_sources) + len(expected_ranges) + len(expected_citations)
|
|
76
|
+
matched_total = (
|
|
77
|
+
len(expected_sources & found_sources)
|
|
78
|
+
+ len(expected_ranges & found_range_set)
|
|
79
|
+
+ len(expected_citations & found_citations)
|
|
80
|
+
)
|
|
81
|
+
recall = 1.0 if expected_total == 0 else matched_total / expected_total
|
|
82
|
+
reciprocal_rank = 0.0
|
|
83
|
+
for index, range_id in enumerate(found_ranges, start=1):
|
|
84
|
+
if range_id in expected_ranges:
|
|
85
|
+
reciprocal_rank = 1.0 / index
|
|
86
|
+
break
|
|
87
|
+
min_recall = float(case.get("min_recall_at_k", 1.0))
|
|
88
|
+
results.append(
|
|
89
|
+
{
|
|
90
|
+
"id": case.get("id"),
|
|
91
|
+
"query": query,
|
|
92
|
+
"passed": recall >= min_recall,
|
|
93
|
+
"expected_source_ids": sorted(expected_sources),
|
|
94
|
+
"expected_range_ids": sorted(expected_ranges),
|
|
95
|
+
"found_source_ids": sorted(found_sources),
|
|
96
|
+
"found_range_ids": found_ranges,
|
|
97
|
+
"recall_at_k": recall,
|
|
98
|
+
"reciprocal_rank": reciprocal_rank,
|
|
99
|
+
"results": found,
|
|
100
|
+
}
|
|
101
|
+
)
|
|
102
|
+
failed = [item for item in results if not item["passed"]]
|
|
103
|
+
if failed:
|
|
104
|
+
raise KcError(
|
|
105
|
+
code="KC_ARTIFACT_SCHEMA_INVALID",
|
|
106
|
+
message="Retrieval eval failed.",
|
|
107
|
+
details={"failed": failed, "total": len(results)},
|
|
108
|
+
)
|
|
109
|
+
result = {
|
|
110
|
+
"pack": repo_relative(pack_path),
|
|
111
|
+
"total": len(results),
|
|
112
|
+
"passed": sum(1 for item in results if item["passed"]),
|
|
113
|
+
"metrics": {
|
|
114
|
+
"recall_at_k": sum(float(item["recall_at_k"]) for item in results) / len(results)
|
|
115
|
+
if results
|
|
116
|
+
else 1.0,
|
|
117
|
+
"mrr": sum(float(item["reciprocal_rank"]) for item in results) / len(results)
|
|
118
|
+
if results
|
|
119
|
+
else 0.0,
|
|
120
|
+
},
|
|
121
|
+
"results": results,
|
|
122
|
+
}
|
|
123
|
+
if out is not None:
|
|
124
|
+
out_path = resolve_repo_path(out, paths.root)
|
|
125
|
+
with mutation_transaction(paths, "eval.run", [out_path]) as tx:
|
|
126
|
+
import orjson
|
|
127
|
+
|
|
128
|
+
atomic_write_text(out_path, orjson.dumps(result, option=orjson.OPT_INDENT_2).decode() + "\n")
|
|
129
|
+
tx.commit({"out": repo_relative(out_path)})
|
|
130
|
+
result["out"] = repo_relative(out_path)
|
|
131
|
+
emit_success("eval.run", result)
|
|
132
|
+
|
|
133
|
+
run("eval.run", _run)
|
kc/commands/export.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from kc.atomic_write import atomic_write_text
|
|
9
|
+
from kc.commands.common import (
|
|
10
|
+
load_artifacts,
|
|
11
|
+
load_citation_edges,
|
|
12
|
+
load_ranges,
|
|
13
|
+
load_sources,
|
|
14
|
+
run,
|
|
15
|
+
validate_choice,
|
|
16
|
+
)
|
|
17
|
+
from kc.errors import KcError
|
|
18
|
+
from kc.output import emit_success
|
|
19
|
+
from kc.paths import current_paths, repo_relative, resolve_repo_path
|
|
20
|
+
from kc.store.transaction import mutation_transaction
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def register(app: typer.Typer) -> None:
|
|
24
|
+
@app.command("export", help="Export registered knowledge as JSON, Markdown bundle, or llms.txt.")
|
|
25
|
+
def export_command(
|
|
26
|
+
export_format: Annotated[
|
|
27
|
+
str,
|
|
28
|
+
typer.Option("--format", help="jsonl, markdown-bundle, or llms-txt."),
|
|
29
|
+
] = "jsonl",
|
|
30
|
+
out: Annotated[Path | None, typer.Option("--out", help="Optional output file.")] = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
def _run() -> None:
|
|
33
|
+
validate_choice(
|
|
34
|
+
export_format,
|
|
35
|
+
option="--format",
|
|
36
|
+
supported={"jsonl", "markdown-bundle", "llms-txt"},
|
|
37
|
+
)
|
|
38
|
+
if export_format == "jsonl":
|
|
39
|
+
content = _jsonl_export()
|
|
40
|
+
elif export_format == "markdown-bundle":
|
|
41
|
+
content = _markdown_bundle()
|
|
42
|
+
elif export_format == "llms-txt":
|
|
43
|
+
content = _llms_txt()
|
|
44
|
+
else:
|
|
45
|
+
raise KcError(
|
|
46
|
+
code="KC_UNSUPPORTED_FEATURE",
|
|
47
|
+
message=f"Unsupported export format: {export_format}",
|
|
48
|
+
details={"supported": ["jsonl", "markdown-bundle", "llms-txt"]},
|
|
49
|
+
)
|
|
50
|
+
if out:
|
|
51
|
+
target = resolve_repo_path(out)
|
|
52
|
+
paths = current_paths()
|
|
53
|
+
with mutation_transaction(paths, "export", [target]) as tx:
|
|
54
|
+
atomic_write_text(target, content)
|
|
55
|
+
tx.commit({"out": repo_relative(target)})
|
|
56
|
+
emit_success(
|
|
57
|
+
"export",
|
|
58
|
+
{
|
|
59
|
+
"format": export_format,
|
|
60
|
+
"bytes": len(content.encode("utf-8")),
|
|
61
|
+
"out": repo_relative(target) if out else None,
|
|
62
|
+
"content_location": "file" if out else "inline",
|
|
63
|
+
"content": None if out else content,
|
|
64
|
+
},
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
run("export", _run)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _jsonl_export() -> str:
|
|
71
|
+
import orjson
|
|
72
|
+
|
|
73
|
+
records = {
|
|
74
|
+
"sources": [s.model_dump(mode="json") for s in load_sources()],
|
|
75
|
+
"source_ranges": [r.model_dump(mode="json") for r in load_ranges()],
|
|
76
|
+
"artifacts": [a.model_dump(mode="json") for a in load_artifacts()],
|
|
77
|
+
"citation_edges": [c.model_dump(mode="json") for c in load_citation_edges()],
|
|
78
|
+
}
|
|
79
|
+
return orjson.dumps(records, option=orjson.OPT_INDENT_2).decode() + "\n"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _markdown_bundle() -> str:
|
|
83
|
+
parts = ["# kc Markdown Bundle\n"]
|
|
84
|
+
for artifact in load_artifacts():
|
|
85
|
+
path = resolve_repo_path(artifact.path)
|
|
86
|
+
if path.exists() and path.suffix.lower() in {".md", ".markdown"}:
|
|
87
|
+
parts.append(f"\n<!-- artifact: {artifact.path} -->\n")
|
|
88
|
+
parts.append(path.read_text(encoding="utf-8-sig"))
|
|
89
|
+
parts.append("\n")
|
|
90
|
+
return "\n".join(parts)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _llms_txt() -> str:
|
|
94
|
+
lines = ["# kc knowledge base", ""]
|
|
95
|
+
for artifact in load_artifacts():
|
|
96
|
+
lines.append(f"- {artifact.title}: {artifact.path}")
|
|
97
|
+
return "\n".join(lines) + "\n"
|