kc-cli 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kc/__init__.py +5 -0
- kc/__main__.py +11 -0
- kc/artifacts/__init__.py +1 -0
- kc/artifacts/diff.py +76 -0
- kc/artifacts/frontmatter.py +26 -0
- kc/artifacts/markdown.py +116 -0
- kc/atomic_write.py +33 -0
- kc/cli.py +284 -0
- kc/commands/__init__.py +1 -0
- kc/commands/artifact.py +1190 -0
- kc/commands/citation.py +231 -0
- kc/commands/common.py +346 -0
- kc/commands/conformance.py +293 -0
- kc/commands/context.py +190 -0
- kc/commands/doctor.py +81 -0
- kc/commands/eval.py +133 -0
- kc/commands/export.py +97 -0
- kc/commands/guide.py +571 -0
- kc/commands/index.py +54 -0
- kc/commands/init.py +207 -0
- kc/commands/lint.py +238 -0
- kc/commands/source.py +464 -0
- kc/commands/status.py +52 -0
- kc/commands/task.py +260 -0
- kc/config.py +127 -0
- kc/embedding_models/potion-base-8M/README.md +97 -0
- kc/embedding_models/potion-base-8M/config.json +13 -0
- kc/embedding_models/potion-base-8M/model.safetensors +0 -0
- kc/embedding_models/potion-base-8M/modules.json +14 -0
- kc/embedding_models/potion-base-8M/tokenizer.json +1 -0
- kc/errors.py +141 -0
- kc/fingerprints.py +35 -0
- kc/ids.py +23 -0
- kc/locks.py +65 -0
- kc/models/__init__.py +17 -0
- kc/models/artifact.py +34 -0
- kc/models/citation.py +60 -0
- kc/models/context.py +23 -0
- kc/models/eval.py +21 -0
- kc/models/plan.py +37 -0
- kc/models/source.py +37 -0
- kc/models/source_range.py +29 -0
- kc/models/source_revision.py +19 -0
- kc/models/task.py +35 -0
- kc/output.py +838 -0
- kc/paths.py +126 -0
- kc/provenance/__init__.py +1 -0
- kc/provenance/citations.py +296 -0
- kc/search/__init__.py +1 -0
- kc/search/extract.py +268 -0
- kc/search/fts.py +284 -0
- kc/search/semantic.py +346 -0
- kc/store/__init__.py +1 -0
- kc/store/jsonl.py +55 -0
- kc/store/sqlite.py +444 -0
- kc/store/transaction.py +67 -0
- kc/templates/agents/skills/kc/SKILL.md +282 -0
- kc/templates/agents/skills/kc/agents/openai.yaml +5 -0
- kc/templates/agents/skills/kc/scripts/resolve_query_citations.py +134 -0
- kc/workspace.py +98 -0
- kc_cli-0.4.0.dist-info/METADATA +522 -0
- kc_cli-0.4.0.dist-info/RECORD +65 -0
- kc_cli-0.4.0.dist-info/WHEEL +4 -0
- kc_cli-0.4.0.dist-info/entry_points.txt +2 -0
- kc_cli-0.4.0.dist-info/licenses/LICENSE +21 -0
kc/commands/artifact.py
ADDED
|
@@ -0,0 +1,1190 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated, Any
|
|
7
|
+
from urllib.parse import quote
|
|
8
|
+
|
|
9
|
+
import orjson
|
|
10
|
+
import typer
|
|
11
|
+
|
|
12
|
+
from kc.artifacts.diff import build_artifact_plan
|
|
13
|
+
from kc.artifacts.frontmatter import dump_frontmatter
|
|
14
|
+
from kc.artifacts.markdown import (
|
|
15
|
+
citation_coverage_issues,
|
|
16
|
+
markdown_body_line_offset,
|
|
17
|
+
markdown_title,
|
|
18
|
+
read_markdown_artifact,
|
|
19
|
+
required_section_names,
|
|
20
|
+
)
|
|
21
|
+
from kc.atomic_write import atomic_write_text, copy_snapshot
|
|
22
|
+
from kc.commands.common import (
|
|
23
|
+
artifact_by_path,
|
|
24
|
+
json_dumps,
|
|
25
|
+
load_artifacts,
|
|
26
|
+
load_citation_edges,
|
|
27
|
+
load_ranges,
|
|
28
|
+
load_sources,
|
|
29
|
+
now,
|
|
30
|
+
path_lock_name,
|
|
31
|
+
run,
|
|
32
|
+
save_artifacts,
|
|
33
|
+
save_citation_edges,
|
|
34
|
+
validate_choice,
|
|
35
|
+
)
|
|
36
|
+
from kc.config import load_config
|
|
37
|
+
from kc.errors import EXIT_PROVENANCE, EXIT_VALIDATION, KcError
|
|
38
|
+
from kc.fingerprints import raw_fingerprint
|
|
39
|
+
from kc.ids import new_id
|
|
40
|
+
from kc.locks import FileLock
|
|
41
|
+
from kc.models.artifact import ArtifactRecord, SourceRef
|
|
42
|
+
from kc.models.citation import ArtifactLocator, CitationEdgeRecord
|
|
43
|
+
from kc.models.plan import PlanRecord
|
|
44
|
+
from kc.output import emit, emit_success, envelope, is_llm_mode, warning
|
|
45
|
+
from kc.paths import current_paths, ensure_under_root, repo_relative, resolve_repo_path
|
|
46
|
+
from kc.provenance.citations import validate_citations
|
|
47
|
+
from kc.store.sqlite import get_idempotency, rebuild_index, save_idempotency, save_plan
|
|
48
|
+
from kc.store.transaction import mutation_transaction
|
|
49
|
+
|
|
50
|
+
app = typer.Typer(help="Create, validate, diff, and safely apply knowledge artifacts.")
|
|
51
|
+
|
|
52
|
+
ALLOWED_ARTIFACT_TYPES = {
|
|
53
|
+
"knowledge_page",
|
|
54
|
+
"glossary",
|
|
55
|
+
"decision_note",
|
|
56
|
+
"source_index",
|
|
57
|
+
"log_entry",
|
|
58
|
+
"eval_pack",
|
|
59
|
+
}
|
|
60
|
+
ALLOWED_ARTIFACT_STATUSES = {"draft", "active", "deprecated", "superseded"}
|
|
61
|
+
ALLOWED_NEW_ARTIFACT_STATUSES = {"draft", "active"}
|
|
62
|
+
REQUIRED_MARKDOWN_FRONTMATTER = {
|
|
63
|
+
"schema_version",
|
|
64
|
+
"artifact_id",
|
|
65
|
+
"title",
|
|
66
|
+
"status",
|
|
67
|
+
"domain",
|
|
68
|
+
"artifact_type",
|
|
69
|
+
"requires_citations",
|
|
70
|
+
"source_refs",
|
|
71
|
+
}
|
|
72
|
+
ALLOWED_STATUS_TRANSITIONS = {
|
|
73
|
+
"draft": {"draft", "active", "deprecated", "superseded"},
|
|
74
|
+
"active": {"active", "deprecated", "superseded"},
|
|
75
|
+
"deprecated": {"deprecated"},
|
|
76
|
+
"superseded": {"superseded"},
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _artifact_template(
|
|
81
|
+
*,
|
|
82
|
+
artifact_id: str,
|
|
83
|
+
title: str,
|
|
84
|
+
artifact_type: str,
|
|
85
|
+
status: str,
|
|
86
|
+
domain: list[str],
|
|
87
|
+
source_ids: list[str],
|
|
88
|
+
) -> str:
|
|
89
|
+
frontmatter = {
|
|
90
|
+
"schema_version": "kc.knowledge_page.v1",
|
|
91
|
+
"artifact_id": artifact_id,
|
|
92
|
+
"title": title,
|
|
93
|
+
"status": status,
|
|
94
|
+
"domain": domain,
|
|
95
|
+
"artifact_type": artifact_type,
|
|
96
|
+
"requires_citations": True,
|
|
97
|
+
"source_refs": [{"source_id": source_id, "ranges": []} for source_id in source_ids],
|
|
98
|
+
"last_validated_at": None,
|
|
99
|
+
}
|
|
100
|
+
body = f"""# {title}
|
|
101
|
+
|
|
102
|
+
## Summary
|
|
103
|
+
|
|
104
|
+
[kc:todo] Add a source-backed summary.
|
|
105
|
+
|
|
106
|
+
## Source-backed facts
|
|
107
|
+
|
|
108
|
+
- [kc:todo] Add cited facts.
|
|
109
|
+
|
|
110
|
+
## Inferences
|
|
111
|
+
|
|
112
|
+
- [kc:todo] Add marked inferences only when needed.
|
|
113
|
+
|
|
114
|
+
## Open questions
|
|
115
|
+
|
|
116
|
+
- [kc:todo] Capture unresolved questions.
|
|
117
|
+
|
|
118
|
+
## Source notes
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
return dump_frontmatter(frontmatter, body)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@app.command("new", help="Create a deterministic artifact skeleton; writes only with --yes.")
|
|
125
|
+
def new(
|
|
126
|
+
path: Annotated[Path, typer.Option("--path", help="Artifact path.")],
|
|
127
|
+
title: Annotated[str, typer.Option("--title", help="Artifact title.")],
|
|
128
|
+
artifact_type: Annotated[
|
|
129
|
+
str,
|
|
130
|
+
typer.Option(
|
|
131
|
+
"--type",
|
|
132
|
+
help="Artifact type: knowledge_page, glossary, decision_note, source_index, log_entry, eval_pack.",
|
|
133
|
+
),
|
|
134
|
+
] = "knowledge_page",
|
|
135
|
+
domain: Annotated[list[str] | None, typer.Option("--domain", help="Domain tag.")] = None,
|
|
136
|
+
source_id: Annotated[
|
|
137
|
+
list[str] | None, typer.Option("--source-id", help="Source reference.")
|
|
138
|
+
] = None,
|
|
139
|
+
status: Annotated[str, typer.Option("--status", help="Artifact status: draft or active.")] = "draft",
|
|
140
|
+
dry_run: Annotated[bool, typer.Option("--dry-run", help="Preview without writing.")] = False,
|
|
141
|
+
yes: Annotated[bool, typer.Option("--yes", help="Write skeleton.")] = False,
|
|
142
|
+
) -> None:
|
|
143
|
+
def _run() -> None:
|
|
144
|
+
target = resolve_repo_path(path)
|
|
145
|
+
validate_choice(artifact_type, option="--type", supported=ALLOWED_ARTIFACT_TYPES)
|
|
146
|
+
validate_choice(status, option="--status", supported=ALLOWED_NEW_ARTIFACT_STATUSES)
|
|
147
|
+
effective_dry_run = dry_run or not yes
|
|
148
|
+
if target.exists() and not effective_dry_run:
|
|
149
|
+
raise KcError(
|
|
150
|
+
code="KC_FILE_EXISTS",
|
|
151
|
+
message=f"Artifact already exists: {path}",
|
|
152
|
+
details={"path": repo_relative(target)},
|
|
153
|
+
)
|
|
154
|
+
artifact_id = new_id("art")
|
|
155
|
+
content = _artifact_template(
|
|
156
|
+
artifact_id=artifact_id,
|
|
157
|
+
title=title,
|
|
158
|
+
artifact_type=artifact_type,
|
|
159
|
+
status=status,
|
|
160
|
+
domain=list(domain or []),
|
|
161
|
+
source_ids=list(source_id or []),
|
|
162
|
+
)
|
|
163
|
+
if not effective_dry_run:
|
|
164
|
+
paths = current_paths()
|
|
165
|
+
with mutation_transaction(paths, "artifact.new", [target]) as tx:
|
|
166
|
+
atomic_write_text(target, content)
|
|
167
|
+
tx.commit({"path": repo_relative(target)})
|
|
168
|
+
emit_success(
|
|
169
|
+
"artifact.new",
|
|
170
|
+
{
|
|
171
|
+
"dry_run": effective_dry_run,
|
|
172
|
+
"artifact_id": artifact_id,
|
|
173
|
+
"path": repo_relative(target),
|
|
174
|
+
"bytes": len(content.encode("utf-8")),
|
|
175
|
+
"content_preview": content if effective_dry_run else None,
|
|
176
|
+
},
|
|
177
|
+
target={"path": repo_relative(target), "artifact_type": artifact_type},
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
run("artifact.new", _run)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def validate_artifact_file(
|
|
184
|
+
file: Path,
|
|
185
|
+
*,
|
|
186
|
+
allow_uncited: bool = False,
|
|
187
|
+
schema: str | None = None,
|
|
188
|
+
) -> dict[str, Any]:
|
|
189
|
+
paths = current_paths()
|
|
190
|
+
target = resolve_repo_path(file)
|
|
191
|
+
if not target.exists():
|
|
192
|
+
raise KcError(
|
|
193
|
+
code="KC_ARTIFACT_NOT_FOUND",
|
|
194
|
+
message=f"Artifact not found: {file}",
|
|
195
|
+
details={"path": repo_relative(target)},
|
|
196
|
+
)
|
|
197
|
+
checks: list[dict[str, Any]] = []
|
|
198
|
+
errors: list[dict[str, Any]] = []
|
|
199
|
+
warnings: list[dict[str, Any]] = []
|
|
200
|
+
edges: list[CitationEdgeRecord] = []
|
|
201
|
+
frontmatter: dict[str, Any] = {}
|
|
202
|
+
body = ""
|
|
203
|
+
text = target.read_text(encoding="utf-8-sig")
|
|
204
|
+
sources = load_sources()
|
|
205
|
+
ranges = load_ranges()
|
|
206
|
+
source_ids = {source.source_id for source in sources}
|
|
207
|
+
range_by_id = {source_range.range_id: source_range for source_range in ranges}
|
|
208
|
+
existing = artifact_by_path(target)
|
|
209
|
+
if target.suffix.lower() in {".md", ".markdown"}:
|
|
210
|
+
frontmatter, body, text = read_markdown_artifact(target)
|
|
211
|
+
if not frontmatter:
|
|
212
|
+
errors.append(
|
|
213
|
+
{
|
|
214
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
215
|
+
"message": "Markdown artifact requires YAML frontmatter.",
|
|
216
|
+
"line": 1,
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
missing_fields = sorted(REQUIRED_MARKDOWN_FRONTMATTER - set(frontmatter))
|
|
220
|
+
if missing_fields:
|
|
221
|
+
errors.append(
|
|
222
|
+
{
|
|
223
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
224
|
+
"message": "Markdown artifact frontmatter is missing required fields.",
|
|
225
|
+
"details": {"missing_fields": missing_fields},
|
|
226
|
+
}
|
|
227
|
+
)
|
|
228
|
+
declared_schema = frontmatter.get("schema_version")
|
|
229
|
+
if schema and declared_schema != schema:
|
|
230
|
+
errors.append(
|
|
231
|
+
{
|
|
232
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
233
|
+
"message": f"Artifact schema_version does not match --schema {schema}.",
|
|
234
|
+
"details": {"schema": schema, "actual": declared_schema},
|
|
235
|
+
}
|
|
236
|
+
)
|
|
237
|
+
status = str(frontmatter.get("status", "draft"))
|
|
238
|
+
artifact_type = str(frontmatter.get("artifact_type", "knowledge_page"))
|
|
239
|
+
if artifact_type not in ALLOWED_ARTIFACT_TYPES:
|
|
240
|
+
errors.append(
|
|
241
|
+
{
|
|
242
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
243
|
+
"message": f"Unknown artifact_type: {artifact_type}",
|
|
244
|
+
"details": {"artifact_type": artifact_type},
|
|
245
|
+
}
|
|
246
|
+
)
|
|
247
|
+
if status not in ALLOWED_ARTIFACT_STATUSES:
|
|
248
|
+
errors.append(
|
|
249
|
+
{
|
|
250
|
+
"code": "KC_ARTIFACT_STATUS_INVALID",
|
|
251
|
+
"message": f"Unknown artifact status: {status}",
|
|
252
|
+
"details": {"status": status},
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
if existing and status not in ALLOWED_STATUS_TRANSITIONS.get(existing.status, {existing.status}):
|
|
256
|
+
errors.append(
|
|
257
|
+
{
|
|
258
|
+
"code": "KC_ARTIFACT_STATUS_INVALID",
|
|
259
|
+
"message": f"Invalid artifact status transition: {existing.status} -> {status}",
|
|
260
|
+
"details": {"from": existing.status, "to": status},
|
|
261
|
+
}
|
|
262
|
+
)
|
|
263
|
+
errors.extend(_validate_source_refs(frontmatter.get("source_refs"), source_ids, range_by_id))
|
|
264
|
+
requires_citations = bool(frontmatter.get("requires_citations", True))
|
|
265
|
+
required_sections = {"summary", "source-backed facts", "open questions"}
|
|
266
|
+
headings = required_section_names(body)
|
|
267
|
+
missing_sections = sorted(required_sections - headings)
|
|
268
|
+
if missing_sections:
|
|
269
|
+
errors.append(
|
|
270
|
+
{
|
|
271
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
272
|
+
"message": "Missing required sections.",
|
|
273
|
+
"details": {"missing_sections": missing_sections},
|
|
274
|
+
}
|
|
275
|
+
)
|
|
276
|
+
errors.extend(
|
|
277
|
+
citation_coverage_issues(
|
|
278
|
+
body,
|
|
279
|
+
status=status,
|
|
280
|
+
requires_citations=requires_citations,
|
|
281
|
+
allow_uncited=allow_uncited,
|
|
282
|
+
line_offset=markdown_body_line_offset(text),
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
if status == "draft" and "[kc:todo]" in body:
|
|
286
|
+
warnings.append(
|
|
287
|
+
warning(
|
|
288
|
+
"KC_ARTIFACT_TODO_MARKERS",
|
|
289
|
+
"[kc:todo] markers are valid only while the artifact remains draft.",
|
|
290
|
+
{"path": repo_relative(target), "status": status},
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
edges, citation_problems = validate_citations(
|
|
294
|
+
repo_relative(target),
|
|
295
|
+
text,
|
|
296
|
+
sources_path=paths.sources_jsonl,
|
|
297
|
+
ranges_path=paths.ranges_jsonl,
|
|
298
|
+
citation_edges_path=paths.citation_edges_jsonl,
|
|
299
|
+
artifact_id=frontmatter.get("artifact_id"),
|
|
300
|
+
)
|
|
301
|
+
errors.extend(citation_problems)
|
|
302
|
+
checks.append(
|
|
303
|
+
{
|
|
304
|
+
"name": "markdown_frontmatter",
|
|
305
|
+
"passed": bool(frontmatter),
|
|
306
|
+
"schema_version": frontmatter.get("schema_version"),
|
|
307
|
+
}
|
|
308
|
+
)
|
|
309
|
+
checks.append(
|
|
310
|
+
{
|
|
311
|
+
"name": "citation_tokens",
|
|
312
|
+
"passed": not citation_problems,
|
|
313
|
+
"citations": len(edges),
|
|
314
|
+
}
|
|
315
|
+
)
|
|
316
|
+
elif target.suffix.lower() == ".json":
|
|
317
|
+
try:
|
|
318
|
+
data = json.loads(text)
|
|
319
|
+
except json.JSONDecodeError as exc:
|
|
320
|
+
errors.append(
|
|
321
|
+
{
|
|
322
|
+
"code": "KC_JSON_INVALID",
|
|
323
|
+
"message": f"Invalid JSON artifact: {exc}",
|
|
324
|
+
}
|
|
325
|
+
)
|
|
326
|
+
data = {}
|
|
327
|
+
checks.append({"name": "json_parse", "passed": not errors})
|
|
328
|
+
if isinstance(data, dict):
|
|
329
|
+
missing_json_fields = sorted(
|
|
330
|
+
{"schema_version", "artifact_id", "title", "artifact_type", "status"} - set(data)
|
|
331
|
+
)
|
|
332
|
+
if missing_json_fields:
|
|
333
|
+
errors.append(
|
|
334
|
+
{
|
|
335
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
336
|
+
"message": "JSON artifact is missing required fields.",
|
|
337
|
+
"details": {"missing_fields": missing_json_fields},
|
|
338
|
+
}
|
|
339
|
+
)
|
|
340
|
+
if schema and isinstance(data, dict) and data.get("schema_version") != schema:
|
|
341
|
+
errors.append(
|
|
342
|
+
{
|
|
343
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
344
|
+
"message": f"JSON artifact schema_version does not match --schema {schema}.",
|
|
345
|
+
"details": {"schema": schema, "actual": data.get("schema_version")},
|
|
346
|
+
}
|
|
347
|
+
)
|
|
348
|
+
json_edges, json_problems = _validate_json_citations(
|
|
349
|
+
data,
|
|
350
|
+
artifact_path=repo_relative(target),
|
|
351
|
+
artifact_id=data.get("artifact_id") if isinstance(data, dict) else None,
|
|
352
|
+
sources_by_id={source.source_id: source for source in sources},
|
|
353
|
+
ranges_by_id=range_by_id,
|
|
354
|
+
)
|
|
355
|
+
edges.extend(json_edges)
|
|
356
|
+
errors.extend(json_problems)
|
|
357
|
+
frontmatter = {
|
|
358
|
+
"schema_version": schema or data.get("schema_version", "kc.json_artifact.v1")
|
|
359
|
+
if isinstance(data, dict)
|
|
360
|
+
else schema,
|
|
361
|
+
"artifact_type": data.get("artifact_type", "glossary")
|
|
362
|
+
if isinstance(data, dict)
|
|
363
|
+
else "glossary",
|
|
364
|
+
"status": data.get("status", "draft") if isinstance(data, dict) else "draft",
|
|
365
|
+
"title": data.get("title", target.stem) if isinstance(data, dict) else target.stem,
|
|
366
|
+
"domain": data.get("domain", []) if isinstance(data, dict) else [],
|
|
367
|
+
"artifact_id": data.get("artifact_id") if isinstance(data, dict) else None,
|
|
368
|
+
}
|
|
369
|
+
else:
|
|
370
|
+
errors.append(
|
|
371
|
+
{
|
|
372
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
373
|
+
"message": f"Unsupported artifact file type: {target.suffix}",
|
|
374
|
+
}
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
valid = not errors
|
|
378
|
+
checks.append({"name": "artifact_valid", "passed": valid})
|
|
379
|
+
return {
|
|
380
|
+
"valid": valid,
|
|
381
|
+
"path": repo_relative(target),
|
|
382
|
+
"fingerprint": raw_fingerprint(target),
|
|
383
|
+
"frontmatter": frontmatter,
|
|
384
|
+
"checks": checks,
|
|
385
|
+
"errors": errors,
|
|
386
|
+
"warnings": warnings,
|
|
387
|
+
"citation_edges": [edge.model_dump(mode="json") for edge in edges],
|
|
388
|
+
"text": text,
|
|
389
|
+
"body": body,
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _validate_source_refs(
|
|
394
|
+
raw_refs: Any,
|
|
395
|
+
source_ids: set[str],
|
|
396
|
+
range_by_id: dict[str, Any],
|
|
397
|
+
) -> list[dict[str, Any]]:
|
|
398
|
+
errors: list[dict[str, Any]] = []
|
|
399
|
+
if raw_refs is None:
|
|
400
|
+
return errors
|
|
401
|
+
if not isinstance(raw_refs, list):
|
|
402
|
+
return [
|
|
403
|
+
{
|
|
404
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
405
|
+
"message": "source_refs must be a list.",
|
|
406
|
+
}
|
|
407
|
+
]
|
|
408
|
+
for ref in raw_refs:
|
|
409
|
+
if not isinstance(ref, dict):
|
|
410
|
+
errors.append(
|
|
411
|
+
{
|
|
412
|
+
"code": "KC_ARTIFACT_SCHEMA_INVALID",
|
|
413
|
+
"message": "source_refs entries must be objects.",
|
|
414
|
+
}
|
|
415
|
+
)
|
|
416
|
+
continue
|
|
417
|
+
source_id = ref.get("source_id")
|
|
418
|
+
if source_id not in source_ids:
|
|
419
|
+
errors.append(
|
|
420
|
+
{
|
|
421
|
+
"code": "KC_CITATION_SOURCE_MISSING",
|
|
422
|
+
"message": f"source_refs source does not exist: {source_id}",
|
|
423
|
+
"source_id": source_id,
|
|
424
|
+
}
|
|
425
|
+
)
|
|
426
|
+
for raw_range in ref.get("ranges", ref.get("range_ids", [])) or []:
|
|
427
|
+
range_id = str(raw_range)
|
|
428
|
+
range_record = _resolve_source_ref_range(source_id, range_id, range_by_id)
|
|
429
|
+
if range_record is None:
|
|
430
|
+
errors.append(
|
|
431
|
+
{
|
|
432
|
+
"code": "KC_CITATION_RANGE_MISSING",
|
|
433
|
+
"message": f"source_refs range does not exist: {range_id}",
|
|
434
|
+
"source_id": source_id,
|
|
435
|
+
"range_id": range_id,
|
|
436
|
+
}
|
|
437
|
+
)
|
|
438
|
+
elif source_id is not None and range_record.source_id != source_id:
|
|
439
|
+
errors.append(
|
|
440
|
+
{
|
|
441
|
+
"code": "KC_CITATION_RANGE_MISSING",
|
|
442
|
+
"message": f"source_refs range does not belong to source: {range_id}",
|
|
443
|
+
"source_id": source_id,
|
|
444
|
+
"range_id": range_id,
|
|
445
|
+
}
|
|
446
|
+
)
|
|
447
|
+
return errors
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _source_ref_locator_aliases(range_record: Any) -> set[str]:
|
|
451
|
+
locator = range_record.locator
|
|
452
|
+
if locator.kind == "line_range":
|
|
453
|
+
return {f"L{locator.start_line}-L{locator.end_line}"}
|
|
454
|
+
if locator.kind == "json_pointer":
|
|
455
|
+
pointer = str(locator.pointer or "/")
|
|
456
|
+
return {f"JP:{pointer}", f"JP:{quote(pointer, safe='/~')}"}
|
|
457
|
+
if locator.kind == "csv_row_range":
|
|
458
|
+
return {f"CSV:R{locator.start_row}-R{locator.end_row}", f"R{locator.start_row}-R{locator.end_row}"}
|
|
459
|
+
return set()
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _resolve_source_ref_range(
|
|
463
|
+
source_id: Any,
|
|
464
|
+
value: str,
|
|
465
|
+
range_by_id: dict[str, Any],
|
|
466
|
+
) -> Any | None:
|
|
467
|
+
direct = range_by_id.get(value)
|
|
468
|
+
if direct is not None:
|
|
469
|
+
return direct
|
|
470
|
+
for range_record in range_by_id.values():
|
|
471
|
+
if source_id is not None and range_record.source_id != source_id:
|
|
472
|
+
continue
|
|
473
|
+
if value in _source_ref_locator_aliases(range_record):
|
|
474
|
+
return range_record
|
|
475
|
+
return None
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _validate_json_citations(
|
|
479
|
+
data: Any,
|
|
480
|
+
*,
|
|
481
|
+
artifact_path: str,
|
|
482
|
+
artifact_id: str | None,
|
|
483
|
+
sources_by_id: dict[str, Any],
|
|
484
|
+
ranges_by_id: dict[str, Any],
|
|
485
|
+
) -> tuple[list[CitationEdgeRecord], list[dict[str, Any]]]:
|
|
486
|
+
edges: list[CitationEdgeRecord] = []
|
|
487
|
+
problems: list[dict[str, Any]] = []
|
|
488
|
+
timestamp = now()
|
|
489
|
+
|
|
490
|
+
def visit(value: Any, pointer: str) -> None:
|
|
491
|
+
if isinstance(value, dict):
|
|
492
|
+
raw_citations = value.get("citations")
|
|
493
|
+
if isinstance(raw_citations, list):
|
|
494
|
+
for citation in raw_citations:
|
|
495
|
+
if not isinstance(citation, dict):
|
|
496
|
+
problems.append(
|
|
497
|
+
{
|
|
498
|
+
"code": "KC_CITATION_INVALID_TOKEN",
|
|
499
|
+
"message": "JSON artifact citation entries must be objects.",
|
|
500
|
+
"pointer": pointer,
|
|
501
|
+
}
|
|
502
|
+
)
|
|
503
|
+
continue
|
|
504
|
+
source_id = str(citation.get("source_id", ""))
|
|
505
|
+
range_id = citation.get("range_id")
|
|
506
|
+
range_id_str = str(range_id) if range_id is not None else ""
|
|
507
|
+
source = sources_by_id.get(source_id)
|
|
508
|
+
range_record = ranges_by_id.get(range_id_str)
|
|
509
|
+
status = "valid"
|
|
510
|
+
if source is None:
|
|
511
|
+
status = "missing_source"
|
|
512
|
+
problems.append(
|
|
513
|
+
{
|
|
514
|
+
"code": "KC_CITATION_SOURCE_MISSING",
|
|
515
|
+
"message": f"JSON citation source does not exist: {source_id}",
|
|
516
|
+
"pointer": pointer,
|
|
517
|
+
"source_id": source_id,
|
|
518
|
+
}
|
|
519
|
+
)
|
|
520
|
+
elif range_record is None:
|
|
521
|
+
status = "missing_range"
|
|
522
|
+
problems.append(
|
|
523
|
+
{
|
|
524
|
+
"code": "KC_CITATION_RANGE_MISSING",
|
|
525
|
+
"message": f"JSON citation range does not exist: {range_id}",
|
|
526
|
+
"pointer": pointer,
|
|
527
|
+
"source_id": source_id,
|
|
528
|
+
"range_id": range_id_str,
|
|
529
|
+
}
|
|
530
|
+
)
|
|
531
|
+
elif range_record.source_id != source_id:
|
|
532
|
+
status = "locator_mismatch"
|
|
533
|
+
problems.append(
|
|
534
|
+
{
|
|
535
|
+
"code": "KC_CITATION_RANGE_MISSING",
|
|
536
|
+
"message": f"JSON citation range does not belong to source: {range_id}",
|
|
537
|
+
"pointer": pointer,
|
|
538
|
+
"source_id": source_id,
|
|
539
|
+
"range_id": range_id_str,
|
|
540
|
+
}
|
|
541
|
+
)
|
|
542
|
+
elif range_record.source_fingerprint != source.fingerprint:
|
|
543
|
+
status = "stale_source"
|
|
544
|
+
problems.append(
|
|
545
|
+
{
|
|
546
|
+
"code": "KC_CITATION_STALE_SOURCE",
|
|
547
|
+
"message": f"JSON citation points to stale source fingerprint: {range_id}",
|
|
548
|
+
"pointer": pointer,
|
|
549
|
+
"source_id": source_id,
|
|
550
|
+
"range_id": range_id_str,
|
|
551
|
+
}
|
|
552
|
+
)
|
|
553
|
+
edges.append(
|
|
554
|
+
CitationEdgeRecord(
|
|
555
|
+
edge_id=new_id("cite"),
|
|
556
|
+
artifact_id=artifact_id,
|
|
557
|
+
artifact_path=artifact_path,
|
|
558
|
+
artifact_locator=ArtifactLocator(start_line=1, end_line=1),
|
|
559
|
+
citation_token=f"json:{source_id}:{range_id_str}",
|
|
560
|
+
source_id=source_id,
|
|
561
|
+
range_id=range_id_str or None,
|
|
562
|
+
source_fingerprint_at_validation=source.fingerprint if source else None,
|
|
563
|
+
validated_at=timestamp,
|
|
564
|
+
status=status, # type: ignore[arg-type]
|
|
565
|
+
metadata={"json_pointer": pointer or "/"},
|
|
566
|
+
)
|
|
567
|
+
)
|
|
568
|
+
for key, child in value.items():
|
|
569
|
+
escaped = str(key).replace("~", "~0").replace("/", "~1")
|
|
570
|
+
visit(child, f"{pointer}/{escaped}")
|
|
571
|
+
elif isinstance(value, list):
|
|
572
|
+
for index, child in enumerate(value):
|
|
573
|
+
visit(child, f"{pointer}/{index}")
|
|
574
|
+
|
|
575
|
+
visit(data, "")
|
|
576
|
+
return edges, problems
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
@app.command("validate", help="Validate artifact schema, required sections, citations, and provenance.")
|
|
580
|
+
def validate(
|
|
581
|
+
file: Annotated[Path, typer.Option("--file", help="Artifact file.")],
|
|
582
|
+
schema: Annotated[str | None, typer.Option("--schema", help="Schema override.")] = None,
|
|
583
|
+
allow_uncited: Annotated[
|
|
584
|
+
bool, typer.Option("--allow-uncited", help="Allow kc:uncited markers for uncited paragraphs.")
|
|
585
|
+
] = False,
|
|
586
|
+
) -> None:
|
|
587
|
+
def _run() -> None:
|
|
588
|
+
result = validate_artifact_file(file, allow_uncited=allow_uncited, schema=schema)
|
|
589
|
+
if not result["valid"]:
|
|
590
|
+
errors = []
|
|
591
|
+
for item in result["errors"]:
|
|
592
|
+
code = str(item.get("code", "KC_ARTIFACT_SCHEMA_INVALID"))
|
|
593
|
+
exit_code = (
|
|
594
|
+
EXIT_PROVENANCE if str(code).startswith("KC_CITATION") else EXIT_VALIDATION
|
|
595
|
+
)
|
|
596
|
+
errors.append(
|
|
597
|
+
KcError(
|
|
598
|
+
code=code,
|
|
599
|
+
message=item.get("message", "Artifact validation failed."),
|
|
600
|
+
details=item,
|
|
601
|
+
exit_code=exit_code,
|
|
602
|
+
suggested_action="fix artifact content or citations",
|
|
603
|
+
).to_message()
|
|
604
|
+
)
|
|
605
|
+
exit_code = max(error["exit_code"] for error in errors) if errors else EXIT_VALIDATION
|
|
606
|
+
emit(
|
|
607
|
+
envelope(
|
|
608
|
+
"artifact.validate",
|
|
609
|
+
None,
|
|
610
|
+
ok=False,
|
|
611
|
+
target={"file": result["path"]},
|
|
612
|
+
warnings=result.get("warnings", []),
|
|
613
|
+
errors=errors,
|
|
614
|
+
),
|
|
615
|
+
exit_code=exit_code,
|
|
616
|
+
)
|
|
617
|
+
emit_success(
|
|
618
|
+
"artifact.validate",
|
|
619
|
+
{k: v for k, v in result.items() if k not in {"text", "body"}},
|
|
620
|
+
target={"file": result["path"]},
|
|
621
|
+
warnings=result.get("warnings", []),
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
run("artifact.validate", _run)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
@app.command("diff", help="Build a structured apply plan and show artifact changes before mutation.")
|
|
628
|
+
def diff(
|
|
629
|
+
file: Annotated[Path, typer.Option("--file", help="Artifact file.")],
|
|
630
|
+
against: Annotated[str | None, typer.Option("--against", help="Comparison baseline: registry or HEAD.")] = None,
|
|
631
|
+
) -> None:
|
|
632
|
+
def _run() -> None:
|
|
633
|
+
target = resolve_repo_path(file)
|
|
634
|
+
validate_choice(against, option="--against", supported={"registry", "HEAD"}, allow_none=True)
|
|
635
|
+
if not target.exists():
|
|
636
|
+
raise KcError(
|
|
637
|
+
code="KC_ARTIFACT_NOT_FOUND",
|
|
638
|
+
message=f"Artifact not found: {file}",
|
|
639
|
+
details={"path": repo_relative(target)},
|
|
640
|
+
)
|
|
641
|
+
existing = artifact_by_path(target)
|
|
642
|
+
baseline_path = _last_applied_snapshot_path(existing)
|
|
643
|
+
plan, diff_text, baseline = build_artifact_plan(
|
|
644
|
+
target,
|
|
645
|
+
registered_fingerprint=existing.fingerprint if existing else None,
|
|
646
|
+
baseline_path=baseline_path,
|
|
647
|
+
)
|
|
648
|
+
try:
|
|
649
|
+
validation = validate_artifact_file(target)
|
|
650
|
+
plan = _enrich_plan(plan, target=target, validation=validation, existing=existing)
|
|
651
|
+
except KcError as exc:
|
|
652
|
+
plan = plan.model_copy(
|
|
653
|
+
update={
|
|
654
|
+
"risk_flags": sorted({*plan.risk_flags, "validation_errors"}),
|
|
655
|
+
"metadata": {
|
|
656
|
+
**plan.metadata,
|
|
657
|
+
"validation_error": exc.to_message(),
|
|
658
|
+
},
|
|
659
|
+
}
|
|
660
|
+
)
|
|
661
|
+
baseline_result = dict(baseline)
|
|
662
|
+
if baseline_result.get("path"):
|
|
663
|
+
baseline_result["path"] = repo_relative(Path(str(baseline_result["path"])))
|
|
664
|
+
emit_success(
|
|
665
|
+
"artifact.diff",
|
|
666
|
+
{
|
|
667
|
+
"plan": plan.model_dump(mode="json"),
|
|
668
|
+
"diff": diff_text,
|
|
669
|
+
"baseline": baseline_result,
|
|
670
|
+
"diff_path": None,
|
|
671
|
+
"risk_flags": plan.risk_flags,
|
|
672
|
+
},
|
|
673
|
+
target={"file": repo_relative(target), "against": against or "registry"},
|
|
674
|
+
warnings=validation.get("warnings", []) if "validation" in locals() else [],
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
run("artifact.diff", _run)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _source_refs_from_edges(edges: list[CitationEdgeRecord]) -> list[SourceRef]:
|
|
681
|
+
by_source: dict[str, set[str]] = {}
|
|
682
|
+
for edge in edges:
|
|
683
|
+
if edge.status != "valid":
|
|
684
|
+
continue
|
|
685
|
+
by_source.setdefault(edge.source_id, set())
|
|
686
|
+
if edge.range_id:
|
|
687
|
+
by_source[edge.source_id].add(edge.range_id)
|
|
688
|
+
return [
|
|
689
|
+
SourceRef(source_id=source_id, range_ids=sorted(range_ids))
|
|
690
|
+
for source_id, range_ids in sorted(by_source.items())
|
|
691
|
+
]
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
def _record_from_validation(
|
|
695
|
+
target: Path, validation: dict[str, Any], existing: ArtifactRecord | None
|
|
696
|
+
) -> ArtifactRecord:
|
|
697
|
+
frontmatter = validation.get("frontmatter") or {}
|
|
698
|
+
body = validation.get("body") or ""
|
|
699
|
+
timestamp = now()
|
|
700
|
+
artifact_id = str(
|
|
701
|
+
frontmatter.get("artifact_id") or (existing.artifact_id if existing else new_id("art"))
|
|
702
|
+
)
|
|
703
|
+
edges = [
|
|
704
|
+
CitationEdgeRecord.model_validate(edge)
|
|
705
|
+
for edge in validation.get("citation_edges", [])
|
|
706
|
+
if edge.get("status") == "valid"
|
|
707
|
+
]
|
|
708
|
+
return ArtifactRecord(
|
|
709
|
+
artifact_id=artifact_id,
|
|
710
|
+
path=repo_relative(target),
|
|
711
|
+
artifact_type=str(frontmatter.get("artifact_type", "knowledge_page")), # type: ignore[arg-type]
|
|
712
|
+
title=markdown_title(frontmatter, body, target.stem),
|
|
713
|
+
status=str(frontmatter.get("status", "draft")), # type: ignore[arg-type]
|
|
714
|
+
domain=list(frontmatter.get("domain", []) or []),
|
|
715
|
+
fingerprint=validation["fingerprint"],
|
|
716
|
+
created_at=existing.created_at if existing else timestamp,
|
|
717
|
+
updated_at=timestamp,
|
|
718
|
+
last_validated_at=timestamp,
|
|
719
|
+
validation_status="passed",
|
|
720
|
+
source_refs=_source_refs_from_edges(edges),
|
|
721
|
+
metadata={"compiled_by": "external_agent", "agent_tool": "kc-cli"},
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def _last_applied_snapshot_path(existing: ArtifactRecord | None) -> Path | None:
|
|
726
|
+
if existing is None:
|
|
727
|
+
return None
|
|
728
|
+
snapshot = existing.metadata.get("last_applied_snapshot")
|
|
729
|
+
if not isinstance(snapshot, str):
|
|
730
|
+
return None
|
|
731
|
+
try:
|
|
732
|
+
return resolve_repo_path(snapshot)
|
|
733
|
+
except KcError:
|
|
734
|
+
return None
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def _enrich_plan(
|
|
738
|
+
plan: PlanRecord,
|
|
739
|
+
*,
|
|
740
|
+
target: Path,
|
|
741
|
+
validation: dict[str, Any],
|
|
742
|
+
existing: ArtifactRecord | None,
|
|
743
|
+
) -> PlanRecord:
|
|
744
|
+
frontmatter = validation.get("frontmatter") or {}
|
|
745
|
+
new_status = str(frontmatter.get("status", existing.status if existing else "draft"))
|
|
746
|
+
valid_edges = [
|
|
747
|
+
edge
|
|
748
|
+
for edge in validation.get("citation_edges", [])
|
|
749
|
+
if isinstance(edge, dict) and edge.get("status") == "valid"
|
|
750
|
+
]
|
|
751
|
+
old_ref_count = (
|
|
752
|
+
sum(len(ref.range_ids) for ref in existing.source_refs)
|
|
753
|
+
if existing is not None
|
|
754
|
+
else 0
|
|
755
|
+
)
|
|
756
|
+
risk_flags = set(plan.risk_flags)
|
|
757
|
+
if existing and existing.status == "active":
|
|
758
|
+
risk_flags.add("updates_active_artifact")
|
|
759
|
+
if existing and existing.status != new_status:
|
|
760
|
+
risk_flags.add("status_transition")
|
|
761
|
+
if old_ref_count > len(valid_edges):
|
|
762
|
+
risk_flags.add("removes_citations")
|
|
763
|
+
if "[kc:uncited]" in str(validation.get("text", "")):
|
|
764
|
+
risk_flags.add("adds_uncited_claim_markers")
|
|
765
|
+
if any(
|
|
766
|
+
isinstance(error, dict) and error.get("code") == "KC_CITATION_STALE_SOURCE"
|
|
767
|
+
for error in validation.get("errors", [])
|
|
768
|
+
):
|
|
769
|
+
risk_flags.add("stale_source_reference")
|
|
770
|
+
|
|
771
|
+
operations = [
|
|
772
|
+
operation.model_copy(
|
|
773
|
+
update={
|
|
774
|
+
"risk": "medium" if risk_flags else "low",
|
|
775
|
+
"details": {
|
|
776
|
+
**operation.details,
|
|
777
|
+
"registry_change": "update" if existing else "create",
|
|
778
|
+
"citation_edges_after": len(valid_edges),
|
|
779
|
+
"artifact_status_after": new_status,
|
|
780
|
+
},
|
|
781
|
+
}
|
|
782
|
+
)
|
|
783
|
+
for operation in plan.operations
|
|
784
|
+
]
|
|
785
|
+
metadata = {
|
|
786
|
+
**plan.metadata,
|
|
787
|
+
"direct_edit_apply": True,
|
|
788
|
+
"artifact_path": repo_relative(target),
|
|
789
|
+
"registry_changes": {
|
|
790
|
+
"artifact": "update" if existing else "create",
|
|
791
|
+
"before_fingerprint": existing.fingerprint if existing else None,
|
|
792
|
+
"after_fingerprint": validation.get("fingerprint"),
|
|
793
|
+
},
|
|
794
|
+
"citation_edge_changes": {
|
|
795
|
+
"after": len(valid_edges),
|
|
796
|
+
"registered_before": old_ref_count,
|
|
797
|
+
},
|
|
798
|
+
"log_preview": _log_entry_text(
|
|
799
|
+
markdown_title(frontmatter, validation.get("body") or "", target.stem),
|
|
800
|
+
repo_relative(target),
|
|
801
|
+
validation.get("fingerprint"),
|
|
802
|
+
plan.plan_id,
|
|
803
|
+
),
|
|
804
|
+
"changed_files": [
|
|
805
|
+
repo_relative(current_paths().artifacts_jsonl),
|
|
806
|
+
repo_relative(current_paths().citation_edges_jsonl),
|
|
807
|
+
repo_relative(current_paths().log_path),
|
|
808
|
+
repo_relative(current_paths().sqlite_path),
|
|
809
|
+
repo_relative(current_paths().plans_dir / f"{plan.plan_id}.json"),
|
|
810
|
+
],
|
|
811
|
+
}
|
|
812
|
+
return plan.model_copy(
|
|
813
|
+
update={
|
|
814
|
+
"operations": operations,
|
|
815
|
+
"risk_flags": sorted(risk_flags),
|
|
816
|
+
"metadata": metadata,
|
|
817
|
+
}
|
|
818
|
+
)
|
|
819
|
+
def _target_from_plan_path(path: str) -> Path:
|
|
820
|
+
candidate = Path(path)
|
|
821
|
+
if candidate.is_absolute():
|
|
822
|
+
return ensure_under_root(candidate)
|
|
823
|
+
return resolve_repo_path(candidate)
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def _load_plan_file(plan_file: Path) -> PlanRecord:
|
|
827
|
+
plan_path = resolve_repo_path(plan_file)
|
|
828
|
+
if not plan_path.exists():
|
|
829
|
+
raise KcError(
|
|
830
|
+
code="KC_FILE_NOT_FOUND",
|
|
831
|
+
message=f"Plan file not found: {plan_file}",
|
|
832
|
+
details={"path": repo_relative(plan_path)},
|
|
833
|
+
)
|
|
834
|
+
try:
|
|
835
|
+
data = orjson.loads(plan_path.read_bytes())
|
|
836
|
+
except orjson.JSONDecodeError as exc:
|
|
837
|
+
raise KcError(
|
|
838
|
+
code="KC_JSON_INVALID",
|
|
839
|
+
message=f"Invalid plan JSON: {exc}",
|
|
840
|
+
details={"path": repo_relative(plan_path)},
|
|
841
|
+
) from exc
|
|
842
|
+
if not isinstance(data, dict) or data.get("schema_version") != "kc.plan.v1":
|
|
843
|
+
raise KcError(
|
|
844
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
845
|
+
message="Plan file must use schema_version kc.plan.v1.",
|
|
846
|
+
details={"path": repo_relative(plan_path)},
|
|
847
|
+
)
|
|
848
|
+
try:
|
|
849
|
+
return PlanRecord.model_validate(data)
|
|
850
|
+
except Exception as exc:
|
|
851
|
+
raise KcError(
|
|
852
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
853
|
+
message=f"Invalid kc plan record: {exc}",
|
|
854
|
+
details={"path": repo_relative(plan_path)},
|
|
855
|
+
) from exc
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def _plan_operation(plan: PlanRecord) -> tuple[str, Path]:
|
|
859
|
+
if plan.command != "artifact.apply":
|
|
860
|
+
raise KcError(
|
|
861
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
862
|
+
message=f"Plan command is not artifact.apply: {plan.command}",
|
|
863
|
+
details={"plan_id": plan.plan_id, "command": plan.command},
|
|
864
|
+
)
|
|
865
|
+
if len(plan.operations) != 1:
|
|
866
|
+
raise KcError(
|
|
867
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
868
|
+
message="Artifact apply plans must contain exactly one operation.",
|
|
869
|
+
details={"plan_id": plan.plan_id, "operations": len(plan.operations)},
|
|
870
|
+
)
|
|
871
|
+
operation = plan.operations[0]
|
|
872
|
+
return operation.path, _target_from_plan_path(operation.path)
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
def _enforce_plan_preconditions(
|
|
876
|
+
plan: PlanRecord,
|
|
877
|
+
target: Path,
|
|
878
|
+
existing: ArtifactRecord | None,
|
|
879
|
+
validation: dict[str, Any],
|
|
880
|
+
) -> None:
|
|
881
|
+
operation_path, operation_target = _plan_operation(plan)
|
|
882
|
+
if operation_target != target:
|
|
883
|
+
raise KcError(
|
|
884
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
885
|
+
message="Plan operation path does not match the requested artifact.",
|
|
886
|
+
details={
|
|
887
|
+
"plan_id": plan.plan_id,
|
|
888
|
+
"operation_path": operation_path,
|
|
889
|
+
"target": repo_relative(target),
|
|
890
|
+
},
|
|
891
|
+
)
|
|
892
|
+
for condition in plan.preconditions:
|
|
893
|
+
if condition.kind == "file_exists" and condition.expected == "true" and not target.exists():
|
|
894
|
+
raise KcError(
|
|
895
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
896
|
+
message="Plan precondition failed: artifact file must exist.",
|
|
897
|
+
details={"plan_id": plan.plan_id, "path": repo_relative(target)},
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
operation = plan.operations[0]
|
|
901
|
+
actual_before = existing.fingerprint if existing else None
|
|
902
|
+
if operation.before_fingerprint != actual_before:
|
|
903
|
+
raise KcError(
|
|
904
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
905
|
+
message="Plan registry fingerprint precondition failed.",
|
|
906
|
+
details={
|
|
907
|
+
"plan_id": plan.plan_id,
|
|
908
|
+
"expected": operation.before_fingerprint,
|
|
909
|
+
"actual": actual_before,
|
|
910
|
+
"path": repo_relative(target),
|
|
911
|
+
},
|
|
912
|
+
)
|
|
913
|
+
actual_after = str(validation["fingerprint"])
|
|
914
|
+
if operation.after_fingerprint != actual_after:
|
|
915
|
+
raise KcError(
|
|
916
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
917
|
+
message="Plan artifact fingerprint precondition failed.",
|
|
918
|
+
details={
|
|
919
|
+
"plan_id": plan.plan_id,
|
|
920
|
+
"expected": operation.after_fingerprint,
|
|
921
|
+
"actual": actual_after,
|
|
922
|
+
"path": repo_relative(target),
|
|
923
|
+
},
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
def _save_plan_file(plan: PlanRecord) -> str:
|
|
928
|
+
plan_path = current_paths().plans_dir / f"{plan.plan_id}.json"
|
|
929
|
+
atomic_write_text(plan_path, json_dumps(plan.model_dump(mode="json")) + "\n")
|
|
930
|
+
return repo_relative(plan_path)
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def _snapshot_kc_state(snapshot_dir: Path, paths: Any) -> list[dict[str, str]]:
|
|
934
|
+
snapshots: list[dict[str, str]] = []
|
|
935
|
+
for source in [
|
|
936
|
+
paths.artifacts_jsonl,
|
|
937
|
+
paths.citation_edges_jsonl,
|
|
938
|
+
paths.log_path,
|
|
939
|
+
]:
|
|
940
|
+
if not source.exists():
|
|
941
|
+
continue
|
|
942
|
+
destination = snapshot_dir / ".kc-state" / source.name
|
|
943
|
+
copy_snapshot(source, destination)
|
|
944
|
+
snapshots.append(
|
|
945
|
+
{
|
|
946
|
+
"path": repo_relative(source),
|
|
947
|
+
"fingerprint": raw_fingerprint(source),
|
|
948
|
+
"snapshot_path": repo_relative(destination),
|
|
949
|
+
}
|
|
950
|
+
)
|
|
951
|
+
return snapshots
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def _log_entry_text(title: str, path: str, fingerprint: str | None, plan_id: str) -> str:
|
|
955
|
+
return (
|
|
956
|
+
f"## {datetime.now(UTC).date().isoformat()} - {title}\n\n"
|
|
957
|
+
f"- Plan: {plan_id}\n"
|
|
958
|
+
f"- Artifact: {path}\n"
|
|
959
|
+
f"- Fingerprint: {fingerprint}\n\n"
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
@app.command("apply", help="Validate, lock, snapshot, register, and apply an artifact safely.")
|
|
964
|
+
def apply(
|
|
965
|
+
file: Annotated[Path | None, typer.Option("--file", help="Artifact file.")] = None,
|
|
966
|
+
plan_file: Annotated[Path | None, typer.Option("--plan", help="Plan JSON file.")] = None,
|
|
967
|
+
dry_run: Annotated[bool, typer.Option("--dry-run", help="Preview without writing.")] = False,
|
|
968
|
+
yes: Annotated[bool, typer.Option("--yes", help="Apply registry/citation changes.")] = False,
|
|
969
|
+
skip_validate: Annotated[
|
|
970
|
+
bool, typer.Option("--skip-validate", help="Skip validation.")
|
|
971
|
+
] = False,
|
|
972
|
+
idempotency_key: Annotated[
|
|
973
|
+
str | None, typer.Option("--idempotency-key", help="Safe retry key.")
|
|
974
|
+
] = None,
|
|
975
|
+
) -> None:
|
|
976
|
+
def _run() -> None:
|
|
977
|
+
paths = current_paths()
|
|
978
|
+
cfg = load_config(paths.root)
|
|
979
|
+
if file is None and plan_file is None:
|
|
980
|
+
raise KcError(
|
|
981
|
+
code="KC_ARTIFACT_NOT_FOUND",
|
|
982
|
+
message="Provide --file or --plan.",
|
|
983
|
+
)
|
|
984
|
+
loaded_plan: PlanRecord | None = None
|
|
985
|
+
if file is not None:
|
|
986
|
+
target = resolve_repo_path(file)
|
|
987
|
+
else:
|
|
988
|
+
if plan_file is None:
|
|
989
|
+
raise KcError(
|
|
990
|
+
code="KC_ARTIFACT_NOT_FOUND",
|
|
991
|
+
message="Provide --file or --plan.",
|
|
992
|
+
)
|
|
993
|
+
loaded_plan = _load_plan_file(plan_file)
|
|
994
|
+
_operation_path, target = _plan_operation(loaded_plan)
|
|
995
|
+
if file is not None and plan_file is not None:
|
|
996
|
+
loaded_plan = _load_plan_file(plan_file)
|
|
997
|
+
_operation_path, plan_target = _plan_operation(loaded_plan)
|
|
998
|
+
if plan_target != target:
|
|
999
|
+
raise KcError(
|
|
1000
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
1001
|
+
message="--file does not match --plan operation path.",
|
|
1002
|
+
details={
|
|
1003
|
+
"file": repo_relative(target),
|
|
1004
|
+
"plan_target": repo_relative(plan_target),
|
|
1005
|
+
},
|
|
1006
|
+
)
|
|
1007
|
+
effective_dry_run = dry_run or not yes
|
|
1008
|
+
previous = get_idempotency(paths.sqlite_path, idempotency_key) if idempotency_key else None
|
|
1009
|
+
if previous:
|
|
1010
|
+
current_fingerprint = raw_fingerprint(target) if target.exists() else None
|
|
1011
|
+
previous_plan = previous.get("plan") if isinstance(previous.get("plan"), dict) else {}
|
|
1012
|
+
previous_ops = previous_plan.get("operations") if isinstance(previous_plan, dict) else []
|
|
1013
|
+
previous_op = previous_ops[0] if isinstance(previous_ops, list) and previous_ops else {}
|
|
1014
|
+
previous_after = previous_op.get("after_fingerprint") if isinstance(previous_op, dict) else None
|
|
1015
|
+
previous_path = previous_op.get("path") if isinstance(previous_op, dict) else None
|
|
1016
|
+
if previous_after != current_fingerprint or (
|
|
1017
|
+
previous_path and repo_relative(_target_from_plan_path(str(previous_path))) != repo_relative(target)
|
|
1018
|
+
):
|
|
1019
|
+
raise KcError(
|
|
1020
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
1021
|
+
message="Idempotency key was already used for a different artifact state.",
|
|
1022
|
+
details={
|
|
1023
|
+
"key": idempotency_key,
|
|
1024
|
+
"previous_path": previous_path,
|
|
1025
|
+
"target": repo_relative(target),
|
|
1026
|
+
"previous_fingerprint": previous_after,
|
|
1027
|
+
"current_fingerprint": current_fingerprint,
|
|
1028
|
+
},
|
|
1029
|
+
)
|
|
1030
|
+
previous["noop"] = True
|
|
1031
|
+
previous["idempotency"] = {"key": idempotency_key, "status": "replayed"}
|
|
1032
|
+
emit_success("artifact.apply", previous, target={"file": repo_relative(target)})
|
|
1033
|
+
if skip_validate and is_llm_mode() and not cfg.allow_skip_validate_in_llm:
|
|
1034
|
+
raise KcError(
|
|
1035
|
+
code="KC_APPLY_NOT_VALIDATED",
|
|
1036
|
+
message="--skip-validate is blocked when LLM=true.",
|
|
1037
|
+
details={"allow_skip_validate_in_llm": False},
|
|
1038
|
+
)
|
|
1039
|
+
validation = (
|
|
1040
|
+
{
|
|
1041
|
+
"valid": True,
|
|
1042
|
+
"fingerprint": raw_fingerprint(target),
|
|
1043
|
+
"frontmatter": {},
|
|
1044
|
+
"citation_edges": [],
|
|
1045
|
+
"warnings": [],
|
|
1046
|
+
"text": target.read_text(encoding="utf-8-sig"),
|
|
1047
|
+
"body": "",
|
|
1048
|
+
}
|
|
1049
|
+
if skip_validate
|
|
1050
|
+
else validate_artifact_file(target)
|
|
1051
|
+
)
|
|
1052
|
+
if not validation["valid"]:
|
|
1053
|
+
raise KcError(
|
|
1054
|
+
code="KC_APPLY_NOT_VALIDATED",
|
|
1055
|
+
message="Artifact does not validate; run kc artifact validate for details.",
|
|
1056
|
+
details={"path": repo_relative(target), "errors": validation["errors"]},
|
|
1057
|
+
)
|
|
1058
|
+
existing = artifact_by_path(target)
|
|
1059
|
+
if loaded_plan is not None:
|
|
1060
|
+
_enforce_plan_preconditions(loaded_plan, target, existing, validation)
|
|
1061
|
+
plan = loaded_plan.model_copy(
|
|
1062
|
+
update={
|
|
1063
|
+
"mode": "dry_run" if effective_dry_run else "apply",
|
|
1064
|
+
"idempotency_key": idempotency_key or loaded_plan.idempotency_key,
|
|
1065
|
+
}
|
|
1066
|
+
)
|
|
1067
|
+
_discarded_plan, diff_text, _baseline = build_artifact_plan(
|
|
1068
|
+
target,
|
|
1069
|
+
registered_fingerprint=existing.fingerprint if existing else None,
|
|
1070
|
+
baseline_path=_last_applied_snapshot_path(existing),
|
|
1071
|
+
)
|
|
1072
|
+
else:
|
|
1073
|
+
plan, diff_text, _baseline = build_artifact_plan(
|
|
1074
|
+
target,
|
|
1075
|
+
registered_fingerprint=existing.fingerprint if existing else None,
|
|
1076
|
+
baseline_path=_last_applied_snapshot_path(existing),
|
|
1077
|
+
mode="dry_run" if effective_dry_run else "apply",
|
|
1078
|
+
idempotency_key=idempotency_key,
|
|
1079
|
+
)
|
|
1080
|
+
plan = _enrich_plan(plan, target=target, validation=validation, existing=existing)
|
|
1081
|
+
if effective_dry_run:
|
|
1082
|
+
emit_success(
|
|
1083
|
+
"artifact.apply",
|
|
1084
|
+
{
|
|
1085
|
+
"dry_run": True,
|
|
1086
|
+
"applied": False,
|
|
1087
|
+
"plan": plan.model_dump(mode="json"),
|
|
1088
|
+
"diff": diff_text,
|
|
1089
|
+
"validation": {
|
|
1090
|
+
k: v for k, v in validation.items() if k not in {"text", "body"}
|
|
1091
|
+
},
|
|
1092
|
+
},
|
|
1093
|
+
target={"file": repo_relative(target)},
|
|
1094
|
+
warnings=validation.get("warnings", []),
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
with mutation_transaction(paths, "artifact.apply", [target]) as repo_tx, FileLock(
|
|
1098
|
+
paths.locks_dir, path_lock_name(target), "artifact.apply", repo_relative(target)
|
|
1099
|
+
):
|
|
1100
|
+
locked_fingerprint = raw_fingerprint(target)
|
|
1101
|
+
if locked_fingerprint != validation["fingerprint"]:
|
|
1102
|
+
raise KcError(
|
|
1103
|
+
code="KC_PLAN_PRECONDITION_FAILED",
|
|
1104
|
+
message="Artifact changed after validation and before lock acquisition.",
|
|
1105
|
+
details={
|
|
1106
|
+
"path": repo_relative(target),
|
|
1107
|
+
"validated_fingerprint": validation["fingerprint"],
|
|
1108
|
+
"locked_fingerprint": locked_fingerprint,
|
|
1109
|
+
},
|
|
1110
|
+
)
|
|
1111
|
+
artifact = _record_from_validation(target, validation, existing)
|
|
1112
|
+
artifacts = [a for a in load_artifacts() if a.path != artifact.path]
|
|
1113
|
+
edges = [
|
|
1114
|
+
CitationEdgeRecord.model_validate(edge)
|
|
1115
|
+
for edge in validation.get("citation_edges", [])
|
|
1116
|
+
]
|
|
1117
|
+
for idx, edge in enumerate(edges):
|
|
1118
|
+
edge.artifact_id = artifact.artifact_id
|
|
1119
|
+
edge.edge_id = edge.edge_id or new_id("cite")
|
|
1120
|
+
edges[idx] = edge
|
|
1121
|
+
all_edges = [e for e in load_citation_edges() if e.artifact_path != artifact.path]
|
|
1122
|
+
all_edges.extend(edges)
|
|
1123
|
+
snapshot_dir = (
|
|
1124
|
+
paths.snapshots_dir
|
|
1125
|
+
/ f"{datetime.now(UTC).strftime('%Y%m%dT%H%M%SZ')}_{plan.plan_id}"
|
|
1126
|
+
)
|
|
1127
|
+
snapshot_path = snapshot_dir / Path(artifact.path).name
|
|
1128
|
+
copy_snapshot(target, snapshot_path)
|
|
1129
|
+
artifact = artifact.model_copy(
|
|
1130
|
+
update={
|
|
1131
|
+
"metadata": {
|
|
1132
|
+
**artifact.metadata,
|
|
1133
|
+
"last_applied_snapshot": repo_relative(snapshot_path),
|
|
1134
|
+
"last_applied_plan_id": plan.plan_id,
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
)
|
|
1138
|
+
artifacts.append(artifact)
|
|
1139
|
+
state_snapshots = _snapshot_kc_state(snapshot_dir, paths)
|
|
1140
|
+
save_artifacts(sorted(artifacts, key=lambda a: a.path))
|
|
1141
|
+
save_citation_edges(all_edges)
|
|
1142
|
+
if cfg.update_log:
|
|
1143
|
+
_append_log(paths.log_path, artifact, plan.plan_id)
|
|
1144
|
+
save_plan(paths.sqlite_path, plan)
|
|
1145
|
+
plan_path = _save_plan_file(plan)
|
|
1146
|
+
rebuild_index(
|
|
1147
|
+
paths.sqlite_path,
|
|
1148
|
+
load_sources(),
|
|
1149
|
+
load_ranges(),
|
|
1150
|
+
load_artifacts(),
|
|
1151
|
+
load_citation_edges(),
|
|
1152
|
+
)
|
|
1153
|
+
result = {
|
|
1154
|
+
"dry_run": False,
|
|
1155
|
+
"applied": True,
|
|
1156
|
+
"artifact": artifact.model_dump(mode="json"),
|
|
1157
|
+
"citation_edges": len(edges),
|
|
1158
|
+
"plan": plan.model_dump(mode="json"),
|
|
1159
|
+
"plan_path": plan_path,
|
|
1160
|
+
"snapshot": {
|
|
1161
|
+
"schema_version": "kc.snapshot.v1",
|
|
1162
|
+
"snapshot_id": new_id("snap"),
|
|
1163
|
+
"plan_id": plan.plan_id,
|
|
1164
|
+
"files": [
|
|
1165
|
+
{
|
|
1166
|
+
"path": artifact.path,
|
|
1167
|
+
"fingerprint": validation["fingerprint"],
|
|
1168
|
+
"snapshot_path": repo_relative(snapshot_path),
|
|
1169
|
+
},
|
|
1170
|
+
*state_snapshots,
|
|
1171
|
+
],
|
|
1172
|
+
},
|
|
1173
|
+
}
|
|
1174
|
+
if idempotency_key:
|
|
1175
|
+
save_idempotency(paths.sqlite_path, idempotency_key, plan.plan_id, result)
|
|
1176
|
+
repo_tx.commit({"path": repo_relative(target), "plan_id": plan.plan_id})
|
|
1177
|
+
emit_success(
|
|
1178
|
+
"artifact.apply",
|
|
1179
|
+
result,
|
|
1180
|
+
target={"file": repo_relative(target)},
|
|
1181
|
+
warnings=validation.get("warnings", []),
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
run("artifact.apply", _run)
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
def _append_log(log_path: Path, artifact: ArtifactRecord, plan_id: str) -> None:
|
|
1188
|
+
current = log_path.read_text(encoding="utf-8") if log_path.exists() else "# Knowledge Log\n\n"
|
|
1189
|
+
entry = _log_entry_text(artifact.title, artifact.path, artifact.fingerprint, plan_id)
|
|
1190
|
+
atomic_write_text(log_path, current.rstrip() + "\n\n" + entry)
|