kc-cli 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kc/__init__.py +5 -0
- kc/__main__.py +11 -0
- kc/artifacts/__init__.py +1 -0
- kc/artifacts/diff.py +76 -0
- kc/artifacts/frontmatter.py +26 -0
- kc/artifacts/markdown.py +116 -0
- kc/atomic_write.py +33 -0
- kc/cli.py +284 -0
- kc/commands/__init__.py +1 -0
- kc/commands/artifact.py +1190 -0
- kc/commands/citation.py +231 -0
- kc/commands/common.py +346 -0
- kc/commands/conformance.py +293 -0
- kc/commands/context.py +190 -0
- kc/commands/doctor.py +81 -0
- kc/commands/eval.py +133 -0
- kc/commands/export.py +97 -0
- kc/commands/guide.py +571 -0
- kc/commands/index.py +54 -0
- kc/commands/init.py +207 -0
- kc/commands/lint.py +238 -0
- kc/commands/source.py +464 -0
- kc/commands/status.py +52 -0
- kc/commands/task.py +260 -0
- kc/config.py +127 -0
- kc/embedding_models/potion-base-8M/README.md +97 -0
- kc/embedding_models/potion-base-8M/config.json +13 -0
- kc/embedding_models/potion-base-8M/model.safetensors +0 -0
- kc/embedding_models/potion-base-8M/modules.json +14 -0
- kc/embedding_models/potion-base-8M/tokenizer.json +1 -0
- kc/errors.py +141 -0
- kc/fingerprints.py +35 -0
- kc/ids.py +23 -0
- kc/locks.py +65 -0
- kc/models/__init__.py +17 -0
- kc/models/artifact.py +34 -0
- kc/models/citation.py +60 -0
- kc/models/context.py +23 -0
- kc/models/eval.py +21 -0
- kc/models/plan.py +37 -0
- kc/models/source.py +37 -0
- kc/models/source_range.py +29 -0
- kc/models/source_revision.py +19 -0
- kc/models/task.py +35 -0
- kc/output.py +838 -0
- kc/paths.py +126 -0
- kc/provenance/__init__.py +1 -0
- kc/provenance/citations.py +296 -0
- kc/search/__init__.py +1 -0
- kc/search/extract.py +268 -0
- kc/search/fts.py +284 -0
- kc/search/semantic.py +346 -0
- kc/store/__init__.py +1 -0
- kc/store/jsonl.py +55 -0
- kc/store/sqlite.py +444 -0
- kc/store/transaction.py +67 -0
- kc/templates/agents/skills/kc/SKILL.md +282 -0
- kc/templates/agents/skills/kc/agents/openai.yaml +5 -0
- kc/templates/agents/skills/kc/scripts/resolve_query_citations.py +134 -0
- kc/workspace.py +98 -0
- kc_cli-0.4.0.dist-info/METADATA +522 -0
- kc_cli-0.4.0.dist-info/RECORD +65 -0
- kc_cli-0.4.0.dist-info/WHEEL +4 -0
- kc_cli-0.4.0.dist-info/entry_points.txt +2 -0
- kc_cli-0.4.0.dist-info/licenses/LICENSE +21 -0
kc/commands/citation.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from kc.atomic_write import atomic_write_text
|
|
9
|
+
from kc.commands.common import load_artifacts, load_ranges, run
|
|
10
|
+
from kc.errors import EXIT_PROVENANCE, KcError
|
|
11
|
+
from kc.output import emit, emit_success, envelope
|
|
12
|
+
from kc.paths import current_paths, repo_relative, resolve_repo_path
|
|
13
|
+
from kc.provenance.citations import find_range_for_token, parse_markdown_citations, validate_citations
|
|
14
|
+
from kc.search.fts import citation_token
|
|
15
|
+
from kc.store.transaction import mutation_transaction
|
|
16
|
+
|
|
17
|
+
app = typer.Typer(help="Validate kc citation tokens and source-range provenance.")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@app.command("check", help="Check citations in one artifact or all registered artifacts.")
|
|
21
|
+
def check(
|
|
22
|
+
file: Annotated[Path | None, typer.Option("--file", help="Artifact file.")] = None,
|
|
23
|
+
all: Annotated[bool, typer.Option("--all", help="Check all registered artifacts.")] = False,
|
|
24
|
+
fail_on_warning: Annotated[
|
|
25
|
+
bool, typer.Option("--fail-on-warning", help="Fail on warnings.")
|
|
26
|
+
] = False,
|
|
27
|
+
) -> None:
|
|
28
|
+
def _run() -> None:
|
|
29
|
+
paths = current_paths()
|
|
30
|
+
files: list[Path] = []
|
|
31
|
+
if file:
|
|
32
|
+
files.append(resolve_repo_path(file))
|
|
33
|
+
if all:
|
|
34
|
+
files.extend(
|
|
35
|
+
resolve_repo_path(artifact.path)
|
|
36
|
+
for artifact in load_artifacts()
|
|
37
|
+
)
|
|
38
|
+
if not files:
|
|
39
|
+
raise KcError(
|
|
40
|
+
code="KC_USAGE_ERROR",
|
|
41
|
+
message="Provide --file or --all.",
|
|
42
|
+
)
|
|
43
|
+
results = []
|
|
44
|
+
problems = []
|
|
45
|
+
for candidate in files:
|
|
46
|
+
if not candidate.exists():
|
|
47
|
+
raise KcError(
|
|
48
|
+
code="KC_ARTIFACT_NOT_FOUND",
|
|
49
|
+
message=f"Artifact not found: {repo_relative(candidate)}",
|
|
50
|
+
details={"path": repo_relative(candidate)},
|
|
51
|
+
)
|
|
52
|
+
text = candidate.read_text(encoding="utf-8-sig")
|
|
53
|
+
edges, candidate_problems = validate_citations(
|
|
54
|
+
repo_relative(candidate),
|
|
55
|
+
text,
|
|
56
|
+
sources_path=paths.sources_jsonl,
|
|
57
|
+
ranges_path=paths.ranges_jsonl,
|
|
58
|
+
citation_edges_path=paths.citation_edges_jsonl,
|
|
59
|
+
)
|
|
60
|
+
results.append(
|
|
61
|
+
{
|
|
62
|
+
"path": repo_relative(candidate),
|
|
63
|
+
"citations": len(edges),
|
|
64
|
+
"valid": not candidate_problems,
|
|
65
|
+
"edges": [edge.model_dump(mode="json") for edge in edges],
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
problems.extend(candidate_problems)
|
|
69
|
+
result = {"valid": not problems, "files": results, "problems": problems}
|
|
70
|
+
if problems:
|
|
71
|
+
emit(
|
|
72
|
+
envelope(
|
|
73
|
+
"citation.check",
|
|
74
|
+
None,
|
|
75
|
+
ok=False,
|
|
76
|
+
target={"file": str(file) if file else None, "all": all},
|
|
77
|
+
errors=[
|
|
78
|
+
KcError(
|
|
79
|
+
code=str(p["code"]),
|
|
80
|
+
message=str(p["message"]),
|
|
81
|
+
details=p,
|
|
82
|
+
exit_code=EXIT_PROVENANCE,
|
|
83
|
+
suggested_action="fix citation token or register source range",
|
|
84
|
+
).to_message()
|
|
85
|
+
for p in problems
|
|
86
|
+
],
|
|
87
|
+
),
|
|
88
|
+
exit_code=EXIT_PROVENANCE,
|
|
89
|
+
)
|
|
90
|
+
emit_success(
|
|
91
|
+
"citation.check",
|
|
92
|
+
result | {"fail_on_warning": fail_on_warning},
|
|
93
|
+
target={"file": str(file) if file else None, "all": all},
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
run("citation.check", _run)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@app.command("rewrite", help="Rewrite legacy locator citations to v2 range citations where exact ranges exist.")
|
|
100
|
+
def rewrite(
|
|
101
|
+
file: Annotated[Path, typer.Option("--file", help="Artifact file.")],
|
|
102
|
+
dry_run: Annotated[bool, typer.Option("--dry-run", help="Preview without writing.")] = False,
|
|
103
|
+
yes: Annotated[bool, typer.Option("--yes", help="Write rewritten citations.")] = False,
|
|
104
|
+
) -> None:
|
|
105
|
+
def _run() -> None:
|
|
106
|
+
target = resolve_repo_path(file)
|
|
107
|
+
if not target.exists():
|
|
108
|
+
raise KcError(
|
|
109
|
+
code="KC_ARTIFACT_NOT_FOUND",
|
|
110
|
+
message=f"Artifact not found: {repo_relative(target)}",
|
|
111
|
+
details={"path": repo_relative(target)},
|
|
112
|
+
)
|
|
113
|
+
ranges = load_ranges()
|
|
114
|
+
text = target.read_text(encoding="utf-8-sig")
|
|
115
|
+
rewritten = text
|
|
116
|
+
changes = []
|
|
117
|
+
for parsed in parse_markdown_citations(text):
|
|
118
|
+
if parsed.token_version != "v1":
|
|
119
|
+
continue
|
|
120
|
+
source_range = find_range_for_token(parsed, ranges)
|
|
121
|
+
if source_range is None:
|
|
122
|
+
changes.append(
|
|
123
|
+
{
|
|
124
|
+
"token": parsed.token,
|
|
125
|
+
"line": parsed.line,
|
|
126
|
+
"status": "unresolved",
|
|
127
|
+
}
|
|
128
|
+
)
|
|
129
|
+
continue
|
|
130
|
+
replacement = citation_token(
|
|
131
|
+
parsed.source_id,
|
|
132
|
+
source_range.locator.model_dump(mode="json"),
|
|
133
|
+
range_id=source_range.range_id,
|
|
134
|
+
)
|
|
135
|
+
rewritten = rewritten.replace(parsed.token, replacement)
|
|
136
|
+
changes.append(
|
|
137
|
+
{
|
|
138
|
+
"token": parsed.token,
|
|
139
|
+
"replacement": replacement,
|
|
140
|
+
"line": parsed.line,
|
|
141
|
+
"range_id": source_range.range_id,
|
|
142
|
+
"status": "rewritten",
|
|
143
|
+
}
|
|
144
|
+
)
|
|
145
|
+
effective_dry_run = dry_run or not yes
|
|
146
|
+
if not effective_dry_run and rewritten != text:
|
|
147
|
+
paths = current_paths()
|
|
148
|
+
with mutation_transaction(paths, "citation.rewrite", [target]) as tx:
|
|
149
|
+
atomic_write_text(target, rewritten)
|
|
150
|
+
tx.commit({"path": repo_relative(target), "rewritten": True})
|
|
151
|
+
emit_success(
|
|
152
|
+
"citation.rewrite",
|
|
153
|
+
{
|
|
154
|
+
"dry_run": effective_dry_run,
|
|
155
|
+
"path": repo_relative(target),
|
|
156
|
+
"rewritten": sum(1 for change in changes if change["status"] == "rewritten"),
|
|
157
|
+
"unresolved": sum(1 for change in changes if change["status"] == "unresolved"),
|
|
158
|
+
"changes": changes,
|
|
159
|
+
"content_preview": rewritten if effective_dry_run else None,
|
|
160
|
+
},
|
|
161
|
+
target={"file": repo_relative(target)},
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
run("citation.rewrite", _run)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@app.command("repair", help="Report deterministic citation repair candidates without inventing evidence.")
|
|
168
|
+
def repair(
|
|
169
|
+
file: Annotated[Path, typer.Option("--file", help="Artifact file.")],
|
|
170
|
+
dry_run: Annotated[bool, typer.Option("--dry-run", help="Preview repair candidates.")] = True,
|
|
171
|
+
yes: Annotated[bool, typer.Option("--yes", help="Apply exact mechanical repairs.")] = False,
|
|
172
|
+
) -> None:
|
|
173
|
+
def _run() -> None:
|
|
174
|
+
target = resolve_repo_path(file)
|
|
175
|
+
if not target.exists():
|
|
176
|
+
raise KcError(
|
|
177
|
+
code="KC_ARTIFACT_NOT_FOUND",
|
|
178
|
+
message=f"Artifact not found: {repo_relative(target)}",
|
|
179
|
+
details={"path": repo_relative(target)},
|
|
180
|
+
)
|
|
181
|
+
ranges = load_ranges()
|
|
182
|
+
text = target.read_text(encoding="utf-8-sig")
|
|
183
|
+
candidates = []
|
|
184
|
+
repaired = text
|
|
185
|
+
for parsed in parse_markdown_citations(text):
|
|
186
|
+
source_range = find_range_for_token(parsed, ranges)
|
|
187
|
+
if source_range is None:
|
|
188
|
+
same_source = [item for item in ranges if item.source_id == parsed.source_id]
|
|
189
|
+
candidates.append(
|
|
190
|
+
{
|
|
191
|
+
"token": parsed.token,
|
|
192
|
+
"line": parsed.line,
|
|
193
|
+
"status": "unresolved",
|
|
194
|
+
"candidate_range_ids": [item.range_id for item in same_source[:5]],
|
|
195
|
+
}
|
|
196
|
+
)
|
|
197
|
+
continue
|
|
198
|
+
if parsed.token_version == "v1":
|
|
199
|
+
replacement = citation_token(
|
|
200
|
+
parsed.source_id,
|
|
201
|
+
source_range.locator.model_dump(mode="json"),
|
|
202
|
+
range_id=source_range.range_id,
|
|
203
|
+
)
|
|
204
|
+
repaired = repaired.replace(parsed.token, replacement)
|
|
205
|
+
candidates.append(
|
|
206
|
+
{
|
|
207
|
+
"token": parsed.token,
|
|
208
|
+
"replacement": replacement,
|
|
209
|
+
"line": parsed.line,
|
|
210
|
+
"status": "mechanical_rewrite",
|
|
211
|
+
}
|
|
212
|
+
)
|
|
213
|
+
effective_dry_run = dry_run or not yes
|
|
214
|
+
if not effective_dry_run and repaired != text:
|
|
215
|
+
paths = current_paths()
|
|
216
|
+
with mutation_transaction(paths, "citation.repair", [target]) as tx:
|
|
217
|
+
atomic_write_text(target, repaired)
|
|
218
|
+
tx.commit({"path": repo_relative(target), "repaired": True})
|
|
219
|
+
emit_success(
|
|
220
|
+
"citation.repair",
|
|
221
|
+
{
|
|
222
|
+
"dry_run": effective_dry_run,
|
|
223
|
+
"path": repo_relative(target),
|
|
224
|
+
"applied": not effective_dry_run,
|
|
225
|
+
"candidates": candidates,
|
|
226
|
+
"unresolved": sum(1 for item in candidates if item["status"] == "unresolved"),
|
|
227
|
+
},
|
|
228
|
+
target={"file": repo_relative(target)},
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
run("citation.repair", _run)
|
kc/commands/common.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
11
|
+
|
|
12
|
+
from kc.errors import KcError
|
|
13
|
+
from kc.fingerprints import raw_fingerprint
|
|
14
|
+
from kc.models.artifact import ArtifactRecord
|
|
15
|
+
from kc.models.citation import CitationEdgeRecord
|
|
16
|
+
from kc.models.source import SourceRecord
|
|
17
|
+
from kc.models.source_range import SourceRangeRecord
|
|
18
|
+
from kc.models.source_revision import SourceRevisionRecord
|
|
19
|
+
from kc.output import emit_error, emit_unexpected, state
|
|
20
|
+
from kc.paths import current_paths, ensure_data_dir_exists, repo_relative, resolve_repo_path
|
|
21
|
+
from kc.store.jsonl import read_jsonl, write_jsonl
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def now() -> str:
|
|
25
|
+
return datetime.now(UTC).isoformat()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def run[T](command: str, func: Callable[[], T]) -> T:
|
|
29
|
+
try:
|
|
30
|
+
return func()
|
|
31
|
+
except SystemExit:
|
|
32
|
+
raise
|
|
33
|
+
except KcError as exc:
|
|
34
|
+
emit_error(command, exc)
|
|
35
|
+
except Exception as exc:
|
|
36
|
+
emit_unexpected(command, exc)
|
|
37
|
+
raise AssertionError("unreachable")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def require_json_format(command: str) -> None:
|
|
41
|
+
if state.format != "json":
|
|
42
|
+
raise KcError(
|
|
43
|
+
code="KC_UNSUPPORTED_FEATURE",
|
|
44
|
+
message=f"Output format '{state.format}' is not supported for {command}.",
|
|
45
|
+
details={"requested": state.format, "supported": ["json"]},
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def validate_choice(
|
|
50
|
+
value: str | None,
|
|
51
|
+
*,
|
|
52
|
+
option: str,
|
|
53
|
+
supported: set[str],
|
|
54
|
+
allow_none: bool = False,
|
|
55
|
+
code: str = "KC_VALIDATION_INVALID_ARGUMENT",
|
|
56
|
+
) -> str | None:
|
|
57
|
+
if value is None and allow_none:
|
|
58
|
+
return None
|
|
59
|
+
if value not in supported:
|
|
60
|
+
raise KcError(
|
|
61
|
+
code=code,
|
|
62
|
+
message=f"Invalid {option}: {value}",
|
|
63
|
+
details={"option": option, "value": value, "supported": sorted(supported)},
|
|
64
|
+
)
|
|
65
|
+
return value
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def validate_positive_int(value: int, *, option: str) -> int:
|
|
69
|
+
if value < 1:
|
|
70
|
+
raise KcError(
|
|
71
|
+
code="KC_VALIDATION_INVALID_ARGUMENT",
|
|
72
|
+
message=f"{option} must be a positive integer.",
|
|
73
|
+
details={"option": option, "value": value},
|
|
74
|
+
)
|
|
75
|
+
return value
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def parse_named_ints(
|
|
79
|
+
raw: str | None,
|
|
80
|
+
*,
|
|
81
|
+
option: str,
|
|
82
|
+
defaults: dict[str, int],
|
|
83
|
+
) -> dict[str, int]:
|
|
84
|
+
if raw is None:
|
|
85
|
+
return dict(defaults)
|
|
86
|
+
if not raw.strip():
|
|
87
|
+
raise KcError(
|
|
88
|
+
code="KC_CONFIG_INVALID",
|
|
89
|
+
message=f"{option} must use key=value entries.",
|
|
90
|
+
details={"option": option, "value": raw, "supported_keys": sorted(defaults)},
|
|
91
|
+
)
|
|
92
|
+
parsed = dict(defaults)
|
|
93
|
+
seen: set[str] = set()
|
|
94
|
+
for part in raw.split(","):
|
|
95
|
+
item = part.strip()
|
|
96
|
+
if not item or "=" not in item:
|
|
97
|
+
raise KcError(
|
|
98
|
+
code="KC_CONFIG_INVALID",
|
|
99
|
+
message=f"Malformed {option} entry: {part}",
|
|
100
|
+
details={"option": option, "value": raw, "supported_keys": sorted(defaults)},
|
|
101
|
+
)
|
|
102
|
+
key, value = item.split("=", 1)
|
|
103
|
+
key = key.strip()
|
|
104
|
+
if key not in parsed:
|
|
105
|
+
raise KcError(
|
|
106
|
+
code="KC_CONFIG_INVALID",
|
|
107
|
+
message=f"Unknown {option} key: {key}",
|
|
108
|
+
details={"option": option, "key": key, "supported_keys": sorted(defaults)},
|
|
109
|
+
)
|
|
110
|
+
if key in seen:
|
|
111
|
+
raise KcError(
|
|
112
|
+
code="KC_CONFIG_INVALID",
|
|
113
|
+
message=f"Duplicate {option} key: {key}",
|
|
114
|
+
details={"option": option, "key": key},
|
|
115
|
+
)
|
|
116
|
+
seen.add(key)
|
|
117
|
+
try:
|
|
118
|
+
parsed[key] = int(value.strip())
|
|
119
|
+
except ValueError as exc:
|
|
120
|
+
raise KcError(
|
|
121
|
+
code="KC_CONFIG_INVALID",
|
|
122
|
+
message=f"Invalid {option} value: {item}",
|
|
123
|
+
details={"option": option, "value": raw, "key": key},
|
|
124
|
+
) from exc
|
|
125
|
+
for key, value in parsed.items():
|
|
126
|
+
if value < 1:
|
|
127
|
+
raise KcError(
|
|
128
|
+
code="KC_CONFIG_INVALID",
|
|
129
|
+
message=f"{option} values must be positive: {key}={value}",
|
|
130
|
+
details={"option": option, "value": raw, "key": key, "parsed_value": value},
|
|
131
|
+
)
|
|
132
|
+
return parsed
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def parse_checks(raw: str, *, allowed: set[str], all_checks: set[str]) -> set[str]:
|
|
136
|
+
parts = {part.strip() for part in raw.split(",") if part.strip()}
|
|
137
|
+
if not parts:
|
|
138
|
+
raise KcError(
|
|
139
|
+
code="KC_VALIDATION_INVALID_ARGUMENT",
|
|
140
|
+
message="--checks must include at least one check name.",
|
|
141
|
+
details={"option": "--checks", "supported": sorted({*allowed, "all"})},
|
|
142
|
+
)
|
|
143
|
+
unknown = sorted(parts - allowed - {"all"})
|
|
144
|
+
if unknown:
|
|
145
|
+
raise KcError(
|
|
146
|
+
code="KC_VALIDATION_INVALID_ARGUMENT",
|
|
147
|
+
message=f"Unknown lint check: {unknown[0]}",
|
|
148
|
+
details={"option": "--checks", "unknown": unknown, "supported": sorted({*allowed, "all"})},
|
|
149
|
+
)
|
|
150
|
+
if "all" in parts:
|
|
151
|
+
return set(all_checks)
|
|
152
|
+
return parts
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def load_sources() -> list[SourceRecord]:
|
|
156
|
+
ensure_data_dir_exists()
|
|
157
|
+
return read_jsonl(current_paths().sources_jsonl, SourceRecord)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def save_sources(records: list[SourceRecord]) -> None:
|
|
161
|
+
write_jsonl(current_paths().sources_jsonl, records)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def load_ranges() -> list[SourceRangeRecord]:
|
|
165
|
+
ensure_data_dir_exists()
|
|
166
|
+
return read_jsonl(current_paths().ranges_jsonl, SourceRangeRecord)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def save_ranges(records: list[SourceRangeRecord]) -> None:
|
|
170
|
+
write_jsonl(current_paths().ranges_jsonl, records)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def load_source_revisions() -> list[SourceRevisionRecord]:
|
|
174
|
+
ensure_data_dir_exists()
|
|
175
|
+
return read_jsonl(current_paths().source_revisions_jsonl, SourceRevisionRecord)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def save_source_revisions(records: list[SourceRevisionRecord]) -> None:
|
|
179
|
+
write_jsonl(current_paths().source_revisions_jsonl, records)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def load_artifacts() -> list[ArtifactRecord]:
|
|
183
|
+
ensure_data_dir_exists()
|
|
184
|
+
return read_jsonl(current_paths().artifacts_jsonl, ArtifactRecord)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def save_artifacts(records: list[ArtifactRecord]) -> None:
|
|
188
|
+
write_jsonl(current_paths().artifacts_jsonl, records)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def load_citation_edges() -> list[CitationEdgeRecord]:
|
|
192
|
+
ensure_data_dir_exists()
|
|
193
|
+
return read_jsonl(current_paths().citation_edges_jsonl, CitationEdgeRecord)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def save_citation_edges(records: list[CitationEdgeRecord]) -> None:
|
|
197
|
+
write_jsonl(current_paths().citation_edges_jsonl, records)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def artifact_by_path(path: Path) -> ArtifactRecord | None:
|
|
201
|
+
rel = repo_relative(path)
|
|
202
|
+
for artifact in load_artifacts():
|
|
203
|
+
if artifact.path == rel or Path(artifact.path).as_posix() == path.as_posix():
|
|
204
|
+
return artifact
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def path_lock_name(path: Path) -> str:
|
|
209
|
+
digest = hashlib.sha256(path.as_posix().encode("utf-8")).hexdigest()[:16]
|
|
210
|
+
return f"artifact-{digest}"
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def json_dumps(data: Any) -> str:
|
|
214
|
+
return orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def parse_input_json(raw: str) -> dict[str, Any]:
|
|
218
|
+
if raw.startswith("@"):
|
|
219
|
+
input_path = Path(raw[1:])
|
|
220
|
+
if not input_path.exists():
|
|
221
|
+
raise KcError(
|
|
222
|
+
code="KC_FILE_NOT_FOUND",
|
|
223
|
+
message=f"Input file not found: {raw[1:]}",
|
|
224
|
+
details={"path": input_path.as_posix()},
|
|
225
|
+
)
|
|
226
|
+
text = input_path.read_text(encoding="utf-8")
|
|
227
|
+
else:
|
|
228
|
+
text = raw
|
|
229
|
+
try:
|
|
230
|
+
value = json.loads(text)
|
|
231
|
+
except json.JSONDecodeError as exc:
|
|
232
|
+
raise KcError(
|
|
233
|
+
code="KC_JSON_INVALID",
|
|
234
|
+
message=f"Invalid JSON input: {exc}",
|
|
235
|
+
details={"input": raw[:120]},
|
|
236
|
+
) from exc
|
|
237
|
+
if not isinstance(value, dict):
|
|
238
|
+
raise KcError(
|
|
239
|
+
code="KC_EVENT_INVALID",
|
|
240
|
+
message="Expected JSON object input.",
|
|
241
|
+
details={"input_type": type(value).__name__},
|
|
242
|
+
)
|
|
243
|
+
return value
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def validate_payload_schema(payload: dict[str, Any], schema: dict[str, Any] | None) -> None:
|
|
247
|
+
if not schema:
|
|
248
|
+
return
|
|
249
|
+
if schema.get("type") == "object" and not isinstance(payload, dict):
|
|
250
|
+
raise KcError(
|
|
251
|
+
code="KC_EVENT_INVALID",
|
|
252
|
+
message="Event input must be a JSON object.",
|
|
253
|
+
details={"expected": "object", "actual": type(payload).__name__},
|
|
254
|
+
)
|
|
255
|
+
required = schema.get("required", [])
|
|
256
|
+
if isinstance(required, list):
|
|
257
|
+
missing = [str(key) for key in required if key not in payload]
|
|
258
|
+
if missing:
|
|
259
|
+
raise KcError(
|
|
260
|
+
code="KC_EVENT_INVALID",
|
|
261
|
+
message="Event input is missing required properties.",
|
|
262
|
+
details={"missing": missing, "schema": schema},
|
|
263
|
+
)
|
|
264
|
+
properties = schema.get("properties", {})
|
|
265
|
+
if not isinstance(properties, dict):
|
|
266
|
+
return
|
|
267
|
+
type_map = {
|
|
268
|
+
"string": str,
|
|
269
|
+
"integer": int,
|
|
270
|
+
"number": (int, float),
|
|
271
|
+
"boolean": bool,
|
|
272
|
+
"object": dict,
|
|
273
|
+
"array": list,
|
|
274
|
+
}
|
|
275
|
+
for key, definition in properties.items():
|
|
276
|
+
if key not in payload or not isinstance(definition, dict):
|
|
277
|
+
continue
|
|
278
|
+
raw_expected = definition.get("type")
|
|
279
|
+
if not isinstance(raw_expected, str):
|
|
280
|
+
continue
|
|
281
|
+
expected = raw_expected
|
|
282
|
+
expected_type = type_map.get(expected)
|
|
283
|
+
if expected_type is None:
|
|
284
|
+
continue
|
|
285
|
+
if expected == "integer":
|
|
286
|
+
valid = isinstance(payload[key], int) and not isinstance(payload[key], bool)
|
|
287
|
+
elif expected == "number":
|
|
288
|
+
valid = isinstance(payload[key], int | float) and not isinstance(payload[key], bool)
|
|
289
|
+
elif expected == "boolean":
|
|
290
|
+
valid = isinstance(payload[key], bool)
|
|
291
|
+
else:
|
|
292
|
+
valid = isinstance(payload[key], expected_type)
|
|
293
|
+
if not valid:
|
|
294
|
+
raise KcError(
|
|
295
|
+
code="KC_EVENT_INVALID",
|
|
296
|
+
message=f"Event input property has invalid type: {key}",
|
|
297
|
+
details={
|
|
298
|
+
"property": key,
|
|
299
|
+
"expected": expected,
|
|
300
|
+
"actual": type(payload[key]).__name__,
|
|
301
|
+
"schema": schema,
|
|
302
|
+
},
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def stale_source_warnings(
|
|
307
|
+
results: list[dict[str, Any]],
|
|
308
|
+
sources: list[SourceRecord] | None = None,
|
|
309
|
+
) -> list[dict[str, Any]]:
|
|
310
|
+
if not results:
|
|
311
|
+
return []
|
|
312
|
+
sources_by_id = {source.source_id: source for source in (sources or load_sources())}
|
|
313
|
+
stale: list[dict[str, Any]] = []
|
|
314
|
+
current_by_source: dict[str, str | None] = {}
|
|
315
|
+
for source_id in sorted({str(item.get("source_id", "")) for item in results if item.get("source_id")}):
|
|
316
|
+
source = sources_by_id.get(source_id)
|
|
317
|
+
if source is None:
|
|
318
|
+
continue
|
|
319
|
+
original = source.metadata.get("original_path")
|
|
320
|
+
current_fingerprint = None
|
|
321
|
+
if isinstance(original, str):
|
|
322
|
+
path = resolve_repo_path(original)
|
|
323
|
+
current_fingerprint = raw_fingerprint(path) if path.exists() else None
|
|
324
|
+
current_by_source[source_id] = current_fingerprint
|
|
325
|
+
if current_fingerprint != source.fingerprint:
|
|
326
|
+
stale.append(
|
|
327
|
+
{
|
|
328
|
+
"source_id": source_id,
|
|
329
|
+
"uri": source.uri,
|
|
330
|
+
"registered_fingerprint": source.fingerprint,
|
|
331
|
+
"current_fingerprint": current_fingerprint,
|
|
332
|
+
}
|
|
333
|
+
)
|
|
334
|
+
for item in results:
|
|
335
|
+
source_id = str(item.get("source_id", ""))
|
|
336
|
+
if source_id in current_by_source:
|
|
337
|
+
item["current_source_fingerprint"] = current_by_source[source_id]
|
|
338
|
+
if not stale:
|
|
339
|
+
return []
|
|
340
|
+
return [
|
|
341
|
+
{
|
|
342
|
+
"code": "KC_SOURCE_STALE",
|
|
343
|
+
"message": "One or more returned source ranges come from stale registered sources.",
|
|
344
|
+
"details": {"sources": stale},
|
|
345
|
+
}
|
|
346
|
+
]
|