kc-cli 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kc/__init__.py +5 -0
- kc/__main__.py +11 -0
- kc/artifacts/__init__.py +1 -0
- kc/artifacts/diff.py +76 -0
- kc/artifacts/frontmatter.py +26 -0
- kc/artifacts/markdown.py +116 -0
- kc/atomic_write.py +33 -0
- kc/cli.py +284 -0
- kc/commands/__init__.py +1 -0
- kc/commands/artifact.py +1190 -0
- kc/commands/citation.py +231 -0
- kc/commands/common.py +346 -0
- kc/commands/conformance.py +293 -0
- kc/commands/context.py +190 -0
- kc/commands/doctor.py +81 -0
- kc/commands/eval.py +133 -0
- kc/commands/export.py +97 -0
- kc/commands/guide.py +571 -0
- kc/commands/index.py +54 -0
- kc/commands/init.py +207 -0
- kc/commands/lint.py +238 -0
- kc/commands/source.py +464 -0
- kc/commands/status.py +52 -0
- kc/commands/task.py +260 -0
- kc/config.py +127 -0
- kc/embedding_models/potion-base-8M/README.md +97 -0
- kc/embedding_models/potion-base-8M/config.json +13 -0
- kc/embedding_models/potion-base-8M/model.safetensors +0 -0
- kc/embedding_models/potion-base-8M/modules.json +14 -0
- kc/embedding_models/potion-base-8M/tokenizer.json +1 -0
- kc/errors.py +141 -0
- kc/fingerprints.py +35 -0
- kc/ids.py +23 -0
- kc/locks.py +65 -0
- kc/models/__init__.py +17 -0
- kc/models/artifact.py +34 -0
- kc/models/citation.py +60 -0
- kc/models/context.py +23 -0
- kc/models/eval.py +21 -0
- kc/models/plan.py +37 -0
- kc/models/source.py +37 -0
- kc/models/source_range.py +29 -0
- kc/models/source_revision.py +19 -0
- kc/models/task.py +35 -0
- kc/output.py +838 -0
- kc/paths.py +126 -0
- kc/provenance/__init__.py +1 -0
- kc/provenance/citations.py +296 -0
- kc/search/__init__.py +1 -0
- kc/search/extract.py +268 -0
- kc/search/fts.py +284 -0
- kc/search/semantic.py +346 -0
- kc/store/__init__.py +1 -0
- kc/store/jsonl.py +55 -0
- kc/store/sqlite.py +444 -0
- kc/store/transaction.py +67 -0
- kc/templates/agents/skills/kc/SKILL.md +282 -0
- kc/templates/agents/skills/kc/agents/openai.yaml +5 -0
- kc/templates/agents/skills/kc/scripts/resolve_query_citations.py +134 -0
- kc/workspace.py +98 -0
- kc_cli-0.4.0.dist-info/METADATA +522 -0
- kc_cli-0.4.0.dist-info/RECORD +65 -0
- kc_cli-0.4.0.dist-info/WHEEL +4 -0
- kc_cli-0.4.0.dist-info/entry_points.txt +2 -0
- kc_cli-0.4.0.dist-info/licenses/LICENSE +21 -0
kc/commands/source.py
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
from kc.commands.common import (
|
|
11
|
+
load_artifacts,
|
|
12
|
+
load_citation_edges,
|
|
13
|
+
load_ranges,
|
|
14
|
+
load_source_revisions,
|
|
15
|
+
load_sources,
|
|
16
|
+
run,
|
|
17
|
+
save_ranges,
|
|
18
|
+
save_source_revisions,
|
|
19
|
+
save_sources,
|
|
20
|
+
stale_source_warnings,
|
|
21
|
+
validate_positive_int,
|
|
22
|
+
)
|
|
23
|
+
from kc.config import load_config
|
|
24
|
+
from kc.errors import KcError
|
|
25
|
+
from kc.fingerprints import normalized_fingerprint, raw_fingerprint
|
|
26
|
+
from kc.ids import stable_id
|
|
27
|
+
from kc.models.source import Authority, SourceRecord
|
|
28
|
+
from kc.models.source_range import SourceRangeRecord
|
|
29
|
+
from kc.models.source_revision import SourceRevisionRecord
|
|
30
|
+
from kc.output import emit_success, warning
|
|
31
|
+
from kc.paths import current_paths, repo_relative, resolve_repo_path
|
|
32
|
+
from kc.provenance.citations import find_range_for_token, parse_markdown_citations
|
|
33
|
+
from kc.search.extract import extract_ranges, guess_media_type, is_text_like
|
|
34
|
+
from kc.search.semantic import build_semantic_index
|
|
35
|
+
from kc.store.sqlite import rebuild_index
|
|
36
|
+
from kc.store.transaction import mutation_transaction
|
|
37
|
+
|
|
38
|
+
app = typer.Typer(help="Register, inspect, index, and search local source material.")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _optional_semantic_rebuild(paths, ranges: list[SourceRangeRecord]) -> dict | None:
|
|
42
|
+
try:
|
|
43
|
+
build_semantic_index(paths.sqlite_path, ranges)
|
|
44
|
+
except KcError as exc:
|
|
45
|
+
if exc.code != "KC_RETRIEVAL_MODEL_UNAVAILABLE":
|
|
46
|
+
raise
|
|
47
|
+
return warning(
|
|
48
|
+
"KC_RETRIEVAL_SEMANTIC_UNAVAILABLE",
|
|
49
|
+
"Semantic index was not rebuilt; SQLite FTS search remains available.",
|
|
50
|
+
{"reason": exc.message},
|
|
51
|
+
)
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _resolve_source(identifier: str) -> tuple[SourceRecord, Path]:
|
|
56
|
+
paths = current_paths()
|
|
57
|
+
sources = load_sources()
|
|
58
|
+
source = next((candidate for candidate in sources if candidate.source_id == identifier), None)
|
|
59
|
+
if source is None:
|
|
60
|
+
maybe_uri = f"file:{repo_relative(resolve_repo_path(identifier), paths.root)}"
|
|
61
|
+
source = next((candidate for candidate in sources if candidate.uri == maybe_uri), None)
|
|
62
|
+
if source is None:
|
|
63
|
+
raise KcError(
|
|
64
|
+
code="KC_SOURCE_NOT_FOUND",
|
|
65
|
+
message=f"Source not found: {identifier}",
|
|
66
|
+
details={"identifier": identifier},
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
original = source.metadata.get("original_path")
|
|
70
|
+
if not isinstance(original, str):
|
|
71
|
+
raise KcError(
|
|
72
|
+
code="KC_SOURCE_NOT_FOUND",
|
|
73
|
+
message=f"Source does not have a local original path: {source.source_id}",
|
|
74
|
+
details={"source_id": source.source_id},
|
|
75
|
+
)
|
|
76
|
+
return source, resolve_repo_path(original, paths.root)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _impacted_artifacts(
|
|
80
|
+
source_id: str, new_ranges: list[SourceRangeRecord]
|
|
81
|
+
) -> list[dict[str, str | None]]:
|
|
82
|
+
impacts: list[dict[str, str | None]] = []
|
|
83
|
+
for edge in load_citation_edges():
|
|
84
|
+
if edge.source_id != source_id:
|
|
85
|
+
continue
|
|
86
|
+
parsed = parse_markdown_citations(edge.citation_token)
|
|
87
|
+
if not parsed:
|
|
88
|
+
impacts.append(
|
|
89
|
+
{
|
|
90
|
+
"artifact_id": edge.artifact_id,
|
|
91
|
+
"artifact_path": edge.artifact_path,
|
|
92
|
+
"citation_token": edge.citation_token,
|
|
93
|
+
"old_range_id": edge.range_id,
|
|
94
|
+
"reason": "invalid_token",
|
|
95
|
+
}
|
|
96
|
+
)
|
|
97
|
+
continue
|
|
98
|
+
if find_range_for_token(parsed[0], new_ranges) is None:
|
|
99
|
+
impacts.append(
|
|
100
|
+
{
|
|
101
|
+
"artifact_id": edge.artifact_id,
|
|
102
|
+
"artifact_path": edge.artifact_path,
|
|
103
|
+
"citation_token": edge.citation_token,
|
|
104
|
+
"old_range_id": edge.range_id,
|
|
105
|
+
"reason": "line_range_no_longer_resolves",
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
continue
|
|
109
|
+
current = find_range_for_token(parsed[0], new_ranges)
|
|
110
|
+
if edge.range_id and current is not None and edge.range_id != current.range_id:
|
|
111
|
+
impacts.append(
|
|
112
|
+
{
|
|
113
|
+
"artifact_id": edge.artifact_id,
|
|
114
|
+
"artifact_path": edge.artifact_path,
|
|
115
|
+
"citation_token": edge.citation_token,
|
|
116
|
+
"old_range_id": edge.range_id,
|
|
117
|
+
"reason": "range_content_changed_at_locator",
|
|
118
|
+
}
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return impacts
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@app.command("add", help="Register a local text/Markdown source, extract citation ranges, and update indexes.")
|
|
125
|
+
def add(
|
|
126
|
+
file: Annotated[str, typer.Argument(help="Local file path to register.")],
|
|
127
|
+
domain: Annotated[list[str] | None, typer.Option("--domain", help="Domain tag.")] = None,
|
|
128
|
+
copy: Annotated[bool, typer.Option("--copy", help="Copy source into knowledge/raw.")] = False,
|
|
129
|
+
dry_run: Annotated[bool, typer.Option("--dry-run", help="Preview without writing.")] = False,
|
|
130
|
+
yes: Annotated[bool, typer.Option("--yes", help="Register the source.")] = False,
|
|
131
|
+
) -> None:
|
|
132
|
+
def _run() -> None:
|
|
133
|
+
if file.startswith("http://") or file.startswith("https://"):
|
|
134
|
+
raise KcError(
|
|
135
|
+
code="KC_UNSUPPORTED_FEATURE",
|
|
136
|
+
message="HTTP source fetching is out of scope for v1. Register exported local files instead.",
|
|
137
|
+
details={"uri": file},
|
|
138
|
+
)
|
|
139
|
+
paths = current_paths()
|
|
140
|
+
source_path = resolve_repo_path(file, paths.root)
|
|
141
|
+
if not source_path.exists():
|
|
142
|
+
raise KcError(
|
|
143
|
+
code="KC_FILE_NOT_FOUND",
|
|
144
|
+
message=f"Source file not found: {file}",
|
|
145
|
+
details={"path": repo_relative(source_path)},
|
|
146
|
+
)
|
|
147
|
+
media_type = guess_media_type(source_path)
|
|
148
|
+
if not is_text_like(source_path, media_type):
|
|
149
|
+
raise KcError(
|
|
150
|
+
code="KC_SOURCE_UNSUPPORTED_MEDIA_TYPE",
|
|
151
|
+
message=f"Unsupported media type for v1 extraction: {media_type}",
|
|
152
|
+
details={"path": repo_relative(source_path), "media_type": media_type},
|
|
153
|
+
)
|
|
154
|
+
rel = repo_relative(source_path)
|
|
155
|
+
uri = f"file:{rel}"
|
|
156
|
+
sources = load_sources()
|
|
157
|
+
for existing in sources:
|
|
158
|
+
if existing.uri == uri:
|
|
159
|
+
raise KcError(
|
|
160
|
+
code="KC_SOURCE_ALREADY_REGISTERED",
|
|
161
|
+
message=f"Source already registered: {uri}",
|
|
162
|
+
details={"source_id": existing.source_id, "uri": uri},
|
|
163
|
+
suggested_action=f"refresh existing source with kc source refresh {existing.source_id} --dry-run",
|
|
164
|
+
)
|
|
165
|
+
raw_fp = raw_fingerprint(source_path)
|
|
166
|
+
norm_fp = normalized_fingerprint(source_path)
|
|
167
|
+
timestamp = datetime.now(UTC).isoformat()
|
|
168
|
+
source_id = stable_id("src", uri)
|
|
169
|
+
revision_id = stable_id("rev", source_id, raw_fp, norm_fp)
|
|
170
|
+
source = SourceRecord(
|
|
171
|
+
source_id=source_id,
|
|
172
|
+
uri=uri,
|
|
173
|
+
display_name=source_path.name,
|
|
174
|
+
media_type=media_type,
|
|
175
|
+
fingerprint=raw_fp,
|
|
176
|
+
raw_fingerprint=raw_fp,
|
|
177
|
+
normalized_fingerprint=norm_fp,
|
|
178
|
+
registered_at=timestamp,
|
|
179
|
+
domain=list(domain or []),
|
|
180
|
+
authority=Authority(),
|
|
181
|
+
metadata={"original_path": rel, "repo_relative": True},
|
|
182
|
+
canonical_source_key=uri,
|
|
183
|
+
current_revision_id=revision_id,
|
|
184
|
+
first_registered_at=timestamp,
|
|
185
|
+
)
|
|
186
|
+
revision = SourceRevisionRecord(
|
|
187
|
+
revision_id=revision_id,
|
|
188
|
+
source_id=source.source_id,
|
|
189
|
+
uri=uri,
|
|
190
|
+
raw_fingerprint=raw_fp,
|
|
191
|
+
normalized_fingerprint=norm_fp,
|
|
192
|
+
media_type=media_type,
|
|
193
|
+
extracted_at=timestamp,
|
|
194
|
+
metadata={"original_path": rel},
|
|
195
|
+
)
|
|
196
|
+
ranges = extract_ranges(
|
|
197
|
+
source_path,
|
|
198
|
+
source.source_id,
|
|
199
|
+
source.fingerprint,
|
|
200
|
+
revision_id=revision.revision_id,
|
|
201
|
+
)
|
|
202
|
+
effective_dry_run = dry_run or not yes
|
|
203
|
+
copied_to: str | None = None
|
|
204
|
+
semantic_warning: dict | None = None
|
|
205
|
+
if not effective_dry_run:
|
|
206
|
+
with mutation_transaction(paths, "source.add", [source_path]) as tx:
|
|
207
|
+
if copy:
|
|
208
|
+
raw_dir = paths.data_dir / "raw"
|
|
209
|
+
raw_dir.mkdir(parents=True, exist_ok=True)
|
|
210
|
+
target = raw_dir / source_path.name
|
|
211
|
+
shutil.copy2(source_path, target)
|
|
212
|
+
copied_to = repo_relative(target)
|
|
213
|
+
source.immutability = "copied"
|
|
214
|
+
source.metadata["copied_to"] = copied_to
|
|
215
|
+
save_sources([*sources, source])
|
|
216
|
+
save_source_revisions([*load_source_revisions(), revision])
|
|
217
|
+
save_ranges([*load_ranges(), *ranges])
|
|
218
|
+
all_ranges = load_ranges()
|
|
219
|
+
rebuild_index(paths.sqlite_path, load_sources(), all_ranges)
|
|
220
|
+
semantic_warning = _optional_semantic_rebuild(paths, all_ranges)
|
|
221
|
+
tx.commit({"source_id": source.source_id, "ranges": len(ranges)})
|
|
222
|
+
warnings = [
|
|
223
|
+
warning(
|
|
224
|
+
"KC_AUTHORITY_UNKNOWN",
|
|
225
|
+
"Source authority was not provided; artifacts based on this source should remain draft.",
|
|
226
|
+
{"source_id": source.source_id},
|
|
227
|
+
)
|
|
228
|
+
]
|
|
229
|
+
if not ranges:
|
|
230
|
+
warnings.append(
|
|
231
|
+
warning(
|
|
232
|
+
"KC_SOURCE_NO_RANGES",
|
|
233
|
+
"Source registered with no extractable ranges.",
|
|
234
|
+
{"source_id": source.source_id, "path": rel},
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
if semantic_warning is not None:
|
|
238
|
+
warnings.append(semantic_warning)
|
|
239
|
+
emit_success(
|
|
240
|
+
"source.add",
|
|
241
|
+
{
|
|
242
|
+
"dry_run": effective_dry_run,
|
|
243
|
+
"source_id": source.source_id,
|
|
244
|
+
"uri": source.uri,
|
|
245
|
+
"fingerprint": source.fingerprint,
|
|
246
|
+
"normalized_fingerprint": source.normalized_fingerprint,
|
|
247
|
+
"media_type": media_type,
|
|
248
|
+
"ranges_extracted": len(ranges),
|
|
249
|
+
"copied": bool(copied_to),
|
|
250
|
+
"copied_to": copied_to,
|
|
251
|
+
"authority": source.authority.model_dump(mode="json"),
|
|
252
|
+
},
|
|
253
|
+
warnings=warnings,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
run("source.add", _run)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@app.command("inspect", help="Show source metadata, current fingerprint state, and optional extracted ranges.")
|
|
260
|
+
def inspect(
|
|
261
|
+
identifier: Annotated[str, typer.Argument(help="Source ID or source path.")],
|
|
262
|
+
ranges: Annotated[bool, typer.Option("--ranges", help="Include source ranges.")] = False,
|
|
263
|
+
) -> None:
|
|
264
|
+
def _run() -> None:
|
|
265
|
+
sources = load_sources()
|
|
266
|
+
source = next((s for s in sources if s.source_id == identifier), None)
|
|
267
|
+
if source is None:
|
|
268
|
+
paths = current_paths()
|
|
269
|
+
maybe_uri = f"file:{repo_relative(resolve_repo_path(identifier, paths.root), paths.root)}"
|
|
270
|
+
source = next((s for s in sources if s.uri == maybe_uri), None)
|
|
271
|
+
if source is None:
|
|
272
|
+
raise KcError(
|
|
273
|
+
code="KC_SOURCE_NOT_FOUND",
|
|
274
|
+
message=f"Source not found: {identifier}",
|
|
275
|
+
details={"identifier": identifier},
|
|
276
|
+
)
|
|
277
|
+
current_fingerprint = None
|
|
278
|
+
stale = False
|
|
279
|
+
original = source.metadata.get("original_path")
|
|
280
|
+
if isinstance(original, str):
|
|
281
|
+
path = resolve_repo_path(original)
|
|
282
|
+
if path.exists():
|
|
283
|
+
current_fingerprint = raw_fingerprint(path)
|
|
284
|
+
stale = current_fingerprint != source.fingerprint
|
|
285
|
+
else:
|
|
286
|
+
stale = True
|
|
287
|
+
result = {
|
|
288
|
+
"source": source.model_dump(mode="json"),
|
|
289
|
+
"current_fingerprint": current_fingerprint,
|
|
290
|
+
"stale": stale,
|
|
291
|
+
}
|
|
292
|
+
if ranges:
|
|
293
|
+
result["ranges"] = [
|
|
294
|
+
r.model_dump(mode="json") for r in load_ranges() if r.source_id == source.source_id
|
|
295
|
+
]
|
|
296
|
+
emit_success("source.inspect", result, target={"identifier": identifier})
|
|
297
|
+
|
|
298
|
+
run("source.inspect", _run)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@app.command("refresh", help="Refresh a registered local source, replace its ranges, and rebuild search indexes.")
|
|
302
|
+
def refresh(
|
|
303
|
+
identifier: Annotated[str, typer.Argument(help="Source ID or source path.")],
|
|
304
|
+
dry_run: Annotated[bool, typer.Option("--dry-run", help="Preview without writing.")] = False,
|
|
305
|
+
yes: Annotated[bool, typer.Option("--yes", help="Refresh the source and ranges.")] = False,
|
|
306
|
+
) -> None:
|
|
307
|
+
def _run() -> None:
|
|
308
|
+
paths = current_paths()
|
|
309
|
+
source, source_path = _resolve_source(identifier)
|
|
310
|
+
if not source_path.exists():
|
|
311
|
+
raise KcError(
|
|
312
|
+
code="KC_FILE_NOT_FOUND",
|
|
313
|
+
message=f"Source file not found: {repo_relative(source_path)}",
|
|
314
|
+
details={"source_id": source.source_id, "path": repo_relative(source_path)},
|
|
315
|
+
)
|
|
316
|
+
media_type = guess_media_type(source_path)
|
|
317
|
+
if not is_text_like(source_path, media_type):
|
|
318
|
+
raise KcError(
|
|
319
|
+
code="KC_SOURCE_UNSUPPORTED_MEDIA_TYPE",
|
|
320
|
+
message=f"Unsupported media type for v1 extraction: {media_type}",
|
|
321
|
+
details={"path": repo_relative(source_path), "media_type": media_type},
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
old_ranges = [item for item in load_ranges() if item.source_id == source.source_id]
|
|
325
|
+
new_raw_fingerprint = raw_fingerprint(source_path)
|
|
326
|
+
new_normalized_fingerprint = normalized_fingerprint(source_path)
|
|
327
|
+
revision_id = stable_id(
|
|
328
|
+
"rev",
|
|
329
|
+
source.source_id,
|
|
330
|
+
new_raw_fingerprint,
|
|
331
|
+
new_normalized_fingerprint,
|
|
332
|
+
)
|
|
333
|
+
refreshed_source = source.model_copy(
|
|
334
|
+
update={
|
|
335
|
+
"display_name": source_path.name,
|
|
336
|
+
"media_type": media_type,
|
|
337
|
+
"fingerprint": new_raw_fingerprint,
|
|
338
|
+
"raw_fingerprint": new_raw_fingerprint,
|
|
339
|
+
"normalized_fingerprint": new_normalized_fingerprint,
|
|
340
|
+
"status": "active",
|
|
341
|
+
"metadata": {
|
|
342
|
+
**source.metadata,
|
|
343
|
+
"original_path": repo_relative(source_path),
|
|
344
|
+
"repo_relative": True,
|
|
345
|
+
},
|
|
346
|
+
"canonical_source_key": source.canonical_source_key or source.uri,
|
|
347
|
+
"current_revision_id": revision_id,
|
|
348
|
+
"first_registered_at": source.first_registered_at or source.registered_at,
|
|
349
|
+
"last_refreshed_at": datetime.now(UTC).isoformat(),
|
|
350
|
+
}
|
|
351
|
+
)
|
|
352
|
+
revision = SourceRevisionRecord(
|
|
353
|
+
revision_id=revision_id,
|
|
354
|
+
source_id=source.source_id,
|
|
355
|
+
uri=source.uri,
|
|
356
|
+
raw_fingerprint=new_raw_fingerprint,
|
|
357
|
+
normalized_fingerprint=new_normalized_fingerprint,
|
|
358
|
+
media_type=media_type,
|
|
359
|
+
extracted_at=refreshed_source.last_refreshed_at or datetime.now(UTC).isoformat(),
|
|
360
|
+
previous_revision_id=source.current_revision_id,
|
|
361
|
+
metadata={"original_path": repo_relative(source_path)},
|
|
362
|
+
)
|
|
363
|
+
new_ranges = extract_ranges(
|
|
364
|
+
source_path,
|
|
365
|
+
source.source_id,
|
|
366
|
+
refreshed_source.fingerprint,
|
|
367
|
+
revision_id=revision.revision_id,
|
|
368
|
+
)
|
|
369
|
+
impacts = _impacted_artifacts(source.source_id, new_ranges)
|
|
370
|
+
effective_dry_run = dry_run or not yes
|
|
371
|
+
semantic_warning: dict | None = None
|
|
372
|
+
if not effective_dry_run:
|
|
373
|
+
with mutation_transaction(paths, "source.refresh", [source_path]) as tx:
|
|
374
|
+
sources = [
|
|
375
|
+
refreshed_source if item.source_id == source.source_id else item
|
|
376
|
+
for item in load_sources()
|
|
377
|
+
]
|
|
378
|
+
ranges = [
|
|
379
|
+
item for item in load_ranges() if item.source_id != source.source_id
|
|
380
|
+
] + new_ranges
|
|
381
|
+
save_sources(sources)
|
|
382
|
+
existing_revisions = [
|
|
383
|
+
item.model_copy(update={"status": "superseded"})
|
|
384
|
+
if item.source_id == source.source_id and item.status == "active"
|
|
385
|
+
else item
|
|
386
|
+
for item in load_source_revisions()
|
|
387
|
+
]
|
|
388
|
+
save_source_revisions([*existing_revisions, revision])
|
|
389
|
+
save_ranges(ranges)
|
|
390
|
+
rebuild_index(paths.sqlite_path, sources, ranges, load_artifacts(), load_citation_edges())
|
|
391
|
+
semantic_warning = _optional_semantic_rebuild(paths, ranges)
|
|
392
|
+
tx.commit({"source_id": source.source_id, "ranges": len(new_ranges)})
|
|
393
|
+
|
|
394
|
+
emit_success(
|
|
395
|
+
"source.refresh",
|
|
396
|
+
{
|
|
397
|
+
"dry_run": effective_dry_run,
|
|
398
|
+
"source_id": source.source_id,
|
|
399
|
+
"uri": source.uri,
|
|
400
|
+
"old_fingerprint": source.fingerprint,
|
|
401
|
+
"new_fingerprint": refreshed_source.fingerprint,
|
|
402
|
+
"old_normalized_fingerprint": source.normalized_fingerprint,
|
|
403
|
+
"new_normalized_fingerprint": refreshed_source.normalized_fingerprint,
|
|
404
|
+
"media_type": media_type,
|
|
405
|
+
"ranges_removed": len(old_ranges),
|
|
406
|
+
"ranges_extracted": len(new_ranges),
|
|
407
|
+
"impacted_artifacts": impacts,
|
|
408
|
+
"index_rebuilt": not effective_dry_run,
|
|
409
|
+
"semantic_index_rebuilt": not effective_dry_run,
|
|
410
|
+
"next_commands": [],
|
|
411
|
+
},
|
|
412
|
+
target={"identifier": identifier, "source_id": source.source_id},
|
|
413
|
+
warnings=[semantic_warning] if semantic_warning is not None else [],
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
run("source.refresh", _run)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
@app.command("search", help="Search source ranges with hybrid retrieval and return citation tokens.")
|
|
420
|
+
def search(
|
|
421
|
+
query: Annotated[str, typer.Argument(help="Search query.")],
|
|
422
|
+
domain: Annotated[str | None, typer.Option("--domain", help="Domain filter.")] = None,
|
|
423
|
+
limit: Annotated[int, typer.Option("--limit", help="Maximum results; must be positive.")] = 10,
|
|
424
|
+
) -> None:
|
|
425
|
+
def _run() -> None:
|
|
426
|
+
validate_positive_int(limit, option="--limit")
|
|
427
|
+
paths = current_paths()
|
|
428
|
+
from kc.search.fts import ensure_index, search_ranges
|
|
429
|
+
|
|
430
|
+
ensure_index(paths.sqlite_path, paths.sources_jsonl, paths.ranges_jsonl)
|
|
431
|
+
config = load_config(paths.root)
|
|
432
|
+
sources = load_sources()
|
|
433
|
+
retrieval_metadata: dict[str, str | None] = {}
|
|
434
|
+
results = search_ranges(
|
|
435
|
+
paths.sqlite_path,
|
|
436
|
+
query,
|
|
437
|
+
domain=domain,
|
|
438
|
+
limit=limit,
|
|
439
|
+
rrf_k=config.rrf_k,
|
|
440
|
+
ranges=load_ranges(),
|
|
441
|
+
metadata=retrieval_metadata,
|
|
442
|
+
)
|
|
443
|
+
warnings = stale_source_warnings(results, sources)
|
|
444
|
+
if retrieval_metadata.get("mode") == "fts_fallback":
|
|
445
|
+
warnings.append(
|
|
446
|
+
warning(
|
|
447
|
+
"KC_RETRIEVAL_SEMANTIC_UNAVAILABLE",
|
|
448
|
+
"Semantic ranking is unavailable; results use SQLite FTS fallback.",
|
|
449
|
+
{"reason": retrieval_metadata.get("semantic_unavailable_reason")},
|
|
450
|
+
)
|
|
451
|
+
)
|
|
452
|
+
emit_success(
|
|
453
|
+
"source.search",
|
|
454
|
+
{
|
|
455
|
+
"query": query,
|
|
456
|
+
"mode": retrieval_metadata.get("mode") or "hybrid",
|
|
457
|
+
"total": len(results),
|
|
458
|
+
"results": results,
|
|
459
|
+
},
|
|
460
|
+
target={"query": query, "domain": domain, "limit": limit, "mode": retrieval_metadata.get("mode") or "hybrid"},
|
|
461
|
+
warnings=warnings,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
run("source.search", _run)
|
kc/commands/status.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from kc.commands.common import load_artifacts, load_ranges, load_sources, run
|
|
6
|
+
from kc.output import emit_success
|
|
7
|
+
from kc.paths import current_workspace
|
|
8
|
+
from kc.store.sqlite import index_status
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def register(app: typer.Typer) -> None:
|
|
12
|
+
@app.command("status", help="Show workspace status and deterministic next commands.")
|
|
13
|
+
def status() -> None:
|
|
14
|
+
def _run() -> None:
|
|
15
|
+
workspace = current_workspace()
|
|
16
|
+
paths = workspace.paths
|
|
17
|
+
initialized = paths.config_path.exists() and paths.data_dir.exists()
|
|
18
|
+
sources = load_sources() if paths.sources_jsonl.exists() else []
|
|
19
|
+
ranges = load_ranges() if paths.ranges_jsonl.exists() else []
|
|
20
|
+
artifacts = load_artifacts() if paths.artifacts_jsonl.exists() else []
|
|
21
|
+
index = index_status(paths.sqlite_path, sources, ranges) if initialized else None
|
|
22
|
+
next_commands = []
|
|
23
|
+
if not initialized:
|
|
24
|
+
next_commands.append("kc init --yes")
|
|
25
|
+
elif not sources:
|
|
26
|
+
next_commands.append("kc source add <file> --domain <domain> --yes")
|
|
27
|
+
elif index and index.get("stale"):
|
|
28
|
+
next_commands.append("kc index build")
|
|
29
|
+
else:
|
|
30
|
+
next_commands.extend(["kc source search '<query>'", "kc context prepare --ask '<task>' --out .kc/context/<id>.json"])
|
|
31
|
+
emit_success(
|
|
32
|
+
"status",
|
|
33
|
+
{
|
|
34
|
+
"initialized": initialized,
|
|
35
|
+
"workspace": {
|
|
36
|
+
"root": workspace.root.as_posix(),
|
|
37
|
+
"resolution_source": workspace.source,
|
|
38
|
+
"project_id": workspace.config.project_id,
|
|
39
|
+
"data_dir": paths.data_dir.as_posix(),
|
|
40
|
+
"state_dir": paths.state_dir.as_posix(),
|
|
41
|
+
},
|
|
42
|
+
"counts": {
|
|
43
|
+
"sources": len(sources),
|
|
44
|
+
"ranges": len(ranges),
|
|
45
|
+
"artifacts": len(artifacts),
|
|
46
|
+
},
|
|
47
|
+
"index": index,
|
|
48
|
+
"next_commands": next_commands,
|
|
49
|
+
},
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
run("status", _run)
|