kc-cli 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kc/__init__.py +5 -0
- kc/__main__.py +11 -0
- kc/artifacts/__init__.py +1 -0
- kc/artifacts/diff.py +76 -0
- kc/artifacts/frontmatter.py +26 -0
- kc/artifacts/markdown.py +116 -0
- kc/atomic_write.py +33 -0
- kc/cli.py +284 -0
- kc/commands/__init__.py +1 -0
- kc/commands/artifact.py +1190 -0
- kc/commands/citation.py +231 -0
- kc/commands/common.py +346 -0
- kc/commands/conformance.py +293 -0
- kc/commands/context.py +190 -0
- kc/commands/doctor.py +81 -0
- kc/commands/eval.py +133 -0
- kc/commands/export.py +97 -0
- kc/commands/guide.py +571 -0
- kc/commands/index.py +54 -0
- kc/commands/init.py +207 -0
- kc/commands/lint.py +238 -0
- kc/commands/source.py +464 -0
- kc/commands/status.py +52 -0
- kc/commands/task.py +260 -0
- kc/config.py +127 -0
- kc/embedding_models/potion-base-8M/README.md +97 -0
- kc/embedding_models/potion-base-8M/config.json +13 -0
- kc/embedding_models/potion-base-8M/model.safetensors +0 -0
- kc/embedding_models/potion-base-8M/modules.json +14 -0
- kc/embedding_models/potion-base-8M/tokenizer.json +1 -0
- kc/errors.py +141 -0
- kc/fingerprints.py +35 -0
- kc/ids.py +23 -0
- kc/locks.py +65 -0
- kc/models/__init__.py +17 -0
- kc/models/artifact.py +34 -0
- kc/models/citation.py +60 -0
- kc/models/context.py +23 -0
- kc/models/eval.py +21 -0
- kc/models/plan.py +37 -0
- kc/models/source.py +37 -0
- kc/models/source_range.py +29 -0
- kc/models/source_revision.py +19 -0
- kc/models/task.py +35 -0
- kc/output.py +838 -0
- kc/paths.py +126 -0
- kc/provenance/__init__.py +1 -0
- kc/provenance/citations.py +296 -0
- kc/search/__init__.py +1 -0
- kc/search/extract.py +268 -0
- kc/search/fts.py +284 -0
- kc/search/semantic.py +346 -0
- kc/store/__init__.py +1 -0
- kc/store/jsonl.py +55 -0
- kc/store/sqlite.py +444 -0
- kc/store/transaction.py +67 -0
- kc/templates/agents/skills/kc/SKILL.md +282 -0
- kc/templates/agents/skills/kc/agents/openai.yaml +5 -0
- kc/templates/agents/skills/kc/scripts/resolve_query_citations.py +134 -0
- kc/workspace.py +98 -0
- kc_cli-0.4.0.dist-info/METADATA +522 -0
- kc_cli-0.4.0.dist-info/RECORD +65 -0
- kc_cli-0.4.0.dist-info/WHEEL +4 -0
- kc_cli-0.4.0.dist-info/entry_points.txt +2 -0
- kc_cli-0.4.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: kc
|
|
3
|
+
description: Use kc to maintain repo-local knowledge workspaces, ingest local or snapshotted remote sources, register and refresh revisioned source ranges, answer natural-language queries with grounded citations to original sources, prepare durable context packs, write cited artifacts, validate exact kc citation tokens, rewrite or repair legacy citations, diff and apply artifacts safely, lint knowledge state, run retrieval evals, and manage durable task workflows.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
<!-- kc-managed-agent-skill:v1 -->
|
|
7
|
+
|
|
8
|
+
# kc
|
|
9
|
+
|
|
10
|
+
## Operating Rule
|
|
11
|
+
|
|
12
|
+
Use `kc` as the deterministic harness around knowledge work. Write semantic content yourself, and use `kc` for source registration, retrieval, query answering, context preparation, citation validation, safe apply, task state, linting, and exports. Treat all boundaries and rules in this skill as strict enforcement, not suggestions.
|
|
13
|
+
|
|
14
|
+
Run commands from the repository root, a subdirectory inside it, or with `kc --root <repo> ...`. Use `kc guide --section bootstrap` or `kc guide --section workflows` when you need the current command contract.
|
|
15
|
+
|
|
16
|
+
Important boundaries:
|
|
17
|
+
|
|
18
|
+
1. **Source management:** `kc source add` registers local files. For web or API documentation, first save a local source snapshot under `knowledge/raw/<domain>/`.
|
|
19
|
+
2. **Content generation:** Do not ask `kc` to summarize, classify, judge truth, or generate prose. You (the agent) write all semantic content and documentation-style responses; `kc` is a deterministic retrieval and validation harness only.
|
|
20
|
+
3. **Citations:** Do not hand-author citation line ranges. Use citation tokens returned by `kc source search`, `kc context prepare`, or `kc artifact validate`.
|
|
21
|
+
|
|
22
|
+
## Workspace Bootstrap And Diagnostics
|
|
23
|
+
|
|
24
|
+
Start by discovering the workspace and current state:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
kc status
|
|
28
|
+
kc guide --section quickstart
|
|
29
|
+
kc guide --section agent_contract
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Use `kc --root <repo> ...` when you are outside the workspace, and respect `KC_ROOT` when it is already set. `kc` resolves the root from `--root`, `KC_ROOT`, `kc.toml`, `.git`, then the current directory.
|
|
33
|
+
|
|
34
|
+
Initialize or update the managed layout when needed:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
kc init --dry-run
|
|
38
|
+
kc init --yes
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Run diagnostics before changing state following this sequence:
|
|
42
|
+
|
|
43
|
+
1. Run `kc doctor` when paths, indexes, locks, or semantic search behavior look wrong.
|
|
44
|
+
2. Run `kc lint` when checking citation or knowledge state consistency.
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
kc doctor
|
|
48
|
+
kc lint
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
If a command reports a `repo-write` lock or an operation under `.kc/operations/`, inspect status and doctor output before retrying. Do not delete locks, operation records, plans, snapshots, task files, or SQLite state unless the user explicitly asks.
|
|
52
|
+
|
|
53
|
+
## Common Workflow
|
|
54
|
+
|
|
55
|
+
1. Check or initialize the workspace:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
kc status
|
|
59
|
+
kc init --yes
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
2. Register local source files before relying on them:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
kc source add docs/policy.md --domain policy --dry-run
|
|
66
|
+
kc source add docs/policy.md --domain policy --yes
|
|
67
|
+
kc source inspect docs/policy.md --ranges
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
3. Gather evidence for the writing task. Use search queries that match the exact claim you need to make, and write a durable context pack for longer work:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
kc source search "ownership responsibilities" --domain policy
|
|
74
|
+
kc context prepare --ask "Create an ownership page" --shape knowledge_page --grounding required --target knowledge/wiki/ownership.md --out .kc/context/ownership.json
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
4. Write or edit the artifact yourself. Do not ask `kc` to summarize, classify, judge truth, or generate prose.
|
|
78
|
+
|
|
79
|
+
5. Keep material claims grounded with returned v2 `[kc:src_...:rng_...]` citation tokens. Mark synthesis with `[kc:inference]` and unresolved draft work with `[kc:todo]`. If validation says a citation range is missing, search again for that exact claim and use the returned token; a visible source line is not necessarily an extracted kc range.
|
|
80
|
+
|
|
81
|
+
6. Validate, preview, and apply:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
kc artifact validate --file knowledge/wiki/ownership.md
|
|
85
|
+
kc artifact diff --file knowledge/wiki/ownership.md
|
|
86
|
+
kc artifact apply --file knowledge/wiki/ownership.md --dry-run
|
|
87
|
+
kc artifact apply --file knowledge/wiki/ownership.md --yes
|
|
88
|
+
kc lint
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
7. For multi-step work, keep task state current:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
kc task start --goal "Create ownership page" --target knowledge/wiki/ownership.md
|
|
95
|
+
kc task next --task-id task_01HX
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Search And Context Packs
|
|
99
|
+
|
|
100
|
+
Use `kc source search` for quick evidence and `kc context prepare` when an external agent needs a complete work packet:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
kc source search "approval owner responsibility" --domain policy --limit 8
|
|
104
|
+
kc context prepare --ask "Document approval ownership" --shape knowledge_page --grounding required --target knowledge/wiki/approval-ownership.md --out .kc/context/approval-ownership.json
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Read `result.mode` and warnings. If `KC_RETRIEVAL_SEMANTIC_UNAVAILABLE` appears, results are still usable FTS fallback results, but you should run `kc doctor` or `kc index build --clean` if semantic ranking is expected.
|
|
108
|
+
|
|
109
|
+
When a context pack exists, treat it as the working contract: use its candidate ranges, citation policy, validation commands, and next commands. Do not scrape human text output when JSON fields are available.
|
|
110
|
+
|
|
111
|
+
## Query Answering
|
|
112
|
+
|
|
113
|
+
For user questions over a kc corpus, answer directly instead of creating an artifact unless the user asks for a durable page.
|
|
114
|
+
|
|
115
|
+
1. Identify the likely domain and run one broad search:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
kc source search "How do approvals work?" --domain codex --limit 8
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
2. For compound questions, split the prompt into exact claim-style searches. Search for the support you need, not just the user's wording:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
kc source search "approval modes sandbox modes Codex" --domain codex --limit 6
|
|
125
|
+
kc source search "managed configuration approval policy sandbox mode" --domain codex --limit 6
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
3. Open the local source snapshot when snippets are too thin or adjacent context matters. Use the returned `display_name`, `source_id`, and line range to find the registered file in `knowledge/sources.jsonl`, then read only the needed lines.
|
|
129
|
+
|
|
130
|
+
4. Resolve citations to original sources before responding. Prefer the `Source URL:` in the snapshot metadata header. If no original URL exists, cite the registered local source path.
|
|
131
|
+
|
|
132
|
+
Use the bundled helper when search output is saved or piped:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
kc source search "Codex app worktrees automations" --domain codex --limit 8 | python .agents/skills/kc/scripts/resolve_query_citations.py -
|
|
136
|
+
kc context prepare --ask "How do Codex app worktrees work?" --domain codex | python .agents/skills/kc/scripts/resolve_query_citations.py -
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
5. You (the agent) write the final answer as a documentation-style response:
|
|
140
|
+
- Put the direct answer first.
|
|
141
|
+
- Use short headings or bullets only when they improve scanability.
|
|
142
|
+
- Cite material claims inline with Markdown links to original source URLs.
|
|
143
|
+
- Do not show `[kc:src_...]` tokens to the user in transient query answers.
|
|
144
|
+
- Add a short "Not found" or "Unclear" note for unsupported parts instead of guessing.
|
|
145
|
+
|
|
146
|
+
Use kc citation tokens only in working notes or durable artifacts. For durable artifacts, keep the exact `[kc:src_...]` tokens and run the artifact validation workflow.
|
|
147
|
+
|
|
148
|
+
## Remote or Bulk Source Ingestion
|
|
149
|
+
|
|
150
|
+
For remote documentation, API pages, or large source sets:
|
|
151
|
+
|
|
152
|
+
1. Discover the authoritative source list first, such as a sitemap, index page, repository tree, or user-provided list.
|
|
153
|
+
2. Snapshot each source to `knowledge/raw/<domain>/...` with a short metadata header:
|
|
154
|
+
- source URL
|
|
155
|
+
- fetched UTC timestamp
|
|
156
|
+
- publisher or owner when known
|
|
157
|
+
- conversion method, if any
|
|
158
|
+
3. Use official Markdown or structured exports when available.
|
|
159
|
+
4. Use HTML conversion only as a fallback. If `markitdown` is available, use it for HTML fallback conversion, then trim site chrome before registration when the converted page starts with navigation noise.
|
|
160
|
+
5. Record a manifest for bulk ingests, including total discovered, downloaded, fallbacks, failures, and post-processing.
|
|
161
|
+
6. Dry-run all source registrations before mutating state. Keep logs under `.kc/logs/` for large batches.
|
|
162
|
+
7. Register with `kc source add ... --yes`, then run `kc index build --clean` and `kc lint`.
|
|
163
|
+
|
|
164
|
+
If you clean or post-process a file after registering it, refresh the source rather than adding it again:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
kc source refresh knowledge/raw/codex/app.md --dry-run
|
|
168
|
+
kc source refresh knowledge/raw/codex/app.md --yes
|
|
169
|
+
kc index build --clean
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Source Maintenance
|
|
173
|
+
|
|
174
|
+
Inspect before re-adding a path:
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
kc source inspect docs/policy.md --ranges
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Refresh changed registered sources instead of adding duplicates:
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
kc source refresh docs/policy.md --dry-run
|
|
184
|
+
kc source refresh docs/policy.md --yes
|
|
185
|
+
kc index build
|
|
186
|
+
kc lint
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Source IDs are path-stable and source revisions track content changes. If refresh changes same-locator text, old locator-only citations may become invalid or stale. Run citation checks and use rewrite or repair rather than preserving stale citations.
|
|
190
|
+
|
|
191
|
+
## Citation and Artifact Rules
|
|
192
|
+
|
|
193
|
+
- Use only exact citation tokens returned by kc. Do not combine adjacent returned tokens into a wider line range.
|
|
194
|
+
- Prefer v2 range-aware tokens such as `[kc:src_...:rng_...:L12-L18]`.
|
|
195
|
+
- If one sentence needs multiple facts, cite each supporting range.
|
|
196
|
+
- If a material claim has no returned source range, omit it, mark it `[kc:todo]` while draft, or mark it `[kc:inference]` if it is explicit synthesis.
|
|
197
|
+
- Leave `source_refs: []` in frontmatter unless you know kc's structured object schema. `kc artifact apply` derives structured source refs from citation edges.
|
|
198
|
+
- Run both `kc artifact validate --file <path>` and `kc citation check --file <path>` before apply when editing cited artifacts.
|
|
199
|
+
- Treat `kc artifact diff` and `kc artifact apply --dry-run` output as the review surface before `--yes`.
|
|
200
|
+
|
|
201
|
+
Use citation maintenance commands for legacy or stale citations:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
kc citation check --file knowledge/wiki/ownership.md
|
|
205
|
+
kc citation rewrite --file knowledge/wiki/ownership.md --dry-run
|
|
206
|
+
kc citation rewrite --file knowledge/wiki/ownership.md --yes
|
|
207
|
+
kc citation repair --file knowledge/wiki/ownership.md --dry-run
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
`kc citation rewrite` is for exact legacy-to-v2 rewrites. `kc citation repair` reports deterministic candidates and only applies exact mechanical repairs.
|
|
211
|
+
|
|
212
|
+
## Artifact Workflow
|
|
213
|
+
|
|
214
|
+
Create skeletons and apply only after validation:
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
kc artifact new --file knowledge/wiki/ownership.md --title "Ownership" --type knowledge_page
|
|
218
|
+
kc artifact validate --file knowledge/wiki/ownership.md
|
|
219
|
+
kc citation check --file knowledge/wiki/ownership.md
|
|
220
|
+
kc artifact diff --file knowledge/wiki/ownership.md
|
|
221
|
+
kc artifact apply --file knowledge/wiki/ownership.md --dry-run
|
|
222
|
+
kc artifact apply --file knowledge/wiki/ownership.md --yes
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
`kc artifact diff` compares against the last applied snapshot when one exists. If the baseline is unavailable, the result declares that instead of pretending there is a stable previous version.
|
|
226
|
+
|
|
227
|
+
## Task State
|
|
228
|
+
|
|
229
|
+
Use task commands for longer external-agent workflows or when the user asks for a durable knowledge update:
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
kc task start --goal "Create ownership page" --target knowledge/wiki/ownership.md
|
|
233
|
+
kc task status --task-id task_01HX
|
|
234
|
+
kc task next --task-id task_01HX
|
|
235
|
+
kc task resume --task-id task_01HX --event artifact_created --input @event.json
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Start the task before source gathering when the work is multi-step. Use `kc task next` after each event; after the artifact validates and is applied, resume the task with the expected event payload so the task reaches `completed`.
|
|
239
|
+
|
|
240
|
+
Typical event payload files are small JSON objects:
|
|
241
|
+
|
|
242
|
+
```json
|
|
243
|
+
{"path": "knowledge/wiki/ownership.md", "valid": true}
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Use `blocked_missing_source` or `blocked_validation_failed` with a `reason` field when the task cannot move forward without user input.
|
|
247
|
+
|
|
248
|
+
## Eval, Export, And Contract Checks
|
|
249
|
+
|
|
250
|
+
Run retrieval eval packs when changing source extraction, ranking, or command contracts:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
kc eval run --pack knowledge/evals/basic.yaml --out .kc/evals/basic-result.json
|
|
254
|
+
kc conformance
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
Use exports only after lint and citation checks are clean:
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
kc export --format jsonl --out knowledge/exports/kc.jsonl
|
|
261
|
+
kc export --format llms_txt --out knowledge/exports/llms.txt
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
## Parallel Work
|
|
265
|
+
|
|
266
|
+
Use subagents only for sidecar work that does not mutate the shared kc registry, such as:
|
|
267
|
+
|
|
268
|
+
- checking coverage against a source list
|
|
269
|
+
- inspecting noisy converted snapshots
|
|
270
|
+
- reviewing whether a draft artifact misses obvious topics
|
|
271
|
+
|
|
272
|
+
Keep source registration, source refresh, index builds, artifact apply, citation rewrite, and task resume in the main thread so `.kc/state.sqlite`, `.kc/operations/`, and JSONL stores are updated predictably.
|
|
273
|
+
|
|
274
|
+
## Guardrails
|
|
275
|
+
|
|
276
|
+
- Keep `kc` provider-neutral and local-first; do not add LLM or model-provider behavior to CLI workflows.
|
|
277
|
+
- Use JSON output for automation and integrations.
|
|
278
|
+
- Treat stale-source warnings as blocking for durable knowledge updates unless the user explicitly acknowledges the warning by running `kc source refresh <path> --yes` or by providing written confirmation to proceed.
|
|
279
|
+
- Prefer dry-run before mutation, especially for source refresh and artifact apply.
|
|
280
|
+
- Do not revert or delete existing kc state, snapshots, or logs unless the user explicitly asks.
|
|
281
|
+
- Expect mutating commands to update `knowledge/*.jsonl`, `.kc/state.sqlite`, `.kc/operations/`, `.kc/plans/`, `.kc/snapshots/`, `.kc/context/`, and task files.
|
|
282
|
+
- In multi-user or collaborative environments, avoid concurrent mutations to `knowledge/*.jsonl` or `.kc/state.sqlite`. Coordinate with other users before running `kc source add`, `kc index build`, or `kc artifact apply` when shared state is possible.
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
# kc-managed-agent-skill:v1
|
|
2
|
+
interface:
|
|
3
|
+
display_name: "kc Knowledge"
|
|
4
|
+
short_description: "Query, ingest, and validate grounded knowledge"
|
|
5
|
+
default_prompt: "Use $kc to answer knowledge queries with original-source citations, ingest sources, prepare grounded context, and validate cited artifacts."
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# kc-managed-agent-skill:v1
|
|
3
|
+
"""Resolve kc search or context results to original source URLs.
|
|
4
|
+
|
|
5
|
+
Reads a kc JSON result from a file or stdin and prints compact JSON records
|
|
6
|
+
with source ids, line ranges, local snapshot paths, and original URLs parsed
|
|
7
|
+
from kc snapshot metadata headers.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
HEADER_KEYS = {
|
|
19
|
+
"source_url": re.compile(r"^\s*Source URL:\s*(.+?)\s*$"),
|
|
20
|
+
"markdown_url": re.compile(r"^\s*Markdown URL:\s*(.+?)\s*$"),
|
|
21
|
+
"publisher": re.compile(r"^\s*Publisher:\s*(.+?)\s*$"),
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def read_json(path: str) -> dict[str, Any]:
|
|
26
|
+
if path == "-":
|
|
27
|
+
import sys
|
|
28
|
+
|
|
29
|
+
return json.load(sys.stdin)
|
|
30
|
+
with open(path, encoding="utf-8-sig") as handle:
|
|
31
|
+
return json.load(handle)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def load_sources(repo_root: Path) -> dict[str, dict[str, Any]]:
|
|
35
|
+
sources_path = repo_root / "knowledge" / "sources.jsonl"
|
|
36
|
+
sources: dict[str, dict[str, Any]] = {}
|
|
37
|
+
with sources_path.open(encoding="utf-8") as handle:
|
|
38
|
+
for line in handle:
|
|
39
|
+
if not line.strip():
|
|
40
|
+
continue
|
|
41
|
+
record = json.loads(line)
|
|
42
|
+
sources[record["source_id"]] = record
|
|
43
|
+
return sources
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def source_path(repo_root: Path, source: dict[str, Any]) -> Path | None:
|
|
47
|
+
metadata = source.get("metadata") or {}
|
|
48
|
+
original_path = metadata.get("original_path")
|
|
49
|
+
if original_path:
|
|
50
|
+
return (repo_root / original_path).resolve()
|
|
51
|
+
|
|
52
|
+
uri = source.get("uri", "")
|
|
53
|
+
if uri.startswith("file:"):
|
|
54
|
+
return (repo_root / uri.removeprefix("file:")).resolve()
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def parse_header(path: Path | None) -> dict[str, str]:
|
|
59
|
+
values: dict[str, str] = {}
|
|
60
|
+
if not path or not path.exists():
|
|
61
|
+
return values
|
|
62
|
+
|
|
63
|
+
with path.open(encoding="utf-8-sig") as handle:
|
|
64
|
+
for index, line in enumerate(handle):
|
|
65
|
+
if index >= 60:
|
|
66
|
+
break
|
|
67
|
+
for key, pattern in HEADER_KEYS.items():
|
|
68
|
+
match = pattern.match(line)
|
|
69
|
+
if match:
|
|
70
|
+
values[key] = match.group(1)
|
|
71
|
+
return values
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def results(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|
75
|
+
result = payload.get("result") or {}
|
|
76
|
+
items = result.get("results")
|
|
77
|
+
if isinstance(items, list):
|
|
78
|
+
return items
|
|
79
|
+
items = result.get("candidate_ranges")
|
|
80
|
+
if isinstance(items, list):
|
|
81
|
+
return items
|
|
82
|
+
if isinstance(payload.get("results"), list):
|
|
83
|
+
return payload["results"]
|
|
84
|
+
if isinstance(payload.get("candidate_ranges"), list):
|
|
85
|
+
return payload["candidate_ranges"]
|
|
86
|
+
return []
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def main() -> int:
|
|
90
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
91
|
+
parser.add_argument("json_result", help="kc JSON result path, or '-' for stdin")
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"--repo-root",
|
|
94
|
+
default=".",
|
|
95
|
+
help="Repository root containing knowledge/sources.jsonl",
|
|
96
|
+
)
|
|
97
|
+
args = parser.parse_args()
|
|
98
|
+
|
|
99
|
+
repo_root = Path(args.repo_root).resolve()
|
|
100
|
+
payload = read_json(args.json_result)
|
|
101
|
+
sources = load_sources(repo_root)
|
|
102
|
+
|
|
103
|
+
resolved = []
|
|
104
|
+
for item in results(payload):
|
|
105
|
+
item_source_id = item.get("source_id")
|
|
106
|
+
source = sources.get(item_source_id, {}) if isinstance(item_source_id, str) else {}
|
|
107
|
+
path = source_path(repo_root, source)
|
|
108
|
+
header = parse_header(path)
|
|
109
|
+
locator = item.get("locator") or {}
|
|
110
|
+
start = locator.get("start_line")
|
|
111
|
+
end = locator.get("end_line")
|
|
112
|
+
line_range = f"L{start}-L{end}" if start and end else None
|
|
113
|
+
original_url = header.get("source_url") or source.get("uri")
|
|
114
|
+
|
|
115
|
+
resolved.append(
|
|
116
|
+
{
|
|
117
|
+
"display_name": item.get("display_name") or source.get("display_name"),
|
|
118
|
+
"source_id": item_source_id,
|
|
119
|
+
"line_range": line_range,
|
|
120
|
+
"original_url": original_url,
|
|
121
|
+
"markdown_url": header.get("markdown_url"),
|
|
122
|
+
"publisher": header.get("publisher"),
|
|
123
|
+
"local_snapshot": str(path.relative_to(repo_root)) if path else None,
|
|
124
|
+
"citation_token": item.get("citation_token"),
|
|
125
|
+
"excerpt": item.get("excerpt"),
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
print(json.dumps(resolved, indent=2, ensure_ascii=False))
|
|
130
|
+
return 0
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
if __name__ == "__main__":
|
|
134
|
+
raise SystemExit(main())
|
kc/workspace.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Workspace root discovery and config-aware path construction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Literal
|
|
9
|
+
|
|
10
|
+
from kc.config import KcConfig, load_config
|
|
11
|
+
from kc.errors import KcError
|
|
12
|
+
from kc.output import state
|
|
13
|
+
from kc.paths import KcPaths
|
|
14
|
+
|
|
15
|
+
WorkspaceSource = Literal["explicit", "env", "kc.toml", "git", "cwd"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class Workspace:
|
|
20
|
+
root: Path
|
|
21
|
+
config: KcConfig
|
|
22
|
+
paths: KcPaths
|
|
23
|
+
source: WorkspaceSource
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _ancestors(start: Path) -> list[Path]:
|
|
27
|
+
resolved = start.resolve()
|
|
28
|
+
if resolved.is_file():
|
|
29
|
+
resolved = resolved.parent
|
|
30
|
+
return [resolved, *resolved.parents]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _find_up(start: Path, name: str) -> Path | None:
|
|
34
|
+
for parent in _ancestors(start):
|
|
35
|
+
if (parent / name).exists():
|
|
36
|
+
return parent
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _resolve_dir(root: Path, value: str) -> Path:
|
|
41
|
+
candidate = Path(value).expanduser()
|
|
42
|
+
if not candidate.is_absolute():
|
|
43
|
+
candidate = root / candidate
|
|
44
|
+
return candidate.resolve()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def resolve_workspace(
|
|
48
|
+
start: Path | None = None,
|
|
49
|
+
*,
|
|
50
|
+
root_override: Path | str | None = None,
|
|
51
|
+
data_dir_override: str | None = None,
|
|
52
|
+
state_dir_override: str | None = None,
|
|
53
|
+
require_initialized: bool = False,
|
|
54
|
+
) -> Workspace:
|
|
55
|
+
start_path = (start or Path.cwd()).resolve()
|
|
56
|
+
explicit = root_override if root_override is not None else state.root_override
|
|
57
|
+
env_root = os.environ.get("KC_ROOT")
|
|
58
|
+
|
|
59
|
+
if explicit:
|
|
60
|
+
root = Path(explicit).expanduser().resolve()
|
|
61
|
+
source: WorkspaceSource = "explicit"
|
|
62
|
+
elif env_root:
|
|
63
|
+
root = Path(env_root).expanduser().resolve()
|
|
64
|
+
source = "env"
|
|
65
|
+
else:
|
|
66
|
+
config_root = _find_up(start_path, "kc.toml")
|
|
67
|
+
if config_root is not None:
|
|
68
|
+
root = config_root
|
|
69
|
+
source = "kc.toml"
|
|
70
|
+
else:
|
|
71
|
+
git_root = _find_up(start_path, ".git")
|
|
72
|
+
if git_root is not None:
|
|
73
|
+
root = git_root
|
|
74
|
+
source = "git"
|
|
75
|
+
else:
|
|
76
|
+
root = start_path if start_path.is_dir() else start_path.parent
|
|
77
|
+
source = "cwd"
|
|
78
|
+
|
|
79
|
+
config_exists = (root / "kc.toml").exists()
|
|
80
|
+
if require_initialized and not config_exists:
|
|
81
|
+
raise KcError(
|
|
82
|
+
code="KC_CONFIG_NOT_FOUND",
|
|
83
|
+
message="kc.toml not found. Run kc init --yes first.",
|
|
84
|
+
details={"path": str(root / "kc.toml"), "workspace_root": root.as_posix()},
|
|
85
|
+
suggested_action=f"run kc --root {root.as_posix()} init --yes",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
config = load_config(root, required=False)
|
|
89
|
+
data_dir = data_dir_override if data_dir_override is not None else state.data_dir
|
|
90
|
+
state_dir = state_dir_override if state_dir_override is not None else state.state_dir
|
|
91
|
+
paths = KcPaths(
|
|
92
|
+
root=root,
|
|
93
|
+
data_dir=_resolve_dir(root, data_dir or config.data_dir),
|
|
94
|
+
state_dir=_resolve_dir(root, state_dir or config.state_dir),
|
|
95
|
+
)
|
|
96
|
+
state.workspace_root = root.as_posix()
|
|
97
|
+
state.workspace_resolution_source = source
|
|
98
|
+
return Workspace(root=root, config=config, paths=paths, source=source)
|