kc-beta 0.6.2 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +81 -0
  2. package/LICENSE-COMMERCIAL.md +125 -0
  3. package/README.md +21 -3
  4. package/package.json +14 -5
  5. package/src/agent/context-window.js +9 -12
  6. package/src/agent/context.js +14 -1
  7. package/src/agent/document-parser.js +169 -0
  8. package/src/agent/engine.js +382 -19
  9. package/src/agent/history/event-history.js +222 -0
  10. package/src/agent/llm-client.js +55 -0
  11. package/src/agent/message-utils.js +63 -0
  12. package/src/agent/pipelines/_milestone-derive.js +566 -0
  13. package/src/agent/pipelines/base.js +21 -0
  14. package/src/agent/pipelines/distillation.js +28 -15
  15. package/src/agent/pipelines/extraction.js +130 -36
  16. package/src/agent/pipelines/finalization.js +178 -11
  17. package/src/agent/pipelines/index.js +6 -1
  18. package/src/agent/pipelines/initializer.js +74 -8
  19. package/src/agent/pipelines/production-qc.js +31 -44
  20. package/src/agent/pipelines/skill-authoring.js +97 -80
  21. package/src/agent/pipelines/skill-testing.js +106 -23
  22. package/src/agent/retry.js +10 -2
  23. package/src/agent/scheduler.js +14 -2
  24. package/src/agent/session-state.js +18 -1
  25. package/src/agent/skill-loader.js +13 -7
  26. package/src/agent/skill-validator.js +19 -5
  27. package/src/agent/task-manager.js +61 -5
  28. package/src/agent/tools/document-chunk.js +21 -9
  29. package/src/agent/tools/phase-advance.js +37 -5
  30. package/src/agent/tools/release.js +51 -9
  31. package/src/agent/tools/rule-catalog.js +11 -1
  32. package/src/agent/tools/workspace-file.js +32 -0
  33. package/src/agent/workspace.js +39 -1
  34. package/src/cli/components.js +64 -14
  35. package/src/cli/index.js +62 -3
  36. package/src/cli/meme.js +26 -25
  37. package/src/config.js +65 -22
  38. package/src/model-tiers.json +24 -8
  39. package/src/providers.js +42 -0
  40. package/template/release/v1/README.md.tmpl +108 -0
  41. package/template/release/v1/catalog.json.tmpl +4 -0
  42. package/template/release/v1/kc_runtime/__init__.py +11 -0
  43. package/template/release/v1/kc_runtime/confidence.py +63 -0
  44. package/template/release/v1/kc_runtime/doc_parser.py +127 -0
  45. package/template/release/v1/manifest.json.tmpl +11 -0
  46. package/template/release/v1/render_dashboard.py +117 -0
  47. package/template/release/v1/run.py +212 -0
  48. package/template/release/v1/serve.sh +17 -0
  49. package/template/skills/en/meta-meta/work-decomposition/SKILL.md +326 -0
  50. package/template/skills/en/skill-creator/SKILL.md +1 -1
  51. package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +321 -0
  52. package/template/skills/zh/skill-creator/SKILL.md +1 -1
@@ -0,0 +1,127 @@
1
+ """
2
+ Minimal document parser for the release runtime.
3
+
4
+ Strategy: try native Python parsers first (pypdf, python-docx),
5
+ fall back to LibreOffice CLI if natives unavailable AND lo is on
6
+ PATH, finally fall back to UTF-8 plaintext read. Each strategy
7
+ records what it tried via the result dict so workflows can decide
8
+ whether to trust the text.
9
+
10
+ This is a release-time helper — KC's CLI mode uses its own document
11
+ parsing pipeline (src/agent/document-parser.js + LibreOffice).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import shutil
18
+ import subprocess
19
+ from pathlib import Path
20
+
21
+
22
+ def preflight(doc: Path) -> dict:
23
+ """
24
+ Verify a document is parseable; return a small status dict.
25
+ Lets workflows skip cleanly when the parse will fail rather than
26
+ burning a worker-LLM call on an unreadable file.
27
+ """
28
+ if not doc.exists():
29
+ return {"ok": False, "reason": "not_found", "path": str(doc)}
30
+ if not doc.is_file():
31
+ return {"ok": False, "reason": "not_file", "path": str(doc)}
32
+ if doc.stat().st_size == 0:
33
+ return {"ok": False, "reason": "empty", "path": str(doc)}
34
+ return {"ok": True, "path": str(doc), "size_bytes": doc.stat().st_size}
35
+
36
+
37
+ def extract_text(doc: Path) -> dict:
38
+ """
39
+ Pull text out of a document. Returns:
40
+ { "text": "...", "via": "<strategy>", "ok": bool, "error"?: str }
41
+ Strategies tried in order:
42
+ 1. Suffix-specific native parser (pypdf for .pdf, python-docx for .docx)
43
+ 2. LibreOffice CLI (`soffice --headless --convert-to txt`) if on PATH
44
+ 3. UTF-8 plaintext (.txt, .md, or any file with text-like bytes)
45
+ """
46
+ suffix = doc.suffix.lower()
47
+
48
+ if suffix == ".pdf":
49
+ text = _try_pypdf(doc)
50
+ if text is not None:
51
+ return {"text": text, "via": "pypdf", "ok": True}
52
+
53
+ if suffix in (".docx",):
54
+ text = _try_python_docx(doc)
55
+ if text is not None:
56
+ return {"text": text, "via": "python-docx", "ok": True}
57
+
58
+ # LibreOffice fallback for anything we couldn't parse natively
59
+ if suffix in (".pdf", ".doc", ".docx", ".odt", ".rtf"):
60
+ text = _try_libreoffice(doc)
61
+ if text is not None:
62
+ return {"text": text, "via": "libreoffice", "ok": True}
63
+
64
+ # Plaintext fallback (covers .txt, .md, .csv, .json, etc.)
65
+ try:
66
+ text = doc.read_text(encoding="utf-8")
67
+ return {"text": text, "via": "plaintext_utf8", "ok": True}
68
+ except UnicodeDecodeError:
69
+ try:
70
+ text = doc.read_text(encoding="gbk") # common in Chinese corpora
71
+ return {"text": text, "via": "plaintext_gbk", "ok": True}
72
+ except Exception as exc:
73
+ return {"text": "", "via": "none", "ok": False, "error": str(exc)}
74
+
75
+
76
+ # --- internals ---
77
+
78
+
79
+ def _try_pypdf(doc: Path):
80
+ try:
81
+ import pypdf # type: ignore
82
+ except ImportError:
83
+ return None
84
+ try:
85
+ reader = pypdf.PdfReader(str(doc))
86
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
87
+ except Exception:
88
+ return None
89
+
90
+
91
+ def _try_python_docx(doc: Path):
92
+ try:
93
+ import docx # python-docx
94
+ except ImportError:
95
+ return None
96
+ try:
97
+ d = docx.Document(str(doc))
98
+ parts = [p.text for p in d.paragraphs]
99
+ for table in d.tables:
100
+ for row in table.rows:
101
+ for cell in row.cells:
102
+ if cell.text:
103
+ parts.append(cell.text)
104
+ return "\n".join(parts)
105
+ except Exception:
106
+ return None
107
+
108
+
109
+ def _try_libreoffice(doc: Path):
110
+ soffice = shutil.which("soffice") or shutil.which("libreoffice")
111
+ if not soffice:
112
+ return None
113
+ out_dir = doc.parent / ".kc-lo-out"
114
+ out_dir.mkdir(exist_ok=True)
115
+ try:
116
+ subprocess.run(
117
+ [soffice, "--headless", "--convert-to", "txt", "--outdir", str(out_dir), str(doc)],
118
+ capture_output=True,
119
+ timeout=60,
120
+ check=True,
121
+ )
122
+ txt_path = out_dir / (doc.stem + ".txt")
123
+ if txt_path.exists():
124
+ return txt_path.read_text(encoding="utf-8")
125
+ except Exception:
126
+ return None
127
+ return None
@@ -0,0 +1,11 @@
1
+ {
2
+ "release_version": "v1",
3
+ "kc_version": "{{kc_version}}",
4
+ "generated_at": "{{generated_at}}",
5
+ "session_id": "{{session_id}}",
6
+ "rules_count": {{rule_count}},
7
+ "workflows_count": {{workflow_count}},
8
+ "workflows": {},
9
+ "calibration_source": "confidence_calibration.json",
10
+ "documentation": "README.md"
11
+ }
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Render a single-page HTML dashboard from the run.py output directory.
4
+
5
+ Usage:
6
+ python3 render_dashboard.py [output/results dir]
7
+ python3 render_dashboard.py output/results/ > dashboard.html
8
+
9
+ Reads every <doc>.json under the given directory + summary.json, renders
10
+ a self-contained HTML page (inline CSS, no external assets) showing per-
11
+ document verdict tables + an overall summary. Designed to be opened
12
+ directly in a browser.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import sys
19
+ from pathlib import Path
20
+ from html import escape
21
+
22
+
23
+ VERDICT_COLORS = {
24
+ "PASS": "#16a34a",
25
+ "FAIL": "#dc2626",
26
+ "PARTIAL": "#f59e0b",
27
+ "PARTIAL_PASS": "#f59e0b",
28
+ "NOT_APPLICABLE": "#6b7280",
29
+ "UNDETERMINED": "#9ca3af",
30
+ "INCONCLUSIVE": "#9ca3af",
31
+ "NO_WORKFLOW": "#7c3aed",
32
+ "ERROR": "#000000",
33
+ }
34
+
35
+
36
+ def _color(verdict: str) -> str:
37
+ return VERDICT_COLORS.get(verdict, "#374151")
38
+
39
+
40
+ def _render_doc(doc_path: Path) -> str:
41
+ data = json.loads(doc_path.read_text(encoding="utf-8"))
42
+ rows = []
43
+ for rule_id, verdict in sorted(data.get("results", {}).items()):
44
+ v = verdict.get("verdict", "UNKNOWN")
45
+ conf = verdict.get("confidence", 0.0)
46
+ reason = verdict.get("reason", "") or verdict.get("error_type", "")
47
+ rows.append(
48
+ f"<tr>"
49
+ f"<td><code>{escape(rule_id)}</code></td>"
50
+ f"<td><span class='verdict' style='background:{_color(v)}'>{escape(v)}</span></td>"
51
+ f"<td>{conf:.2f}</td>"
52
+ f"<td>{escape(str(reason))[:200]}</td>"
53
+ f"</tr>"
54
+ )
55
+ name = escape(Path(data.get("document", doc_path.stem)).name)
56
+ return (
57
+ f"<section><h2>{name}</h2><table><thead><tr>"
58
+ f"<th>Rule</th><th>Verdict</th><th>Conf</th><th>Reason</th>"
59
+ f"</tr></thead><tbody>{''.join(rows)}</tbody></table></section>"
60
+ )
61
+
62
+
63
+ def render(results_dir: Path) -> str:
64
+ doc_files = sorted(p for p in results_dir.glob("*.json") if p.name != "summary.json")
65
+ summary = {}
66
+ summary_path = results_dir / "summary.json"
67
+ if summary_path.exists():
68
+ summary = json.loads(summary_path.read_text(encoding="utf-8"))
69
+
70
+ summary_rows = "".join(
71
+ f"<li><b>{escape(v)}</b>: {n}</li>"
72
+ for v, n in sorted(summary.get("by_verdict", {}).items(), key=lambda kv: -kv[1])
73
+ )
74
+
75
+ body_sections = "\n".join(_render_doc(p) for p in doc_files)
76
+
77
+ return f"""<!doctype html>
78
+ <html lang="en"><head><meta charset="utf-8">
79
+ <title>KC verification dashboard</title>
80
+ <style>
81
+ body {{ font: 14px system-ui, sans-serif; max-width: 1100px; margin: 2em auto; padding: 0 1em; color: #111; }}
82
+ h1 {{ font-size: 1.5em; }}
83
+ h2 {{ font-size: 1.1em; margin-top: 2em; border-bottom: 1px solid #e5e7eb; padding-bottom: .3em; }}
84
+ table {{ width: 100%; border-collapse: collapse; margin-top: .5em; }}
85
+ th, td {{ text-align: left; padding: .4em .6em; border-bottom: 1px solid #f3f4f6; vertical-align: top; }}
86
+ th {{ background: #f9fafb; font-weight: 600; }}
87
+ code {{ font-family: ui-monospace, monospace; }}
88
+ .verdict {{ display: inline-block; padding: 2px 8px; border-radius: 4px; color: white; font-weight: 600; font-size: .85em; }}
89
+ .summary {{ background: #f9fafb; padding: 1em; border-radius: 6px; }}
90
+ .summary ul {{ margin: 0; padding-left: 1.2em; }}
91
+ </style>
92
+ </head><body>
93
+ <h1>KC verification — release v1</h1>
94
+ <div class="summary">
95
+ <p><b>Total runs:</b> {summary.get("total_runs", 0)} ·
96
+ <b>Errors:</b> {summary.get("errors", 0)} ·
97
+ <b>Documents:</b> {len(doc_files)}</p>
98
+ <ul>{summary_rows or '<li>No verdicts</li>'}</ul>
99
+ </div>
100
+ {body_sections}
101
+ </body></html>
102
+ """
103
+
104
+
105
+ def main():
106
+ if len(sys.argv) > 1:
107
+ results_dir = Path(sys.argv[1]).resolve()
108
+ else:
109
+ results_dir = Path(__file__).resolve().parent / "output" / "results"
110
+ if not results_dir.is_dir():
111
+ print(f"results directory not found: {results_dir}", file=sys.stderr)
112
+ sys.exit(1)
113
+ print(render(results_dir))
114
+
115
+
116
+ if __name__ == "__main__":
117
+ main()
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ KC release runner v1.
4
+
5
+ Entry point for a self-contained KC release bundle. Loads the manifest,
6
+ iterates rules, dispatches each rule's workflow against the supplied
7
+ input documents, writes per-document verdict JSONs to output/results/.
8
+
9
+ Usage:
10
+ python3 run.py <input_dir>
11
+ python3 run.py <input_dir> --rules R001,R005,R012
12
+ python3 run.py --doc <single_file> # single-doc smoke test
13
+
14
+ The bundle is shipped from KC's finalization phase. KC's run-in-CLI
15
+ mode is the source of truth; this is the ship-as-artifact form for
16
+ re-running verification on new document batches without the full KC
17
+ toolchain installed.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import json
24
+ import os
25
+ import sys
26
+ import subprocess
27
+ from pathlib import Path
28
+
29
+ HERE = Path(__file__).resolve().parent
30
+
31
+ # Add kc_runtime to path so submodules import cleanly when run from any cwd.
32
+ sys.path.insert(0, str(HERE))
33
+
34
+ from kc_runtime import doc_parser, confidence # noqa: E402
35
+
36
+
37
+ def _load_json(path: Path, *, required: bool = False, default=None):
38
+ if not path.exists():
39
+ if required:
40
+ raise SystemExit(
41
+ f"required file missing: {path}\n"
42
+ f"this release bundle was shipped without a complete manifest.\n"
43
+ f"re-run KC finalization or contact the bundle author."
44
+ )
45
+ return default
46
+ return json.loads(path.read_text(encoding="utf-8"))
47
+
48
+
49
+ def _select_rules(catalog, rule_filter):
50
+ if not rule_filter:
51
+ return catalog
52
+ wanted = set(rule_filter.split(","))
53
+ return [r for r in catalog if r.get("id") in wanted]
54
+
55
+
56
+ def _list_input_docs(input_dir: Path):
57
+ if not input_dir.is_dir():
58
+ raise SystemExit(f"input_dir not a directory: {input_dir}")
59
+ docs = []
60
+ for entry in sorted(input_dir.iterdir()):
61
+ if entry.is_file() and not entry.name.startswith("."):
62
+ docs.append(entry)
63
+ return docs
64
+
65
+
66
+ def _run_workflow(rule_id: str, workflow_path: Path, doc_path: Path) -> dict:
67
+ """
68
+ Dispatch a single workflow against a single document.
69
+
70
+ Each workflow is a stand-alone Python script that takes a document
71
+ path on argv and emits a JSON verdict on stdout. Workflows are
72
+ sandbox-runnable: no shared module state, no special imports beyond
73
+ stdlib + kc_runtime.
74
+ """
75
+ if not workflow_path.exists():
76
+ return {
77
+ "rule_id": rule_id,
78
+ "verdict": "ERROR",
79
+ "confidence": 0.0,
80
+ "error_type": "workflow_missing",
81
+ "reason": f"workflow not found: {workflow_path.name}",
82
+ }
83
+ try:
84
+ proc = subprocess.run(
85
+ [sys.executable, str(workflow_path), str(doc_path)],
86
+ capture_output=True,
87
+ text=True,
88
+ timeout=180,
89
+ )
90
+ if proc.returncode != 0:
91
+ return {
92
+ "rule_id": rule_id,
93
+ "verdict": "ERROR",
94
+ "confidence": 0.0,
95
+ "error_type": "workflow_exit_nonzero",
96
+ "reason": (proc.stderr or proc.stdout or "").strip()[:500],
97
+ }
98
+ # Workflow contract: last stdout line is the verdict JSON.
99
+ last = next(
100
+ (line for line in reversed(proc.stdout.splitlines()) if line.strip()),
101
+ None,
102
+ )
103
+ if not last:
104
+ return {
105
+ "rule_id": rule_id,
106
+ "verdict": "ERROR",
107
+ "confidence": 0.0,
108
+ "error_type": "empty_workflow_output",
109
+ }
110
+ verdict = json.loads(last)
111
+ verdict.setdefault("rule_id", rule_id)
112
+ return verdict
113
+ except subprocess.TimeoutExpired:
114
+ return {
115
+ "rule_id": rule_id,
116
+ "verdict": "ERROR",
117
+ "confidence": 0.0,
118
+ "error_type": "workflow_timeout",
119
+ }
120
+ except json.JSONDecodeError as exc:
121
+ return {
122
+ "rule_id": rule_id,
123
+ "verdict": "ERROR",
124
+ "confidence": 0.0,
125
+ "error_type": "workflow_output_not_json",
126
+ "reason": str(exc),
127
+ }
128
+
129
+
130
+ def main():
131
+ parser = argparse.ArgumentParser(prog="run.py", description="KC release runner")
132
+ parser.add_argument("input_dir", nargs="?", help="Directory of input documents")
133
+ parser.add_argument("--doc", help="Single document path (smoke-test mode)")
134
+ parser.add_argument("--rules", help="Comma-separated rule_ids to run (default: all)")
135
+ parser.add_argument("--output-dir", default=str(HERE / "output" / "results"))
136
+ args = parser.parse_args()
137
+
138
+ if not args.input_dir and not args.doc:
139
+ parser.error("either input_dir or --doc is required")
140
+
141
+ manifest = _load_json(HERE / "manifest.json", required=True)
142
+ catalog = _load_json(HERE / "catalog.json", required=False, default=[])
143
+ historical = _load_json(
144
+ HERE / "confidence_calibration.json",
145
+ required=False,
146
+ default={"historical_accuracy": {}},
147
+ )
148
+
149
+ rules = _select_rules(catalog, args.rules)
150
+ if not rules:
151
+ raise SystemExit("no rules to run (catalog empty or filter excluded all)")
152
+
153
+ if args.doc:
154
+ docs = [Path(args.doc).resolve()]
155
+ else:
156
+ docs = _list_input_docs(Path(args.input_dir).resolve())
157
+
158
+ output_dir = Path(args.output_dir).resolve()
159
+ output_dir.mkdir(parents=True, exist_ok=True)
160
+
161
+ workflows = manifest.get("workflows", {})
162
+ summary = {"total_runs": 0, "by_verdict": {}, "errors": 0}
163
+
164
+ for doc in docs:
165
+ # Lightweight parse — let the workflow do its own parse if needed,
166
+ # but offer a doc_parser preflight so workflows can rely on the
167
+ # text being available.
168
+ try:
169
+ doc_parser.preflight(doc)
170
+ except Exception as exc:
171
+ print(f"[run.py] preflight failed for {doc.name}: {exc}", file=sys.stderr)
172
+
173
+ results = {}
174
+ for rule in rules:
175
+ rule_id = rule.get("id")
176
+ if not rule_id:
177
+ continue
178
+ wf_relpath = workflows.get(rule_id)
179
+ if not wf_relpath:
180
+ results[rule_id] = {
181
+ "rule_id": rule_id,
182
+ "verdict": "NO_WORKFLOW",
183
+ "confidence": 0.0,
184
+ }
185
+ continue
186
+ wf_path = HERE / wf_relpath
187
+ verdict = _run_workflow(rule_id, wf_path, doc)
188
+ verdict = confidence.calibrate(verdict, historical)
189
+ results[rule_id] = verdict
190
+ summary["total_runs"] += 1
191
+ v = verdict.get("verdict", "UNKNOWN")
192
+ summary["by_verdict"][v] = summary["by_verdict"].get(v, 0) + 1
193
+ if v == "ERROR":
194
+ summary["errors"] += 1
195
+
196
+ out_file = output_dir / f"{doc.stem}.json"
197
+ out_file.write_text(
198
+ json.dumps(
199
+ {"document": str(doc), "results": results}, ensure_ascii=False, indent=2
200
+ ),
201
+ encoding="utf-8",
202
+ )
203
+
204
+ summary_path = output_dir / "summary.json"
205
+ summary_path.write_text(
206
+ json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8"
207
+ )
208
+ print(json.dumps(summary, ensure_ascii=False, indent=2))
209
+
210
+
211
+ if __name__ == "__main__":
212
+ main()
@@ -0,0 +1,17 @@
1
+ #!/bin/sh
2
+ # Minimal local-preview server for the KC release dashboard.
3
+ # Renders dashboard.html if missing, then serves on PORT (default 8765).
4
+
5
+ set -e
6
+ HERE="$(cd "$(dirname "$0")" && pwd)"
7
+ cd "$HERE"
8
+
9
+ PORT="${PORT:-8765}"
10
+
11
+ if [ ! -f dashboard.html ] || [ output/results/summary.json -nt dashboard.html ]; then
12
+ echo "rendering dashboard.html..."
13
+ python3 render_dashboard.py output/results/ > dashboard.html
14
+ fi
15
+
16
+ echo "serving on http://localhost:$PORT/dashboard.html"
17
+ exec python3 -m http.server "$PORT"