kc-beta 0.3.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/package.json +1 -1
  2. package/src/agent/confidence-scorer.js +8 -0
  3. package/src/agent/context.js +25 -0
  4. package/src/agent/corner-case-registry.js +5 -0
  5. package/src/agent/engine.js +514 -75
  6. package/src/agent/event-log.js +15 -2
  7. package/src/agent/history.js +91 -23
  8. package/src/agent/pipelines/initializer.js +3 -6
  9. package/src/agent/retry.js +9 -1
  10. package/src/agent/scheduler.js +276 -0
  11. package/src/agent/session-state.js +11 -2
  12. package/src/agent/task-manager.js +5 -0
  13. package/src/agent/tools/agent-tool.js +57 -14
  14. package/src/agent/tools/archive-file.js +94 -0
  15. package/src/agent/tools/copy-to-workspace.js +140 -0
  16. package/src/agent/tools/phase-advance.js +60 -0
  17. package/src/agent/tools/release.js +322 -0
  18. package/src/agent/tools/schedule-fetch.js +118 -0
  19. package/src/agent/tools/snapshot.js +101 -0
  20. package/src/agent/tools/workspace-file.js +10 -7
  21. package/src/agent/version-manager.js +29 -120
  22. package/src/agent/workspace.js +127 -4
  23. package/src/cli/components.js +4 -1
  24. package/src/cli/index.js +57 -4
  25. package/src/config.js +10 -1
  26. package/template/release-runtime/README.md.tmpl +84 -0
  27. package/template/release-runtime/kc_runtime/__init__.py +2 -0
  28. package/template/release-runtime/kc_runtime/confidence.py +93 -0
  29. package/template/release-runtime/kc_runtime/dashboard.py +208 -0
  30. package/template/release-runtime/render_dashboard.py +49 -0
  31. package/template/release-runtime/run.py +230 -0
  32. package/template/release-runtime/serve.sh +15 -0
  33. package/template/skills/en/meta/entity-extraction/SKILL.md +6 -0
  34. package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  35. package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
  36. package/template/skills/en/meta-meta/rule-extraction/SKILL.md +35 -0
  37. package/template/skills/en/meta-meta/rule-graph/SKILL.md +16 -0
  38. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
  39. package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
  40. package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
  41. package/template/skills/zh/meta/entity-extraction/SKILL.md +6 -0
  42. package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  43. package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
  44. package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +35 -0
  45. package/template/skills/zh/meta-meta/rule-graph/SKILL.md +16 -0
  46. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
  47. package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
  48. package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
  49. package/template/workspace.gitignore +22 -0
@@ -0,0 +1,208 @@
1
+ """
2
+ End-user dashboard renderer — Python port of DashboardRenderTool._renderHtml.
3
+
4
+ Takes a release-run result JSON (the output of run.py) and emits a static
5
+ HTML dashboard. Dark theme, two tabs (Summary + Per-Rule), no external
6
+ dependencies, no JS framework — vanilla JS for tab switching only.
7
+ """
8
+
9
+ import html as _html
10
+ import json
11
+ from datetime import datetime, timezone
12
+
13
+
14
+ def render(result, manifest):
15
+ """
16
+ result: dict from run.py — keys: release, snapshot_tag, input,
17
+ started_at, duration_ms, results: [{rule_id, value, confidence,
18
+ confidence_band, extraction_method, exit_code, raw}]
19
+ manifest: dict from the bundle's manifest.json (for header info)
20
+ Returns: a complete HTML string.
21
+ """
22
+ label = manifest.get("label", result.get("release", ""))
23
+ snap_tag = manifest.get("snapshot_tag", result.get("snapshot_tag", ""))
24
+ input_doc = result.get("input", "")
25
+ started = result.get("started_at", "")
26
+ duration_ms = result.get("duration_ms", 0)
27
+ rules = manifest.get("rules", [])
28
+ rule_titles = {r["id"]: r.get("title", "") for r in rules}
29
+ results = result.get("results", [])
30
+ generated_at = datetime.now(timezone.utc).isoformat()
31
+
32
+ # Aggregates
33
+ total = len(results)
34
+ by_band = {"high": 0, "medium": 0, "low": 0}
35
+ failed = 0
36
+ for r in results:
37
+ b = r.get("confidence_band") or "low"
38
+ by_band[b] = by_band.get(b, 0) + 1
39
+ if r.get("exit_code", 0) != 0:
40
+ failed += 1
41
+
42
+ summary_rows = []
43
+ for r in results:
44
+ rid = r.get("rule_id", "")
45
+ title = rule_titles.get(rid, "")
46
+ value = _short(r.get("value") or _value_from_raw(r.get("raw")))
47
+ conf = r.get("confidence", 0)
48
+ b = r.get("confidence_band") or "low"
49
+ method = r.get("extraction_method") or "?"
50
+ exit_code = r.get("exit_code", 0)
51
+ status_icon = "✓" if exit_code == 0 else "✗"
52
+ status_class = f"band-{b}" if exit_code == 0 else "band-fail"
53
+ summary_rows.append(
54
+ f"<tr class='{status_class}'>"
55
+ f"<td>{status_icon}</td>"
56
+ f"<td><code>{_html.escape(rid)}</code></td>"
57
+ f"<td>{_html.escape(title)}</td>"
58
+ f"<td>{_html.escape(value)}</td>"
59
+ f"<td>{conf:.3f}</td>"
60
+ f"<td>{_html.escape(b)}</td>"
61
+ f"<td>{_html.escape(method)}</td>"
62
+ f"</tr>"
63
+ )
64
+
65
+ detail_blocks = []
66
+ for r in results:
67
+ rid = r.get("rule_id", "")
68
+ title = rule_titles.get(rid, "")
69
+ raw_json = json.dumps(r.get("raw") or {}, ensure_ascii=False, indent=2)
70
+ detail_blocks.append(
71
+ f"<div class='detail-card'>"
72
+ f"<h3><code>{_html.escape(rid)}</code> &middot; {_html.escape(title)}</h3>"
73
+ f"<dl>"
74
+ f"<dt>Value</dt><dd>{_html.escape(_short(r.get('value') or _value_from_raw(r.get('raw'))))}</dd>"
75
+ f"<dt>Confidence</dt><dd>{r.get('confidence', 0):.3f} ({_html.escape(r.get('confidence_band') or '')})</dd>"
76
+ f"<dt>Method</dt><dd>{_html.escape(r.get('extraction_method') or '?')}</dd>"
77
+ f"<dt>Exit code</dt><dd>{r.get('exit_code', 0)}</dd>"
78
+ f"</dl>"
79
+ f"<details><summary>Raw workflow output</summary>"
80
+ f"<pre>{_html.escape(raw_json)}</pre>"
81
+ f"</details>"
82
+ f"</div>"
83
+ )
84
+
85
+ return TEMPLATE.format(
86
+ label=_html.escape(label),
87
+ snap_tag=_html.escape(snap_tag),
88
+ input_doc=_html.escape(input_doc),
89
+ started=_html.escape(started),
90
+ duration_s=f"{duration_ms / 1000:.2f}",
91
+ total=total,
92
+ high=by_band["high"],
93
+ medium=by_band["medium"],
94
+ low=by_band["low"],
95
+ failed=failed,
96
+ summary_rows="\n".join(summary_rows) or "<tr><td colspan='7'>(no results)</td></tr>",
97
+ detail_blocks="\n".join(detail_blocks) or "<p>(no results)</p>",
98
+ generated_at=generated_at,
99
+ )
100
+
101
+
102
+ def _short(s, n=80):
103
+ s = "" if s is None else str(s)
104
+ return s if len(s) <= n else s[: n - 1] + "…"
105
+
106
+
107
+ def _value_from_raw(raw):
108
+ if not isinstance(raw, dict):
109
+ return ""
110
+ for k in ("extracted_value", "value", "result"):
111
+ if k in raw:
112
+ return raw[k]
113
+ return ""
114
+
115
+
116
+ TEMPLATE = """<!DOCTYPE html>
117
+ <html lang="en">
118
+ <head>
119
+ <meta charset="UTF-8">
120
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
121
+ <title>KC Release {label} — Verification Result</title>
122
+ <style>
123
+ body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
124
+ max-width: 1100px; margin: 0 auto; padding: 24px;
125
+ background: #0a0a0a; color: #e5e5e5; }}
126
+ h1 {{ color: #f4f4f5; font-size: 1.5em; margin-bottom: 4px; }}
127
+ .meta {{ color: #737373; font-size: 0.85em; margin-bottom: 24px; }}
128
+ .meta code {{ color: #a3a3a3; }}
129
+ .card {{ background: #171717; border: 1px solid #262626; border-radius: 8px;
130
+ padding: 16px; margin: 12px 0; }}
131
+ .metrics {{ display: flex; gap: 32px; flex-wrap: wrap; }}
132
+ .metric .value {{ font-size: 2em; font-weight: 600; }}
133
+ .metric .label {{ font-size: 0.8em; color: #737373; text-transform: uppercase; letter-spacing: .03em; }}
134
+ .v-high {{ color: #22c55e; }}
135
+ .v-med {{ color: #eab308; }}
136
+ .v-low {{ color: #f97316; }}
137
+ .v-fail {{ color: #ef4444; }}
138
+ .tabs {{ display: flex; gap: 0; border-bottom: 1px solid #262626; margin: 24px 0 12px; }}
139
+ .tab {{ padding: 8px 16px; cursor: pointer; color: #737373; border-bottom: 2px solid transparent; user-select: none; }}
140
+ .tab.active {{ color: #f4f4f5; border-bottom-color: #22c55e; }}
141
+ table {{ width: 100%; border-collapse: collapse; }}
142
+ th, td {{ text-align: left; padding: 8px 10px; border-bottom: 1px solid #262626; font-size: 0.92em; }}
143
+ th {{ color: #737373; font-weight: 500; font-size: 0.78em; text-transform: uppercase; letter-spacing: .04em; }}
144
+ td code {{ color: #a3a3a3; }}
145
+ tr.band-high td:nth-child(6) {{ color: #22c55e; }}
146
+ tr.band-medium td:nth-child(6) {{ color: #eab308; }}
147
+ tr.band-low td:nth-child(6) {{ color: #f97316; }}
148
+ tr.band-fail td:nth-child(6) {{ color: #ef4444; }}
149
+ .detail-card {{ background: #171717; border: 1px solid #262626; border-radius: 8px;
150
+ padding: 14px 18px; margin: 14px 0; }}
151
+ .detail-card h3 {{ margin: 0 0 10px; font-size: 1em; color: #e5e5e5; }}
152
+ .detail-card dl {{ display: grid; grid-template-columns: 100px 1fr; gap: 4px 16px; margin: 0; }}
153
+ .detail-card dt {{ color: #737373; font-size: 0.85em; }}
154
+ .detail-card dd {{ margin: 0; color: #e5e5e5; }}
155
+ details summary {{ cursor: pointer; color: #a3a3a3; font-size: 0.85em; margin-top: 8px; }}
156
+ pre {{ background: #0d0d0d; border: 1px solid #262626; border-radius: 4px;
157
+ padding: 10px; overflow-x: auto; font-size: 0.82em; color: #d4d4d4; }}
158
+ .footer {{ color: #525252; font-size: 0.78em; margin-top: 32px; text-align: center; }}
159
+ </style>
160
+ </head>
161
+ <body>
162
+ <h1>KC Release <code>{label}</code></h1>
163
+ <p class="meta">
164
+ Snapshot: <code>{snap_tag}</code> &middot;
165
+ Input: <code>{input_doc}</code> &middot;
166
+ Started: <code>{started}</code> &middot;
167
+ Duration: <code>{duration_s}s</code>
168
+ </p>
169
+
170
+ <div class="card metrics">
171
+ <div class="metric"><div class="value">{total}</div><div class="label">Rules run</div></div>
172
+ <div class="metric"><div class="value v-high">{high}</div><div class="label">High confidence</div></div>
173
+ <div class="metric"><div class="value v-med">{medium}</div><div class="label">Medium</div></div>
174
+ <div class="metric"><div class="value v-low">{low}</div><div class="label">Low</div></div>
175
+ <div class="metric"><div class="value v-fail">{failed}</div><div class="label">Failed</div></div>
176
+ </div>
177
+
178
+ <div class="tabs">
179
+ <div class="tab active" data-target="summary" onclick="kcShow('summary', this)">Summary</div>
180
+ <div class="tab" data-target="detail" onclick="kcShow('detail', this)">Per-rule detail</div>
181
+ </div>
182
+
183
+ <div id="summary" class="view">
184
+ <div class="card">
185
+ <table>
186
+ <tr><th></th><th>Rule</th><th>Title</th><th>Value</th><th>Conf.</th><th>Band</th><th>Method</th></tr>
187
+ {summary_rows}
188
+ </table>
189
+ </div>
190
+ </div>
191
+
192
+ <div id="detail" class="view" style="display:none">
193
+ {detail_blocks}
194
+ </div>
195
+
196
+ <p class="footer">Generated {generated_at} — KC Agent CLI</p>
197
+
198
+ <script>
199
+ function kcShow(id, tab) {{
200
+ document.querySelectorAll('.view').forEach(v => v.style.display = 'none');
201
+ document.getElementById(id).style.display = '';
202
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
203
+ tab.classList.add('active');
204
+ }}
205
+ </script>
206
+ </body>
207
+ </html>
208
+ """
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Re-render an HTML dashboard from an existing run.py result JSON.
4
+
5
+ Useful when run.py was invoked without --dashboard, or when the dashboard
6
+ template is updated and you want to re-render past results.
7
+
8
+ Usage:
9
+ python render_dashboard.py <result.json> [--output dashboard.html]
10
+ """
11
+
12
+ import argparse
13
+ import json
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ HERE = Path(__file__).resolve().parent
18
+ sys.path.insert(0, str(HERE))
19
+
20
+ from kc_runtime import dashboard as kc_dash
21
+
22
+
23
+ def main():
24
+ ap = argparse.ArgumentParser()
25
+ ap.add_argument("result", help="Path to a result.json produced by run.py")
26
+ ap.add_argument("--output", "-o", help="HTML output path (default: alongside result)")
27
+ args = ap.parse_args()
28
+
29
+ result_path = Path(args.result).resolve()
30
+ if not result_path.is_file():
31
+ print(f"error: result file not found: {result_path}", file=sys.stderr)
32
+ sys.exit(2)
33
+
34
+ manifest_path = HERE / "manifest.json"
35
+ if not manifest_path.is_file():
36
+ print(f"error: manifest.json not found alongside this script", file=sys.stderr)
37
+ sys.exit(2)
38
+
39
+ result = json.loads(result_path.read_text(encoding="utf-8"))
40
+ manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
41
+ html = kc_dash.render(result, manifest)
42
+
43
+ out_path = Path(args.output) if args.output else result_path.with_suffix(".html")
44
+ out_path.write_text(html, encoding="utf-8")
45
+ print(f"Wrote {out_path}")
46
+
47
+
48
+ if __name__ == "__main__":
49
+ main()
@@ -0,0 +1,230 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ KC release runner — standalone, no kc-beta dependency.
4
+
5
+ Loads the bundled release manifest, runs each rule's workflow against an
6
+ input document, scores confidence, aggregates results.
7
+
8
+ Usage:
9
+ python run.py <input-doc> [--rule R001] [--output result.json] [--dashboard]
10
+
11
+ Required env vars (same conventions as KC's .env):
12
+ LLM_API_KEY, LLM_BASE_URL
13
+ TIER1, TIER2, TIER3, TIER4 (any subset of model lists, comma-separated)
14
+
15
+ Workflows are invoked as `python <workflow_path> <input-doc>` and must emit
16
+ their result as a single JSON object on the last line of stdout.
17
+ """
18
+
19
+ import argparse
20
+ import json
21
+ import os
22
+ import subprocess
23
+ import sys
24
+ import time
25
+ from datetime import datetime, timezone
26
+ from pathlib import Path
27
+
28
+ # kc_runtime is bundled next to this file
29
+ HERE = Path(__file__).resolve().parent
30
+ sys.path.insert(0, str(HERE))
31
+
32
+ from kc_runtime import confidence as kc_conf
33
+ from kc_runtime import dashboard as kc_dash
34
+
35
+
36
+ def main():
37
+ ap = argparse.ArgumentParser(description="Run a KC release on a document.")
38
+ ap.add_argument("input", help="Path to the input document (PDF, DOCX, TXT, ...)")
39
+ ap.add_argument("--rule", help="Run only this rule id (default: all rules in catalog)")
40
+ ap.add_argument("--output", "-o", help="Write aggregated JSON here (default: stdout)")
41
+ ap.add_argument("--dashboard", action="store_true",
42
+ help="Also emit an HTML dashboard next to the JSON output")
43
+ args = ap.parse_args()
44
+
45
+ input_path = Path(args.input).resolve()
46
+ if not input_path.is_file():
47
+ _die(f"Input file not found: {input_path}")
48
+
49
+ manifest = _load_json(HERE / "manifest.json", required=True)
50
+ catalog = _load_json(HERE / "catalog.json", required=False) or []
51
+ historical = _load_calibration(HERE / "confidence_calibration.json")
52
+ corner_cases = _load_json(HERE / "corner_cases.json", required=False)
53
+
54
+ rules = manifest.get("rules", [])
55
+ if args.rule:
56
+ rules = [r for r in rules if r.get("id") == args.rule]
57
+ if not rules:
58
+ _die(f"No rule '{args.rule}' in manifest")
59
+
60
+ if not _check_env():
61
+ sys.exit(2)
62
+
63
+ started = datetime.now(timezone.utc).isoformat()
64
+ t0 = time.monotonic()
65
+
66
+ results = []
67
+ any_failure = False
68
+ for rule in rules:
69
+ result = _run_one(rule, input_path, catalog,
70
+ historical=historical, corner_cases=corner_cases)
71
+ results.append(result)
72
+ if result.get("exit_code", 0) != 0:
73
+ any_failure = True
74
+
75
+ duration_ms = int((time.monotonic() - t0) * 1000)
76
+ aggregated = {
77
+ "release": manifest.get("label"),
78
+ "snapshot_tag": manifest.get("snapshot_tag"),
79
+ "input": str(input_path),
80
+ "started_at": started,
81
+ "duration_ms": duration_ms,
82
+ "results": results,
83
+ }
84
+
85
+ out_text = json.dumps(aggregated, ensure_ascii=False, indent=2)
86
+ if args.output:
87
+ out_path = Path(args.output).resolve()
88
+ out_path.write_text(out_text, encoding="utf-8")
89
+ print(f"Wrote {out_path}", file=sys.stderr)
90
+ else:
91
+ print(out_text)
92
+
93
+ if args.dashboard:
94
+ html = kc_dash.render(aggregated, manifest)
95
+ if args.output:
96
+ html_path = Path(args.output).with_suffix(".html")
97
+ else:
98
+ html_path = HERE / f"result_{int(time.time())}.html"
99
+ html_path.write_text(html, encoding="utf-8")
100
+ print(f"Dashboard: {html_path}", file=sys.stderr)
101
+
102
+ sys.exit(1 if any_failure else 0)
103
+
104
+
105
+ def _run_one(rule, input_path, catalog, *, historical, corner_cases):
106
+ rule_id = rule.get("id")
107
+ workflow_rel = rule.get("workflow")
108
+ if not workflow_rel:
109
+ return _error_result(rule_id, "no workflow path in manifest")
110
+
111
+ workflow_abs = (HERE / workflow_rel).resolve()
112
+ if not workflow_abs.is_file():
113
+ return _error_result(rule_id, f"workflow not found: {workflow_rel}")
114
+
115
+ try:
116
+ proc = subprocess.run(
117
+ ["python", str(workflow_abs), str(input_path)],
118
+ capture_output=True, text=True, timeout=300,
119
+ )
120
+ except subprocess.TimeoutExpired:
121
+ return _error_result(rule_id, "workflow timed out (300s)")
122
+ except FileNotFoundError:
123
+ return _error_result(rule_id, "`python` not found on PATH")
124
+
125
+ raw_stdout = (proc.stdout or "").strip()
126
+ raw_data = _parse_last_json_line(raw_stdout)
127
+
128
+ extracted_value = _extract_value(raw_data)
129
+ method = (raw_data or {}).get("extraction_method") or "llm"
130
+ source_text = (raw_data or {}).get("raw_text") or ""
131
+
132
+ conf = kc_conf.score(
133
+ rule_id=rule_id,
134
+ extracted_value=str(extracted_value),
135
+ source_text=source_text,
136
+ method=method,
137
+ document=str(input_path),
138
+ historical=historical,
139
+ corner_cases=corner_cases,
140
+ )
141
+
142
+ return {
143
+ "rule_id": rule_id,
144
+ "value": extracted_value,
145
+ "confidence": conf,
146
+ "confidence_band": kc_conf.band(conf),
147
+ "extraction_method": method,
148
+ "exit_code": proc.returncode,
149
+ "raw": raw_data if raw_data is not None else {"stderr": (proc.stderr or "")[:2000]},
150
+ }
151
+
152
+
153
+ def _error_result(rule_id, msg):
154
+ return {
155
+ "rule_id": rule_id,
156
+ "value": None,
157
+ "confidence": 0.0,
158
+ "confidence_band": "low",
159
+ "extraction_method": "fallback",
160
+ "exit_code": 2,
161
+ "raw": {"error": msg},
162
+ }
163
+
164
+
165
+ def _parse_last_json_line(text):
166
+ if not text:
167
+ return None
168
+ # Walk lines from the bottom, return the first that parses as a JSON object
169
+ for line in reversed(text.split("\n")):
170
+ line = line.strip()
171
+ if not line:
172
+ continue
173
+ if line[0] not in "{[":
174
+ continue
175
+ try:
176
+ return json.loads(line)
177
+ except json.JSONDecodeError:
178
+ continue
179
+ return None
180
+
181
+
182
+ def _extract_value(raw):
183
+ if not isinstance(raw, dict):
184
+ return None
185
+ for k in ("extracted_value", "value", "result"):
186
+ if k in raw:
187
+ return raw[k]
188
+ return None
189
+
190
+
191
+ def _load_json(path, *, required):
192
+ if not path.is_file():
193
+ if required:
194
+ _die(f"Required file missing: {path.name}")
195
+ return None
196
+ try:
197
+ return json.loads(path.read_text(encoding="utf-8"))
198
+ except json.JSONDecodeError as e:
199
+ _die(f"Invalid JSON in {path.name}: {e}")
200
+
201
+
202
+ def _load_calibration(path):
203
+ data = _load_json(path, required=False)
204
+ if not data:
205
+ return {}
206
+ return data.get("historical_accuracy") or data or {}
207
+
208
+
209
+ def _check_env():
210
+ missing = []
211
+ for k in ("LLM_API_KEY",):
212
+ if not os.environ.get(k):
213
+ missing.append(k)
214
+ tiers = [t for t in ("TIER1", "TIER2", "TIER3", "TIER4") if os.environ.get(t)]
215
+ if not tiers:
216
+ missing.append("at least one of TIER1..TIER4")
217
+ if missing:
218
+ print("Missing env vars: " + ", ".join(missing), file=sys.stderr)
219
+ print("Workflows in this release call worker LLMs and need these set.", file=sys.stderr)
220
+ return False
221
+ return True
222
+
223
+
224
+ def _die(msg):
225
+ print(f"error: {msg}", file=sys.stderr)
226
+ sys.exit(2)
227
+
228
+
229
+ if __name__ == "__main__":
230
+ main()
@@ -0,0 +1,15 @@
1
+ #!/bin/sh
2
+ # Serve this release directory locally so dashboards open in a browser.
3
+ # Generated HTML files (e.g. result_*.html, dashboard.html) become reachable
4
+ # at http://localhost:<port>/...
5
+ #
6
+ # Usage:
7
+ # ./serve.sh # default port 8080
8
+ # ./serve.sh 9000 # custom port
9
+ #
10
+ # Stop with Ctrl-C.
11
+
12
+ PORT="${1:-8080}"
13
+ cd "$(dirname "$0")" || exit 1
14
+ echo "Serving $(pwd) on http://localhost:${PORT}/"
15
+ exec python -m http.server "$PORT"
@@ -49,6 +49,12 @@ Many real verification tasks require semantic understanding — "is this descrip
49
49
 
50
50
  If a method's results fall below the accuracy threshold, try a different method or a more capable model. If regex works and meets accuracy — keep it, it's free. If regex produces results below threshold, escalate to worker LLM. If a cheap worker LLM isn't accurate enough, try a more capable tier. Record what works for each extraction type in AGENT.md for future reference.
51
51
 
52
+ ## Project Glossary
53
+
54
+ The project glossary (built and maintained by `rule-extraction`, stored at `rules/glossary.json`) is a useful resource when designing extraction. It records canonical names and known aliases for entities that appear across rules. Reading it before extracting helps keep entity names schema-aligned and avoids parallel labels for the same thing.
55
+
56
+ Whether the glossary becomes more than a naming convention — for instance, driving cheap pattern matching for entities with stable surface forms — is a per-project judgment. Apply the same cost-accuracy logic as elsewhere: whatever method meets the accuracy threshold for the task at hand.
57
+
52
58
  ## Schema Design
53
59
 
54
60
  Define the expected output for each extraction. Keep it simple and JIT:
@@ -61,6 +61,17 @@ After the conversation:
61
61
  5. Initialize version tracking (a `versions.json` manifest).
62
62
  6. Log the bootstrap conversation summary for future reference.
63
63
 
64
+ ## Scheduled Ingestion (Production)
65
+
66
+ Once a project is past bootstrap and into production, fresh documents often arrive on a regular cadence — daily regulator drops, hourly API pulls, batch uploads from upstream systems. Use the `schedule_fetch` tool to register ingestion jobs the OS scheduler runs while kc-beta is closed:
67
+
68
+ - Each job is a shell command (rsync, curl, custom script) that lands files in `$INPUT_DIR`.
69
+ - KC writes a wrapper script under `scripts/ingest/<job-id>.sh`; the user installs the script line into their crontab via `crontab -e`.
70
+ - Newly-arrived files are auto-prefixed with `<job-id>_<UTC-timestamp>_` so origin and arrival time are visible in the filename.
71
+ - View status with `/schedule` or `schedule_fetch list`. Tail of `logs/ingest.log` shows recent runs.
72
+
73
+ Discuss the cadence with the developer user during bootstrap — knowing the production input rhythm shapes how skills and workflows should be written (batch vs streaming, idempotency requirements, etc.).
74
+
64
75
  ## When to Re-Bootstrap
65
76
 
66
77
  Return to this skill when:
@@ -106,7 +106,19 @@ For production Input/ documents:
106
106
  4. Review the selected results (LLM-as-Judge or manual review by the developer user).
107
107
  5. Compute batch accuracy from reviewed results.
108
108
  6. Log batch QC report.
109
- 7. If accuracy is acceptable, finalize the batch. If not, trigger evolution loop.
109
+ 7. Move processed input docs to `input/archived/` via `archive_file` so the next session sees only fresh arrivals.
110
+ 8. If accuracy is acceptable, finalize the batch. If not, trigger evolution loop.
111
+
112
+ Production input often arrives on a schedule (see `bootstrap-workspace` → "Scheduled Ingestion"). Files in `input/` are auto-prefixed with `<job-id>_<UTC-timestamp>_` by the ingestion wrapper, so each batch carries provenance in its filenames. When a batch fails QC, the prefixes let you trace which scheduled run produced the bad data.
113
+
114
+ ## Two Dashboard Surfaces
115
+
116
+ There are two distinct dashboards in this system:
117
+
118
+ - **Developer dashboard** — `dashboard_render` tool, generated inside the workspace from `output/results/`, `logs/evolution/`, and `output/qc/`. For your audit and the developer user's day-to-day monitoring during BUILD and DISTILL.
119
+ - **End-user dashboard** — the `render_dashboard.py` script bundled inside a release (built via the `release` tool). For non-developer recipients of a packaged release. It renders results from a single `run.py` invocation; no workspace dependency.
120
+
121
+ When a release is built, point end users at the bundled dashboard, not the workspace one. Workspace dashboard stays your developer surface.
110
122
 
111
123
  ## Developer User Involvement
112
124
 
@@ -104,6 +104,41 @@ Maintain a lightweight catalog of all extracted rules. This is your index, not t
104
104
 
105
105
  Format: a simple markdown table or JSON file. Do not over-engineer this. The catalog exists to give you and the developer user an overview of progress.
106
106
 
107
+ ## Project Glossary
108
+
109
+ Alongside the rule catalog, build a project glossary — a living vocabulary of the entities, terms, and patterns the verification system encounters. The glossary is what keeps entity names consistent across rules: without it, the same balance-sheet item might be named "注册资本", "registered capital", and "paid-in capital" by three different rule skills, breaking shared-entity matching and producing inconsistent extraction outputs.
110
+
111
+ The glossary is not frozen at the end of extraction. It is a living document. Update it when you discover new aliases in samples, when a worker LLM extraction reveals a variant phrasing, when corner cases surface unfamiliar terminology. Both the coding agent and any operator can edit it.
112
+
113
+ ### When to seed it
114
+
115
+ During rule extraction. As you decompose each rule, note the entities the rule references — capital ratios, signature pages, related-party transactions, dates, parties, monetary values. Seed the glossary with the canonical name and any aliases already visible in the source documents.
116
+
117
+ ### Storage and shape
118
+
119
+ Save as `rules/glossary.json` next to `catalog.json`. Each entry is small:
120
+
121
+ ```json
122
+ {
123
+ "canonical": "registered_capital",
124
+ "aliases": ["注册资本", "registered capital", "实收资本"],
125
+ "definition": "The capital amount registered with regulators",
126
+ "entity_type": "monetary_value",
127
+ "seen_in": ["rules/regulation_A.pdf:p12", "samples/annual_report_2024.pdf:p3"],
128
+ "status": "extracted"
129
+ }
130
+ ```
131
+
132
+ Status field tracks maturity: `extracted` (from rules), `validated` (confirmed in samples), `production` (used by deployed workflows). Add or drop fields as the project demands — same JIT philosophy as the rule schema.
133
+
134
+ ### How it integrates
135
+
136
+ - `rule-graph` consumes the glossary so `shares_entity` edges reference canonical labels rather than free-text strings.
137
+ - `entity-extraction` references the glossary for canonical names and known aliases when designing extraction logic.
138
+ - Skills authored under `skill-authoring` should use canonical names in their schemas.
139
+
140
+ How the glossary is used downstream is a per-project judgment. A mature glossary may enable cheap pattern-based matching for some entities; for others it just keeps naming consistent. Let the cost-accuracy logic in `entity-extraction` decide per case.
141
+
107
142
  ## Handling Ambiguity
108
143
 
109
144
  Regulations are often ambiguous. When you encounter ambiguity:
@@ -43,6 +43,22 @@ Two rules that can produce contradictory guidance. Regulation A requires disclos
43
43
 
44
44
  Edge cases that affect multiple rules. A document with an unusual structure (merged cells in a table, non-standard date format) may cause extraction failures across several rules. The graph links these rules to the shared corner case so a fix in one propagates awareness to others.
45
45
 
46
+ ## Project Glossary
47
+
48
+ The glossary (built and owned by `rule-extraction`, stored at `rules/glossary.json`) is the canonical-label registry that makes `shares_entity` edges meaningful. Without it, two rules can target the same entity under different names and the edge between them never gets drawn.
49
+
50
+ Edges that reference entities should use the glossary's canonical labels, not free-text strings copied from rule descriptions:
51
+
52
+ ```json
53
+ {"from": "R001", "to": "R004", "type": "shares_entity", "entity": "registered_capital"}
54
+ ```
55
+
56
+ Where `registered_capital` is the canonical name in `glossary.json`, with aliases like `注册资本` and `paid-in capital` recorded under it.
57
+
58
+ When the glossary is updated — new aliases discovered in samples, two entries merged, a definition refined — revisit affected `shares_entity` edges. New aliases may surface previously hidden cross-rule connections; merged entries collapse parallel edges into one.
59
+
60
+ The glossary is built and owned by rule-extraction; rule-graph just consumes it.
61
+
46
62
  ## Three Uses
47
63
 
48
64
  ### 1. Impact Analysis
@@ -135,6 +135,14 @@ The coding agent's skill-based results are the ground truth. For each document i
135
135
 
136
136
  Each iteration of a workflow is a new version file: `workflow_v1.py`, `workflow_v2.py`, etc. Track which version is active in `config.json`. See `version-control` skill for the full methodology.
137
137
 
138
+ ## Releasing Workflows
139
+
140
+ Once workflows hit accuracy threshold, they can be packaged for end users via the `release` tool. Each release is a self-contained directory under `output/releases/<slug>/` with the pinned workflows, a Python runner, a confidence scorer, an HTML dashboard generator, and a `serve.sh` helper. The bundle has no kc-beta dependency — anyone with Python and a worker LLM API key can run `python run.py <doc>` and produce verification results.
141
+
142
+ What to include is your call: all rules in catalog, or a curated subset via the `include` parameter; bundling 1-3 representative samples as `fixtures/` if you want the recipient to be able to dry-run without their own data.
143
+
144
+ The `release` tool snapshots the workspace first (git tag `snap/release-<slug>`), so the bundle is regenerable from git even if `output/releases/` is later cleaned. Decide when to release — there's no automation, no forced cadence. Typical triggers: workflows reach SKILL/WORKFLOW_ACCURACY thresholds, a stakeholder needs a hand-off, a production cron should run pinned versions instead of latest. Discuss with the developer user.
145
+
138
146
  ## Cost Tracking
139
147
 
140
148
  Track the cost of each workflow run:
@@ -96,6 +96,19 @@ Tags enable three capabilities that you cannot afford to lose:
96
96
 
97
97
  Tag format: a simple string field on every intermediate output. Example values: `regex`, `python_calc`, `llm_tier2`, `manual_review`. Be consistent within a project. Define the tag vocabulary once at project setup and enforce it across all skills and workflows.
98
98
 
99
+ ## Multi-agent coordination — keep it lock-free
100
+
101
+ When a task is large enough that you reach for `agent_tool` to spawn parallel sub-agents, partition by an independent unit (one rule per sub-agent, one document per sub-agent, etc.) so the sub-agents never need to coordinate through a shared mutable file.
102
+
103
+ Lesson from a peer-team failure: they tried equal-status agents claiming work via a shared coordination file with locks. Two predictable failures emerged. (1) Agents held locks too long or forgot to release them; even with locks working, twenty agents' throughput dropped to that of two or three because most time went to waiting. (2) Fragility — agents could fail while holding a lock, try to acquire a lock they already held, or update the coordination file without acquiring a lock at all.
104
+
105
+ KC's preferred patterns:
106
+
107
+ - **Single-dispatcher** — `TaskManager` hands tasks out one at a time to the conductor. No locks, no peer coordination. This is the default ralph-loop architecture.
108
+ - **Partition-by-unit** — when spawning sub-agents via `agent_tool`, give each one a non-overlapping slice (per-rule, per-document). Sub-agents write to their own `sub_agents/<taskId>/` for state, and to per-rule paths in `rule_skills/<id>/` or `workflows/<id>/` for shared artifacts. Block 11's git auto-commit serializes the shared writes; partition-by-rule keeps last-writer-wins from being a problem.
109
+
110
+ If two would-be sub-agents need to talk to each other to make progress, they should probably be one task (run sequentially) or a sequence (parent dispatches second after first finishes), not concurrent peers.
111
+
99
112
  ## Anti-Patterns
100
113
 
101
114
  Five failure modes recur across projects. Learn to recognize them early.