kc-beta 0.7.5 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +47 -0
  2. package/package.json +3 -2
  3. package/src/agent/context.js +17 -1
  4. package/src/agent/engine.js +467 -100
  5. package/src/agent/llm-client.js +24 -1
  6. package/src/agent/pipelines/_advance-hints.js +92 -0
  7. package/src/agent/pipelines/_milestone-derive.js +325 -20
  8. package/src/agent/pipelines/skill-authoring.js +49 -3
  9. package/src/agent/tools/agent-tool.js +2 -2
  10. package/src/agent/tools/consult-skill.js +15 -0
  11. package/src/agent/tools/dashboard-render.js +48 -1
  12. package/src/agent/tools/document-parse.js +31 -2
  13. package/src/agent/tools/phase-advance.js +17 -13
  14. package/src/agent/tools/release.js +343 -7
  15. package/src/agent/tools/sandbox-exec.js +65 -8
  16. package/src/agent/tools/worker-llm-call.js +95 -15
  17. package/src/agent/workspace.js +25 -4
  18. package/src/cli/components.js +4 -1
  19. package/src/cli/index.js +125 -8
  20. package/src/config.js +19 -2
  21. package/src/marathon/driver.js +217 -0
  22. package/src/marathon/prompts.js +93 -0
  23. package/template/.env.template +17 -1
  24. package/template/AGENT.md +2 -2
  25. package/template/skills/en/auto-model-selection/SKILL.md +55 -35
  26. package/template/skills/en/bootstrap-workspace/SKILL.md +27 -0
  27. package/template/skills/en/compliance-judgment/SKILL.md +14 -0
  28. package/template/skills/en/confidence-system/SKILL.md +30 -8
  29. package/template/skills/en/corner-case-management/SKILL.md +53 -33
  30. package/template/skills/en/cross-document-verification/SKILL.md +88 -83
  31. package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
  32. package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  33. package/template/skills/en/data-sensibility/SKILL.md +19 -12
  34. package/template/skills/en/document-chunking/SKILL.md +99 -15
  35. package/template/skills/en/entity-extraction/SKILL.md +14 -4
  36. package/template/skills/en/quality-control/SKILL.md +23 -0
  37. package/template/skills/en/rule-extraction/SKILL.md +92 -94
  38. package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
  39. package/template/skills/en/skill-authoring/SKILL.md +85 -2
  40. package/template/skills/en/skill-creator/SKILL.md +25 -3
  41. package/template/skills/en/skill-to-workflow/SKILL.md +73 -1
  42. package/template/skills/en/task-decomposition/SKILL.md +1 -1
  43. package/template/skills/en/tree-processing/SKILL.md +1 -1
  44. package/template/skills/en/version-control/SKILL.md +15 -0
  45. package/template/skills/en/work-decomposition/SKILL.md +52 -32
  46. package/template/skills/phase_skills.yaml +5 -0
  47. package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
  48. package/template/skills/zh/bootstrap-workspace/SKILL.md +27 -0
  49. package/template/skills/zh/compliance-judgment/SKILL.md +51 -37
  50. package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
  51. package/template/skills/zh/confidence-system/SKILL.md +34 -9
  52. package/template/skills/zh/corner-case-management/SKILL.md +71 -104
  53. package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
  54. package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
  55. package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
  56. package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  57. package/template/skills/zh/data-sensibility/SKILL.md +13 -0
  58. package/template/skills/zh/document-chunking/SKILL.md +101 -18
  59. package/template/skills/zh/document-parsing/SKILL.md +65 -65
  60. package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
  61. package/template/skills/zh/entity-extraction/SKILL.md +78 -68
  62. package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
  63. package/template/skills/zh/quality-control/SKILL.md +23 -0
  64. package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
  65. package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
  66. package/template/skills/zh/rule-extraction/SKILL.md +199 -188
  67. package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
  68. package/template/skills/zh/skill-authoring/SKILL.md +136 -58
  69. package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
  70. package/template/skills/zh/skill-creator/SKILL.md +215 -201
  71. package/template/skills/zh/skill-creator/references/schemas.md +60 -60
  72. package/template/skills/zh/skill-to-workflow/SKILL.md +73 -1
  73. package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
  74. package/template/skills/zh/task-decomposition/SKILL.md +1 -1
  75. package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
  76. package/template/skills/zh/tree-processing/SKILL.md +67 -63
  77. package/template/skills/zh/version-control/SKILL.md +15 -0
  78. package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
  79. package/template/skills/zh/work-decomposition/SKILL.md +52 -30
  80. package/template/workflows/common/llm_client.py +168 -0
  81. package/template/workflows/common/utils.py +132 -0
@@ -0,0 +1,168 @@
1
+ """KC worker-LLM client (v0.8.1 P10-A canonical shim).
2
+
3
+ Distilled workflows use this module to call worker LLMs. Provider-agnostic:
4
+ reads connection info from workspace `.env` so the same workflow can run
5
+ against SiliconFlow, OpenAI, Anthropic, Aliyun, Volcanocloud, etc.
6
+
7
+ Two modes:
8
+ - Inside a KC session: the engine's `worker_llm_call` tool is preferred
9
+ for new code (it tracks cost, applies rate limiting, and writes to
10
+ events.jsonl). This shim is fine if the workflow needs to be
11
+ portable to standalone (no-KC) deployment.
12
+ - Standalone (deployed release bundle): this shim is the only LLM
13
+ access path. Every call writes a line to `output/llm_ledger.jsonl`
14
+ so post-hoc analysis can reconstruct cost and traffic.
15
+
16
+ Required `.env` fields:
17
+ LLM_API_KEY API key for the provider
18
+ LLM_BASE_URL Provider base URL (e.g. https://api.siliconflow.cn/v1)
19
+ TIER1..TIER4 Comma-separated model names per tier
20
+
21
+ Optional:
22
+ LLM_AUTH_TYPE "bearer" (default) | "x-api-key" (Anthropic native)
23
+ LLM_API_FORMAT "openai" (default) — only OpenAI-format chat completions
24
+ are supported by this shim. Use worker_llm_call for
25
+ non-OpenAI-format providers (e.g. Anthropic native).
26
+
27
+ If LLM_BASE_URL is missing, the shim raises explicitly — no silent
28
+ fallback to a hardcoded vendor URL. This avoids accidentally sending
29
+ traffic to siliconflow.cn from an OpenAI-configured workspace.
30
+
31
+ Migration aliases:
32
+ SILICONFLOW_API_KEY → falls back to LLM_API_KEY if the canonical
33
+ name is missing (for workspaces predating v0.8.1).
34
+ """
35
+ import json
36
+ import os
37
+ import time
38
+ import urllib.error
39
+ import urllib.request
40
+
41
+ _LEDGER_PATH = os.path.join("output", "llm_ledger.jsonl")
42
+
43
+
44
+ def call(tier="tier2", prompt="", system_prompt=None, max_tokens=4096, timeout_s=120):
45
+ """Single-prompt chat-completions call.
46
+
47
+ Returns: {response, model_used, tier, tokens_in, tokens_out}.
48
+ Raises: RuntimeError on missing config; urllib HTTPError on transport.
49
+ """
50
+ if not prompt:
51
+ raise RuntimeError("call() requires a non-empty `prompt`")
52
+
53
+ api_key = _env("LLM_API_KEY") or _env("SILICONFLOW_API_KEY")
54
+ if not api_key:
55
+ raise RuntimeError(
56
+ "LLM_API_KEY not configured. Set it in .env or run `kc-beta onboard`."
57
+ )
58
+
59
+ base_url = _env("LLM_BASE_URL") or _env("SILICONFLOW_BASE_URL")
60
+ if not base_url:
61
+ raise RuntimeError(
62
+ "LLM_BASE_URL not configured. Set the canonical name in .env "
63
+ "(e.g. https://api.openai.com/v1 for OpenAI; "
64
+ "https://api.siliconflow.cn/v1 for SiliconFlow). "
65
+ "Run `kc-beta onboard` to configure interactively."
66
+ )
67
+ base_url = base_url.rstrip("/")
68
+
69
+ auth_type = (_env("LLM_AUTH_TYPE") or "bearer").lower()
70
+ api_format = (_env("LLM_API_FORMAT") or "openai").lower()
71
+ if api_format != "openai":
72
+ raise RuntimeError(
73
+ f"LLM_API_FORMAT={api_format} not supported by this shim. "
74
+ f"Only `openai` chat-completions wire format is implemented. "
75
+ f"Use the engine's `worker_llm_call` tool for native non-OpenAI providers."
76
+ )
77
+
78
+ tier_models = _load_tier_models(tier)
79
+ if not tier_models:
80
+ raise RuntimeError(f"No models configured for {tier.upper()}; check .env TIER1-TIER4")
81
+
82
+ messages = []
83
+ if system_prompt:
84
+ messages.append({"role": "system", "content": system_prompt})
85
+ messages.append({"role": "user", "content": prompt})
86
+
87
+ body = json.dumps(
88
+ {"model": tier_models[0], "messages": messages, "max_tokens": max_tokens}
89
+ ).encode("utf-8")
90
+
91
+ headers = {"Content-Type": "application/json"}
92
+ if auth_type == "x-api-key":
93
+ headers["x-api-key"] = api_key
94
+ headers["anthropic-version"] = "2023-06-01"
95
+ else:
96
+ headers["Authorization"] = f"Bearer {api_key}"
97
+
98
+ req = urllib.request.Request(f"{base_url}/chat/completions", data=body, headers=headers)
99
+ t0 = time.time()
100
+ try:
101
+ with urllib.request.urlopen(req, timeout=timeout_s) as resp:
102
+ data = json.loads(resp.read())
103
+ except urllib.error.HTTPError as e:
104
+ # Preserve the body for debugging — providers often return useful errors
105
+ err_body = e.read().decode("utf-8", errors="replace")[:500] if e.fp else ""
106
+ raise RuntimeError(f"LLM call HTTP {e.code} from {base_url}: {err_body}") from e
107
+
108
+ usage = data.get("usage") or {}
109
+ result = {
110
+ "response": data["choices"][0]["message"]["content"],
111
+ "model_used": tier_models[0],
112
+ "tier": tier,
113
+ "tokens_in": usage.get("prompt_tokens", 0),
114
+ "tokens_out": usage.get("completion_tokens", 0),
115
+ }
116
+ _write_ledger({
117
+ **result,
118
+ "duration_s": round(time.time() - t0, 3),
119
+ "ts": time.time(),
120
+ "base_url": base_url,
121
+ "auth_type": auth_type,
122
+ })
123
+ return result
124
+
125
+
126
+ def _env(key):
127
+ """Read `key` from process env first, then workspace .env file."""
128
+ v = os.environ.get(key)
129
+ if v:
130
+ return v
131
+ if os.path.exists(".env"):
132
+ try:
133
+ with open(".env", "r", encoding="utf-8") as f:
134
+ for raw in f:
135
+ line = raw.strip()
136
+ if not line or line.startswith("#"):
137
+ continue
138
+ if "=" not in line:
139
+ continue
140
+ k, val = line.split("=", 1)
141
+ if k.strip() == key:
142
+ val = val.strip()
143
+ if (val.startswith('"') and val.endswith('"')) or (
144
+ val.startswith("'") and val.endswith("'")
145
+ ):
146
+ val = val[1:-1]
147
+ return val
148
+ except OSError:
149
+ return None
150
+ return None
151
+
152
+
153
+ def _load_tier_models(tier):
154
+ raw = _env(tier.upper()) or ""
155
+ return [m.strip() for m in raw.split(",") if m.strip()]
156
+
157
+
158
+ def _write_ledger(record):
159
+ try:
160
+ os.makedirs(os.path.dirname(_LEDGER_PATH), exist_ok=True)
161
+ with open(_LEDGER_PATH, "a", encoding="utf-8") as f:
162
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
163
+ except OSError:
164
+ # Ledger is best-effort; never break the workflow over a write failure.
165
+ pass
166
+
167
+
168
+ __all__ = ["call"]
@@ -0,0 +1,132 @@
1
+ """KC workflow helpers (v0.8.1 P10-B).
2
+
3
+ Common utilities for distilled workflows. Provider-agnostic, no
4
+ external dependencies. Reusable across rule check.py / workflow.py
5
+ files so that per-rule scripts stay focused on rule-specific logic.
6
+
7
+ Currently:
8
+ strip_annotations(text) — drop reviewer-annotation footers
9
+ from sample documents so per-rule
10
+ check.py regex doesn't false-positive
11
+ on the annotation itself
12
+
13
+ detect_report_type(text) — light-touch report-type classifier
14
+ (年报 / 季报 / 月报 / 周报 / 其他)
15
+ used by rules that gate on report type
16
+
17
+ make_result(rule_id, verdict, evidence, confidence, **kwargs)
18
+ — standardized result dict factory
19
+ """
20
+ import re
21
+
22
+
23
+ # Annotation prefixes that mark reviewer-added footers in sample docs.
24
+ # These should be stripped before keyword/regex matching so per-rule
25
+ # check.py doesn't match the annotation as if it were document content.
26
+ #
27
+ # Added based on E2E #11 贷款 v0.8 audit § 9: 4/14 spot-checks
28
+ # false-positive PASS because samples contain `预期命中点: ...年化利率`
29
+ # footers that the rule's keyword regex matches.
30
+ _ANNOTATION_PREFIXES = (
31
+ "预期命中点",
32
+ "预期结果",
33
+ "预期判定",
34
+ "预期验证",
35
+ "标注",
36
+ "审核标注",
37
+ "Expected",
38
+ "expected",
39
+ "EXPECTED",
40
+ "Annotation",
41
+ "annotation",
42
+ )
43
+
44
+
45
+ def strip_annotations(text, extra_prefixes=None):
46
+ """Remove reviewer-annotation footers from document text.
47
+
48
+ A line is dropped if it starts with one of the recognized
49
+ annotation prefixes followed by `:` or `:` (Chinese full-width
50
+ colon). All subsequent lines until a blank line or end of text
51
+ are also dropped (annotations are typically multi-line trailing
52
+ blocks).
53
+
54
+ Pass `extra_prefixes` (iterable of strings) to add project-specific
55
+ annotation labels.
56
+
57
+ Returns the cleaned text. Input is never mutated.
58
+ """
59
+ if not text:
60
+ return text
61
+ prefixes = tuple(_ANNOTATION_PREFIXES)
62
+ if extra_prefixes:
63
+ prefixes = prefixes + tuple(extra_prefixes)
64
+ # Build a pattern matching `<prefix>` + colon (half or full-width)
65
+ pattern = "|".join(re.escape(p) for p in prefixes)
66
+ anno_start = re.compile(rf"^\s*(?:{pattern})\s*[::]")
67
+
68
+ out_lines = []
69
+ in_anno_block = False
70
+ for line in text.split("\n"):
71
+ if anno_start.match(line):
72
+ in_anno_block = True
73
+ continue
74
+ if in_anno_block:
75
+ # End block on a blank line OR a line that doesn't look
76
+ # like annotation continuation (no leading whitespace).
77
+ if not line.strip() or not line.startswith((" ", "\t", "-", "*", "·")):
78
+ in_anno_block = False
79
+ if line.strip():
80
+ out_lines.append(line)
81
+ # Otherwise still inside the annotation block — drop.
82
+ continue
83
+ out_lines.append(line)
84
+ return "\n".join(out_lines)
85
+
86
+
87
+ _REPORT_TYPE_PATTERNS = [
88
+ ("年报", re.compile(r"年报|年度报告|annual report", re.IGNORECASE)),
89
+ ("半年报", re.compile(r"半年报|半年度报告|interim report", re.IGNORECASE)),
90
+ ("季报", re.compile(r"季报|季度报告|quarterly report", re.IGNORECASE)),
91
+ ("月报", re.compile(r"月报|月度报告|monthly report", re.IGNORECASE)),
92
+ ("周报", re.compile(r"周报|周度报告|weekly report", re.IGNORECASE)),
93
+ ]
94
+
95
+
96
+ def detect_report_type(text):
97
+ """Light-touch report-type classifier.
98
+
99
+ Returns one of: "年报", "半年报", "季报", "月报", "周报", "其他".
100
+ Scans only the first 2000 chars (report-type identifiers usually
101
+ appear in the title or cover page). Used by rules that gate on
102
+ report type (e.g. R02-06/R02-08 are NOT_APPLICABLE for 季报).
103
+ """
104
+ if not text:
105
+ return "其他"
106
+ head = text[:2000]
107
+ for kind, pattern in _REPORT_TYPE_PATTERNS:
108
+ if pattern.search(head):
109
+ return kind
110
+ return "其他"
111
+
112
+
113
+ def make_result(rule_id, verdict, evidence, confidence=0.7, **kwargs):
114
+ """Build a standardized check result dict.
115
+
116
+ Required: rule_id, verdict ("PASS" / "FAIL" / "WARNING" / "NOT_APPLICABLE"),
117
+ evidence (string explaining the verdict).
118
+
119
+ Optional: confidence (0.0-1.0), plus any extra fields the rule
120
+ needs (model_used, llm_calls, llm_tokens, comment, etc.).
121
+ """
122
+ result = {
123
+ "rule_id": rule_id,
124
+ "verdict": verdict,
125
+ "evidence": evidence,
126
+ "confidence": confidence,
127
+ }
128
+ result.update(kwargs)
129
+ return result
130
+
131
+
132
+ __all__ = ["strip_annotations", "detect_report_type", "make_result"]