kc-beta 0.7.5 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/package.json +3 -2
- package/src/agent/context.js +17 -1
- package/src/agent/engine.js +467 -100
- package/src/agent/llm-client.js +24 -1
- package/src/agent/pipelines/_advance-hints.js +92 -0
- package/src/agent/pipelines/_milestone-derive.js +325 -20
- package/src/agent/pipelines/skill-authoring.js +49 -3
- package/src/agent/tools/agent-tool.js +2 -2
- package/src/agent/tools/consult-skill.js +15 -0
- package/src/agent/tools/dashboard-render.js +48 -1
- package/src/agent/tools/document-parse.js +31 -2
- package/src/agent/tools/phase-advance.js +17 -13
- package/src/agent/tools/release.js +343 -7
- package/src/agent/tools/sandbox-exec.js +65 -8
- package/src/agent/tools/worker-llm-call.js +95 -15
- package/src/agent/workspace.js +25 -4
- package/src/cli/components.js +4 -1
- package/src/cli/index.js +125 -8
- package/src/config.js +19 -2
- package/src/marathon/driver.js +217 -0
- package/src/marathon/prompts.js +93 -0
- package/template/.env.template +17 -1
- package/template/AGENT.md +2 -2
- package/template/skills/en/auto-model-selection/SKILL.md +55 -35
- package/template/skills/en/bootstrap-workspace/SKILL.md +27 -0
- package/template/skills/en/compliance-judgment/SKILL.md +14 -0
- package/template/skills/en/confidence-system/SKILL.md +30 -8
- package/template/skills/en/corner-case-management/SKILL.md +53 -33
- package/template/skills/en/cross-document-verification/SKILL.md +88 -83
- package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
- package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
- package/template/skills/en/data-sensibility/SKILL.md +19 -12
- package/template/skills/en/document-chunking/SKILL.md +99 -15
- package/template/skills/en/entity-extraction/SKILL.md +14 -4
- package/template/skills/en/quality-control/SKILL.md +23 -0
- package/template/skills/en/rule-extraction/SKILL.md +92 -94
- package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
- package/template/skills/en/skill-authoring/SKILL.md +85 -2
- package/template/skills/en/skill-creator/SKILL.md +25 -3
- package/template/skills/en/skill-to-workflow/SKILL.md +73 -1
- package/template/skills/en/task-decomposition/SKILL.md +1 -1
- package/template/skills/en/tree-processing/SKILL.md +1 -1
- package/template/skills/en/version-control/SKILL.md +15 -0
- package/template/skills/en/work-decomposition/SKILL.md +52 -32
- package/template/skills/phase_skills.yaml +5 -0
- package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
- package/template/skills/zh/bootstrap-workspace/SKILL.md +27 -0
- package/template/skills/zh/compliance-judgment/SKILL.md +51 -37
- package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
- package/template/skills/zh/confidence-system/SKILL.md +34 -9
- package/template/skills/zh/corner-case-management/SKILL.md +71 -104
- package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
- package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
- package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
- package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
- package/template/skills/zh/data-sensibility/SKILL.md +13 -0
- package/template/skills/zh/document-chunking/SKILL.md +101 -18
- package/template/skills/zh/document-parsing/SKILL.md +65 -65
- package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
- package/template/skills/zh/entity-extraction/SKILL.md +78 -68
- package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
- package/template/skills/zh/quality-control/SKILL.md +23 -0
- package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
- package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
- package/template/skills/zh/rule-extraction/SKILL.md +199 -188
- package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
- package/template/skills/zh/skill-authoring/SKILL.md +136 -58
- package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
- package/template/skills/zh/skill-creator/SKILL.md +215 -201
- package/template/skills/zh/skill-creator/references/schemas.md +60 -60
- package/template/skills/zh/skill-to-workflow/SKILL.md +73 -1
- package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
- package/template/skills/zh/task-decomposition/SKILL.md +1 -1
- package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
- package/template/skills/zh/tree-processing/SKILL.md +67 -63
- package/template/skills/zh/version-control/SKILL.md +15 -0
- package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
- package/template/skills/zh/work-decomposition/SKILL.md +52 -30
- package/template/workflows/common/llm_client.py +168 -0
- package/template/workflows/common/utils.py +132 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""KC worker-LLM client (v0.8.1 P10-A canonical shim).
|
|
2
|
+
|
|
3
|
+
Distilled workflows use this module to call worker LLMs. Provider-agnostic:
|
|
4
|
+
reads connection info from workspace `.env` so the same workflow can run
|
|
5
|
+
against SiliconFlow, OpenAI, Anthropic, Aliyun, Volcanocloud, etc.
|
|
6
|
+
|
|
7
|
+
Two modes:
|
|
8
|
+
- Inside a KC session: the engine's `worker_llm_call` tool is preferred
|
|
9
|
+
for new code (it tracks cost, applies rate limiting, and writes to
|
|
10
|
+
events.jsonl). This shim is fine if the workflow needs to be
|
|
11
|
+
portable to standalone (no-KC) deployment.
|
|
12
|
+
- Standalone (deployed release bundle): this shim is the only LLM
|
|
13
|
+
access path. Every call writes a line to `output/llm_ledger.jsonl`
|
|
14
|
+
so post-hoc analysis can reconstruct cost and traffic.
|
|
15
|
+
|
|
16
|
+
Required `.env` fields:
|
|
17
|
+
LLM_API_KEY API key for the provider
|
|
18
|
+
LLM_BASE_URL Provider base URL (e.g. https://api.siliconflow.cn/v1)
|
|
19
|
+
TIER1..TIER4 Comma-separated model names per tier
|
|
20
|
+
|
|
21
|
+
Optional:
|
|
22
|
+
LLM_AUTH_TYPE "bearer" (default) | "x-api-key" (Anthropic native)
|
|
23
|
+
LLM_API_FORMAT "openai" (default) — only OpenAI-format chat completions
|
|
24
|
+
are supported by this shim. Use worker_llm_call for
|
|
25
|
+
non-OpenAI-format providers (e.g. Anthropic native).
|
|
26
|
+
|
|
27
|
+
If LLM_BASE_URL is missing, the shim raises explicitly — no silent
|
|
28
|
+
fallback to a hardcoded vendor URL. This avoids accidentally sending
|
|
29
|
+
traffic to siliconflow.cn from an OpenAI-configured workspace.
|
|
30
|
+
|
|
31
|
+
Migration aliases:
|
|
32
|
+
SILICONFLOW_API_KEY → falls back to LLM_API_KEY if the canonical
|
|
33
|
+
name is missing (for workspaces predating v0.8.1).
|
|
34
|
+
"""
|
|
35
|
+
import json
|
|
36
|
+
import os
|
|
37
|
+
import time
|
|
38
|
+
import urllib.error
|
|
39
|
+
import urllib.request
|
|
40
|
+
|
|
41
|
+
_LEDGER_PATH = os.path.join("output", "llm_ledger.jsonl")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def call(tier="tier2", prompt="", system_prompt=None, max_tokens=4096, timeout_s=120):
|
|
45
|
+
"""Single-prompt chat-completions call.
|
|
46
|
+
|
|
47
|
+
Returns: {response, model_used, tier, tokens_in, tokens_out}.
|
|
48
|
+
Raises: RuntimeError on missing config; urllib HTTPError on transport.
|
|
49
|
+
"""
|
|
50
|
+
if not prompt:
|
|
51
|
+
raise RuntimeError("call() requires a non-empty `prompt`")
|
|
52
|
+
|
|
53
|
+
api_key = _env("LLM_API_KEY") or _env("SILICONFLOW_API_KEY")
|
|
54
|
+
if not api_key:
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
"LLM_API_KEY not configured. Set it in .env or run `kc-beta onboard`."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
base_url = _env("LLM_BASE_URL") or _env("SILICONFLOW_BASE_URL")
|
|
60
|
+
if not base_url:
|
|
61
|
+
raise RuntimeError(
|
|
62
|
+
"LLM_BASE_URL not configured. Set the canonical name in .env "
|
|
63
|
+
"(e.g. https://api.openai.com/v1 for OpenAI; "
|
|
64
|
+
"https://api.siliconflow.cn/v1 for SiliconFlow). "
|
|
65
|
+
"Run `kc-beta onboard` to configure interactively."
|
|
66
|
+
)
|
|
67
|
+
base_url = base_url.rstrip("/")
|
|
68
|
+
|
|
69
|
+
auth_type = (_env("LLM_AUTH_TYPE") or "bearer").lower()
|
|
70
|
+
api_format = (_env("LLM_API_FORMAT") or "openai").lower()
|
|
71
|
+
if api_format != "openai":
|
|
72
|
+
raise RuntimeError(
|
|
73
|
+
f"LLM_API_FORMAT={api_format} not supported by this shim. "
|
|
74
|
+
f"Only `openai` chat-completions wire format is implemented. "
|
|
75
|
+
f"Use the engine's `worker_llm_call` tool for native non-OpenAI providers."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
tier_models = _load_tier_models(tier)
|
|
79
|
+
if not tier_models:
|
|
80
|
+
raise RuntimeError(f"No models configured for {tier.upper()}; check .env TIER1-TIER4")
|
|
81
|
+
|
|
82
|
+
messages = []
|
|
83
|
+
if system_prompt:
|
|
84
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
85
|
+
messages.append({"role": "user", "content": prompt})
|
|
86
|
+
|
|
87
|
+
body = json.dumps(
|
|
88
|
+
{"model": tier_models[0], "messages": messages, "max_tokens": max_tokens}
|
|
89
|
+
).encode("utf-8")
|
|
90
|
+
|
|
91
|
+
headers = {"Content-Type": "application/json"}
|
|
92
|
+
if auth_type == "x-api-key":
|
|
93
|
+
headers["x-api-key"] = api_key
|
|
94
|
+
headers["anthropic-version"] = "2023-06-01"
|
|
95
|
+
else:
|
|
96
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
97
|
+
|
|
98
|
+
req = urllib.request.Request(f"{base_url}/chat/completions", data=body, headers=headers)
|
|
99
|
+
t0 = time.time()
|
|
100
|
+
try:
|
|
101
|
+
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
|
102
|
+
data = json.loads(resp.read())
|
|
103
|
+
except urllib.error.HTTPError as e:
|
|
104
|
+
# Preserve the body for debugging — providers often return useful errors
|
|
105
|
+
err_body = e.read().decode("utf-8", errors="replace")[:500] if e.fp else ""
|
|
106
|
+
raise RuntimeError(f"LLM call HTTP {e.code} from {base_url}: {err_body}") from e
|
|
107
|
+
|
|
108
|
+
usage = data.get("usage") or {}
|
|
109
|
+
result = {
|
|
110
|
+
"response": data["choices"][0]["message"]["content"],
|
|
111
|
+
"model_used": tier_models[0],
|
|
112
|
+
"tier": tier,
|
|
113
|
+
"tokens_in": usage.get("prompt_tokens", 0),
|
|
114
|
+
"tokens_out": usage.get("completion_tokens", 0),
|
|
115
|
+
}
|
|
116
|
+
_write_ledger({
|
|
117
|
+
**result,
|
|
118
|
+
"duration_s": round(time.time() - t0, 3),
|
|
119
|
+
"ts": time.time(),
|
|
120
|
+
"base_url": base_url,
|
|
121
|
+
"auth_type": auth_type,
|
|
122
|
+
})
|
|
123
|
+
return result
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _env(key):
|
|
127
|
+
"""Read `key` from process env first, then workspace .env file."""
|
|
128
|
+
v = os.environ.get(key)
|
|
129
|
+
if v:
|
|
130
|
+
return v
|
|
131
|
+
if os.path.exists(".env"):
|
|
132
|
+
try:
|
|
133
|
+
with open(".env", "r", encoding="utf-8") as f:
|
|
134
|
+
for raw in f:
|
|
135
|
+
line = raw.strip()
|
|
136
|
+
if not line or line.startswith("#"):
|
|
137
|
+
continue
|
|
138
|
+
if "=" not in line:
|
|
139
|
+
continue
|
|
140
|
+
k, val = line.split("=", 1)
|
|
141
|
+
if k.strip() == key:
|
|
142
|
+
val = val.strip()
|
|
143
|
+
if (val.startswith('"') and val.endswith('"')) or (
|
|
144
|
+
val.startswith("'") and val.endswith("'")
|
|
145
|
+
):
|
|
146
|
+
val = val[1:-1]
|
|
147
|
+
return val
|
|
148
|
+
except OSError:
|
|
149
|
+
return None
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _load_tier_models(tier):
|
|
154
|
+
raw = _env(tier.upper()) or ""
|
|
155
|
+
return [m.strip() for m in raw.split(",") if m.strip()]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _write_ledger(record):
|
|
159
|
+
try:
|
|
160
|
+
os.makedirs(os.path.dirname(_LEDGER_PATH), exist_ok=True)
|
|
161
|
+
with open(_LEDGER_PATH, "a", encoding="utf-8") as f:
|
|
162
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
163
|
+
except OSError:
|
|
164
|
+
# Ledger is best-effort; never break the workflow over a write failure.
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
__all__ = ["call"]
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""KC workflow helpers (v0.8.1 P10-B).
|
|
2
|
+
|
|
3
|
+
Common utilities for distilled workflows. Provider-agnostic, no
|
|
4
|
+
external dependencies. Reusable across rule check.py / workflow.py
|
|
5
|
+
files so that per-rule scripts stay focused on rule-specific logic.
|
|
6
|
+
|
|
7
|
+
Currently:
|
|
8
|
+
strip_annotations(text) — drop reviewer-annotation footers
|
|
9
|
+
from sample documents so per-rule
|
|
10
|
+
check.py regex doesn't false-positive
|
|
11
|
+
on the annotation itself
|
|
12
|
+
|
|
13
|
+
detect_report_type(text) — light-touch report-type classifier
|
|
14
|
+
(年报 / 季报 / 月报 / 周报 / 其他)
|
|
15
|
+
used by rules that gate on report type
|
|
16
|
+
|
|
17
|
+
make_result(rule_id, verdict, evidence, confidence, **kwargs)
|
|
18
|
+
— standardized result dict factory
|
|
19
|
+
"""
|
|
20
|
+
import re
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Annotation prefixes that mark reviewer-added footers in sample docs.
|
|
24
|
+
# These should be stripped before keyword/regex matching so per-rule
|
|
25
|
+
# check.py doesn't match the annotation as if it were document content.
|
|
26
|
+
#
|
|
27
|
+
# Added based on E2E #11 贷款 v0.8 audit § 9: 4/14 spot-checks
|
|
28
|
+
# false-positive PASS because samples contain `预期命中点: ...年化利率`
|
|
29
|
+
# footers that the rule's keyword regex matches.
|
|
30
|
+
_ANNOTATION_PREFIXES = (
|
|
31
|
+
"预期命中点",
|
|
32
|
+
"预期结果",
|
|
33
|
+
"预期判定",
|
|
34
|
+
"预期验证",
|
|
35
|
+
"标注",
|
|
36
|
+
"审核标注",
|
|
37
|
+
"Expected",
|
|
38
|
+
"expected",
|
|
39
|
+
"EXPECTED",
|
|
40
|
+
"Annotation",
|
|
41
|
+
"annotation",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def strip_annotations(text, extra_prefixes=None):
|
|
46
|
+
"""Remove reviewer-annotation footers from document text.
|
|
47
|
+
|
|
48
|
+
A line is dropped if it starts with one of the recognized
|
|
49
|
+
annotation prefixes followed by `:` or `:` (Chinese full-width
|
|
50
|
+
colon). All subsequent lines until a blank line or end of text
|
|
51
|
+
are also dropped (annotations are typically multi-line trailing
|
|
52
|
+
blocks).
|
|
53
|
+
|
|
54
|
+
Pass `extra_prefixes` (iterable of strings) to add project-specific
|
|
55
|
+
annotation labels.
|
|
56
|
+
|
|
57
|
+
Returns the cleaned text. Input is never mutated.
|
|
58
|
+
"""
|
|
59
|
+
if not text:
|
|
60
|
+
return text
|
|
61
|
+
prefixes = tuple(_ANNOTATION_PREFIXES)
|
|
62
|
+
if extra_prefixes:
|
|
63
|
+
prefixes = prefixes + tuple(extra_prefixes)
|
|
64
|
+
# Build a pattern matching `<prefix>` + colon (half or full-width)
|
|
65
|
+
pattern = "|".join(re.escape(p) for p in prefixes)
|
|
66
|
+
anno_start = re.compile(rf"^\s*(?:{pattern})\s*[::]")
|
|
67
|
+
|
|
68
|
+
out_lines = []
|
|
69
|
+
in_anno_block = False
|
|
70
|
+
for line in text.split("\n"):
|
|
71
|
+
if anno_start.match(line):
|
|
72
|
+
in_anno_block = True
|
|
73
|
+
continue
|
|
74
|
+
if in_anno_block:
|
|
75
|
+
# End block on a blank line OR a line that doesn't look
|
|
76
|
+
# like annotation continuation (no leading whitespace).
|
|
77
|
+
if not line.strip() or not line.startswith((" ", "\t", "-", "*", "·")):
|
|
78
|
+
in_anno_block = False
|
|
79
|
+
if line.strip():
|
|
80
|
+
out_lines.append(line)
|
|
81
|
+
# Otherwise still inside the annotation block — drop.
|
|
82
|
+
continue
|
|
83
|
+
out_lines.append(line)
|
|
84
|
+
return "\n".join(out_lines)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
_REPORT_TYPE_PATTERNS = [
|
|
88
|
+
("年报", re.compile(r"年报|年度报告|annual report", re.IGNORECASE)),
|
|
89
|
+
("半年报", re.compile(r"半年报|半年度报告|interim report", re.IGNORECASE)),
|
|
90
|
+
("季报", re.compile(r"季报|季度报告|quarterly report", re.IGNORECASE)),
|
|
91
|
+
("月报", re.compile(r"月报|月度报告|monthly report", re.IGNORECASE)),
|
|
92
|
+
("周报", re.compile(r"周报|周度报告|weekly report", re.IGNORECASE)),
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def detect_report_type(text):
|
|
97
|
+
"""Light-touch report-type classifier.
|
|
98
|
+
|
|
99
|
+
Returns one of: "年报", "半年报", "季报", "月报", "周报", "其他".
|
|
100
|
+
Scans only the first 2000 chars (report-type identifiers usually
|
|
101
|
+
appear in the title or cover page). Used by rules that gate on
|
|
102
|
+
report type (e.g. R02-06/R02-08 are NOT_APPLICABLE for 季报).
|
|
103
|
+
"""
|
|
104
|
+
if not text:
|
|
105
|
+
return "其他"
|
|
106
|
+
head = text[:2000]
|
|
107
|
+
for kind, pattern in _REPORT_TYPE_PATTERNS:
|
|
108
|
+
if pattern.search(head):
|
|
109
|
+
return kind
|
|
110
|
+
return "其他"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def make_result(rule_id, verdict, evidence, confidence=0.7, **kwargs):
|
|
114
|
+
"""Build a standardized check result dict.
|
|
115
|
+
|
|
116
|
+
Required: rule_id, verdict ("PASS" / "FAIL" / "WARNING" / "NOT_APPLICABLE"),
|
|
117
|
+
evidence (string explaining the verdict).
|
|
118
|
+
|
|
119
|
+
Optional: confidence (0.0-1.0), plus any extra fields the rule
|
|
120
|
+
needs (model_used, llm_calls, llm_tokens, comment, etc.).
|
|
121
|
+
"""
|
|
122
|
+
result = {
|
|
123
|
+
"rule_id": rule_id,
|
|
124
|
+
"verdict": verdict,
|
|
125
|
+
"evidence": evidence,
|
|
126
|
+
"confidence": confidence,
|
|
127
|
+
}
|
|
128
|
+
result.update(kwargs)
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
__all__ = ["strip_annotations", "detect_report_type", "make_result"]
|