@kweaver-ai/kweaver-sdk 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +40 -52
  2. package/README.zh.md +41 -46
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/context-loader.d.ts +1 -0
  19. package/dist/api/resources.d.ts +94 -0
  20. package/dist/api/resources.js +166 -0
  21. package/dist/api/semantic-search.d.ts +5 -0
  22. package/dist/api/semantic-search.js +5 -0
  23. package/dist/api/skills.d.ts +75 -2
  24. package/dist/api/skills.js +108 -12
  25. package/dist/api/trace.d.ts +5 -0
  26. package/dist/api/trace.js +4 -0
  27. package/dist/cli.js +109 -15
  28. package/dist/client.d.ts +3 -3
  29. package/dist/client.js +5 -5
  30. package/dist/commands/agent/mode.d.ts +6 -0
  31. package/dist/commands/agent/mode.js +75 -0
  32. package/dist/commands/agent-members.js +27 -11
  33. package/dist/commands/agent.js +469 -286
  34. package/dist/commands/auth.js +184 -71
  35. package/dist/commands/bkn-metric.js +37 -16
  36. package/dist/commands/bkn-ops.js +164 -86
  37. package/dist/commands/bkn-query.js +99 -31
  38. package/dist/commands/bkn-schema.d.ts +3 -3
  39. package/dist/commands/bkn-schema.js +127 -86
  40. package/dist/commands/bkn.js +153 -114
  41. package/dist/commands/call.js +23 -13
  42. package/dist/commands/config.js +22 -12
  43. package/dist/commands/context-loader.js +625 -49
  44. package/dist/commands/dataflow.js +14 -6
  45. package/dist/commands/ds.js +52 -30
  46. package/dist/commands/explore.js +18 -15
  47. package/dist/commands/model.js +53 -42
  48. package/dist/commands/resource.d.ts +1 -0
  49. package/dist/commands/{dataview.js → resource.js} +62 -84
  50. package/dist/commands/skill.d.ts +21 -1
  51. package/dist/commands/skill.js +567 -43
  52. package/dist/commands/token.js +11 -0
  53. package/dist/commands/tool.js +46 -29
  54. package/dist/commands/toolbox.js +31 -15
  55. package/dist/commands/trace.d.ts +26 -1
  56. package/dist/commands/trace.js +515 -15
  57. package/dist/commands/vega.js +466 -250
  58. package/dist/help/format.d.ts +65 -0
  59. package/dist/help/format.js +141 -0
  60. package/dist/index.d.ts +5 -5
  61. package/dist/index.js +3 -3
  62. package/dist/resources/bkn.d.ts +5 -0
  63. package/dist/resources/bkn.js +5 -0
  64. package/dist/resources/{dataviews.d.ts → resources.d.ts} +10 -11
  65. package/dist/resources/{dataviews.js → resources.js} +12 -13
  66. package/dist/resources/skills.d.ts +17 -1
  67. package/dist/resources/skills.js +32 -1
  68. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  69. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  71. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  72. package/dist/trace-ai/diagnose/index.js +246 -0
  73. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  74. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  75. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  76. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  77. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  78. package/dist/{trace-core → trace-ai}/diagnose/report-assembler.js +19 -9
  79. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  80. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  81. package/dist/{trace-core → trace-ai}/diagnose/rule-loader.js +42 -8
  82. package/dist/{trace-core → trace-ai}/diagnose/schemas.d.ts +77 -2
  83. package/dist/trace-ai/diagnose/schemas.js +154 -0
  84. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  85. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  86. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  87. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  88. package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.js +1 -0
  89. package/dist/{trace-core → trace-ai}/diagnose/types.d.ts +55 -6
  90. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  91. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  92. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  93. package/dist/trace-ai/eval-set/builder.js +126 -0
  94. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  95. package/dist/trace-ai/eval-set/index.js +10 -0
  96. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  97. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  98. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  99. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  100. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  101. package/dist/trace-ai/eval-set/redactor.js +133 -0
  102. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  103. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  104. package/dist/trace-ai/eval-set/schemas.js +130 -0
  105. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  106. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  107. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  108. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  109. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  110. package/dist/trace-ai/eval-set/types.js +8 -0
  111. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  112. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  113. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  114. package/dist/trace-ai/exp/claude-binary.js +30 -0
  115. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  116. package/dist/trace-ai/exp/coordinator.js +203 -0
  117. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  118. package/dist/trace-ai/exp/eval-runner.js +47 -0
  119. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  120. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  121. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  122. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  123. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  124. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  125. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  126. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  127. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  128. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  129. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  130. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  131. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  132. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  133. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  134. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  135. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  136. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  137. package/dist/trace-ai/exp/index.d.ts +8 -0
  138. package/dist/trace-ai/exp/index.js +238 -0
  139. package/dist/trace-ai/exp/info.d.ts +35 -0
  140. package/dist/trace-ai/exp/info.js +120 -0
  141. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  142. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  143. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  144. package/dist/trace-ai/exp/patch/index.js +13 -0
  145. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  146. package/dist/trace-ai/exp/patch/skill.js +24 -0
  147. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  148. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  149. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  150. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  151. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  152. package/dist/trace-ai/exp/schemas.js +50 -0
  153. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  154. package/dist/trace-ai/exp/scoring.js +46 -0
  155. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  156. package/dist/trace-ai/scan/aggregator.js +26 -0
  157. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  158. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  159. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  160. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  161. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  162. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  163. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  164. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  165. package/dist/trace-ai/scan/index.d.ts +31 -0
  166. package/dist/trace-ai/scan/index.js +390 -0
  167. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  168. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  169. package/dist/trace-ai/scan/runner.d.ts +25 -0
  170. package/dist/trace-ai/scan/runner.js +42 -0
  171. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  172. package/dist/trace-ai/scan/sampler.js +81 -0
  173. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  174. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  175. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  176. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  177. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  178. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  179. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  180. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  181. package/package.json +2 -2
  182. package/dist/api/dataviews.d.ts +0 -117
  183. package/dist/api/dataviews.js +0 -265
  184. package/dist/commands/dataview.d.ts +0 -8
  185. package/dist/trace-core/diagnose/index.d.ts +0 -9
  186. package/dist/trace-core/diagnose/index.js +0 -104
  187. package/dist/trace-core/diagnose/report-assembler.d.ts +0 -12
  188. package/dist/trace-core/diagnose/schemas.js +0 -94
  189. package/dist/trace-core/diagnose/signal-probe.d.ts +0 -5
  190. package/dist/trace-core/diagnose/signal-probe.js +0 -21
  191. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +0 -0
  192. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +0 -0
  193. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +0 -0
  194. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +0 -0
  195. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.js +0 -0
  196. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +0 -0
  197. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.d.ts +0 -0
  198. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.js +0 -0
  199. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +0 -0
  200. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.js +0 -0
  201. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +0 -0
  202. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.d.ts +0 -0
  203. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.js +0 -0
  204. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.yaml +0 -0
  205. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +0 -0
  206. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.js +0 -0
  207. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.yaml +0 -0
  208. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.d.ts +0 -0
  209. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.js +0 -0
  210. /package/dist/{trace-core → trace-ai}/diagnose/rule-loader.d.ts +0 -0
  211. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.d.ts +0 -0
  212. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.js +0 -0
  213. /package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.d.ts +0 -0
  214. /package/dist/{trace-core → trace-ai}/diagnose/types.js +0 -0
@@ -0,0 +1,36 @@
1
+ /**
2
+ * M5 eval-set builder — orchestrates build:
3
+ * picker → ensureQueryId → redact → write (with conflict resolution) → validate
4
+ *
5
+ * `ensureQueryId` is the deterministic hash-based ID generator (inline here,
6
+ * not a separate file — spec doc §9 "反过度工程" decision).
7
+ */
8
+ import type { BuildResult } from "./types.js";
9
+ import { type ConflictStrategy } from "./output-writer.js";
10
+ export declare class BuilderError extends Error {
11
+ readonly cause?: Error | undefined;
12
+ constructor(message: string, cause?: Error | undefined);
13
+ }
14
+ export type BuildSource = {
15
+ kind: "diagnosis";
16
+ path: string;
17
+ } | {
18
+ kind: "queries";
19
+ path: string;
20
+ };
21
+ export interface BuildOpts {
22
+ source: BuildSource;
23
+ outDir: string;
24
+ evalSetId: string;
25
+ onConflict: ConflictStrategy;
26
+ /** From `--redaction-rules=<path>` */
27
+ redactionRulesCliFlag: string | undefined;
28
+ /** From CWD: usually `path.join(process.cwd(), "redaction-rules")` — caller passes resolved path */
29
+ repoDir: string | undefined;
30
+ }
31
+ export declare function ensureQueryId(c: {
32
+ query_id: string;
33
+ input: unknown;
34
+ tags?: string[];
35
+ }): string;
36
+ export declare function build(opts: BuildOpts): Promise<BuildResult>;
@@ -0,0 +1,126 @@
1
+ /**
2
+ * M5 eval-set builder — orchestrates build:
3
+ * picker → ensureQueryId → redact → write (with conflict resolution) → validate
4
+ *
5
+ * `ensureQueryId` is the deterministic hash-based ID generator (inline here,
6
+ * not a separate file — spec doc §9 "反过度工程" decision).
7
+ */
8
+ import { createHash } from "node:crypto";
9
+ import { liftFromQueriesFile, liftFromDiagnosis, QueryPickerError } from "./query-picker.js";
10
+ import { loadRules, applyRules, RedactorError } from "./redactor.js";
11
+ import { writeEvalSet, WriterError } from "./output-writer.js";
12
+ export class BuilderError extends Error {
13
+ cause;
14
+ constructor(message, cause) {
15
+ super(message);
16
+ this.cause = cause;
17
+ this.name = "BuilderError";
18
+ }
19
+ }
20
+ /**
21
+ * Canonical JSON serialization for hashing — keys sorted, no whitespace.
22
+ * Ensures hash(case) is stable across runs.
23
+ */
24
+ function canonicalJson(value) {
25
+ if (value === null || typeof value !== "object")
26
+ return JSON.stringify(value);
27
+ if (Array.isArray(value))
28
+ return "[" + value.map(canonicalJson).join(",") + "]";
29
+ const obj = value;
30
+ const keys = Object.keys(obj).sort();
31
+ return "{" + keys.map((k) => JSON.stringify(k) + ":" + canonicalJson(obj[k])).join(",") + "}";
32
+ }
33
+ export function ensureQueryId(c) {
34
+ if (c.query_id && c.query_id.length > 0)
35
+ return c.query_id;
36
+ const seed = canonicalJson({ input: c.input, tags: c.tags ?? [] });
37
+ return createHash("sha256").update(seed).digest("hex").slice(0, 12);
38
+ }
39
+ function redactCase(c, applyFn) {
40
+ const redacted = {
41
+ query_id: c.query_id,
42
+ input: { user_message: applyFn(c.input.user_message) },
43
+ tags: c.tags,
44
+ };
45
+ if (c.reference) {
46
+ redacted.reference = { answer: applyFn(c.reference.answer) };
47
+ }
48
+ if (c.assertions) {
49
+ redacted.assertions = c.assertions; // assertions strings (regex / value) intentionally NOT redacted
50
+ // — they are user-authored test expectations, not raw PII
51
+ }
52
+ return redacted;
53
+ }
54
+ export async function build(opts) {
55
+ // Stage 1: pick cases
56
+ let lifted;
57
+ let skippedFindingsCount = 0;
58
+ try {
59
+ if (opts.source.kind === "queries") {
60
+ lifted = await liftFromQueriesFile(opts.source.path);
61
+ }
62
+ else {
63
+ const r = await liftFromDiagnosis(opts.source.path);
64
+ lifted = r.cases;
65
+ skippedFindingsCount = r.skipped_findings_count;
66
+ }
67
+ }
68
+ catch (e) {
69
+ if (e instanceof QueryPickerError) {
70
+ throw new BuilderError(`picker failed: ${e.message}`, e);
71
+ }
72
+ throw e;
73
+ }
74
+ // Stage 2: ensure query_id
75
+ const withIds = lifted.map((c) => ({ ...c, query_id: ensureQueryId(c) }));
76
+ // Stage 3: redact
77
+ let rulesResult;
78
+ try {
79
+ rulesResult = await loadRules({
80
+ cliFlag: opts.redactionRulesCliFlag,
81
+ repoDir: opts.repoDir,
82
+ });
83
+ }
84
+ catch (e) {
85
+ if (e instanceof RedactorError) {
86
+ throw new BuilderError(`redactor failed: ${e.message}`, e);
87
+ }
88
+ throw e;
89
+ }
90
+ const apply = (s) => applyRules(s, rulesResult.rules);
91
+ const redacted = withIds.map((c) => redactCase(c, apply));
92
+ // Stage 3.5: guard against 0-cases lift (better UX than letting writer fail
93
+ // with cryptic "Too small: expected array to have >=1 items"). Common cause:
94
+ // --diagnosis= where every finding has query=null (e.g. runtime doesn't emit
95
+ // gen_ai.input.messages) or empty assertions[].
96
+ if (redacted.length === 0) {
97
+ const sourceLabel = opts.source.kind === "diagnosis" ? "--diagnosis=" : "--queries=";
98
+ const skippedNote = skippedFindingsCount > 0
99
+ ? `\n Skipped ${skippedFindingsCount} finding(s) — common causes:\n - findings have query: null (M4 trace runtime doesn't emit gen_ai.input.messages)\n - findings have empty assertions[]`
100
+ : "";
101
+ throw new BuilderError(`lifted 0 eval-cases from ${sourceLabel}${opts.source.path}.${skippedNote}\n Alternatives: use --queries=<file> to provide queries manually, or upgrade M4 trace runtime to emit gen_ai.input.messages.`);
102
+ }
103
+ // Stage 4: write + conflict + validate
104
+ let writeRes;
105
+ try {
106
+ writeRes = await writeEvalSet({
107
+ outDir: opts.outDir,
108
+ evalSetId: opts.evalSetId,
109
+ newCases: redacted,
110
+ onConflict: opts.onConflict,
111
+ });
112
+ }
113
+ catch (e) {
114
+ if (e instanceof WriterError) {
115
+ throw new BuilderError(`writer failed: ${e.message}`, e);
116
+ }
117
+ throw e;
118
+ }
119
+ return {
120
+ cases_written: writeRes.cases_written,
121
+ cases_skipped: writeRes.cases_skipped + skippedFindingsCount,
122
+ conflicts: writeRes.conflicts,
123
+ shard_paths: writeRes.shard_paths,
124
+ redaction_rules_source: rulesResult.source,
125
+ };
126
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * M5 eval-set module — public exports.
3
+ *
4
+ * Consumers (commands/trace.ts, tests, future M6 reuse) import from this
5
+ * barrel; internal modules cross-import via direct paths.
6
+ */
7
+ export type { EvalCase, EvalCaseInput, EvalReference, EvalAssertion, AssertionType, EvalSetIndex, EvalSetIndexShard, BuildResult, RedactionRule, } from "./types.js";
8
+ export { build, ensureQueryId, BuilderError } from "./builder.js";
9
+ export type { BuildOpts, BuildSource } from "./builder.js";
10
+ export { run as runTest } from "./test-runner.js";
11
+ export type { RunOpts, RunnerDeps } from "./test-runner.js";
12
+ export { evaluateAssertion } from "./assertion-evaluator.js";
13
+ export type { AssertionContext, AssertionResult, SemanticMatchProvider, SemanticMatchVerdict, } from "./assertion-evaluator.js";
14
+ export { createBuiltinSemanticMatchProvider, ANSWER_MATCH_REFERENCE_REF, AnswerMatchOutputSchema, } from "./semantic-match-provider.js";
15
+ export type { CreateSemanticMatchProviderOpts } from "./semantic-match-provider.js";
@@ -0,0 +1,10 @@
1
+ /**
2
+ * M5 eval-set module — public exports.
3
+ *
4
+ * Consumers (commands/trace.ts, tests, future M6 reuse) import from this
5
+ * barrel; internal modules cross-import via direct paths.
6
+ */
7
+ export { build, ensureQueryId, BuilderError } from "./builder.js";
8
+ export { run as runTest } from "./test-runner.js";
9
+ export { evaluateAssertion } from "./assertion-evaluator.js";
10
+ export { createBuiltinSemanticMatchProvider, ANSWER_MATCH_REFERENCE_REF, AnswerMatchOutputSchema, } from "./semantic-match-provider.js";
@@ -0,0 +1,27 @@
1
+ /**
2
+ * M5 eval-set output writer — handles directory layout, index upsert, shard
3
+ * merge, on-conflict resolution (fail / skip / overwrite), and .bak preservation.
4
+ *
5
+ * MVP layout: always one shard named `cases.yaml`. Users can manually split
6
+ * into multi-shard later (re-write `index.yaml` to reference more shards)
7
+ * and call `kweaver trace schema validate` to verify.
8
+ */
9
+ import type { EvalCase } from "./types.js";
10
+ export declare class WriterError extends Error {
11
+ readonly conflictIds?: string[] | undefined;
12
+ constructor(message: string, conflictIds?: string[] | undefined);
13
+ }
14
+ export type ConflictStrategy = "fail" | "skip" | "overwrite";
15
+ export interface WriteEvalSetOpts {
16
+ outDir: string;
17
+ evalSetId: string;
18
+ newCases: EvalCase[];
19
+ onConflict: ConflictStrategy;
20
+ }
21
+ export interface WriteEvalSetResult {
22
+ cases_written: number;
23
+ cases_skipped: number;
24
+ conflicts: string[];
25
+ shard_paths: string[];
26
+ }
27
+ export declare function writeEvalSet(opts: WriteEvalSetOpts): Promise<WriteEvalSetResult>;
@@ -0,0 +1,126 @@
1
+ /**
2
+ * M5 eval-set output writer — handles directory layout, index upsert, shard
3
+ * merge, on-conflict resolution (fail / skip / overwrite), and .bak preservation.
4
+ *
5
+ * MVP layout: always one shard named `cases.yaml`. Users can manually split
6
+ * into multi-shard later (re-write `index.yaml` to reference more shards)
7
+ * and call `kweaver trace schema validate` to verify.
8
+ */
9
+ import { readFile, writeFile, copyFile, mkdir } from "node:fs/promises";
10
+ import path from "node:path";
11
+ import yaml from "js-yaml";
12
+ import { EvalSetIndexSchema, EvalSetShardSchema } from "./schemas.js";
13
+ export class WriterError extends Error {
14
+ conflictIds;
15
+ constructor(message, conflictIds) {
16
+ super(message);
17
+ this.conflictIds = conflictIds;
18
+ this.name = "WriterError";
19
+ }
20
+ }
21
+ const SHARD_NAME = "cases.yaml";
22
+ const INDEX_NAME = "index.yaml";
23
+ async function fileExists(p) {
24
+ try {
25
+ await readFile(p);
26
+ return true;
27
+ }
28
+ catch {
29
+ return false;
30
+ }
31
+ }
32
+ async function readShardCases(shardPath) {
33
+ if (!(await fileExists(shardPath)))
34
+ return [];
35
+ const raw = await readFile(shardPath, "utf8");
36
+ const parsed = yaml.load(raw);
37
+ const r = EvalSetShardSchema.safeParse(parsed);
38
+ if (!r.success) {
39
+ throw new WriterError(`existing shard at ${shardPath} fails schema validation: ${r.error.issues[0].message}`);
40
+ }
41
+ return r.data.cases;
42
+ }
43
+ export async function writeEvalSet(opts) {
44
+ const { outDir, evalSetId, newCases, onConflict } = opts;
45
+ // intra-batch duplicate detection
46
+ const seenInBatch = new Set();
47
+ const dupInBatch = [];
48
+ for (const c of newCases) {
49
+ if (seenInBatch.has(c.query_id))
50
+ dupInBatch.push(c.query_id);
51
+ seenInBatch.add(c.query_id);
52
+ }
53
+ if (dupInBatch.length > 0) {
54
+ throw new WriterError(`intra-batch duplicate query_id(s): ${dupInBatch.join(", ")}`, dupInBatch);
55
+ }
56
+ await mkdir(outDir, { recursive: true });
57
+ const shardPath = path.join(outDir, SHARD_NAME);
58
+ const existingCases = await readShardCases(shardPath);
59
+ const existingIds = new Set(existingCases.map((c) => c.query_id));
60
+ const incomingByConflict = newCases.filter((c) => existingIds.has(c.query_id));
61
+ const incomingFresh = newCases.filter((c) => !existingIds.has(c.query_id));
62
+ if (incomingByConflict.length > 0 && onConflict === "fail") {
63
+ throw new WriterError(`query_id conflict(s): ${incomingByConflict.map((c) => c.query_id).join(", ")}`, incomingByConflict.map((c) => c.query_id));
64
+ }
65
+ let mergedCases;
66
+ let casesWritten = 0;
67
+ let casesSkipped = 0;
68
+ if (onConflict === "skip") {
69
+ mergedCases = [...existingCases, ...incomingFresh];
70
+ casesWritten = incomingFresh.length;
71
+ casesSkipped = incomingByConflict.length;
72
+ }
73
+ else if (onConflict === "overwrite") {
74
+ if (incomingByConflict.length > 0 && (await fileExists(shardPath))) {
75
+ await copyFile(shardPath, shardPath + ".bak");
76
+ }
77
+ const overwriteIds = new Set(incomingByConflict.map((c) => c.query_id));
78
+ const kept = existingCases.filter((c) => !overwriteIds.has(c.query_id));
79
+ mergedCases = [...kept, ...incomingFresh, ...incomingByConflict];
80
+ casesWritten = incomingFresh.length + incomingByConflict.length;
81
+ casesSkipped = 0;
82
+ }
83
+ else {
84
+ // "fail" strategy — no conflicts at this point (would have thrown above)
85
+ mergedCases = [...existingCases, ...incomingFresh];
86
+ casesWritten = incomingFresh.length;
87
+ casesSkipped = 0;
88
+ }
89
+ const shardDoc = {
90
+ schema_version: "trace-eval-set/v1",
91
+ cases: mergedCases,
92
+ };
93
+ const shardCheck = EvalSetShardSchema.safeParse(shardDoc);
94
+ if (!shardCheck.success) {
95
+ throw new WriterError(`merged shard fails schema validation: ${shardCheck.error.issues[0].message}`);
96
+ }
97
+ await writeFile(shardPath, yaml.dump(shardDoc, { lineWidth: 120, noRefs: true }), "utf8");
98
+ const indexPath = path.join(outDir, INDEX_NAME);
99
+ let indexDoc;
100
+ if (await fileExists(indexPath)) {
101
+ const raw = await readFile(indexPath, "utf8");
102
+ const parsed = yaml.load(raw);
103
+ const r = EvalSetIndexSchema.safeParse(parsed);
104
+ if (!r.success) {
105
+ throw new WriterError(`existing index.yaml fails schema validation: ${r.error.issues[0].message}`);
106
+ }
107
+ indexDoc = r.data;
108
+ if (!indexDoc.shards.some((s) => s.path === SHARD_NAME)) {
109
+ indexDoc.shards.push({ path: SHARD_NAME });
110
+ }
111
+ }
112
+ else {
113
+ indexDoc = {
114
+ schema_version: "trace-eval-set-index/v1",
115
+ eval_set_id: evalSetId,
116
+ shards: [{ path: SHARD_NAME }],
117
+ };
118
+ }
119
+ await writeFile(indexPath, yaml.dump(indexDoc, { lineWidth: 120, noRefs: true }), "utf8");
120
+ return {
121
+ cases_written: casesWritten,
122
+ cases_skipped: casesSkipped,
123
+ conflicts: incomingByConflict.map((c) => c.query_id),
124
+ shard_paths: [shardPath],
125
+ };
126
+ }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * M5 eval-set query picker — two lift functions:
3
+ * - liftFromQueriesFile(path) reads `trace-eval-set-input/v1` simplified yaml
4
+ * - liftFromDiagnosis(dir) reads M4 diagnose report yamls (added in Task 4)
5
+ *
6
+ * Both return EvalCase[] (without query_id auto-fill — that happens in builder.ts).
7
+ */
8
+ import type { EvalCase } from "./types.js";
9
+ export declare class QueryPickerError extends Error {
10
+ readonly path?: string | undefined;
11
+ constructor(message: string, path?: string | undefined);
12
+ }
13
+ export declare function liftFromQueriesFile(filePath: string): Promise<EvalCase[]>;
14
+ export interface LiftFromDiagnosisResult {
15
+ cases: EvalCase[];
16
+ skipped_findings_count: number;
17
+ skipped_findings_summary: string[];
18
+ }
19
+ /**
20
+ * Read all *.yaml / *.yml files in `dirPath`, validate each as `trace-diagnose-report/v1`,
21
+ * and extract `findings[*].verify_with.suggested_eval_case` as EvalCases.
22
+ *
23
+ * Skips findings where:
24
+ * - `suggested_eval_case.query` is null (M4 has no user query → can't construct EvalCase.input)
25
+ * - `suggested_eval_case.assertions` is empty (refinement would fail; no reference either)
26
+ *
27
+ * Lifts:
28
+ * - `EvalCase.input.user_message = suggested_eval_case.query`
29
+ * - `EvalCase.query_id = suggested_eval_case.query_id ?? ""` (empty → builder.ensureQueryId fills)
30
+ * - `EvalCase.assertions` = M4 string templates wrapped as placeholder `contains` assertions
31
+ * with `_note` flagging "convert to structured manually"
32
+ * - `EvalCase.reference = undefined` (M4 doesn't emit reference)
33
+ *
34
+ * Files that fail to schema-validate cause a fail-fast error (all *.yaml in dir
35
+ * must be diagnose reports — picker doesn't filter by content).
36
+ */
37
+ export declare function liftFromDiagnosis(dirPath: string): Promise<LiftFromDiagnosisResult>;
@@ -0,0 +1,147 @@
1
+ /**
2
+ * M5 eval-set query picker — two lift functions:
3
+ * - liftFromQueriesFile(path) reads `trace-eval-set-input/v1` simplified yaml
4
+ * - liftFromDiagnosis(dir) reads M4 diagnose report yamls (added in Task 4)
5
+ *
6
+ * Both return EvalCase[] (without query_id auto-fill — that happens in builder.ts).
7
+ */
8
+ import { readFile, readdir } from "node:fs/promises";
9
+ import path from "node:path";
10
+ import yaml from "js-yaml";
11
+ import { EvalSetInputSchema } from "./schemas.js";
12
+ import { ReportSchema } from "../diagnose/schemas.js";
13
+ export class QueryPickerError extends Error {
14
+ path;
15
+ constructor(message, path) {
16
+ super(message);
17
+ this.path = path;
18
+ this.name = "QueryPickerError";
19
+ }
20
+ }
21
+ export async function liftFromQueriesFile(filePath) {
22
+ let raw;
23
+ try {
24
+ raw = await readFile(filePath, "utf8");
25
+ }
26
+ catch (e) {
27
+ const err = e;
28
+ if (err.code === "ENOENT") {
29
+ throw new QueryPickerError(`file not found: ${filePath}`, filePath);
30
+ }
31
+ throw new QueryPickerError(`failed to read ${filePath}: ${err.message}`, filePath);
32
+ }
33
+ let parsed;
34
+ try {
35
+ parsed = yaml.load(raw);
36
+ }
37
+ catch (e) {
38
+ throw new QueryPickerError(`failed to parse yaml ${filePath}: ${e.message}`, filePath);
39
+ }
40
+ const result = EvalSetInputSchema.safeParse(parsed);
41
+ if (!result.success) {
42
+ const firstIssue = result.error.issues[0];
43
+ const where = firstIssue.path.join(".");
44
+ throw new QueryPickerError(`schema validation failed for ${filePath} at '${where}': ${firstIssue.message}`, filePath);
45
+ }
46
+ return result.data.cases.map((c) => ({
47
+ query_id: c.query_id ?? "", // empty → builder.ensureQueryId fills it; undefined would break downstream
48
+ input: c.input,
49
+ reference: c.reference,
50
+ assertions: c.assertions,
51
+ tags: c.tags,
52
+ }));
53
+ }
54
+ /**
55
+ * Read all *.yaml / *.yml files in `dirPath`, validate each as `trace-diagnose-report/v1`,
56
+ * and extract `findings[*].verify_with.suggested_eval_case` as EvalCases.
57
+ *
58
+ * Skips findings where:
59
+ * - `suggested_eval_case.query` is null (M4 has no user query → can't construct EvalCase.input)
60
+ * - `suggested_eval_case.assertions` is empty (refinement would fail; no reference either)
61
+ *
62
+ * Lifts:
63
+ * - `EvalCase.input.user_message = suggested_eval_case.query`
64
+ * - `EvalCase.query_id = suggested_eval_case.query_id ?? ""` (empty → builder.ensureQueryId fills)
65
+ * - `EvalCase.assertions` = M4 string templates wrapped as placeholder `contains` assertions
66
+ * with `_note` flagging "convert to structured manually"
67
+ * - `EvalCase.reference = undefined` (M4 doesn't emit reference)
68
+ *
69
+ * Files that fail to schema-validate cause a fail-fast error (all *.yaml in dir
70
+ * must be diagnose reports — picker doesn't filter by content).
71
+ */
72
+ export async function liftFromDiagnosis(dirPath) {
73
+ let entries;
74
+ try {
75
+ entries = await readdir(dirPath);
76
+ }
77
+ catch (e) {
78
+ const err = e;
79
+ if (err.code === "ENOENT") {
80
+ throw new QueryPickerError(`directory not found: ${dirPath}`, dirPath);
81
+ }
82
+ throw new QueryPickerError(`failed to read directory ${dirPath}: ${err.message}`, dirPath);
83
+ }
84
+ const yamlFiles = entries
85
+ .filter((e) => e.endsWith(".yaml") || e.endsWith(".yml"))
86
+ .map((e) => path.join(dirPath, e));
87
+ // Accumulate by query_id so multiple findings from the same conversation
88
+ // collapse into one case with merged assertions (avoids intra-batch dup error).
89
+ const byQueryId = new Map();
90
+ let skipped = 0;
91
+ const skippedSummary = [];
92
+ for (const file of yamlFiles) {
93
+ let raw;
94
+ try {
95
+ raw = await readFile(file, "utf8");
96
+ }
97
+ catch (e) {
98
+ throw new QueryPickerError(`failed to read ${file}: ${e.message}`, file);
99
+ }
100
+ let parsed;
101
+ try {
102
+ parsed = yaml.load(raw);
103
+ }
104
+ catch (e) {
105
+ throw new QueryPickerError(`failed to parse yaml ${file}: ${e.message}`, file);
106
+ }
107
+ const result = ReportSchema.safeParse(parsed);
108
+ if (!result.success) {
109
+ const firstIssue = result.error.issues[0];
110
+ const where = firstIssue.path.join(".");
111
+ throw new QueryPickerError(`schema validation failed for ${file} at '${where}': ${firstIssue.message}`, file);
112
+ }
113
+ for (const finding of result.data.findings) {
114
+ const sec = finding.verify_with.suggested_eval_case;
115
+ if (sec.query === null) {
116
+ skipped += 1;
117
+ skippedSummary.push(`${path.basename(file)}: rule=${finding.rule_id} (query=null; upgrade M4 trace to populate input.messages)`);
118
+ continue;
119
+ }
120
+ if (sec.assertions.length === 0) {
121
+ skipped += 1;
122
+ skippedSummary.push(`${path.basename(file)}: rule=${finding.rule_id} (empty assertions; refinement would fail)`);
123
+ continue;
124
+ }
125
+ const placeholderAssertions = sec.assertions.map((t) => ({
126
+ type: "contains",
127
+ value: t,
128
+ _note: "auto-lifted from M4 assertion template; convert to structured assertion manually",
129
+ }));
130
+ const queryId = sec.query_id ?? "";
131
+ const existing = byQueryId.get(queryId);
132
+ if (existing) {
133
+ existing.assertions = [...(existing.assertions ?? []), ...placeholderAssertions];
134
+ }
135
+ else {
136
+ byQueryId.set(queryId, {
137
+ query_id: queryId,
138
+ input: { user_message: sec.query },
139
+ reference: undefined,
140
+ assertions: placeholderAssertions,
141
+ tags: undefined,
142
+ });
143
+ }
144
+ }
145
+ }
146
+ return { cases: [...byQueryId.values()], skipped_findings_count: skipped, skipped_findings_summary: skippedSummary };
147
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * M5 eval-set redactor — PII pattern matching + replacement.
3
+ *
4
+ * Three rule sources, in priority order (chain):
5
+ * 1. --redaction-rules=<path> (CLI flag, highest)
6
+ * 2. <repo>/redaction-rules/*.yaml (repo-local)
7
+ * 3. BUILTIN_RULES (5 low-fidelity defaults)
8
+ *
9
+ * Builtin rules cover common Chinese-context PII: phone / email / id_card /
10
+ * bank_card / ip. Organizations write more rules in <repo>/redaction-rules/
11
+ * for their business-specific patterns.
12
+ *
13
+ * Rule yaml format:
14
+ * rules:
15
+ * - name: <id>
16
+ * pattern: <regex source string>
17
+ * replace: <replacement template; supports {hash6} placeholder>
18
+ *
19
+ * Malformed regex causes loadRules to throw RedactorError (no silent fallback).
20
+ */
21
+ import type { RedactionRule } from "./types.js";
22
+ export declare class RedactorError extends Error {
23
+ readonly path?: string | undefined;
24
+ constructor(message: string, path?: string | undefined);
25
+ }
26
+ /**
27
+ * 5 builtin low-fidelity PII patterns. Tuned for Chinese-context defaults;
28
+ * organizations override with their own rules in <repo>/redaction-rules/.
29
+ */
30
+ export declare const BUILTIN_RULES: RedactionRule[];
31
+ export interface LoadRulesOpts {
32
+ /** From `--redaction-rules=<path>`; highest priority */
33
+ cliFlag: string | undefined;
34
+ /** From `<repo>/redaction-rules/` (resolved by caller — usually `path.join(repoRoot, "redaction-rules")`) */
35
+ repoDir: string | undefined;
36
+ }
37
+ export interface LoadRulesResult {
38
+ rules: RedactionRule[];
39
+ source: "cli-flag" | "repo" | "builtin";
40
+ }
41
+ export declare function loadRules(opts: LoadRulesOpts): Promise<LoadRulesResult>;
42
+ export declare function applyRules(text: string, rules: RedactionRule[]): string;