@rekshaw/promptmanager 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +117 -0
  2. package/dist/assertions.d.ts +3 -0
  3. package/dist/assertions.js +125 -0
  4. package/dist/assertions.js.map +1 -0
  5. package/dist/cli/init.d.ts +5 -0
  6. package/dist/cli/init.js +244 -0
  7. package/dist/cli/init.js.map +1 -0
  8. package/dist/cli.d.ts +2 -0
  9. package/dist/cli.js +151 -0
  10. package/dist/cli.js.map +1 -0
  11. package/dist/config.d.ts +7 -0
  12. package/dist/config.js +118 -0
  13. package/dist/config.js.map +1 -0
  14. package/dist/dataset.d.ts +2 -0
  15. package/dist/dataset.js +38 -0
  16. package/dist/dataset.js.map +1 -0
  17. package/dist/diffRuns.d.ts +2 -0
  18. package/dist/diffRuns.js +52 -0
  19. package/dist/diffRuns.js.map +1 -0
  20. package/dist/index.d.ts +5 -0
  21. package/dist/index.js +5 -0
  22. package/dist/index.js.map +1 -0
  23. package/dist/prompts.d.ts +2 -0
  24. package/dist/prompts.js +47 -0
  25. package/dist/prompts.js.map +1 -0
  26. package/dist/providers/anthropic.d.ts +2 -0
  27. package/dist/providers/anthropic.js +135 -0
  28. package/dist/providers/anthropic.js.map +1 -0
  29. package/dist/providers/common.d.ts +8 -0
  30. package/dist/providers/common.js +32 -0
  31. package/dist/providers/common.js.map +1 -0
  32. package/dist/providers/google.d.ts +2 -0
  33. package/dist/providers/google.js +149 -0
  34. package/dist/providers/google.js.map +1 -0
  35. package/dist/providers/openai.d.ts +2 -0
  36. package/dist/providers/openai.js +171 -0
  37. package/dist/providers/openai.js.map +1 -0
  38. package/dist/providers/registry.d.ts +5 -0
  39. package/dist/providers/registry.js +29 -0
  40. package/dist/providers/registry.js.map +1 -0
  41. package/dist/reporting.d.ts +8 -0
  42. package/dist/reporting.js +45 -0
  43. package/dist/reporting.js.map +1 -0
  44. package/dist/runSuite.d.ts +2 -0
  45. package/dist/runSuite.js +164 -0
  46. package/dist/runSuite.js.map +1 -0
  47. package/dist/schema.d.ts +7 -0
  48. package/dist/schema.js +22 -0
  49. package/dist/schema.js.map +1 -0
  50. package/dist/suggestions.d.ts +2 -0
  51. package/dist/suggestions.js +172 -0
  52. package/dist/suggestions.js.map +1 -0
  53. package/dist/tools/loadTools.d.ts +5 -0
  54. package/dist/tools/loadTools.js +30 -0
  55. package/dist/tools/loadTools.js.map +1 -0
  56. package/dist/tools/toolRunner.d.ts +13 -0
  57. package/dist/tools/toolRunner.js +109 -0
  58. package/dist/tools/toolRunner.js.map +1 -0
  59. package/dist/types.d.ts +242 -0
  60. package/dist/types.js +2 -0
  61. package/dist/types.js.map +1 -0
  62. package/dist/utils.d.ts +13 -0
  63. package/dist/utils.js +119 -0
  64. package/dist/utils.js.map +1 -0
  65. package/package.json +35 -0
  66. package/runtime/tool-worker.mjs +130 -0
  67. package/templates/promptmanager.workflow.yml +22 -0
package/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # PromptManager
2
+
3
+ PromptManager is a Node/TypeScript CLI + SDK for regression-safe prompt development with real tool-calling.
4
+
5
+ ## What it does
6
+
7
+ - Version prompts in Git (`prompts/<promptId>/v<semver>.md`)
8
+ - Run eval suites from JSONL fixtures
9
+ - Execute real tool handlers in subprocess isolation
10
+ - Validate outputs with JSON Schema + field-level assertions
11
+ - Diff candidate runs against baseline reports and fail CI on regressions
12
+ - Generate non-blocking prompt improvement suggestions
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ npm install @rekshaw/promptmanager
18
+ ```
19
+
20
+ ## Initialize
21
+
22
+ ```bash
23
+ npx promptmgr init
24
+ ```
25
+
26
+ This creates:
27
+
28
+ - `promptmanager.config.ts`
29
+ - `prompts/customer-email-parser/*`
30
+ - `evals/customer-email/*`
31
+ - `tools/customer-email-tools.mjs`
32
+ - `.github/workflows/promptmanager.yml`
33
+
34
+ A reusable workflow template is also included at `templates/promptmanager.workflow.yml`.
35
+
36
+ ## CLI
37
+
38
+ ```bash
39
+ promptmgr run --suite customer-email-parser --provider openai
40
+ promptmgr diff --baseline ./baseline.json --candidate ./candidate.json
41
+ promptmgr ci --suite customer-email-parser --provider openai --baseline ./baseline/run-report.json --fail-on-regression
42
+ promptmgr suggest --run ./candidate.json --with-ai
43
+ ```
44
+
45
+ ## Config contract
46
+
47
+ `promptmanager.config.ts` (or `.json`) must include:
48
+
49
+ - `providers`
50
+ - `suites`
51
+ - `toolRunner`
52
+ - `privacy`
53
+ - `reporting`
54
+
55
+ ## Tool module contract
56
+
57
+ ```ts
58
+ import type { ToolModuleShape } from "@rekshaw/promptmanager";
59
+
60
+ export const tools: ToolModuleShape["tools"] = [
61
+ {
62
+ name: "my_tool",
63
+ description: "Tool description",
64
+ strict: true,
65
+ inputSchema: {
66
+ type: "object",
67
+ properties: { foo: { type: "string" } },
68
+ required: ["foo"],
69
+ additionalProperties: false
70
+ }
71
+ }
72
+ ];
73
+
74
+ export const handlers: ToolModuleShape["handlers"] = {
75
+ async my_tool(args, context) {
76
+ return { ok: true };
77
+ }
78
+ };
79
+ ```
80
+
81
+ ## OpenAI function-calling workflow support
82
+
83
+ PromptManager's OpenAI adapter follows the same loop as the official function-calling guide:
84
+
85
+ 1. Send `input + tools` to `responses.create`
86
+ 2. Read `response.output` items
87
+ 3. Execute each `function_call` in your app code
88
+ 4. Append `function_call_output` items to the running `input`
89
+ 5. Call `responses.create` again until no more function calls remain
90
+
91
+ For reasoning models, this preserves the full output items across turns (including reasoning/tool call items), matching the documented requirement.
92
+
93
+ Customer-email parsing with fare normalization is scaffolded by default. To use your existing fare mapper, replace the local function in `tools/customer-email-tools.mjs` with an import from your codebase.
94
+
95
+ ## Report artifacts
96
+
97
+ Each run emits a JSON artifact with:
98
+
99
+ - summary counts
100
+ - per-case status (`pass|fail|error`)
101
+ - schema/assertion failures
102
+ - tool-call traces
103
+ - hashed case IDs
104
+
105
+ ## SDK usage
106
+
107
+ ```ts
108
+ import { runSuite, diffRuns, generateSuggestions } from "@rekshaw/promptmanager";
109
+ ```
110
+
111
+ ## Development
112
+
113
+ ```bash
114
+ npm ci
115
+ npm run build
116
+ npm test
117
+ ```
@@ -0,0 +1,3 @@
1
+ import { AssertionResult, AssertionSpec, JsonValue } from "./types.js";
2
+ export declare function loadAssertionSpec(assertionsPath: string): Promise<AssertionSpec>;
3
+ export declare function evaluateAssertions(output: JsonValue, expectedOutput: JsonValue, spec: AssertionSpec): AssertionResult;
@@ -0,0 +1,125 @@
1
+ import fs from "node:fs/promises";
2
+ import { asObject, getByPath } from "./utils.js";
3
+ export async function loadAssertionSpec(assertionsPath) {
4
+ const raw = await fs.readFile(assertionsPath, "utf8");
5
+ const parsed = JSON.parse(raw);
6
+ if (!Array.isArray(parsed.requiredKeys)) {
7
+ throw new Error("Assertion spec must contain an array for 'requiredKeys'.");
8
+ }
9
+ return {
10
+ requiredKeys: parsed.requiredKeys,
11
+ allowAdditionalKeys: parsed.allowAdditionalKeys ?? false,
12
+ variableFields: parsed.variableFields ?? [],
13
+ fieldMatchers: parsed.fieldMatchers ?? {},
14
+ };
15
+ }
16
+ function resolveExpectedValue(matcher, fieldPath, expectedOutput) {
17
+ if (matcher.value !== undefined) {
18
+ return matcher.value;
19
+ }
20
+ if (matcher.expectedPath) {
21
+ const pathValue = matcher.expectedPath.replace(/^\$expected\./, "");
22
+ return getByPath(expectedOutput, pathValue);
23
+ }
24
+ return getByPath(expectedOutput, fieldPath);
25
+ }
26
+ function runCheck(op, actual, expected) {
27
+ switch (op) {
28
+ case "equals": {
29
+ const passed = JSON.stringify(actual) === JSON.stringify(expected);
30
+ return {
31
+ passed,
32
+ message: passed ? "matches expected value" : "value does not match expected",
33
+ };
34
+ }
35
+ case "oneOf": {
36
+ const pool = Array.isArray(expected) ? expected : [];
37
+ const passed = pool.some((candidate) => JSON.stringify(candidate) === JSON.stringify(actual));
38
+ return {
39
+ passed,
40
+ message: passed ? "value matches allowed option" : "value not in allowed set",
41
+ };
42
+ }
43
+ case "contains": {
44
+ if (typeof actual === "string" && typeof expected === "string") {
45
+ const passed = actual.includes(expected);
46
+ return { passed, message: passed ? "substring found" : "substring missing" };
47
+ }
48
+ if (Array.isArray(actual)) {
49
+ const passed = actual.some((item) => JSON.stringify(item) === JSON.stringify(expected));
50
+ return { passed, message: passed ? "array contains value" : "array does not contain value" };
51
+ }
52
+ return { passed: false, message: "contains expects string or array output" };
53
+ }
54
+ case "regex": {
55
+ const pattern = typeof expected === "string" ? expected : "";
56
+ const regex = new RegExp(pattern);
57
+ const passed = regex.test(String(actual ?? ""));
58
+ return { passed, message: passed ? "regex matched" : `regex '${pattern}' did not match` };
59
+ }
60
+ case "numericRange": {
61
+ const range = (expected ?? {});
62
+ const value = typeof actual === "number" ? actual : Number.NaN;
63
+ const minOk = range.min === undefined || value >= range.min;
64
+ const maxOk = range.max === undefined || value <= range.max;
65
+ const passed = Number.isFinite(value) && minOk && maxOk;
66
+ return {
67
+ passed,
68
+ message: passed
69
+ ? "value in numeric range"
70
+ : `value outside range [${range.min ?? "-inf"}, ${range.max ?? "+inf"}]`,
71
+ };
72
+ }
73
+ case "exists": {
74
+ const passed = actual !== undefined && actual !== null;
75
+ return { passed, message: passed ? "value exists" : "value missing" };
76
+ }
77
+ case "absent": {
78
+ const passed = actual === undefined || actual === null;
79
+ return { passed, message: passed ? "value absent as expected" : "value should be absent" };
80
+ }
81
+ default:
82
+ return { passed: false, message: "unsupported assertion operator" };
83
+ }
84
+ }
85
+ export function evaluateAssertions(output, expectedOutput, spec) {
86
+ const outputObj = asObject(output);
87
+ const checks = [];
88
+ const missingKeys = spec.requiredKeys.filter((key) => !(key in outputObj));
89
+ const allowedKeys = new Set([
90
+ ...spec.requiredKeys,
91
+ ...(spec.variableFields ?? []),
92
+ ...Object.keys(spec.fieldMatchers ?? {}),
93
+ ]);
94
+ const unexpectedKeys = (spec.allowAdditionalKeys ?? false)
95
+ ? []
96
+ : Object.keys(outputObj).filter((key) => !allowedKeys.has(key));
97
+ for (const [field, matchers] of Object.entries(spec.fieldMatchers ?? {})) {
98
+ const actualValue = getByPath(output, field);
99
+ for (const matcher of matchers) {
100
+ const expectedValue = resolveExpectedValue(matcher, field, expectedOutput);
101
+ const result = runCheck(matcher.op, actualValue, expectedValue);
102
+ const check = {
103
+ field,
104
+ op: matcher.op,
105
+ passed: result.passed,
106
+ actual: actualValue,
107
+ message: result.message,
108
+ };
109
+ if (expectedValue !== undefined) {
110
+ check.expected = expectedValue;
111
+ }
112
+ checks.push(check);
113
+ }
114
+ }
115
+ const passed = missingKeys.length === 0 &&
116
+ unexpectedKeys.length === 0 &&
117
+ checks.every((check) => check.passed);
118
+ return {
119
+ passed,
120
+ checks,
121
+ missingKeys,
122
+ unexpectedKeys,
123
+ };
124
+ }
125
+ //# sourceMappingURL=assertions.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"assertions.js","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAUlC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,cAAsB;IAC5D,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAkB,CAAC;IAEhD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;IAC9E,CAAC;IACD,OAAO;QACL,YAAY,EAAE,MAAM,CAAC,YAAY;QACjC,mBAAmB,EAAE,MAAM,CAAC,mBAAmB,IAAI,KAAK;QACxD,cAAc,EAAE,MAAM,CAAC,cAAc,IAAI,EAAE;QAC3C,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;KAC1C,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAC3B,OAAqB,EACrB,SAAiB,EACjB,cAAyB;IAEzB,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC,KAAK,CAAC;IACvB,CAAC;IAED,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;QACpE,OAAO,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,QAAQ,CACf,EAAqB,EACrB,MAA6B,EAC7B,QAAqD;IAErD,QAAQ,EAAE,EAAE,CAAC;QACX,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;YACnE,OAAO;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,+BAA+B;aAC7E,CAAC;QACJ,CAAC;QACD,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YACrD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;YAC9F,OAAO;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAC,0BAA0B;aAC9E,CAAC;QACJ,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBACzC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,mBAAmB,EAAE,CAAC;YAC/E,CAAC;YACD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1B,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACxF,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,8BAA8B,EAAE,CAAC;YAC/F,CAAC;YACD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,yCAAyC,EAAE,CAAC;QAC/E,CAAC;QACD,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,OAAO,GAAG,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;YAClC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC;YAChD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,UAAU,OAAO,iBAAiB,EAAE,CAAC;QAC5F,CAAC;QACD,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,KAAK,GAAG,CAAC,QAAQ,IAAI,EAAE,CAAwB,CAAC;YACtD,MAAM,KAAK,GAAG,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;YAC/D,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,IAAI,KAAK,CAAC,GAAG,CAAC;YAC5D,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,IAAI,KAAK,CAAC,GAAG,CAAC;YAC5D,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,KAAK,CAAC;YACxD,OAAO;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM;oBACb,CAAC,CAAC,wBAAwB;oBAC1B,CAAC,CAAC,wBAAwB,KAAK,CAAC,GAAG,IAAI,MAAM,KAAK,KAAK,CAAC,GAAG,IAAI,MAAM,GAAG;aAC3E,CAAC;QACJ,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,MAAM,GAAG,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,CAAC;YACvD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC;QACxE,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,MAAM,GAAG,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,CAAC;YACvD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,wBAAwB,EAAE,CAAC;QAC7F,CAAC;QACD;YACE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,gCAAgC,EAAE,CAAC;IACxE,CAAC;AACH,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,MAAiB,EACjB,cAAyB,EACzB,IAAmB;IAEnB,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,MAAM,GAA2B,EAAE,CAAC;IAE1C,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,SAAS,CAAC,CAAC,CAAC;IAE3E,MAAM,WAAW,GAAG,IAAI,GAAG,CAAS;QAClC,GAAG,IAAI,CAAC,YAAY;QACpB,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,EAAE,CAAC;QAC9B,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC;KACzC,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,CAAC,IAAI,CAAC,mBAAmB,IAAI,KAAK,CAAC;QACxD,CAAC,CAAC,EAAE;QACJ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAElE,KAAK,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC,EAAE,CAAC;QACzE,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,oBAAoB,CAAC,OAAO,EAAE,KAAK,EAAE,cAAc,CAAC,CAAC;YAC3E,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,EAAE,EAAE,WAAW,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,KAAK,GAAyB;gBAClC,KAAK;gBACL,EAAE,EAAE,OAAO,CAAC,EAAE;gBACd,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,MAAM,EAAE,WAAW;gBACnB,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YACF,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;gBAChC,KAAK,CAAC,QAAQ,GAAG,aAA0B,CAAC;YAC9C,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GACV,WAAW,CAAC,MAAM,KAAK,CAAC;QACxB,cAAc,CAAC,MAAM,KAAK,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAExC,OAAO;QACL,MAAM;QACN,MAAM;QACN,WAAW;QACX,cAAc;KACf,CAAC;AACJ,CAAC"}
@@ -0,0 +1,5 @@
1
+ interface InitOptions {
2
+ force?: boolean;
3
+ }
4
+ export declare function runInit(cwd: string, options: InitOptions): Promise<void>;
5
+ export {};
@@ -0,0 +1,244 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ async function writeFileSafe(file, force) {
4
+ await fs.mkdir(path.dirname(file.filePath), { recursive: true });
5
+ if (!force) {
6
+ try {
7
+ await fs.access(file.filePath);
8
+ return "skipped";
9
+ }
10
+ catch {
11
+ // file missing
12
+ }
13
+ }
14
+ await fs.writeFile(file.filePath, file.content, "utf8");
15
+ return "created";
16
+ }
17
+ function filesForCwd(cwd) {
18
+ return [
19
+ {
20
+ filePath: path.resolve(cwd, "promptmanager.config.ts"),
21
+ content: `import type { PromptManagerConfig } from "@rekshaw/promptmanager";
22
+
23
+ const config: PromptManagerConfig = {
24
+ providers: {
25
+ openai: {
26
+ apiKeyEnv: "OPENAI_API_KEY",
27
+ parallelToolCalls: false,
28
+ toolChoice: "auto"
29
+ },
30
+ anthropic: { apiKeyEnv: "ANTHROPIC_API_KEY" },
31
+ google: { apiKeyEnv: "GEMINI_API_KEY" }
32
+ },
33
+ suites: [
34
+ {
35
+ id: "customer-email-parser",
36
+ promptId: "customer-email-parser",
37
+ datasetPath: "evals/customer-email/dataset.jsonl",
38
+ schemaPath: "evals/customer-email/schema.json",
39
+ assertionsPath: "evals/customer-email/assertions.json",
40
+ toolsModule: "tools/customer-email-tools.mjs",
41
+ modelByProvider: {
42
+ openai: "gpt-5-mini",
43
+ anthropic: "claude-3-5-sonnet-latest",
44
+ google: "gemini-2.0-flash"
45
+ }
46
+ }
47
+ ],
48
+ toolRunner: {
49
+ mode: "subprocess",
50
+ command: "node",
51
+ envAllowlist: ["TZ"],
52
+ timeoutMs: 5000,
53
+ maxToolCallsPerCase: 8
54
+ },
55
+ privacy: {
56
+ allowRawProductionFixtures: true,
57
+ redactInReports: true,
58
+ encryptionAtRest: false
59
+ },
60
+ reporting: {
61
+ includeToolTrace: true,
62
+ outDir: "promptmanager-reports"
63
+ }
64
+ };
65
+
66
+ export default config;
67
+ `,
68
+ },
69
+ {
70
+ filePath: path.resolve(cwd, "prompts/customer-email-parser/meta.json"),
71
+ content: `${JSON.stringify({ currentVersion: "1.0.0", versions: ["1.0.0"] }, null, 2)}\n`,
72
+ },
73
+ {
74
+ filePath: path.resolve(cwd, "prompts/customer-email-parser/v1.0.0.md"),
75
+ content: `You extract structured booking data from customer confirmation emails.
76
+
77
+ Rules:
78
+ 1. Return only valid JSON. No markdown. No prose.
79
+ 2. If the email contains a fare label, call the normalize_fare_type function.
80
+ 3. Set fare_type_normalized from the function output.
81
+ 4. If a field is missing, return null for that field.
82
+
83
+ Required output fields:
84
+ - customer_email
85
+ - reservation_code
86
+ - departure_date
87
+ - fare_type_raw
88
+ - fare_type_normalized
89
+ `,
90
+ },
91
+ {
92
+ filePath: path.resolve(cwd, "evals/customer-email/dataset.jsonl"),
93
+ content: [
94
+ JSON.stringify({
95
+ caseId: "customer-email-001",
96
+ input: {
97
+ subject: "Your flight is confirmed",
98
+ body: "Hello Andrea, thanks for booking with us. Reservation ZX81Y is confirmed. Passenger fare type: Economy Flex. Departure date: 2026-04-18. Contact: andrea@example.com"
99
+ },
100
+ expected: {
101
+ customer_email: "andrea@example.com",
102
+ reservation_code: "ZX81Y",
103
+ departure_date: "2026-04-18",
104
+ fare_type_raw: "Economy Flex",
105
+ fare_type_normalized: "ECONOMY_FLEX"
106
+ },
107
+ tags: ["happy-path", "fare-normalization"]
108
+ }),
109
+ ].join("\n") + "\n",
110
+ },
111
+ {
112
+ filePath: path.resolve(cwd, "evals/customer-email/schema.json"),
113
+ content: `${JSON.stringify({
114
+ type: "object",
115
+ properties: {
116
+ customer_email: { type: ["string", "null"] },
117
+ reservation_code: { type: ["string", "null"] },
118
+ departure_date: { type: ["string", "null"] },
119
+ fare_type_raw: { type: ["string", "null"] },
120
+ fare_type_normalized: { type: ["string", "null"] }
121
+ },
122
+ required: [
123
+ "customer_email",
124
+ "reservation_code",
125
+ "departure_date",
126
+ "fare_type_raw",
127
+ "fare_type_normalized"
128
+ ],
129
+ additionalProperties: false
130
+ }, null, 2)}\n`,
131
+ },
132
+ {
133
+ filePath: path.resolve(cwd, "evals/customer-email/assertions.json"),
134
+ content: `${JSON.stringify({
135
+ requiredKeys: [
136
+ "customer_email",
137
+ "reservation_code",
138
+ "departure_date",
139
+ "fare_type_raw",
140
+ "fare_type_normalized"
141
+ ],
142
+ allowAdditionalKeys: false,
143
+ variableFields: [],
144
+ fieldMatchers: {
145
+ customer_email: [{ op: "regex", value: "^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$" }],
146
+ reservation_code: [{ op: "regex", value: "^[A-Z0-9-]{4,12}$" }],
147
+ departure_date: [{ op: "regex", value: "^\\d{4}-\\d{2}-\\d{2}$" }],
148
+ fare_type_normalized: [{ op: "oneOf", value: ["ECONOMY_BASIC", "ECONOMY_FLEX", "PREMIUM_ECONOMY", "BUSINESS", "FIRST", "UNKNOWN"] }]
149
+ }
150
+ }, null, 2)}\n`,
151
+ },
152
+ {
153
+ filePath: path.resolve(cwd, "tools/customer-email-tools.mjs"),
154
+ content: `// Replace this local mapper with an import from your codebase if you already have one.
155
+ // Example: import { mapFareType } from "../src/domain/fares/mapFareType.js";
156
+
157
+ const FARE_TYPE_MAP = {
158
+ "economy basic": "ECONOMY_BASIC",
159
+ "economy flex": "ECONOMY_FLEX",
160
+ "premium economy": "PREMIUM_ECONOMY",
161
+ "business": "BUSINESS",
162
+ "first": "FIRST"
163
+ };
164
+
165
+ function mapFareType(rawFareType) {
166
+ const normalized = String(rawFareType || "").trim().toLowerCase();
167
+ return FARE_TYPE_MAP[normalized] ?? "UNKNOWN";
168
+ }
169
+
170
+ export const tools = [
171
+ {
172
+ type: "function",
173
+ name: "normalize_fare_type",
174
+ description: "Map a raw fare label from an email into a normalized internal fare type enum",
175
+ strict: true,
176
+ inputSchema: {
177
+ type: "object",
178
+ properties: {
179
+ raw_fare_type: { type: "string", description: "Raw fare label, e.g. Economy Flex" }
180
+ },
181
+ required: ["raw_fare_type"],
182
+ additionalProperties: false
183
+ }
184
+ }
185
+ ];
186
+
187
+ export const handlers = {
188
+ async normalize_fare_type(args) {
189
+ const raw = typeof args === "object" && args && "raw_fare_type" in args
190
+ ? String(args.raw_fare_type)
191
+ : "";
192
+
193
+ return {
194
+ raw_fare_type: raw,
195
+ fare_type_normalized: mapFareType(raw)
196
+ };
197
+ }
198
+ };
199
+ `,
200
+ },
201
+ {
202
+ filePath: path.resolve(cwd, ".github/workflows/promptmanager.yml"),
203
+ content: `name: PromptManager CI
204
+
205
+ on:
206
+ pull_request:
207
+ push:
208
+ branches: [main]
209
+
210
+ jobs:
211
+ prompt-evals:
212
+ runs-on: ubuntu-latest
213
+ steps:
214
+ - uses: actions/checkout@v4
215
+ - uses: actions/setup-node@v4
216
+ with:
217
+ node-version: '20'
218
+ - run: npm ci
219
+ - run: npm run build
220
+ - run: npx promptmgr ci --suite customer-email-parser --provider openai --baseline ./baseline/run-report.json --fail-on-regression
221
+ env:
222
+ OPENAI_API_KEY: \${{ secrets.OPENAI_API_KEY }}
223
+ ANTHROPIC_API_KEY: \${{ secrets.ANTHROPIC_API_KEY }}
224
+ GEMINI_API_KEY: \${{ secrets.GEMINI_API_KEY }}
225
+ `,
226
+ },
227
+ ];
228
+ }
229
+ export async function runInit(cwd, options) {
230
+ const files = filesForCwd(cwd);
231
+ let created = 0;
232
+ let skipped = 0;
233
+ for (const file of files) {
234
+ const outcome = await writeFileSafe(file, options.force ?? false);
235
+ if (outcome === "created") {
236
+ created += 1;
237
+ }
238
+ else {
239
+ skipped += 1;
240
+ }
241
+ }
242
+ process.stdout.write(`Initialized PromptManager scaffold. created=${created} skipped=${skipped}\n`);
243
+ }
244
+ //# sourceMappingURL=init.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/cli/init.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAW7B,KAAK,UAAU,aAAa,CAAC,IAAkB,EAAE,KAAc;IAC7D,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACjE,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC/B,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;IACH,CAAC;IACD,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IACxD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO;QACL;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,yBAAyB,CAAC;YACtD,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8Cd;SACI;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,yCAAyC,CAAC;YACtE,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,cAAc,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI;SAC1F;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,yCAAyC,CAAC;YACtE,OAAO,EAAE;;;;;;;;;;;;;;CAcd;SACI;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,oCAAoC,CAAC;YACjE,OAAO,EAAE;gBACP,IAAI,CAAC,SAAS,CAAC;oBACb,MAAM,EAAE,oBAAoB;oBAC5B,KAAK,EAAE;wBACL,OAAO,EAAE,0BAA0B;wBACnC,IAAI,EAAE,sKAAsK;qBAC7K;oBACD,QAAQ,EAAE;wBACR,cAAc,EAAE,oBAAoB;wBACpC,gBAAgB,EAAE,OAAO;wBACzB,cAAc,EAAE,YAAY;wBAC5B,aAAa,EAAE,cAAc;wBAC7B,oBAAoB,EAAE,cAAc;qBACrC;oBACD,IAAI,EAAE,CAAC,YAAY,EAAE,oBAAoB,CAAC;iBAC3C,CAAC;aACH,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI;SACpB;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,kCAAkC,CAAC;YAC/D,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CACxB;gBACE,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,cAAc,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC5C,gBAAgB,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC9C,cAAc,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC5C,aAAa,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC3C,oBAAoB,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;iBACnD;gBACD,QAAQ,EAAE;oBACR,gBAAgB;oBAChB,kBAAkB;oBAClB,gBAAgB;oBAChB,eAAe;oBACf,sBAAsB;iBACvB;gBACD,oBAAoB,EAAE,KAAK;aAC5B,EACD,IAAI,EACJ,CAAC,CACF,IAAI;SACN;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,sCAAsC,CAAC;YACnE,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CACxB;gBACE,YAAY,EAAE;oBACZ,gBAAgB;oBAChB,kBAAkB;oBAClB,gBAAgB;oBAChB,eAAe;oBACf,sBAAsB;iBACvB;gBACD,mBAAmB,EAAE,KAAK;gBAC1B,cAAc,EAAE,EAAE;gBAClB,aAAa,EAAE;oBACb,cAAc,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;oBAC1E,gBAAgB,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,mBAAmB,EAAE,CAAC;oBAC/D,cAAc,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;oBAClE,oBAAoB,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,CAAC,EAAE,CAAC;iBACrI;aACF,EACD,IAAI,EACJ,CAAC,CACF,IAAI;SACN;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,gCAAgC,CAAC;YAC7D,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6Cd;SACI;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,qCAAqC,CAAC;YAClE,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;CAsBd;SACI;KACF,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,GAAW,EAAE,OAAoB;IAC7D,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/B,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK,IAAI,KAAK,CAAC,CAAC;QAClE,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC;QACf,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,+CAA+C,OAAO,YAAY,OAAO,IAAI,CAAC,CAAC;AACtG,CAAC"}
package/dist/cli.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env node
2
+ import path from "node:path";
3
+ import { Command } from "commander";
4
+ import { runSuite } from "./runSuite.js";
5
+ import { diffRuns } from "./diffRuns.js";
6
+ import { generateSuggestions } from "./suggestions.js";
7
+ import { runInit } from "./cli/init.js";
8
+ import { loadConfig } from "./config.js";
9
+ import { defaultRunReportPath, defaultSuggestionPath, printDiffSummary, printRunSummary, printSuggestionSummary, writeRunReport, writeSuggestionReport, } from "./reporting.js";
10
+ import { readJsonFile } from "./utils.js";
11
+ function parseProvider(value) {
12
+ if (value === "openai" || value === "anthropic" || value === "google") {
13
+ return value;
14
+ }
15
+ throw new Error(`Invalid provider '${value}'. Expected openai|anthropic|google.`);
16
+ }
17
+ function toInt(value, fallback) {
18
+ const parsed = Number.parseInt(value, 10);
19
+ return Number.isFinite(parsed) ? parsed : fallback;
20
+ }
21
+ async function resolveConfiguredOutDir(cwd, configPath) {
22
+ try {
23
+ const { path: loadedPath, config } = await loadConfig(cwd, configPath);
24
+ const baseDir = path.dirname(loadedPath);
25
+ return path.resolve(baseDir, config.reporting.outDir ?? "promptmanager-reports");
26
+ }
27
+ catch {
28
+ return path.resolve(cwd, "promptmanager-reports");
29
+ }
30
+ }
31
+ async function main() {
32
+ const program = new Command();
33
+ program
34
+ .name("promptmgr")
35
+ .description("PromptManager: regression-safe prompt + tool-calling evaluation")
36
+ .version("0.1.0");
37
+ program
38
+ .command("init")
39
+ .description("Initialize PromptManager scaffold in current repo")
40
+ .option("--force", "Overwrite existing scaffold files")
41
+ .action(async (options) => {
42
+ await runInit(process.cwd(), options);
43
+ });
44
+ program
45
+ .command("run")
46
+ .description("Run one suite against a provider/model and emit run report")
47
+ .requiredOption("--suite <suite>", "Suite ID")
48
+ .requiredOption("--provider <provider>", "Provider: openai|anthropic|google")
49
+ .option("--model <model>", "Model override")
50
+ .option("--out <path>", "Output report JSON path")
51
+ .option("--config <path>", "Config path (default: promptmanager.config.ts/json)")
52
+ .option("--concurrency <n>", "Parallel case workers (default: 4)", "4")
53
+ .action(async (options) => {
54
+ const provider = parseProvider(options.provider);
55
+ const report = await runSuite({
56
+ suiteId: options.suite,
57
+ provider,
58
+ model: options.model,
59
+ outPath: options.out,
60
+ configPath: options.config,
61
+ cwd: process.cwd(),
62
+ concurrency: toInt(options.concurrency, 4),
63
+ });
64
+ printRunSummary(report);
65
+ const outDir = await resolveConfiguredOutDir(process.cwd(), options.config);
66
+ const outPath = options.out
67
+ ? path.resolve(process.cwd(), options.out)
68
+ : defaultRunReportPath(process.cwd(), options.suite, provider, outDir);
69
+ await writeRunReport(outPath, report);
70
+ process.stdout.write(`Run report written: ${outPath}\n`);
71
+ });
72
+ program
73
+ .command("diff")
74
+ .description("Diff baseline vs candidate run reports")
75
+ .requiredOption("--baseline <path>", "Baseline report path")
76
+ .requiredOption("--candidate <path>", "Candidate report path")
77
+ .action(async (options) => {
78
+ const baseline = await readJsonFile(path.resolve(process.cwd(), options.baseline));
79
+ const candidate = await readJsonFile(path.resolve(process.cwd(), options.candidate));
80
+ const diff = diffRuns(baseline, candidate);
81
+ printDiffSummary(diff);
82
+ process.stdout.write(`${JSON.stringify(diff, null, 2)}\n`);
83
+ });
84
+ program
85
+ .command("ci")
86
+ .description("Run suite and fail on regression against a baseline")
87
+ .requiredOption("--suite <suite>", "Suite ID")
88
+ .requiredOption("--provider <provider>", "Provider: openai|anthropic|google")
89
+ .requiredOption("--baseline <path>", "Baseline report path")
90
+ .option("--model <model>", "Model override")
91
+ .option("--config <path>", "Config path")
92
+ .option("--out <path>", "Candidate report output path")
93
+ .option("--fail-on-regression", "Fail CI when regressions exist", true)
94
+ .option("--concurrency <n>", "Parallel case workers (default: 4)", "4")
95
+ .action(async (options) => {
96
+ const provider = parseProvider(options.provider);
97
+ const candidate = await runSuite({
98
+ suiteId: options.suite,
99
+ provider,
100
+ model: options.model,
101
+ configPath: options.config,
102
+ cwd: process.cwd(),
103
+ concurrency: toInt(options.concurrency, 4),
104
+ });
105
+ printRunSummary(candidate);
106
+ const outDir = await resolveConfiguredOutDir(process.cwd(), options.config);
107
+ const candidatePath = options.out
108
+ ? path.resolve(process.cwd(), options.out)
109
+ : defaultRunReportPath(process.cwd(), options.suite, provider, outDir);
110
+ await writeRunReport(candidatePath, candidate);
111
+ process.stdout.write(`Candidate run report written: ${candidatePath}\n`);
112
+ const baseline = await readJsonFile(path.resolve(process.cwd(), options.baseline));
113
+ const diff = diffRuns(baseline, candidate);
114
+ printDiffSummary(diff);
115
+ if ((options.failOnRegression ?? true) && diff.regressions.length > 0) {
116
+ process.stderr.write(`CI failed: ${diff.regressions.length} regressions detected.\n`);
117
+ process.exitCode = 1;
118
+ return;
119
+ }
120
+ process.stdout.write("CI check passed.\n");
121
+ });
122
+ program
123
+ .command("suggest")
124
+ .description("Generate prompt improvement suggestions from a run report")
125
+ .requiredOption("--run <path>", "Run report path")
126
+ .option("--out <path>", "Output suggestion report path")
127
+ .option("--with-ai", "Use AI suggestion generation when OPENAI_API_KEY is available", false)
128
+ .option("--model <model>", "Suggestion model override")
129
+ .option("--max <n>", "Max number of suggestions", "5")
130
+ .action(async (options) => {
131
+ const report = await readJsonFile(path.resolve(process.cwd(), options.run));
132
+ const suggestions = await generateSuggestions({
133
+ report,
134
+ maxSuggestions: toInt(options.max, 5),
135
+ withAi: options.withAi ?? false,
136
+ aiModel: options.model,
137
+ });
138
+ printSuggestionSummary(suggestions);
139
+ const outPath = options.out
140
+ ? path.resolve(process.cwd(), options.out)
141
+ : defaultSuggestionPath(process.cwd(), report.suiteId);
142
+ await writeSuggestionReport(outPath, suggestions);
143
+ process.stdout.write(`Suggestion report written: ${outPath}\n`);
144
+ });
145
+ await program.parseAsync(process.argv);
146
+ }
147
+ main().catch((error) => {
148
+ process.stderr.write(`promptmgr failed: ${error instanceof Error ? error.message : String(error)}\n`);
149
+ process.exitCode = 1;
150
+ });
151
+ //# sourceMappingURL=cli.js.map