@rekshaw/promptmanager 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -0
- package/dist/assertions.d.ts +3 -0
- package/dist/assertions.js +125 -0
- package/dist/assertions.js.map +1 -0
- package/dist/cli/init.d.ts +5 -0
- package/dist/cli/init.js +244 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +151 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +7 -0
- package/dist/config.js +118 -0
- package/dist/config.js.map +1 -0
- package/dist/dataset.d.ts +2 -0
- package/dist/dataset.js +38 -0
- package/dist/dataset.js.map +1 -0
- package/dist/diffRuns.d.ts +2 -0
- package/dist/diffRuns.js +52 -0
- package/dist/diffRuns.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/prompts.d.ts +2 -0
- package/dist/prompts.js +47 -0
- package/dist/prompts.js.map +1 -0
- package/dist/providers/anthropic.d.ts +2 -0
- package/dist/providers/anthropic.js +135 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/common.d.ts +8 -0
- package/dist/providers/common.js +32 -0
- package/dist/providers/common.js.map +1 -0
- package/dist/providers/google.d.ts +2 -0
- package/dist/providers/google.js +149 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/openai.d.ts +2 -0
- package/dist/providers/openai.js +171 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/registry.d.ts +5 -0
- package/dist/providers/registry.js +29 -0
- package/dist/providers/registry.js.map +1 -0
- package/dist/reporting.d.ts +8 -0
- package/dist/reporting.js +45 -0
- package/dist/reporting.js.map +1 -0
- package/dist/runSuite.d.ts +2 -0
- package/dist/runSuite.js +164 -0
- package/dist/runSuite.js.map +1 -0
- package/dist/schema.d.ts +7 -0
- package/dist/schema.js +22 -0
- package/dist/schema.js.map +1 -0
- package/dist/suggestions.d.ts +2 -0
- package/dist/suggestions.js +172 -0
- package/dist/suggestions.js.map +1 -0
- package/dist/tools/loadTools.d.ts +5 -0
- package/dist/tools/loadTools.js +30 -0
- package/dist/tools/loadTools.js.map +1 -0
- package/dist/tools/toolRunner.d.ts +13 -0
- package/dist/tools/toolRunner.js +109 -0
- package/dist/tools/toolRunner.js.map +1 -0
- package/dist/types.d.ts +242 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +13 -0
- package/dist/utils.js +119 -0
- package/dist/utils.js.map +1 -0
- package/package.json +35 -0
- package/runtime/tool-worker.mjs +130 -0
- package/templates/promptmanager.workflow.yml +22 -0
package/README.md
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# PromptManager
|
|
2
|
+
|
|
3
|
+
PromptManager is a Node/TypeScript CLI + SDK for regression-safe prompt development with real tool-calling.
|
|
4
|
+
|
|
5
|
+
## What it does
|
|
6
|
+
|
|
7
|
+
- Version prompts in Git (`prompts/<promptId>/v<semver>.md`)
|
|
8
|
+
- Run eval suites from JSONL fixtures
|
|
9
|
+
- Execute real tool handlers in subprocess isolation
|
|
10
|
+
- Validate outputs with JSON Schema + field-level assertions
|
|
11
|
+
- Diff candidate runs against baseline reports and fail CI on regressions
|
|
12
|
+
- Generate non-blocking prompt improvement suggestions
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install @rekshaw/promptmanager
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Initialize
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npx promptmgr init
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
This creates:
|
|
27
|
+
|
|
28
|
+
- `promptmanager.config.ts`
|
|
29
|
+
- `prompts/customer-email-parser/*`
|
|
30
|
+
- `evals/customer-email/*`
|
|
31
|
+
- `tools/customer-email-tools.mjs`
|
|
32
|
+
- `.github/workflows/promptmanager.yml`
|
|
33
|
+
|
|
34
|
+
A reusable workflow template is also included at `templates/promptmanager.workflow.yml`.
|
|
35
|
+
|
|
36
|
+
## CLI
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
promptmgr run --suite customer-email-parser --provider openai
|
|
40
|
+
promptmgr diff --baseline ./baseline.json --candidate ./candidate.json
|
|
41
|
+
promptmgr ci --suite customer-email-parser --provider openai --baseline ./baseline/run-report.json --fail-on-regression
|
|
42
|
+
promptmgr suggest --run ./candidate.json --with-ai
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Config contract
|
|
46
|
+
|
|
47
|
+
`promptmanager.config.ts` (or `.json`) must include:
|
|
48
|
+
|
|
49
|
+
- `providers`
|
|
50
|
+
- `suites`
|
|
51
|
+
- `toolRunner`
|
|
52
|
+
- `privacy`
|
|
53
|
+
- `reporting`
|
|
54
|
+
|
|
55
|
+
## Tool module contract
|
|
56
|
+
|
|
57
|
+
```ts
|
|
58
|
+
import type { ToolModuleShape } from "@rekshaw/promptmanager";
|
|
59
|
+
|
|
60
|
+
export const tools: ToolModuleShape["tools"] = [
|
|
61
|
+
{
|
|
62
|
+
name: "my_tool",
|
|
63
|
+
description: "Tool description",
|
|
64
|
+
strict: true,
|
|
65
|
+
inputSchema: {
|
|
66
|
+
type: "object",
|
|
67
|
+
properties: { foo: { type: "string" } },
|
|
68
|
+
required: ["foo"],
|
|
69
|
+
additionalProperties: false
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
];
|
|
73
|
+
|
|
74
|
+
export const handlers: ToolModuleShape["handlers"] = {
|
|
75
|
+
async my_tool(args, context) {
|
|
76
|
+
return { ok: true };
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## OpenAI function-calling workflow support
|
|
82
|
+
|
|
83
|
+
PromptManager's OpenAI adapter follows the same loop as the official function-calling guide:
|
|
84
|
+
|
|
85
|
+
1. Send `input + tools` to `responses.create`
|
|
86
|
+
2. Read `response.output` items
|
|
87
|
+
3. Execute each `function_call` in your app code
|
|
88
|
+
4. Append `function_call_output` items to the running `input`
|
|
89
|
+
5. Call `responses.create` again until no more function calls remain
|
|
90
|
+
|
|
91
|
+
For reasoning models, this preserves the full output items across turns (including reasoning/tool call items), matching the documented requirement.
|
|
92
|
+
|
|
93
|
+
Customer-email parsing with fare normalization is scaffolded by default. To use your existing fare mapper, replace the local function in `tools/customer-email-tools.mjs` with an import from your codebase.
|
|
94
|
+
|
|
95
|
+
## Report artifacts
|
|
96
|
+
|
|
97
|
+
Each run emits a JSON artifact with:
|
|
98
|
+
|
|
99
|
+
- summary counts
|
|
100
|
+
- per-case status (`pass|fail|error`)
|
|
101
|
+
- schema/assertion failures
|
|
102
|
+
- tool-call traces
|
|
103
|
+
- hashed case IDs
|
|
104
|
+
|
|
105
|
+
## SDK usage
|
|
106
|
+
|
|
107
|
+
```ts
|
|
108
|
+
import { runSuite, diffRuns, generateSuggestions } from "@rekshaw/promptmanager";
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Development
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
npm ci
|
|
115
|
+
npm run build
|
|
116
|
+
npm test
|
|
117
|
+
```
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { AssertionResult, AssertionSpec, JsonValue } from "./types.js";
|
|
2
|
+
export declare function loadAssertionSpec(assertionsPath: string): Promise<AssertionSpec>;
|
|
3
|
+
export declare function evaluateAssertions(output: JsonValue, expectedOutput: JsonValue, spec: AssertionSpec): AssertionResult;
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import { asObject, getByPath } from "./utils.js";
|
|
3
|
+
export async function loadAssertionSpec(assertionsPath) {
|
|
4
|
+
const raw = await fs.readFile(assertionsPath, "utf8");
|
|
5
|
+
const parsed = JSON.parse(raw);
|
|
6
|
+
if (!Array.isArray(parsed.requiredKeys)) {
|
|
7
|
+
throw new Error("Assertion spec must contain an array for 'requiredKeys'.");
|
|
8
|
+
}
|
|
9
|
+
return {
|
|
10
|
+
requiredKeys: parsed.requiredKeys,
|
|
11
|
+
allowAdditionalKeys: parsed.allowAdditionalKeys ?? false,
|
|
12
|
+
variableFields: parsed.variableFields ?? [],
|
|
13
|
+
fieldMatchers: parsed.fieldMatchers ?? {},
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
function resolveExpectedValue(matcher, fieldPath, expectedOutput) {
|
|
17
|
+
if (matcher.value !== undefined) {
|
|
18
|
+
return matcher.value;
|
|
19
|
+
}
|
|
20
|
+
if (matcher.expectedPath) {
|
|
21
|
+
const pathValue = matcher.expectedPath.replace(/^\$expected\./, "");
|
|
22
|
+
return getByPath(expectedOutput, pathValue);
|
|
23
|
+
}
|
|
24
|
+
return getByPath(expectedOutput, fieldPath);
|
|
25
|
+
}
|
|
26
|
+
function runCheck(op, actual, expected) {
|
|
27
|
+
switch (op) {
|
|
28
|
+
case "equals": {
|
|
29
|
+
const passed = JSON.stringify(actual) === JSON.stringify(expected);
|
|
30
|
+
return {
|
|
31
|
+
passed,
|
|
32
|
+
message: passed ? "matches expected value" : "value does not match expected",
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
case "oneOf": {
|
|
36
|
+
const pool = Array.isArray(expected) ? expected : [];
|
|
37
|
+
const passed = pool.some((candidate) => JSON.stringify(candidate) === JSON.stringify(actual));
|
|
38
|
+
return {
|
|
39
|
+
passed,
|
|
40
|
+
message: passed ? "value matches allowed option" : "value not in allowed set",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
case "contains": {
|
|
44
|
+
if (typeof actual === "string" && typeof expected === "string") {
|
|
45
|
+
const passed = actual.includes(expected);
|
|
46
|
+
return { passed, message: passed ? "substring found" : "substring missing" };
|
|
47
|
+
}
|
|
48
|
+
if (Array.isArray(actual)) {
|
|
49
|
+
const passed = actual.some((item) => JSON.stringify(item) === JSON.stringify(expected));
|
|
50
|
+
return { passed, message: passed ? "array contains value" : "array does not contain value" };
|
|
51
|
+
}
|
|
52
|
+
return { passed: false, message: "contains expects string or array output" };
|
|
53
|
+
}
|
|
54
|
+
case "regex": {
|
|
55
|
+
const pattern = typeof expected === "string" ? expected : "";
|
|
56
|
+
const regex = new RegExp(pattern);
|
|
57
|
+
const passed = regex.test(String(actual ?? ""));
|
|
58
|
+
return { passed, message: passed ? "regex matched" : `regex '${pattern}' did not match` };
|
|
59
|
+
}
|
|
60
|
+
case "numericRange": {
|
|
61
|
+
const range = (expected ?? {});
|
|
62
|
+
const value = typeof actual === "number" ? actual : Number.NaN;
|
|
63
|
+
const minOk = range.min === undefined || value >= range.min;
|
|
64
|
+
const maxOk = range.max === undefined || value <= range.max;
|
|
65
|
+
const passed = Number.isFinite(value) && minOk && maxOk;
|
|
66
|
+
return {
|
|
67
|
+
passed,
|
|
68
|
+
message: passed
|
|
69
|
+
? "value in numeric range"
|
|
70
|
+
: `value outside range [${range.min ?? "-inf"}, ${range.max ?? "+inf"}]`,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
case "exists": {
|
|
74
|
+
const passed = actual !== undefined && actual !== null;
|
|
75
|
+
return { passed, message: passed ? "value exists" : "value missing" };
|
|
76
|
+
}
|
|
77
|
+
case "absent": {
|
|
78
|
+
const passed = actual === undefined || actual === null;
|
|
79
|
+
return { passed, message: passed ? "value absent as expected" : "value should be absent" };
|
|
80
|
+
}
|
|
81
|
+
default:
|
|
82
|
+
return { passed: false, message: "unsupported assertion operator" };
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
export function evaluateAssertions(output, expectedOutput, spec) {
|
|
86
|
+
const outputObj = asObject(output);
|
|
87
|
+
const checks = [];
|
|
88
|
+
const missingKeys = spec.requiredKeys.filter((key) => !(key in outputObj));
|
|
89
|
+
const allowedKeys = new Set([
|
|
90
|
+
...spec.requiredKeys,
|
|
91
|
+
...(spec.variableFields ?? []),
|
|
92
|
+
...Object.keys(spec.fieldMatchers ?? {}),
|
|
93
|
+
]);
|
|
94
|
+
const unexpectedKeys = (spec.allowAdditionalKeys ?? false)
|
|
95
|
+
? []
|
|
96
|
+
: Object.keys(outputObj).filter((key) => !allowedKeys.has(key));
|
|
97
|
+
for (const [field, matchers] of Object.entries(spec.fieldMatchers ?? {})) {
|
|
98
|
+
const actualValue = getByPath(output, field);
|
|
99
|
+
for (const matcher of matchers) {
|
|
100
|
+
const expectedValue = resolveExpectedValue(matcher, field, expectedOutput);
|
|
101
|
+
const result = runCheck(matcher.op, actualValue, expectedValue);
|
|
102
|
+
const check = {
|
|
103
|
+
field,
|
|
104
|
+
op: matcher.op,
|
|
105
|
+
passed: result.passed,
|
|
106
|
+
actual: actualValue,
|
|
107
|
+
message: result.message,
|
|
108
|
+
};
|
|
109
|
+
if (expectedValue !== undefined) {
|
|
110
|
+
check.expected = expectedValue;
|
|
111
|
+
}
|
|
112
|
+
checks.push(check);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
const passed = missingKeys.length === 0 &&
|
|
116
|
+
unexpectedKeys.length === 0 &&
|
|
117
|
+
checks.every((check) => check.passed);
|
|
118
|
+
return {
|
|
119
|
+
passed,
|
|
120
|
+
checks,
|
|
121
|
+
missingKeys,
|
|
122
|
+
unexpectedKeys,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=assertions.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"assertions.js","sourceRoot":"","sources":["../src/assertions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAUlC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,cAAsB;IAC5D,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAkB,CAAC;IAEhD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;IAC9E,CAAC;IACD,OAAO;QACL,YAAY,EAAE,MAAM,CAAC,YAAY;QACjC,mBAAmB,EAAE,MAAM,CAAC,mBAAmB,IAAI,KAAK;QACxD,cAAc,EAAE,MAAM,CAAC,cAAc,IAAI,EAAE;QAC3C,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;KAC1C,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAC3B,OAAqB,EACrB,SAAiB,EACjB,cAAyB;IAEzB,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC,KAAK,CAAC;IACvB,CAAC;IAED,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;QACpE,OAAO,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,QAAQ,CACf,EAAqB,EACrB,MAA6B,EAC7B,QAAqD;IAErD,QAAQ,EAAE,EAAE,CAAC;QACX,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;YACnE,OAAO;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,+BAA+B;aAC7E,CAAC;QACJ,CAAC;QACD,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YACrD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;YAC9F,OAAO;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAC,0BAA0B;aAC9E,CAAC;QACJ,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBACzC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,mBAAmB,EAAE,CAAC;YAC/E,CAAC;YACD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1B,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACxF,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,8BAA8B,EAAE,CAAC;YAC/F,CAAC;YACD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,yCAAyC,EAAE,CAAC;QAC/E,CAAC;QACD,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,OAAO,GAAG,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;YAClC,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC;YAChD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,UAAU,OAAO,iBAAiB,EAAE,CAAC;QAC5F,CAAC;QACD,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,KAAK,GAAG,CAAC,QAAQ,IAAI,EAAE,CAAwB,CAAC;YACtD,MAAM,KAAK,GAAG,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;YAC/D,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,IAAI,KAAK,CAAC,GAAG,CAAC;YAC5D,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,KAAK,SAAS,IAAI,KAAK,IAAI,KAAK,CAAC,GAAG,CAAC;YAC5D,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,KAAK,CAAC;YACxD,OAAO;gBACL,MAAM;gBACN,OAAO,EAAE,MAAM;oBACb,CAAC,CAAC,wBAAwB;oBAC1B,CAAC,CAAC,wBAAwB,KAAK,CAAC,GAAG,IAAI,MAAM,KAAK,KAAK,CAAC,GAAG,IAAI,MAAM,GAAG;aAC3E,CAAC;QACJ,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,MAAM,GAAG,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,CAAC;YACvD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC;QACxE,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,MAAM,GAAG,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,CAAC;YACvD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,wBAAwB,EAAE,CAAC;QAC7F,CAAC;QACD;YACE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,gCAAgC,EAAE,CAAC;IACxE,CAAC;AACH,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,MAAiB,EACjB,cAAyB,EACzB,IAAmB;IAEnB,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;IACnC,MAAM,MAAM,GAA2B,EAAE,CAAC;IAE1C,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,SAAS,CAAC,CAAC,CAAC;IAE3E,MAAM,WAAW,GAAG,IAAI,GAAG,CAAS;QAClC,GAAG,IAAI,CAAC,YAAY;QACpB,GAAG,CAAC,IAAI,CAAC,cAAc,IAAI,EAAE,CAAC;QAC9B,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC;KACzC,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,CAAC,IAAI,CAAC,mBAAmB,IAAI,KAAK,CAAC;QACxD,CAAC,CAAC,EAAE;QACJ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAElE,KAAK,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC,EAAE,CAAC;QACzE,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QAC7C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,oBAAoB,CAAC,OAAO,EAAE,KAAK,EAAE,cAAc,CAAC,CAAC;YAC3E,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,EAAE,EAAE,WAAW,EAAE,aAAa,CAAC,CAAC;YAChE,MAAM,KAAK,GAAyB;gBAClC,KAAK;gBACL,EAAE,EAAE,OAAO,CAAC,EAAE;gBACd,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,MAAM,EAAE,WAAW;gBACnB,OAAO,EAAE,MAAM,CAAC,OAAO;aACxB,CAAC;YACF,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;gBAChC,KAAK,CAAC,QAAQ,GAAG,aAA0B,CAAC;YAC9C,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GACV,WAAW,CAAC,MAAM,KAAK,CAAC;QACxB,cAAc,CAAC,MAAM,KAAK,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAExC,OAAO;QACL,MAAM;QACN,MAAM;QACN,WAAW;QACX,cAAc;KACf,CAAC;AACJ,CAAC"}
|
package/dist/cli/init.js
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
async function writeFileSafe(file, force) {
|
|
4
|
+
await fs.mkdir(path.dirname(file.filePath), { recursive: true });
|
|
5
|
+
if (!force) {
|
|
6
|
+
try {
|
|
7
|
+
await fs.access(file.filePath);
|
|
8
|
+
return "skipped";
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
// file missing
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
await fs.writeFile(file.filePath, file.content, "utf8");
|
|
15
|
+
return "created";
|
|
16
|
+
}
|
|
17
|
+
function filesForCwd(cwd) {
|
|
18
|
+
return [
|
|
19
|
+
{
|
|
20
|
+
filePath: path.resolve(cwd, "promptmanager.config.ts"),
|
|
21
|
+
content: `import type { PromptManagerConfig } from "@rekshaw/promptmanager";
|
|
22
|
+
|
|
23
|
+
const config: PromptManagerConfig = {
|
|
24
|
+
providers: {
|
|
25
|
+
openai: {
|
|
26
|
+
apiKeyEnv: "OPENAI_API_KEY",
|
|
27
|
+
parallelToolCalls: false,
|
|
28
|
+
toolChoice: "auto"
|
|
29
|
+
},
|
|
30
|
+
anthropic: { apiKeyEnv: "ANTHROPIC_API_KEY" },
|
|
31
|
+
google: { apiKeyEnv: "GEMINI_API_KEY" }
|
|
32
|
+
},
|
|
33
|
+
suites: [
|
|
34
|
+
{
|
|
35
|
+
id: "customer-email-parser",
|
|
36
|
+
promptId: "customer-email-parser",
|
|
37
|
+
datasetPath: "evals/customer-email/dataset.jsonl",
|
|
38
|
+
schemaPath: "evals/customer-email/schema.json",
|
|
39
|
+
assertionsPath: "evals/customer-email/assertions.json",
|
|
40
|
+
toolsModule: "tools/customer-email-tools.mjs",
|
|
41
|
+
modelByProvider: {
|
|
42
|
+
openai: "gpt-5-mini",
|
|
43
|
+
anthropic: "claude-3-5-sonnet-latest",
|
|
44
|
+
google: "gemini-2.0-flash"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
toolRunner: {
|
|
49
|
+
mode: "subprocess",
|
|
50
|
+
command: "node",
|
|
51
|
+
envAllowlist: ["TZ"],
|
|
52
|
+
timeoutMs: 5000,
|
|
53
|
+
maxToolCallsPerCase: 8
|
|
54
|
+
},
|
|
55
|
+
privacy: {
|
|
56
|
+
allowRawProductionFixtures: true,
|
|
57
|
+
redactInReports: true,
|
|
58
|
+
encryptionAtRest: false
|
|
59
|
+
},
|
|
60
|
+
reporting: {
|
|
61
|
+
includeToolTrace: true,
|
|
62
|
+
outDir: "promptmanager-reports"
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
export default config;
|
|
67
|
+
`,
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
filePath: path.resolve(cwd, "prompts/customer-email-parser/meta.json"),
|
|
71
|
+
content: `${JSON.stringify({ currentVersion: "1.0.0", versions: ["1.0.0"] }, null, 2)}\n`,
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
filePath: path.resolve(cwd, "prompts/customer-email-parser/v1.0.0.md"),
|
|
75
|
+
content: `You extract structured booking data from customer confirmation emails.
|
|
76
|
+
|
|
77
|
+
Rules:
|
|
78
|
+
1. Return only valid JSON. No markdown. No prose.
|
|
79
|
+
2. If the email contains a fare label, call the normalize_fare_type function.
|
|
80
|
+
3. Set fare_type_normalized from the function output.
|
|
81
|
+
4. If a field is missing, return null for that field.
|
|
82
|
+
|
|
83
|
+
Required output fields:
|
|
84
|
+
- customer_email
|
|
85
|
+
- reservation_code
|
|
86
|
+
- departure_date
|
|
87
|
+
- fare_type_raw
|
|
88
|
+
- fare_type_normalized
|
|
89
|
+
`,
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
filePath: path.resolve(cwd, "evals/customer-email/dataset.jsonl"),
|
|
93
|
+
content: [
|
|
94
|
+
JSON.stringify({
|
|
95
|
+
caseId: "customer-email-001",
|
|
96
|
+
input: {
|
|
97
|
+
subject: "Your flight is confirmed",
|
|
98
|
+
body: "Hello Andrea, thanks for booking with us. Reservation ZX81Y is confirmed. Passenger fare type: Economy Flex. Departure date: 2026-04-18. Contact: andrea@example.com"
|
|
99
|
+
},
|
|
100
|
+
expected: {
|
|
101
|
+
customer_email: "andrea@example.com",
|
|
102
|
+
reservation_code: "ZX81Y",
|
|
103
|
+
departure_date: "2026-04-18",
|
|
104
|
+
fare_type_raw: "Economy Flex",
|
|
105
|
+
fare_type_normalized: "ECONOMY_FLEX"
|
|
106
|
+
},
|
|
107
|
+
tags: ["happy-path", "fare-normalization"]
|
|
108
|
+
}),
|
|
109
|
+
].join("\n") + "\n",
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
filePath: path.resolve(cwd, "evals/customer-email/schema.json"),
|
|
113
|
+
content: `${JSON.stringify({
|
|
114
|
+
type: "object",
|
|
115
|
+
properties: {
|
|
116
|
+
customer_email: { type: ["string", "null"] },
|
|
117
|
+
reservation_code: { type: ["string", "null"] },
|
|
118
|
+
departure_date: { type: ["string", "null"] },
|
|
119
|
+
fare_type_raw: { type: ["string", "null"] },
|
|
120
|
+
fare_type_normalized: { type: ["string", "null"] }
|
|
121
|
+
},
|
|
122
|
+
required: [
|
|
123
|
+
"customer_email",
|
|
124
|
+
"reservation_code",
|
|
125
|
+
"departure_date",
|
|
126
|
+
"fare_type_raw",
|
|
127
|
+
"fare_type_normalized"
|
|
128
|
+
],
|
|
129
|
+
additionalProperties: false
|
|
130
|
+
}, null, 2)}\n`,
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
filePath: path.resolve(cwd, "evals/customer-email/assertions.json"),
|
|
134
|
+
content: `${JSON.stringify({
|
|
135
|
+
requiredKeys: [
|
|
136
|
+
"customer_email",
|
|
137
|
+
"reservation_code",
|
|
138
|
+
"departure_date",
|
|
139
|
+
"fare_type_raw",
|
|
140
|
+
"fare_type_normalized"
|
|
141
|
+
],
|
|
142
|
+
allowAdditionalKeys: false,
|
|
143
|
+
variableFields: [],
|
|
144
|
+
fieldMatchers: {
|
|
145
|
+
customer_email: [{ op: "regex", value: "^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$" }],
|
|
146
|
+
reservation_code: [{ op: "regex", value: "^[A-Z0-9-]{4,12}$" }],
|
|
147
|
+
departure_date: [{ op: "regex", value: "^\\d{4}-\\d{2}-\\d{2}$" }],
|
|
148
|
+
fare_type_normalized: [{ op: "oneOf", value: ["ECONOMY_BASIC", "ECONOMY_FLEX", "PREMIUM_ECONOMY", "BUSINESS", "FIRST", "UNKNOWN"] }]
|
|
149
|
+
}
|
|
150
|
+
}, null, 2)}\n`,
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
filePath: path.resolve(cwd, "tools/customer-email-tools.mjs"),
|
|
154
|
+
content: `// Replace this local mapper with an import from your codebase if you already have one.
|
|
155
|
+
// Example: import { mapFareType } from "../src/domain/fares/mapFareType.js";
|
|
156
|
+
|
|
157
|
+
const FARE_TYPE_MAP = {
|
|
158
|
+
"economy basic": "ECONOMY_BASIC",
|
|
159
|
+
"economy flex": "ECONOMY_FLEX",
|
|
160
|
+
"premium economy": "PREMIUM_ECONOMY",
|
|
161
|
+
"business": "BUSINESS",
|
|
162
|
+
"first": "FIRST"
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
function mapFareType(rawFareType) {
|
|
166
|
+
const normalized = String(rawFareType || "").trim().toLowerCase();
|
|
167
|
+
return FARE_TYPE_MAP[normalized] ?? "UNKNOWN";
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export const tools = [
|
|
171
|
+
{
|
|
172
|
+
type: "function",
|
|
173
|
+
name: "normalize_fare_type",
|
|
174
|
+
description: "Map a raw fare label from an email into a normalized internal fare type enum",
|
|
175
|
+
strict: true,
|
|
176
|
+
inputSchema: {
|
|
177
|
+
type: "object",
|
|
178
|
+
properties: {
|
|
179
|
+
raw_fare_type: { type: "string", description: "Raw fare label, e.g. Economy Flex" }
|
|
180
|
+
},
|
|
181
|
+
required: ["raw_fare_type"],
|
|
182
|
+
additionalProperties: false
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
];
|
|
186
|
+
|
|
187
|
+
export const handlers = {
|
|
188
|
+
async normalize_fare_type(args) {
|
|
189
|
+
const raw = typeof args === "object" && args && "raw_fare_type" in args
|
|
190
|
+
? String(args.raw_fare_type)
|
|
191
|
+
: "";
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
raw_fare_type: raw,
|
|
195
|
+
fare_type_normalized: mapFareType(raw)
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
`,
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
filePath: path.resolve(cwd, ".github/workflows/promptmanager.yml"),
|
|
203
|
+
content: `name: PromptManager CI
|
|
204
|
+
|
|
205
|
+
on:
|
|
206
|
+
pull_request:
|
|
207
|
+
push:
|
|
208
|
+
branches: [main]
|
|
209
|
+
|
|
210
|
+
jobs:
|
|
211
|
+
prompt-evals:
|
|
212
|
+
runs-on: ubuntu-latest
|
|
213
|
+
steps:
|
|
214
|
+
- uses: actions/checkout@v4
|
|
215
|
+
- uses: actions/setup-node@v4
|
|
216
|
+
with:
|
|
217
|
+
node-version: '20'
|
|
218
|
+
- run: npm ci
|
|
219
|
+
- run: npm run build
|
|
220
|
+
- run: npx promptmgr ci --suite customer-email-parser --provider openai --baseline ./baseline/run-report.json --fail-on-regression
|
|
221
|
+
env:
|
|
222
|
+
OPENAI_API_KEY: \${{ secrets.OPENAI_API_KEY }}
|
|
223
|
+
ANTHROPIC_API_KEY: \${{ secrets.ANTHROPIC_API_KEY }}
|
|
224
|
+
GEMINI_API_KEY: \${{ secrets.GEMINI_API_KEY }}
|
|
225
|
+
`,
|
|
226
|
+
},
|
|
227
|
+
];
|
|
228
|
+
}
|
|
229
|
+
export async function runInit(cwd, options) {
|
|
230
|
+
const files = filesForCwd(cwd);
|
|
231
|
+
let created = 0;
|
|
232
|
+
let skipped = 0;
|
|
233
|
+
for (const file of files) {
|
|
234
|
+
const outcome = await writeFileSafe(file, options.force ?? false);
|
|
235
|
+
if (outcome === "created") {
|
|
236
|
+
created += 1;
|
|
237
|
+
}
|
|
238
|
+
else {
|
|
239
|
+
skipped += 1;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
process.stdout.write(`Initialized PromptManager scaffold. created=${created} skipped=${skipped}\n`);
|
|
243
|
+
}
|
|
244
|
+
//# sourceMappingURL=init.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/cli/init.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAW7B,KAAK,UAAU,aAAa,CAAC,IAAkB,EAAE,KAAc;IAC7D,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACjE,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC/B,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;IACH,CAAC;IACD,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IACxD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO;QACL;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,yBAAyB,CAAC;YACtD,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8Cd;SACI;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,yCAAyC,CAAC;YACtE,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,cAAc,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI;SAC1F;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,yCAAyC,CAAC;YACtE,OAAO,EAAE;;;;;;;;;;;;;;CAcd;SACI;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,oCAAoC,CAAC;YACjE,OAAO,EAAE;gBACP,IAAI,CAAC,SAAS,CAAC;oBACb,MAAM,EAAE,oBAAoB;oBAC5B,KAAK,EAAE;wBACL,OAAO,EAAE,0BAA0B;wBACnC,IAAI,EAAE,sKAAsK;qBAC7K;oBACD,QAAQ,EAAE;wBACR,cAAc,EAAE,oBAAoB;wBACpC,gBAAgB,EAAE,OAAO;wBACzB,cAAc,EAAE,YAAY;wBAC5B,aAAa,EAAE,cAAc;wBAC7B,oBAAoB,EAAE,cAAc;qBACrC;oBACD,IAAI,EAAE,CAAC,YAAY,EAAE,oBAAoB,CAAC;iBAC3C,CAAC;aACH,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI;SACpB;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,kCAAkC,CAAC;YAC/D,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CACxB;gBACE,IAAI,EAAE,QAAQ;gBACd,UAAU,EAAE;oBACV,cAAc,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC5C,gBAAgB,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC9C,cAAc,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC5C,aAAa,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;oBAC3C,oBAAoB,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE;iBACnD;gBACD,QAAQ,EAAE;oBACR,gBAAgB;oBAChB,kBAAkB;oBAClB,gBAAgB;oBAChB,eAAe;oBACf,sBAAsB;iBACvB;gBACD,oBAAoB,EAAE,KAAK;aAC5B,EACD,IAAI,EACJ,CAAC,CACF,IAAI;SACN;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,sCAAsC,CAAC;YACnE,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CACxB;gBACE,YAAY,EAAE;oBACZ,gBAAgB;oBAChB,kBAAkB;oBAClB,gBAAgB;oBAChB,eAAe;oBACf,sBAAsB;iBACvB;gBACD,mBAAmB,EAAE,KAAK;gBAC1B,cAAc,EAAE,EAAE;gBAClB,aAAa,EAAE;oBACb,cAAc,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;oBAC1E,gBAAgB,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,mBAAmB,EAAE,CAAC;oBAC/D,cAAc,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;oBAClE,oBAAoB,EAAE,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,eAAe,EAAE,cAAc,EAAE,iBAAiB,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,CAAC,EAAE,CAAC;iBACrI;aACF,EACD,IAAI,EACJ,CAAC,CACF,IAAI;SACN;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,gCAAgC,CAAC;YAC7D,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6Cd;SACI;QACD;YACE,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,qCAAqC,CAAC;YAClE,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;CAsBd;SACI;KACF,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,GAAW,EAAE,OAAoB;IAC7D,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/B,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK,IAAI,KAAK,CAAC,CAAC;QAClE,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC;QACf,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,+CAA+C,OAAO,YAAY,OAAO,IAAI,CAAC,CAAC;AACtG,CAAC"}
|
package/dist/cli.d.ts
ADDED
package/dist/cli.js
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import { runSuite } from "./runSuite.js";
|
|
5
|
+
import { diffRuns } from "./diffRuns.js";
|
|
6
|
+
import { generateSuggestions } from "./suggestions.js";
|
|
7
|
+
import { runInit } from "./cli/init.js";
|
|
8
|
+
import { loadConfig } from "./config.js";
|
|
9
|
+
import { defaultRunReportPath, defaultSuggestionPath, printDiffSummary, printRunSummary, printSuggestionSummary, writeRunReport, writeSuggestionReport, } from "./reporting.js";
|
|
10
|
+
import { readJsonFile } from "./utils.js";
|
|
11
|
+
function parseProvider(value) {
|
|
12
|
+
if (value === "openai" || value === "anthropic" || value === "google") {
|
|
13
|
+
return value;
|
|
14
|
+
}
|
|
15
|
+
throw new Error(`Invalid provider '${value}'. Expected openai|anthropic|google.`);
|
|
16
|
+
}
|
|
17
|
+
function toInt(value, fallback) {
|
|
18
|
+
const parsed = Number.parseInt(value, 10);
|
|
19
|
+
return Number.isFinite(parsed) ? parsed : fallback;
|
|
20
|
+
}
|
|
21
|
+
async function resolveConfiguredOutDir(cwd, configPath) {
|
|
22
|
+
try {
|
|
23
|
+
const { path: loadedPath, config } = await loadConfig(cwd, configPath);
|
|
24
|
+
const baseDir = path.dirname(loadedPath);
|
|
25
|
+
return path.resolve(baseDir, config.reporting.outDir ?? "promptmanager-reports");
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
return path.resolve(cwd, "promptmanager-reports");
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
async function main() {
|
|
32
|
+
const program = new Command();
|
|
33
|
+
program
|
|
34
|
+
.name("promptmgr")
|
|
35
|
+
.description("PromptManager: regression-safe prompt + tool-calling evaluation")
|
|
36
|
+
.version("0.1.0");
|
|
37
|
+
program
|
|
38
|
+
.command("init")
|
|
39
|
+
.description("Initialize PromptManager scaffold in current repo")
|
|
40
|
+
.option("--force", "Overwrite existing scaffold files")
|
|
41
|
+
.action(async (options) => {
|
|
42
|
+
await runInit(process.cwd(), options);
|
|
43
|
+
});
|
|
44
|
+
program
|
|
45
|
+
.command("run")
|
|
46
|
+
.description("Run one suite against a provider/model and emit run report")
|
|
47
|
+
.requiredOption("--suite <suite>", "Suite ID")
|
|
48
|
+
.requiredOption("--provider <provider>", "Provider: openai|anthropic|google")
|
|
49
|
+
.option("--model <model>", "Model override")
|
|
50
|
+
.option("--out <path>", "Output report JSON path")
|
|
51
|
+
.option("--config <path>", "Config path (default: promptmanager.config.ts/json)")
|
|
52
|
+
.option("--concurrency <n>", "Parallel case workers (default: 4)", "4")
|
|
53
|
+
.action(async (options) => {
|
|
54
|
+
const provider = parseProvider(options.provider);
|
|
55
|
+
const report = await runSuite({
|
|
56
|
+
suiteId: options.suite,
|
|
57
|
+
provider,
|
|
58
|
+
model: options.model,
|
|
59
|
+
outPath: options.out,
|
|
60
|
+
configPath: options.config,
|
|
61
|
+
cwd: process.cwd(),
|
|
62
|
+
concurrency: toInt(options.concurrency, 4),
|
|
63
|
+
});
|
|
64
|
+
printRunSummary(report);
|
|
65
|
+
const outDir = await resolveConfiguredOutDir(process.cwd(), options.config);
|
|
66
|
+
const outPath = options.out
|
|
67
|
+
? path.resolve(process.cwd(), options.out)
|
|
68
|
+
: defaultRunReportPath(process.cwd(), options.suite, provider, outDir);
|
|
69
|
+
await writeRunReport(outPath, report);
|
|
70
|
+
process.stdout.write(`Run report written: ${outPath}\n`);
|
|
71
|
+
});
|
|
72
|
+
program
|
|
73
|
+
.command("diff")
|
|
74
|
+
.description("Diff baseline vs candidate run reports")
|
|
75
|
+
.requiredOption("--baseline <path>", "Baseline report path")
|
|
76
|
+
.requiredOption("--candidate <path>", "Candidate report path")
|
|
77
|
+
.action(async (options) => {
|
|
78
|
+
const baseline = await readJsonFile(path.resolve(process.cwd(), options.baseline));
|
|
79
|
+
const candidate = await readJsonFile(path.resolve(process.cwd(), options.candidate));
|
|
80
|
+
const diff = diffRuns(baseline, candidate);
|
|
81
|
+
printDiffSummary(diff);
|
|
82
|
+
process.stdout.write(`${JSON.stringify(diff, null, 2)}\n`);
|
|
83
|
+
});
|
|
84
|
+
program
|
|
85
|
+
.command("ci")
|
|
86
|
+
.description("Run suite and fail on regression against a baseline")
|
|
87
|
+
.requiredOption("--suite <suite>", "Suite ID")
|
|
88
|
+
.requiredOption("--provider <provider>", "Provider: openai|anthropic|google")
|
|
89
|
+
.requiredOption("--baseline <path>", "Baseline report path")
|
|
90
|
+
.option("--model <model>", "Model override")
|
|
91
|
+
.option("--config <path>", "Config path")
|
|
92
|
+
.option("--out <path>", "Candidate report output path")
|
|
93
|
+
.option("--fail-on-regression", "Fail CI when regressions exist", true)
|
|
94
|
+
.option("--concurrency <n>", "Parallel case workers (default: 4)", "4")
|
|
95
|
+
.action(async (options) => {
|
|
96
|
+
const provider = parseProvider(options.provider);
|
|
97
|
+
const candidate = await runSuite({
|
|
98
|
+
suiteId: options.suite,
|
|
99
|
+
provider,
|
|
100
|
+
model: options.model,
|
|
101
|
+
configPath: options.config,
|
|
102
|
+
cwd: process.cwd(),
|
|
103
|
+
concurrency: toInt(options.concurrency, 4),
|
|
104
|
+
});
|
|
105
|
+
printRunSummary(candidate);
|
|
106
|
+
const outDir = await resolveConfiguredOutDir(process.cwd(), options.config);
|
|
107
|
+
const candidatePath = options.out
|
|
108
|
+
? path.resolve(process.cwd(), options.out)
|
|
109
|
+
: defaultRunReportPath(process.cwd(), options.suite, provider, outDir);
|
|
110
|
+
await writeRunReport(candidatePath, candidate);
|
|
111
|
+
process.stdout.write(`Candidate run report written: ${candidatePath}\n`);
|
|
112
|
+
const baseline = await readJsonFile(path.resolve(process.cwd(), options.baseline));
|
|
113
|
+
const diff = diffRuns(baseline, candidate);
|
|
114
|
+
printDiffSummary(diff);
|
|
115
|
+
if ((options.failOnRegression ?? true) && diff.regressions.length > 0) {
|
|
116
|
+
process.stderr.write(`CI failed: ${diff.regressions.length} regressions detected.\n`);
|
|
117
|
+
process.exitCode = 1;
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
process.stdout.write("CI check passed.\n");
|
|
121
|
+
});
|
|
122
|
+
program
|
|
123
|
+
.command("suggest")
|
|
124
|
+
.description("Generate prompt improvement suggestions from a run report")
|
|
125
|
+
.requiredOption("--run <path>", "Run report path")
|
|
126
|
+
.option("--out <path>", "Output suggestion report path")
|
|
127
|
+
.option("--with-ai", "Use AI suggestion generation when OPENAI_API_KEY is available", false)
|
|
128
|
+
.option("--model <model>", "Suggestion model override")
|
|
129
|
+
.option("--max <n>", "Max number of suggestions", "5")
|
|
130
|
+
.action(async (options) => {
|
|
131
|
+
const report = await readJsonFile(path.resolve(process.cwd(), options.run));
|
|
132
|
+
const suggestions = await generateSuggestions({
|
|
133
|
+
report,
|
|
134
|
+
maxSuggestions: toInt(options.max, 5),
|
|
135
|
+
withAi: options.withAi ?? false,
|
|
136
|
+
aiModel: options.model,
|
|
137
|
+
});
|
|
138
|
+
printSuggestionSummary(suggestions);
|
|
139
|
+
const outPath = options.out
|
|
140
|
+
? path.resolve(process.cwd(), options.out)
|
|
141
|
+
: defaultSuggestionPath(process.cwd(), report.suiteId);
|
|
142
|
+
await writeSuggestionReport(outPath, suggestions);
|
|
143
|
+
process.stdout.write(`Suggestion report written: ${outPath}\n`);
|
|
144
|
+
});
|
|
145
|
+
await program.parseAsync(process.argv);
|
|
146
|
+
}
|
|
147
|
+
main().catch((error) => {
|
|
148
|
+
process.stderr.write(`promptmgr failed: ${error instanceof Error ? error.message : String(error)}\n`);
|
|
149
|
+
process.exitCode = 1;
|
|
150
|
+
});
|
|
151
|
+
//# sourceMappingURL=cli.js.map
|