@archsight/aios 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +35 -0
- package/OPENCODE.md +23 -0
- package/README.md +64 -31
- package/RELEASE_NOTES.md +25 -0
- package/agents/README.md +6 -3
- package/agents/daedalus/system-prompt.md +2 -0
- package/agents/hestia/constraints.md +7 -0
- package/agents/hestia/responsibilities.md +7 -0
- package/agents/hestia/role.md +12 -0
- package/agents/hestia/system-prompt.md +23 -0
- package/agents/hestia/workflow.md +8 -0
- package/agents/plutus/constraints.md +7 -0
- package/agents/plutus/responsibilities.md +7 -0
- package/agents/plutus/role.md +12 -0
- package/agents/plutus/system-prompt.md +24 -0
- package/agents/plutus/workflow.md +8 -0
- package/agents/themis/constraints.md +7 -0
- package/agents/themis/responsibilities.md +7 -0
- package/agents/themis/role.md +12 -0
- package/agents/themis/system-prompt.md +24 -0
- package/agents/themis/workflow.md +8 -0
- package/bin/archsight-aios.mjs +555 -24
- package/docs/PUBLIC_DISCOVERY.md +16 -2
- package/docs/business-expert-guide.md +5 -3
- package/docs/glossary.md +11 -3
- package/docs/quickstart.md +18 -4
- package/gemini-extension.json +1 -1
- package/package.json +17 -6
- package/prompts/README.md +12 -0
- package/prompts/evaluation-policy.md +70 -0
- package/prompts/evaluations/engineering-business-basic-advisory-validation-2026-06-16.md +87 -0
- package/prompts/evaluations/engineering-business-basic-fixtures.json +375 -0
- package/prompts/evaluations/engineering-business-basic-model-output.example.json +179 -0
- package/prompts/evaluations/engineering-business-basic-prompts-2026-06-16.md +205 -0
- package/prompts/evaluations/engineering-business-basic-scorecard.json +238 -0
- package/prompts/evaluations/engineering-business-public-advisory-fixtures.json +422 -0
- package/prompts/evaluations/public-advisory-md/01-technical-bid.md +63 -0
- package/prompts/evaluations/public-advisory-md/02-contract.md +61 -0
- package/prompts/evaluations/public-advisory-md/03-daily.md +69 -0
- package/prompts/evaluations/public-advisory-md/04-meeting.md +48 -0
- package/prompts/evaluations/public-advisory-md/05-variation.md +63 -0
- package/prompts/evaluations/public-advisory-md/06-scheme.md +60 -0
- package/prompts/failure-cases.md +5 -1
- package/prompts/prompt-registry.md +10 -0
- package/runtime/agent-routing.md +36 -8
- package/runtime/archsight-aios.manifest.json +104 -40
- package/runtime/hermes/agent-registry.md +3 -0
- package/runtime/hermes/workspace-binding.md +3 -0
- package/runtime/skill-routing.md +12 -7
- package/scripts/analyze-prompt-run-results.mjs +187 -0
- package/scripts/build-prompt-run-pack.mjs +248 -0
- package/scripts/validate-prompt-fixtures.mjs +225 -0
- package/scripts/validate-prompt-model-outputs.mjs +201 -0
- package/scripts/validate-prompt-run-results.mjs +259 -0
- package/scripts/validate-prompt-scorecard.mjs +133 -0
- package/scripts/validate-skills.mjs +6 -2
- package/skills/README.md +3 -0
- package/skills/aios-commercial-contract/SKILL.md +18 -0
- package/skills/aios-commercial-contract/prompts/basic-prompt.md +83 -0
- package/skills/aios-commercial-tender/SKILL.md +18 -0
- package/skills/aios-commercial-tender/prompts/basic-prompt.md +94 -0
- package/skills/aios-commercial-variation/SKILL.md +18 -0
- package/skills/aios-commercial-variation/prompts/basic-prompt.md +99 -0
- package/skills/aios-construction-daily/SKILL.md +18 -0
- package/skills/aios-construction-daily/prompts/basic-prompt.md +76 -0
- package/skills/aios-construction-meeting/SKILL.md +18 -0
- package/skills/aios-construction-meeting/prompts/basic-prompt.md +78 -0
- package/skills/aios-construction-scheme/SKILL.md +18 -0
- package/skills/aios-construction-scheme/prompts/basic-prompt.md +90 -0
- package/skills/aios-prompt-compare/SKILL.md +178 -0
- package/skills/aios-prompt-compare/agents/openai.yaml +4 -0
- package/skills/engineering-business-starter-kit.md +109 -0
- package/templates/README.md +16 -2
- package/templates/project-ai/.ai/ARCHSIGHT_AIOS_RULES.md +5 -4
- package/templates/project-ai/.ai/agent-routing.md +3 -1
- package/templates/project-ai/.ai/profile-detection.md +24 -0
- package/templates/project-ai/.ai/project-context.md +4 -1
- package/templates/project-ai/.ai/skills.md +25 -20
- package/templates/project-ai/AGENTS.md +6 -5
- package/templates/project-ai/AI_CODING_RULES.md +1 -1
- package/templates/project-ai/CLAUDE.md +6 -5
- package/templates/project-ai/GEMINI.md +6 -5
- package/templates/project-ai/OPENCODE.md +26 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
|
|
6
|
+
const root = fs.realpathSync(process.cwd());
|
|
7
|
+
const errors = [];
|
|
8
|
+
|
|
9
|
+
const defaultOutputPath = "prompts/evaluations/engineering-business-basic-model-output.example.json";
|
|
10
|
+
const args = parseArgs(process.argv.slice(2));
|
|
11
|
+
const outputPath = args.file ?? repoPath(defaultOutputPath);
|
|
12
|
+
const fixturePath = repoPath("prompts/evaluations/engineering-business-basic-fixtures.json");
|
|
13
|
+
const fixture = readJson(fixturePath);
|
|
14
|
+
const outputFile = args.init ? undefined : readJson(outputPath);
|
|
15
|
+
|
|
16
|
+
const sensitiveTerms = [
|
|
17
|
+
"立信",
|
|
18
|
+
"费敏",
|
|
19
|
+
"闻总",
|
|
20
|
+
"谭总",
|
|
21
|
+
"茅盾中学",
|
|
22
|
+
"鸿益",
|
|
23
|
+
"太鑫",
|
|
24
|
+
"飞双",
|
|
25
|
+
"魔毯",
|
|
26
|
+
"客户内部",
|
|
27
|
+
"培训演示",
|
|
28
|
+
"基础内测",
|
|
29
|
+
"内测模式",
|
|
30
|
+
"内测包"
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
function repoPath(...parts) {
|
|
34
|
+
const target = path.join(root, ...parts);
|
|
35
|
+
const relative = path.relative(root, target);
|
|
36
|
+
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
|
37
|
+
throw new Error(`Path traversal detected: ${target}`);
|
|
38
|
+
}
|
|
39
|
+
return target;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function parseArgs(argv) {
|
|
43
|
+
const parsed = {
|
|
44
|
+
file: undefined,
|
|
45
|
+
init: undefined,
|
|
46
|
+
force: false
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
50
|
+
const arg = argv[index];
|
|
51
|
+
if (arg === "--file") {
|
|
52
|
+
const value = argv[index + 1];
|
|
53
|
+
if (!value) {
|
|
54
|
+
errors.push("--file requires a path");
|
|
55
|
+
} else {
|
|
56
|
+
parsed.file = repoPath(value);
|
|
57
|
+
index += 1;
|
|
58
|
+
}
|
|
59
|
+
} else if (arg === "--init") {
|
|
60
|
+
const value = argv[index + 1];
|
|
61
|
+
if (!value) {
|
|
62
|
+
errors.push("--init requires a path");
|
|
63
|
+
} else {
|
|
64
|
+
parsed.init = repoPath(value);
|
|
65
|
+
index += 1;
|
|
66
|
+
}
|
|
67
|
+
} else if (arg === "--force") {
|
|
68
|
+
parsed.force = true;
|
|
69
|
+
} else {
|
|
70
|
+
errors.push(`Unknown argument: ${arg}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return parsed;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function readJson(filePath) {
|
|
78
|
+
try {
|
|
79
|
+
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
|
80
|
+
} catch (error) {
|
|
81
|
+
errors.push(`${path.relative(root, filePath)}: invalid JSON (${error.message})`);
|
|
82
|
+
return undefined;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function check(condition, message) {
|
|
87
|
+
if (!condition) errors.push(message);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function includesSensitiveTerm(value) {
|
|
91
|
+
const raw = typeof value === "string" ? value : JSON.stringify(value);
|
|
92
|
+
return sensitiveTerms.filter((term) => raw.includes(term));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function outputText(value) {
|
|
96
|
+
if (Array.isArray(value)) return value.join("\n");
|
|
97
|
+
if (typeof value === "string") return value;
|
|
98
|
+
return "";
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function createOutputTemplate() {
|
|
102
|
+
if (!fixture) return;
|
|
103
|
+
|
|
104
|
+
if (fs.existsSync(args.init) && !args.force) {
|
|
105
|
+
errors.push(`${path.relative(root, args.init)} already exists; pass --force to overwrite`);
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const template = {
|
|
110
|
+
schema: 1,
|
|
111
|
+
name: "engineering-business-basic-model-output-run",
|
|
112
|
+
version: fixture.version ?? "0.1",
|
|
113
|
+
fixture: "prompts/evaluations/engineering-business-basic-fixtures.json",
|
|
114
|
+
isExample: false,
|
|
115
|
+
dataBoundary:
|
|
116
|
+
"Fill this file with de-identified model outputs only. Do not include customer names, contacts, project names, exact amounts, or raw source documents.",
|
|
117
|
+
outputs: (fixture.cases ?? []).map((item) => ({
|
|
118
|
+
caseId: item.id,
|
|
119
|
+
promptVersion: fixture.version ?? "0.1",
|
|
120
|
+
model: "",
|
|
121
|
+
ranAt: "",
|
|
122
|
+
notes: "",
|
|
123
|
+
promptPath: item.promptPath,
|
|
124
|
+
scenario: item.scenario,
|
|
125
|
+
expectedSections: item.expectedStrongSections,
|
|
126
|
+
bannedClaims: item.bannedClaims,
|
|
127
|
+
output: []
|
|
128
|
+
}))
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
fs.mkdirSync(path.dirname(args.init), { recursive: true });
|
|
132
|
+
fs.writeFileSync(args.init, `${JSON.stringify(template, null, 2)}\n`, "utf8");
|
|
133
|
+
console.log(`Prompt model output template written: ${path.relative(root, args.init)}`);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (args.init) {
|
|
137
|
+
if (errors.length === 0) createOutputTemplate();
|
|
138
|
+
if (errors.length > 0) {
|
|
139
|
+
console.error(`Prompt model output validation failed with ${errors.length} error(s):`);
|
|
140
|
+
for (const error of errors) console.error(`- ${error}`);
|
|
141
|
+
process.exit(1);
|
|
142
|
+
}
|
|
143
|
+
process.exit(0);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (fixture && outputFile) {
|
|
147
|
+
check(outputFile.schema === 1, "model output file: schema must be 1");
|
|
148
|
+
check(typeof outputFile.version === "string" && outputFile.version.length > 0, "model output file: version must be a string");
|
|
149
|
+
check(Array.isArray(outputFile.outputs), "model output file: outputs must be an array");
|
|
150
|
+
check(
|
|
151
|
+
outputFile.fixture === "prompts/evaluations/engineering-business-basic-fixtures.json",
|
|
152
|
+
"model output file: fixture path mismatch"
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
const casesById = new Map((fixture.cases ?? []).map((item) => [item.id, item]));
|
|
156
|
+
const expectedIds = [...casesById.keys()].sort();
|
|
157
|
+
const actualIds = (outputFile.outputs ?? []).map((item) => item.caseId).sort();
|
|
158
|
+
|
|
159
|
+
check(JSON.stringify(actualIds) === JSON.stringify(expectedIds), "model output file: case coverage mismatch");
|
|
160
|
+
|
|
161
|
+
const seenIds = new Set();
|
|
162
|
+
for (const item of outputFile.outputs ?? []) {
|
|
163
|
+
check(typeof item.caseId === "string" && item.caseId.length > 0, "model output item: missing caseId");
|
|
164
|
+
check(!seenIds.has(item.caseId), `${item.caseId}: duplicate model output`);
|
|
165
|
+
seenIds.add(item.caseId);
|
|
166
|
+
|
|
167
|
+
const sourceCase = casesById.get(item.caseId);
|
|
168
|
+
check(Boolean(sourceCase), `${item.caseId}: caseId not found in fixtures`);
|
|
169
|
+
|
|
170
|
+
check(typeof item.promptVersion === "string" && item.promptVersion.length > 0, `${item.caseId}: missing promptVersion`);
|
|
171
|
+
check(item.promptVersion === fixture.version, `${item.caseId}: promptVersion must match fixture version ${fixture.version}`);
|
|
172
|
+
check(typeof item.model === "string" && item.model.length > 0, `${item.caseId}: missing model`);
|
|
173
|
+
check(typeof item.ranAt === "string" && item.ranAt.length > 0, `${item.caseId}: missing ranAt`);
|
|
174
|
+
check(!Number.isNaN(Date.parse(item.ranAt)), `${item.caseId}: ranAt must be a parseable timestamp`);
|
|
175
|
+
check(typeof item.notes === "string", `${item.caseId}: notes must be a string`);
|
|
176
|
+
if (outputFile.isExample !== true) {
|
|
177
|
+
check(item.model !== "example-skeleton", `${item.caseId}: non-example output must use a real model identifier`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const text = outputText(item.output);
|
|
181
|
+
check(text.length > 0, `${item.caseId}: output must be a non-empty string or string array`);
|
|
182
|
+
|
|
183
|
+
const sensitiveHits = includesSensitiveTerm(item);
|
|
184
|
+
check(sensitiveHits.length === 0, `${item.caseId}: sensitive terms leaked (${sensitiveHits.join(", ")})`);
|
|
185
|
+
|
|
186
|
+
for (const section of sourceCase?.expectedStrongSections ?? []) {
|
|
187
|
+
check(text.includes(section), `${item.caseId}: output missing expected section "${section}"`);
|
|
188
|
+
}
|
|
189
|
+
for (const bannedClaim of sourceCase?.bannedClaims ?? []) {
|
|
190
|
+
check(!text.includes(bannedClaim), `${item.caseId}: output contains prohibited claim "${bannedClaim}"`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (errors.length > 0) {
|
|
196
|
+
console.error(`Prompt model output validation failed with ${errors.length} error(s):`);
|
|
197
|
+
for (const error of errors) console.error(`- ${error}`);
|
|
198
|
+
process.exit(1);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
console.log("Prompt model output validation passed.");
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
|
|
6
|
+
const root = fs.realpathSync(process.cwd());
|
|
7
|
+
const errors = [];
|
|
8
|
+
const diagnostics = [];
|
|
9
|
+
|
|
10
|
+
const fixturePath = repoPath("prompts/evaluations/engineering-business-basic-fixtures.json");
|
|
11
|
+
const fixture = readJson(fixturePath);
|
|
12
|
+
const args = parseArgs(process.argv.slice(2));
|
|
13
|
+
|
|
14
|
+
const sensitiveTerms = [
|
|
15
|
+
"立信",
|
|
16
|
+
"费敏",
|
|
17
|
+
"闻总",
|
|
18
|
+
"谭总",
|
|
19
|
+
"茅盾中学",
|
|
20
|
+
"鸿益",
|
|
21
|
+
"太鑫",
|
|
22
|
+
"飞双",
|
|
23
|
+
"魔毯",
|
|
24
|
+
"客户内部",
|
|
25
|
+
"培训演示",
|
|
26
|
+
"基础内测",
|
|
27
|
+
"内测模式",
|
|
28
|
+
"内测包"
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
function repoPath(...parts) {
|
|
32
|
+
const target = path.join(root, ...parts);
|
|
33
|
+
const relative = path.relative(root, target);
|
|
34
|
+
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
|
35
|
+
throw new Error(`Path traversal detected: ${target}`);
|
|
36
|
+
}
|
|
37
|
+
return target;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function parseArgs(argv) {
|
|
41
|
+
const parsed = {
|
|
42
|
+
file: undefined,
|
|
43
|
+
init: undefined,
|
|
44
|
+
force: false,
|
|
45
|
+
checkTemplate: argv.length === 0
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
49
|
+
const arg = argv[index];
|
|
50
|
+
if (arg === "--file") {
|
|
51
|
+
const value = argv[index + 1];
|
|
52
|
+
if (!value) {
|
|
53
|
+
errors.push("--file requires a path");
|
|
54
|
+
} else {
|
|
55
|
+
parsed.file = repoPath(value);
|
|
56
|
+
index += 1;
|
|
57
|
+
}
|
|
58
|
+
} else if (arg === "--init") {
|
|
59
|
+
const value = argv[index + 1];
|
|
60
|
+
if (!value) {
|
|
61
|
+
errors.push("--init requires a path");
|
|
62
|
+
} else {
|
|
63
|
+
parsed.init = repoPath(value);
|
|
64
|
+
index += 1;
|
|
65
|
+
}
|
|
66
|
+
} else if (arg === "--force") {
|
|
67
|
+
parsed.force = true;
|
|
68
|
+
} else if (arg === "--check-template") {
|
|
69
|
+
parsed.checkTemplate = true;
|
|
70
|
+
} else {
|
|
71
|
+
errors.push(`Unknown argument: ${arg}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if ([parsed.file, parsed.init, parsed.checkTemplate].filter(Boolean).length > 1) {
|
|
76
|
+
errors.push("Use only one mode: --file, --init, or --check-template");
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return parsed;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function readJson(filePath) {
|
|
83
|
+
try {
|
|
84
|
+
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
|
85
|
+
} catch (error) {
|
|
86
|
+
errors.push(`${path.relative(root, filePath)}: invalid JSON (${error.message})`);
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function check(condition, message) {
|
|
92
|
+
if (!condition) errors.push(message);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function includesSensitiveTerm(value) {
|
|
96
|
+
const raw = typeof value === "string" ? value : JSON.stringify(value);
|
|
97
|
+
return sensitiveTerms.filter((term) => raw.includes(term));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function outputText(value) {
|
|
101
|
+
if (Array.isArray(value)) return value.join("\n");
|
|
102
|
+
if (typeof value === "string") return value;
|
|
103
|
+
return "";
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function expectedRuns() {
|
|
107
|
+
if (!fixture) return [];
|
|
108
|
+
|
|
109
|
+
return (fixture.cases ?? []).flatMap((item) => [
|
|
110
|
+
{
|
|
111
|
+
runId: `${item.id}::weak`,
|
|
112
|
+
caseId: item.id,
|
|
113
|
+
variant: "weak",
|
|
114
|
+
promptSource: "fixture.weakPrompt",
|
|
115
|
+
promptVersion: fixture.version,
|
|
116
|
+
expectedStrongSections: item.expectedStrongSections,
|
|
117
|
+
bannedClaims: item.bannedClaims,
|
|
118
|
+
weakFailureModes: item.weakFailureModes
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
runId: `${item.id}::basic`,
|
|
122
|
+
caseId: item.id,
|
|
123
|
+
variant: "basic",
|
|
124
|
+
promptSource: item.promptPath,
|
|
125
|
+
promptVersion: fixture.version,
|
|
126
|
+
expectedStrongSections: item.expectedStrongSections,
|
|
127
|
+
bannedClaims: item.bannedClaims,
|
|
128
|
+
weakFailureModes: item.weakFailureModes
|
|
129
|
+
}
|
|
130
|
+
]);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function createTemplate() {
|
|
134
|
+
return {
|
|
135
|
+
schema: 1,
|
|
136
|
+
name: "engineering-business-basic-run-results",
|
|
137
|
+
version: fixture?.version ?? "0.1",
|
|
138
|
+
fixture: "prompts/evaluations/engineering-business-basic-fixtures.json",
|
|
139
|
+
runPack: "prompts/evaluations/engineering-business-basic-run-pack.generated.json",
|
|
140
|
+
isExample: false,
|
|
141
|
+
dataBoundary:
|
|
142
|
+
"Fill this file with de-identified weak/basic model outputs only. Do not include customer names, contacts, project names, exact amounts, or raw source documents.",
|
|
143
|
+
outputs: expectedRuns().map((item) => ({
|
|
144
|
+
runId: item.runId,
|
|
145
|
+
caseId: item.caseId,
|
|
146
|
+
variant: item.variant,
|
|
147
|
+
promptVersion: item.promptVersion,
|
|
148
|
+
model: "",
|
|
149
|
+
ranAt: "",
|
|
150
|
+
notes: "",
|
|
151
|
+
promptSource: item.promptSource,
|
|
152
|
+
expectedStrongSections: item.expectedStrongSections,
|
|
153
|
+
bannedClaims: item.bannedClaims,
|
|
154
|
+
weakFailureModes: item.weakFailureModes,
|
|
155
|
+
output: []
|
|
156
|
+
}))
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function validateTemplateShape(template) {
|
|
161
|
+
check(template.schema === 1, "run results: schema must be 1");
|
|
162
|
+
check(template.version === fixture?.version, `run results: version must match fixture version ${fixture?.version}`);
|
|
163
|
+
check(template.fixture === "prompts/evaluations/engineering-business-basic-fixtures.json", "run results: fixture path mismatch");
|
|
164
|
+
check(Array.isArray(template.outputs), "run results: outputs must be an array");
|
|
165
|
+
check(template.outputs?.length === expectedRuns().length, "run results: output count must match weak/basic run count");
|
|
166
|
+
|
|
167
|
+
const sensitiveHits = includesSensitiveTerm(template);
|
|
168
|
+
check(sensitiveHits.length === 0, `run results: sensitive terms leaked (${sensitiveHits.join(", ")})`);
|
|
169
|
+
|
|
170
|
+
const expectedByRunId = new Map(expectedRuns().map((item) => [item.runId, item]));
|
|
171
|
+
const actualRunIds = (template.outputs ?? []).map((item) => item.runId).sort();
|
|
172
|
+
const expectedRunIds = [...expectedByRunId.keys()].sort();
|
|
173
|
+
check(JSON.stringify(actualRunIds) === JSON.stringify(expectedRunIds), "run results: runId coverage mismatch");
|
|
174
|
+
|
|
175
|
+
const seen = new Set();
|
|
176
|
+
for (const item of template.outputs ?? []) {
|
|
177
|
+
const expected = expectedByRunId.get(item.runId);
|
|
178
|
+
check(!seen.has(item.runId), `${item.runId}: duplicate output`);
|
|
179
|
+
seen.add(item.runId);
|
|
180
|
+
check(Boolean(expected), `${item.runId}: runId not found in expected run pack`);
|
|
181
|
+
check(item.caseId === expected?.caseId, `${item.runId}: caseId mismatch`);
|
|
182
|
+
check(item.variant === expected?.variant, `${item.runId}: variant mismatch`);
|
|
183
|
+
check(item.promptVersion === expected?.promptVersion, `${item.runId}: promptVersion mismatch`);
|
|
184
|
+
check(item.promptSource === expected?.promptSource, `${item.runId}: promptSource mismatch`);
|
|
185
|
+
check(Array.isArray(item.expectedStrongSections), `${item.runId}: expectedStrongSections must be an array`);
|
|
186
|
+
check(Array.isArray(item.bannedClaims), `${item.runId}: bannedClaims must be an array`);
|
|
187
|
+
check(Array.isArray(item.weakFailureModes), `${item.runId}: weakFailureModes must be an array`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function validateRunResults(results) {
|
|
192
|
+
validateTemplateShape(results);
|
|
193
|
+
|
|
194
|
+
for (const item of results.outputs ?? []) {
|
|
195
|
+
check(typeof item.model === "string" && item.model.length > 0, `${item.runId}: missing model`);
|
|
196
|
+
check(item.model !== "example-skeleton", `${item.runId}: model must be a real model identifier`);
|
|
197
|
+
check(typeof item.ranAt === "string" && item.ranAt.length > 0, `${item.runId}: missing ranAt`);
|
|
198
|
+
check(!Number.isNaN(Date.parse(item.ranAt)), `${item.runId}: ranAt must be a parseable timestamp`);
|
|
199
|
+
check(typeof item.notes === "string", `${item.runId}: notes must be a string`);
|
|
200
|
+
|
|
201
|
+
const text = outputText(item.output);
|
|
202
|
+
check(text.length > 0, `${item.runId}: output must be a non-empty string or string array`);
|
|
203
|
+
|
|
204
|
+
const sensitiveHits = includesSensitiveTerm(item);
|
|
205
|
+
check(sensitiveHits.length === 0, `${item.runId}: sensitive terms leaked (${sensitiveHits.join(", ")})`);
|
|
206
|
+
|
|
207
|
+
const missingSections = (item.expectedStrongSections ?? []).filter((section) => !text.includes(section));
|
|
208
|
+
const prohibitedClaims = (item.bannedClaims ?? []).filter((claim) => text.includes(claim));
|
|
209
|
+
|
|
210
|
+
if (item.variant === "basic") {
|
|
211
|
+
for (const section of missingSections) {
|
|
212
|
+
errors.push(`${item.runId}: basic output missing expected section "${section}"`);
|
|
213
|
+
}
|
|
214
|
+
for (const claim of prohibitedClaims) {
|
|
215
|
+
errors.push(`${item.runId}: basic output contains prohibited claim "${claim}"`);
|
|
216
|
+
}
|
|
217
|
+
} else if (missingSections.length > 0 || prohibitedClaims.length > 0) {
|
|
218
|
+
diagnostics.push({
|
|
219
|
+
runId: item.runId,
|
|
220
|
+
missingSections,
|
|
221
|
+
prohibitedClaims
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (args.init) {
|
|
228
|
+
const template = createTemplate();
|
|
229
|
+
validateTemplateShape(template);
|
|
230
|
+
if (fs.existsSync(args.init) && !args.force) {
|
|
231
|
+
errors.push(`${path.relative(root, args.init)} already exists; pass --force to overwrite`);
|
|
232
|
+
}
|
|
233
|
+
if (errors.length === 0) {
|
|
234
|
+
fs.mkdirSync(path.dirname(args.init), { recursive: true });
|
|
235
|
+
fs.writeFileSync(args.init, `${JSON.stringify(template, null, 2)}\n`, "utf8");
|
|
236
|
+
console.log(`Prompt run results template written: ${path.relative(root, args.init)}`);
|
|
237
|
+
process.exit(0);
|
|
238
|
+
}
|
|
239
|
+
} else if (args.file) {
|
|
240
|
+
const results = readJson(args.file);
|
|
241
|
+
if (results) validateRunResults(results);
|
|
242
|
+
} else {
|
|
243
|
+
validateTemplateShape(createTemplate());
|
|
244
|
+
if (errors.length === 0) {
|
|
245
|
+
console.log("Prompt run results template validation passed.");
|
|
246
|
+
process.exit(0);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (errors.length > 0) {
|
|
251
|
+
console.error(`Prompt run results validation failed with ${errors.length} error(s):`);
|
|
252
|
+
for (const error of errors) console.error(`- ${error}`);
|
|
253
|
+
process.exit(1);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
console.log("Prompt run results validation passed.");
|
|
257
|
+
if (diagnostics.length > 0) {
|
|
258
|
+
console.log(`Weak output diagnostics: ${diagnostics.length} run(s) need comparison review.`);
|
|
259
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
|
|
6
|
+
const root = fs.realpathSync(process.cwd());
|
|
7
|
+
const errors = [];
|
|
8
|
+
|
|
9
|
+
const fixturePath = repoPath("prompts/evaluations/engineering-business-basic-fixtures.json");
|
|
10
|
+
const scorecardPath = repoPath("prompts/evaluations/engineering-business-basic-scorecard.json");
|
|
11
|
+
const fixture = readJson(fixturePath);
|
|
12
|
+
const scorecard = readJson(scorecardPath);
|
|
13
|
+
|
|
14
|
+
const sensitiveTerms = [
|
|
15
|
+
"立信",
|
|
16
|
+
"费敏",
|
|
17
|
+
"闻总",
|
|
18
|
+
"谭总",
|
|
19
|
+
"茅盾中学",
|
|
20
|
+
"鸿益",
|
|
21
|
+
"太鑫",
|
|
22
|
+
"飞双",
|
|
23
|
+
"魔毯",
|
|
24
|
+
"客户内部",
|
|
25
|
+
"培训演示",
|
|
26
|
+
"基础内测",
|
|
27
|
+
"内测模式",
|
|
28
|
+
"内测包"
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
function repoPath(...parts) {
|
|
32
|
+
const target = path.join(root, ...parts);
|
|
33
|
+
const relative = path.relative(root, target);
|
|
34
|
+
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
|
35
|
+
throw new Error(`Path traversal detected: ${target}`);
|
|
36
|
+
}
|
|
37
|
+
return target;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function readJson(filePath) {
|
|
41
|
+
try {
|
|
42
|
+
return JSON.parse(fs.readFileSync(filePath, "utf8"));
|
|
43
|
+
} catch (error) {
|
|
44
|
+
errors.push(`${path.relative(root, filePath)}: invalid JSON (${error.message})`);
|
|
45
|
+
return undefined;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function check(condition, message) {
|
|
50
|
+
if (!condition) errors.push(message);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function includesSensitiveTerm(value) {
|
|
54
|
+
const raw = typeof value === "string" ? value : JSON.stringify(value);
|
|
55
|
+
return sensitiveTerms.filter((term) => raw.includes(term));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function weightedScore(scores, criteria) {
|
|
59
|
+
const totalWeight = criteria.reduce((sum, item) => sum + item.weight, 0);
|
|
60
|
+
return (
|
|
61
|
+
criteria.reduce((sum, item) => {
|
|
62
|
+
return sum + scores[item.id] * item.weight;
|
|
63
|
+
}, 0) / totalWeight
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (fixture && scorecard) {
|
|
68
|
+
check(scorecard.schema === 1, "scorecard: schema must be 1");
|
|
69
|
+
check(scorecard.version === fixture.version, `scorecard: version must match fixture version ${fixture.version}`);
|
|
70
|
+
check(scorecard.fixture === "prompts/evaluations/engineering-business-basic-fixtures.json", "scorecard: fixture path mismatch");
|
|
71
|
+
check(Array.isArray(scorecard.criteria) && scorecard.criteria.length > 0, "scorecard: criteria must be a non-empty array");
|
|
72
|
+
check(Array.isArray(scorecard.cases) && scorecard.cases.length === fixture.cases.length, "scorecard: case count mismatch");
|
|
73
|
+
|
|
74
|
+
const sensitiveHits = includesSensitiveTerm(scorecard);
|
|
75
|
+
check(sensitiveHits.length === 0, `scorecard: sensitive terms leaked (${sensitiveHits.join(", ")})`);
|
|
76
|
+
|
|
77
|
+
const criteriaIds = new Set();
|
|
78
|
+
let totalWeight = 0;
|
|
79
|
+
for (const criterion of scorecard.criteria ?? []) {
|
|
80
|
+
check(/^[a-z0-9_]+$/.test(criterion.id), `${criterion.id ?? "unknown"}: invalid criterion id`);
|
|
81
|
+
check(!criteriaIds.has(criterion.id), `${criterion.id}: duplicate criterion`);
|
|
82
|
+
criteriaIds.add(criterion.id);
|
|
83
|
+
check(Number.isInteger(criterion.weight) && criterion.weight > 0, `${criterion.id}: weight must be a positive integer`);
|
|
84
|
+
totalWeight += criterion.weight ?? 0;
|
|
85
|
+
check(typeof criterion.description === "string" && criterion.description.length > 0, `${criterion.id}: missing description`);
|
|
86
|
+
}
|
|
87
|
+
check(totalWeight === 100, `scorecard: criteria weights must total 100, got ${totalWeight}`);
|
|
88
|
+
|
|
89
|
+
const fixtureById = new Map((fixture.cases ?? []).map((item) => [item.id, item]));
|
|
90
|
+
const expectedIds = [...fixtureById.keys()].sort();
|
|
91
|
+
const actualIds = (scorecard.cases ?? []).map((item) => item.caseId).sort();
|
|
92
|
+
check(JSON.stringify(actualIds) === JSON.stringify(expectedIds), "scorecard: case coverage mismatch");
|
|
93
|
+
|
|
94
|
+
for (const item of scorecard.cases ?? []) {
|
|
95
|
+
const sourceCase = fixtureById.get(item.caseId);
|
|
96
|
+
check(Boolean(sourceCase), `${item.caseId}: caseId not found in fixtures`);
|
|
97
|
+
check(item.winner === "basic", `${item.caseId}: winner must be basic`);
|
|
98
|
+
check(typeof item.decisionBasis === "string" && item.decisionBasis.length > 0, `${item.caseId}: missing decisionBasis`);
|
|
99
|
+
check(Array.isArray(item.basicPromptGains) && item.basicPromptGains.length > 0, `${item.caseId}: basicPromptGains must be non-empty`);
|
|
100
|
+
check(
|
|
101
|
+
JSON.stringify(item.observedWeakFailures ?? []) === JSON.stringify(sourceCase?.weakFailureModes ?? []),
|
|
102
|
+
`${item.caseId}: observedWeakFailures must match fixture weakFailureModes`
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
for (const scoreSetName of ["weakScores", "basicScores"]) {
|
|
106
|
+
const scoreSet = item[scoreSetName] ?? {};
|
|
107
|
+
const scoreIds = Object.keys(scoreSet).sort();
|
|
108
|
+
check(JSON.stringify(scoreIds) === JSON.stringify([...criteriaIds].sort()), `${item.caseId}: ${scoreSetName} coverage mismatch`);
|
|
109
|
+
for (const criterionId of criteriaIds) {
|
|
110
|
+
const value = scoreSet[criterionId];
|
|
111
|
+
check(Number.isInteger(value) && value >= 1 && value <= 5, `${item.caseId}: ${scoreSetName}.${criterionId} must be 1-5`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const weakTotal = weightedScore(item.weakScores, scorecard.criteria);
|
|
116
|
+
const basicTotal = weightedScore(item.basicScores, scorecard.criteria);
|
|
117
|
+
check(
|
|
118
|
+
basicTotal - weakTotal >= scorecard.minimumWeightedDelta,
|
|
119
|
+
`${item.caseId}: weighted improvement ${Number(basicTotal - weakTotal).toFixed(2)} is below minimum ${scorecard.minimumWeightedDelta}`
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
check(scorecard.overallDecision?.winner === "basic", "scorecard: overall winner must be basic");
|
|
124
|
+
check(typeof scorecard.overallDecision?.notAClaim === "string", "scorecard: overallDecision.notAClaim is required");
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (errors.length > 0) {
|
|
128
|
+
console.error(`Prompt scorecard validation failed with ${errors.length} error(s):`);
|
|
129
|
+
for (const error of errors) console.error(`- ${error}`);
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
console.log("Prompt scorecard validation passed.");
|
|
@@ -88,15 +88,19 @@ if (manifest) {
|
|
|
88
88
|
check(frontmatter.name === skill.id, `${skillFile}: frontmatter name must be ${skill.id}`);
|
|
89
89
|
check(Boolean(frontmatter.description), `${skillFile}: missing frontmatter description`);
|
|
90
90
|
}
|
|
91
|
+
|
|
92
|
+
for (const requiredAsset of manifest.requiredAssets ?? []) {
|
|
93
|
+
check(exists(requiredAsset), `runtime/archsight-aios.manifest.json: required asset missing ${requiredAsset}`);
|
|
94
|
+
}
|
|
91
95
|
}
|
|
92
96
|
|
|
93
97
|
if (packageJson) {
|
|
94
|
-
const requiredFiles = ["skills/", "scripts/", ".claude-plugin/", "gemini-extension.json"];
|
|
98
|
+
const requiredFiles = ["skills/", "scripts/", ".claude-plugin/", "gemini-extension.json", "OPENCODE.md"];
|
|
95
99
|
for (const requiredFile of requiredFiles) {
|
|
96
100
|
check(packageJson.files?.includes(requiredFile), `package.json: files must include ${requiredFile}`);
|
|
97
101
|
}
|
|
98
102
|
|
|
99
|
-
const requiredKeywords = ["agent-skills", "skills-sh", "gemini-cli", "claude-code", "workbuddy", "construction-ai"];
|
|
103
|
+
const requiredKeywords = ["agent-skills", "skills-sh", "gemini-cli", "claude-code", "workbuddy", "opencode", "construction-ai"];
|
|
100
104
|
for (const keyword of requiredKeywords) {
|
|
101
105
|
check(packageJson.keywords?.includes(keyword), `package.json: keywords must include ${keyword}`);
|
|
102
106
|
}
|
package/skills/README.md
CHANGED
|
@@ -60,9 +60,12 @@ Skill 可以继续用 `SKILL.md` 表达操作方法,但涉及确定性工具
|
|
|
60
60
|
| `aios-knowledge` | BIM、IFC、建筑规范、审图规则和知识结构化。 |
|
|
61
61
|
| `aios-structural` | 结构力学、荷载、边界条件、FEM 和确定性求解链路评审。 |
|
|
62
62
|
| `aios-runtime` | Prompt、Context、Memory、MCP/Tool、RAG/GraphRAG 和多 Agent Runtime 设计。 |
|
|
63
|
+
| `aios-prompt-compare` | Prompt / Skill 效果对比:对同一输入分别评估弱提示词、便携强提示词和真实 Skill 触发结果,判断是否应沉淀为 Skill。 |
|
|
63
64
|
|
|
64
65
|
工程业务管理技能包 (Engineering Project Management):
|
|
65
66
|
|
|
67
|
+
工程业务管理场景可直接参考 [工程业务管理基础技能包](engineering-business-starter-kit.md)。该基础包提供 L0-L1 级通用提示词 / Skill 模板能力:把工程资料整理成矩阵、清单、台账和人工复核问题;不承诺系统建设、自动审批、专业结论或替代签审。
|
|
68
|
+
|
|
66
69
|
| Skill | 用途 |
|
|
67
70
|
| --- | --- |
|
|
68
71
|
| `aios-commercial-tender` | 工程招投标响应证据链,用于提取评分点、资格条件、废标风险、资料缺口和人工复核事项。 |
|
|
@@ -28,6 +28,24 @@ description: 工程合同履约证据链工作流。用于从工程分包、采
|
|
|
28
28
|
- 项目类型、合同类型、地区、标段、承包范围和关键里程碑。
|
|
29
29
|
- 用户指定的重点问题,例如付款、工期、质量、验收、违约、争议解决或资料缺失。
|
|
30
30
|
|
|
31
|
+
## 基础版模式
|
|
32
|
+
|
|
33
|
+
用于工程业务资料处理、项目复核辅助、部门工作流整理或模板沉淀时,按本节收口。可复制的基础提示词见 `prompts/basic-prompt.md`。
|
|
34
|
+
|
|
35
|
+
最小可用输入:
|
|
36
|
+
|
|
37
|
+
- 脱敏后的工程合同、分包合同、采购合同、租赁合同、补充协议或合同摘要。
|
|
38
|
+
- 文件名、版本、日期、章节 / 页码 / 条款标题。
|
|
39
|
+
- 用户要复核的问题,例如履约节点、付款条件、空白字段、责任边界或资料缺口。
|
|
40
|
+
- 人工复核岗位,例如法务、商务、项目经理、造价、财务或合同经办。
|
|
41
|
+
|
|
42
|
+
基础输出优先级:
|
|
43
|
+
|
|
44
|
+
1. 先输出合同基本信息和空白字段优先核对表。
|
|
45
|
+
2. 把合同转成关键履约节点、付款和结算条件、责任和违约风险提示。
|
|
46
|
+
3. 所有金额、期限、责任方、验收、付款、违约条款必须带原文依据位置或原文关键词。
|
|
47
|
+
4. 输出只作为履约管理辅助,不构成法律意见、违约定性、索赔或结算结论。
|
|
48
|
+
|
|
31
49
|
## Capability 与证据
|
|
32
50
|
|
|
33
51
|
- 所有抽取结论必须带 `Evidence`,至少包括来源文件、页码/章节/行号或原文短摘。
|