@archsight/aios 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/CHANGELOG.md +59 -0
  3. package/OPENCODE.md +23 -0
  4. package/README.md +64 -31
  5. package/RELEASE_NOTES.md +37 -0
  6. package/adapters/workbuddy/README.md +11 -1
  7. package/agents/README.md +6 -3
  8. package/agents/atlas/responsibilities.md +1 -1
  9. package/agents/atlas/system-prompt.md +1 -1
  10. package/agents/daedalus/system-prompt.md +2 -0
  11. package/agents/hestia/constraints.md +7 -0
  12. package/agents/hestia/responsibilities.md +7 -0
  13. package/agents/hestia/role.md +12 -0
  14. package/agents/hestia/system-prompt.md +23 -0
  15. package/agents/hestia/workflow.md +8 -0
  16. package/agents/plutus/constraints.md +7 -0
  17. package/agents/plutus/responsibilities.md +7 -0
  18. package/agents/plutus/role.md +12 -0
  19. package/agents/plutus/system-prompt.md +24 -0
  20. package/agents/plutus/workflow.md +8 -0
  21. package/agents/themis/constraints.md +7 -0
  22. package/agents/themis/responsibilities.md +7 -0
  23. package/agents/themis/role.md +12 -0
  24. package/agents/themis/system-prompt.md +24 -0
  25. package/agents/themis/workflow.md +8 -0
  26. package/bin/archsight-aios.mjs +558 -25
  27. package/docs/PUBLIC_DISCOVERY.md +16 -2
  28. package/docs/business-expert-guide.md +5 -3
  29. package/docs/glossary.md +11 -3
  30. package/docs/quickstart.md +18 -4
  31. package/gemini-extension.json +1 -1
  32. package/governance/README.md +41 -11
  33. package/governance/agent-boundary.md +1 -2
  34. package/governance/ai-review-policy.md +1 -2
  35. package/governance/arbitration-protocol.md +33 -33
  36. package/governance/context-policy.md +2 -3
  37. package/governance/delivery-policy.md +1 -2
  38. package/governance/memory-policy.md +1 -2
  39. package/governance/security-policy.md +1 -2
  40. package/memory/decision-records.md +8 -9
  41. package/package.json +17 -6
  42. package/prompts/README.md +12 -0
  43. package/prompts/evaluation-policy.md +70 -0
  44. package/prompts/evaluations/engineering-business-basic-advisory-validation-2026-06-16.md +87 -0
  45. package/prompts/evaluations/engineering-business-basic-fixtures.json +375 -0
  46. package/prompts/evaluations/engineering-business-basic-model-output.example.json +179 -0
  47. package/prompts/evaluations/engineering-business-basic-prompts-2026-06-16.md +205 -0
  48. package/prompts/evaluations/engineering-business-basic-scorecard.json +238 -0
  49. package/prompts/evaluations/engineering-business-public-advisory-fixtures.json +422 -0
  50. package/prompts/evaluations/public-advisory-md/01-technical-bid.md +63 -0
  51. package/prompts/evaluations/public-advisory-md/02-contract.md +61 -0
  52. package/prompts/evaluations/public-advisory-md/03-daily.md +69 -0
  53. package/prompts/evaluations/public-advisory-md/04-meeting.md +48 -0
  54. package/prompts/evaluations/public-advisory-md/05-variation.md +63 -0
  55. package/prompts/evaluations/public-advisory-md/06-scheme.md +60 -0
  56. package/prompts/failure-cases.md +5 -1
  57. package/prompts/prompt-registry.md +10 -0
  58. package/runtime/agent-routing.md +39 -9
  59. package/runtime/archsight-aios.manifest.json +154 -51
  60. package/runtime/hermes/agent-registry.md +3 -0
  61. package/runtime/hermes/workspace-binding.md +3 -0
  62. package/runtime/skill-routing.md +23 -12
  63. package/scripts/analyze-prompt-run-results.mjs +187 -0
  64. package/scripts/build-prompt-run-pack.mjs +248 -0
  65. package/scripts/validate-prompt-fixtures.mjs +225 -0
  66. package/scripts/validate-prompt-model-outputs.mjs +201 -0
  67. package/scripts/validate-prompt-run-results.mjs +259 -0
  68. package/scripts/validate-prompt-scorecard.mjs +133 -0
  69. package/scripts/validate-skills.mjs +8 -3
  70. package/skills/README.md +12 -6
  71. package/skills/aios/SKILL.md +79 -0
  72. package/skills/aios/agents/openai.yaml +4 -0
  73. package/skills/aios-arch/SKILL.md +14 -14
  74. package/skills/aios-ceo/SKILL.md +13 -13
  75. package/skills/aios-commercial-contract/SKILL.md +32 -14
  76. package/skills/aios-commercial-contract/prompts/basic-prompt.md +83 -0
  77. package/skills/aios-commercial-tender/SKILL.md +31 -13
  78. package/skills/aios-commercial-tender/prompts/basic-prompt.md +94 -0
  79. package/skills/aios-commercial-variation/SKILL.md +33 -15
  80. package/skills/aios-commercial-variation/prompts/basic-prompt.md +99 -0
  81. package/skills/aios-compare/SKILL.md +92 -0
  82. package/skills/aios-compare/agents/openai.yaml +4 -0
  83. package/skills/aios-construction-daily/SKILL.md +32 -14
  84. package/skills/aios-construction-daily/prompts/basic-prompt.md +76 -0
  85. package/skills/aios-construction-meeting/SKILL.md +32 -14
  86. package/skills/aios-construction-meeting/prompts/basic-prompt.md +78 -0
  87. package/skills/aios-construction-scheme/SKILL.md +28 -10
  88. package/skills/aios-construction-scheme/prompts/basic-prompt.md +90 -0
  89. package/skills/aios-plan/SKILL.md +7 -7
  90. package/skills/aios-prompt-compare/SKILL.md +180 -0
  91. package/skills/aios-prompt-compare/agents/openai.yaml +4 -0
  92. package/skills/aios-review/SKILL.md +1 -1
  93. package/skills/aios-structural/SKILL.md +7 -7
  94. package/skills/archsight-aios/SKILL.md +40 -0
  95. package/skills/archsight-aios/agents/openai.yaml +4 -0
  96. package/skills/engineering-business-starter-kit.md +112 -0
  97. package/templates/README.md +16 -2
  98. package/templates/project-ai/.ai/ARCHSIGHT_AIOS_RULES.md +5 -4
  99. package/templates/project-ai/.ai/agent-routing.md +3 -1
  100. package/templates/project-ai/.ai/profile-detection.md +24 -0
  101. package/templates/project-ai/.ai/project-context.md +4 -1
  102. package/templates/project-ai/.ai/skills.md +36 -24
  103. package/templates/project-ai/AGENTS.md +6 -5
  104. package/templates/project-ai/AI_CODING_RULES.md +1 -1
  105. package/templates/project-ai/CLAUDE.md +6 -5
  106. package/templates/project-ai/GEMINI.md +6 -5
  107. package/templates/project-ai/OPENCODE.md +26 -0
  108. package/workflows/README.md +1 -1
  109. package/workflows/architecture-review.md +10 -10
  110. package/workflows/site-daily-loop.md +25 -25
@@ -0,0 +1,87 @@
1
+ # 工程业务基础提示词 advisory 复核说明
2
+
3
+ > 日期:2026-06-16
4
+ > 范围:6 个工程业务基础提示词与 advisory 工作区中的旧提示词包、普通 / 优化输出对比记录。
5
+ > 边界:本文件只记录脱敏后的案例形态、输出差异和沉淀判断;不复制原始业务资料、联系人、项目名称、金额细节或完整模型输出。
6
+
7
+ ## 复核结论
8
+
9
+ 当前 AIOS 基础提示词比 advisory 旧提示词更适合沉淀为通用 Skill,原因不是“答案更长”,而是把旧提示词中的经验规则收口成了稳定规程:
10
+
11
+ | 维度 | advisory 旧提示词包 | AIOS 基础提示词 |
12
+ |---|---|---|
13
+ | 使用场景 | 为 PPT 准备和现场分享服务,文件之间相对独立 | 作为 `aios-*` Skill 的可复用基础模式 |
14
+ | 输入判断 | 每个提示词有边界提示,但分散在单文件内 | 每个 Skill 固定先判断资料类型、缺口和可验证程度 |
15
+ | 输出形态 | 已能生成矩阵、清单、台账和回查表 | 进一步统一资料来源清单、主表、需确认项、复核岗位和不能下结论事项 |
16
+ | 风险边界 | 依赖提示词文本和人工使用习惯 | 固化禁止结论、人工复核岗位、L0-L1 能力边界和验证脚本 |
17
+ | 资产化程度 | 更像一次项目素材包 | 已进入 registry、manifest、安装分发、fixtures、scorecard 和 CLI 校验 |
18
+
19
+ 因此,“更好”的选择不是直接搬 advisory 旧提示词,而是使用 AIOS 基础提示词作为通用技能包版本;advisory 旧提示词继续作为来源验证和案例启发。
20
+
21
+ ## 只读复核来源
22
+
23
+ 本次只读查看了 advisory 中的以下类型资产:
24
+
25
+ - `source/prompts/README.md`:旧提示词包的使用方式、输出验证和文件清单。
26
+ - `source/prompts/01-...` 到 `source/prompts/06-...`:6 个工程业务场景提示词。
27
+ - `source/prompts/07-...`:终稿阶段的案例分工、提示词优化方向和边界记录。
28
+ - `source/prompt-runs/2026-06-14-普通与优化提示词输出对比.md`:普通提示词与优化提示词的逐场景对比。
29
+
30
+ 未读取或复制到 AIOS 的内容:
31
+
32
+ - 原始 docx / pdf 全文。
33
+ - 真实项目名称、联系人、公司内部称呼。
34
+ - 完整模型输出、金额细节、合同完整条款或正式资料编号。
35
+
36
+ ## 场景信号映射
37
+
38
+ | AIOS caseId | advisory 抽象信号 | AIOS 固化结果 |
39
+ |---|---|---|
40
+ | `commercial-tender-response-matrix` | 技术标工具试用后的人工检查问题 + 评分点结构;不是完整招标原文读标 | 固化为输入类型判断、缺少可验证招标依据、问题回应矩阵、评分点响应矩阵 |
41
+ | `commercial-contract-obligation-nodes` | 工程合同片段有履约节点,也有空白字段和专业复核边界 | 固化为空白字段核对表、关键履约节点、付款结算条件和不能下结论事项 |
42
+ | `construction-daily-issue-tracking` | 日报有施工内容,也有资源、材料、照片等空白字段 | 固化为管理摘要、问题跟踪表、模板质量诊断;空白不等于现场事实 |
43
+ | `construction-meeting-action-closure` | 会议记录有发言人、状态和待办,但责任人 / 期限常不完整 | 固化为待办闭环、责任线索、需确认责任人和需确认期限 |
44
+ | `commercial-variation-evidence-chain` | 公开样表字段可讲资料链方法,但不证明具体项目事实 | 固化为资料链完整度、样表字段结构、过程线索和正式依据缺口 |
45
+ | `construction-scheme-assistive-review` | 施工方案 AI 生成 / 复核反馈涉及参数、图纸、地方标准和计算书边界 | 固化为辅助复核口径、失准复盘、专家修改说明回查和人工复核问题清单 |
46
+
47
+ ## 普通提示词失败模式
48
+
49
+ advisory 对比记录显示,普通提示词在 6 个场景中有共性问题:
50
+
51
+ - 容易默认资料完整,跳过输入状态判断。
52
+ - 容易输出段落摘要,而不是可分工的矩阵、台账或回查表。
53
+ - 容易把未提供、未填、未见的内容写成事实判断。
54
+ - 容易把业务风险提示写成法律、造价、质量安全或审批结论。
55
+ - 容易把一次性回答当成工具能力、系统能力或正式交付能力。
56
+
57
+ AIOS 基础提示词针对这些问题加了统一约束:
58
+
59
+ - `资料来源清单` 和资料状态判断。
60
+ - 主输出表格或清单。
61
+ - `需补充确认` / `需核验`。
62
+ - 人工复核岗位。
63
+ - `不能下结论的事项`。
64
+ - `Claim / Evidence / Tool Result / Decision`。
65
+
66
+ ## 对当前 AIOS 资产的影响
67
+
68
+ 本次复核后,AIOS 保留以下沉淀方式:
69
+
70
+ - 6 个基础提示词继续放在各自 Skill 的 `prompts/basic-prompt.md`。
71
+ - `engineering-business-basic-fixtures.json` 增加 `sourceSignals` 和 `advisoryComparison`,记录脱敏来源信号。
72
+ - `engineering-business-basic-scorecard.json` 继续作为“哪套更好”的结构化判断。
73
+ - `validate-prompt-fixtures.mjs` 校验来源信号必须是抽象前缀,避免真实资料名回流。
74
+
75
+ ## 仍未完成的验证
76
+
77
+ 当前验证可以证明提示词设计、案例覆盖、边界规则和资产分发链路已经成形,但不能声称已经完成真实外部模型批量评测。
78
+
79
+ 真实批跑需要:
80
+
81
+ 1. 生成 weak/basic run pack。
82
+ 2. 用同一模型分别跑 12 条输入。
83
+ 3. 把输出填入 run results JSON。
84
+ 4. 执行 `validate-prompt-run-results.mjs --file`。
85
+ 5. 执行 `analyze-prompt-run-results.mjs --file ... --out ...`。
86
+
87
+ 没有真实模型输出时,scorecard 只能作为设计评审和静态回归门禁,不能当作模型效果保证。
@@ -0,0 +1,375 @@
1
+ {
2
+ "schema": 1,
3
+ "name": "engineering-business-basic-fixtures",
4
+ "version": "0.1",
5
+ "dataBoundary": "De-identified fixtures derived from internal engineering-business prompt comparisons. Do not include customer names, contacts, project names, exact amounts, or raw source documents.",
6
+ "sourceBoundary": "Source signals are abstracted from advisory prompt validation records and source inventory. They preserve task shape only, not raw documents or identifiable project facts.",
7
+ "cases": [
8
+ {
9
+ "id": "commercial-tender-response-matrix",
10
+ "skillId": "aios-commercial-tender",
11
+ "promptPath": "skills/aios-commercial-tender/prompts/basic-prompt.md",
12
+ "scenario": "AI 标书工具试用后的人工检查问题和评分点结构整理",
13
+ "sourceSignals": [
14
+ "advisory:source/prompts/01 tender response matrix prompt",
15
+ "advisory:source/prompt-runs weak-vs-optimized comparison case 1",
16
+ "source-shape:AI technical-bid tool review issues plus scoring-point structure",
17
+ "boundary-shape:not a complete tender-original reading task"
18
+ ],
19
+ "advisoryComparison": [
20
+ "普通提示词容易转向工具建议或泛泛投标建议。",
21
+ "优化提示词把同一类输入固定为输入类型判断、问题回应矩阵、评分点响应矩阵和人工复核岗位。"
22
+ ],
23
+ "weakPrompt": "请分析这些技术标问题,给出建议。",
24
+ "inputSummary": [
25
+ "输入不是完整招标文件,而是技术标生成后的人工检查问题清单。",
26
+ "资料包含评分点结构、技术标章节、图表和工具能力问题。",
27
+ "缺少招标原文、评标办法页码、废标条款和完整投标文件成稿。"
28
+ ],
29
+ "sampleInput": [
30
+ "资料类型:AI 技术标工具试用后的人工检查问题清单。",
31
+ "问题片段:AI 生成技术标时页数不可控,分章节生成后合并格式容易错乱;总平面布置图、航拍图识别和施工效果图仍需人工复核。",
32
+ "评分点结构片段:工程概况、施工总体部署、施工进度计划、主要分部分项工程施工方案、资源配置、质量安全文明施工。",
33
+ "资料缺口:未提供招标文件原文、评分办法页码、废标条款、AI 生成技术标成稿和最终人工修改稿。"
34
+ ],
35
+ "requiredPromptTerms": [
36
+ "资料来源清单",
37
+ "输入类型",
38
+ "缺少可验证招标依据",
39
+ "AI 技术标复核问题回应矩阵",
40
+ "评分点响应矩阵",
41
+ "不能下结论"
42
+ ],
43
+ "expectedOutputShape": [
44
+ "先判断输入类型和资料状态。",
45
+ "缺少招标原文时输出问题回应矩阵和响应矩阵模板。",
46
+ "评分点、废标项、资格条件和分值缺证据时标为需补充确认。",
47
+ "列出投标负责人、经营人员、技术负责人、法务或造价复核事项。"
48
+ ],
49
+ "expectedStrongSections": [
50
+ "资料来源清单",
51
+ "输入类型和资料状态判断",
52
+ "AI 技术标复核问题回应矩阵",
53
+ "评分点响应矩阵",
54
+ "不能下结论的事项"
55
+ ],
56
+ "weakFailureModes": [
57
+ "直接推荐 AI 标书工具或采购路径。",
58
+ "默认已经完成真实读标。",
59
+ "编造废标项、资格条件、分值或页码。"
60
+ ],
61
+ "bannedClaims": [
62
+ "中标概率",
63
+ "评标结论",
64
+ "保证中标",
65
+ "工具采购建议",
66
+ "串标规避建议"
67
+ ]
68
+ },
69
+ {
70
+ "id": "commercial-contract-obligation-nodes",
71
+ "skillId": "aios-commercial-contract",
72
+ "promptPath": "skills/aios-commercial-contract/prompts/basic-prompt.md",
73
+ "scenario": "工程合同履约节点、付款结算和空白字段核对",
74
+ "sourceSignals": [
75
+ "advisory:source/prompts/02 contract obligation nodes prompt",
76
+ "advisory:source/prompt-runs weak-vs-optimized comparison case 2",
77
+ "source-shape:engineering subcontract or procurement contract fragment",
78
+ "boundary-shape:contract fields may be blank and cannot support legal conclusions"
79
+ ],
80
+ "advisoryComparison": [
81
+ "普通提示词能总结风险,但容易停留在摘要。",
82
+ "优化提示词把合同片段改造成空白字段核对、履约节点、付款结算条件和人工复核分流。"
83
+ ],
84
+ "weakPrompt": "请总结这份合同的重点和风险。",
85
+ "inputSummary": [
86
+ "输入为工程分包或采购合同片段。",
87
+ "合同包含付款、验收、资料提交、工资或结算等节点。",
88
+ "部分主体、项目、金额、期限或代表字段为空白。"
89
+ ],
90
+ "sampleInput": [
91
+ "资料类型:工程分包合同片段。",
92
+ "合同片段:承包方每月固定日期前提交月完成工程量清单、考勤记录和工资表,经发包方审核后作为月度结算依据。",
93
+ "合同片段:分项工程完成并验收合格后,双方在约定期限内办理分包总价款结算。",
94
+ "合同缺口:工程名称、工程地点、合同价款、双方现场代表和部分签章信息为空白。"
95
+ ],
96
+ "requiredPromptTerms": [
97
+ "资料来源清单",
98
+ "合同基本信息",
99
+ "空白字段优先核对表",
100
+ "关键履约节点",
101
+ "付款和结算条款",
102
+ "不能下结论"
103
+ ],
104
+ "expectedOutputShape": [
105
+ "先列合同基本信息和空白字段。",
106
+ "把节点拆成触发条件、期限、责任方、原文依据、需准备资料和复核提示。",
107
+ "风险提示使用建议复核口径。",
108
+ "保留法务、商务、造价、财务或项目负责人复核边界。"
109
+ ],
110
+ "expectedStrongSections": [
111
+ "资料来源清单",
112
+ "合同基本信息",
113
+ "空白字段优先核对表",
114
+ "关键履约节点",
115
+ "不能下结论的事项"
116
+ ],
117
+ "weakFailureModes": [
118
+ "只输出合同风险摘要。",
119
+ "忽略空白字段。",
120
+ "把风险提示写成法律意见或违约责任结论。"
121
+ ],
122
+ "bannedClaims": [
123
+ "法律意见",
124
+ "违约定性",
125
+ "索赔金额",
126
+ "结算金额结论",
127
+ "责任归属结论"
128
+ ]
129
+ },
130
+ {
131
+ "id": "construction-daily-issue-tracking",
132
+ "skillId": "aios-construction-daily",
133
+ "promptPath": "skills/aios-construction-daily/prompts/basic-prompt.md",
134
+ "scenario": "施工日报管理摘要、问题台账和模板质量诊断",
135
+ "sourceSignals": [
136
+ "advisory:source/prompts/03 daily issue tracking prompt",
137
+ "advisory:source/prompt-runs weak-vs-optimized comparison case 3",
138
+ "source-shape:single-day construction daily with populated work content and blank resource fields",
139
+ "boundary-shape:blank fields are data-quality gaps, not site-fact conclusions"
140
+ ],
141
+ "advisoryComparison": [
142
+ "普通提示词能概括日报,但容易混淆现场事实和模板空白。",
143
+ "优化提示词固定输出管理摘要、问题跟踪表、模板质量诊断和需补充确认事项。"
144
+ ],
145
+ "weakPrompt": "请总结这份项目日报并指出问题。",
146
+ "inputSummary": [
147
+ "输入为单日施工日报或周报素材。",
148
+ "日报包含天气、人员、施工内容、明日计划和部分空白表格。",
149
+ "材料、机械、照片、滞后措施或完成率可能为空。"
150
+ ],
151
+ "sampleInput": [
152
+ "资料类型:单日施工日报片段。",
153
+ "日报片段:天气晴;当日施工内容包括内墙施工、安装作业、外立面作业和现场清理;明日计划继续相关工序。",
154
+ "资源片段:人员工种有记录,机械数量为空;主要材料进场表为空。",
155
+ "模板片段:外部检查写暂无,图纸变更写暂无,现场照片区域保留序号但未见图片。"
156
+ ],
157
+ "requiredPromptTerms": [
158
+ "资料来源清单",
159
+ "当日管理摘要",
160
+ "问题跟踪表",
161
+ "日报模板质量诊断",
162
+ "需补充确认事项",
163
+ "不能下结论"
164
+ ],
165
+ "expectedOutputShape": [
166
+ "区分已明确事项、异常或风险提示、需补充确认。",
167
+ "把空白字段归入模板质量诊断。",
168
+ "不把未提及事项写成现场不合格。",
169
+ "输出适合项目台账和周报复用的字段。"
170
+ ],
171
+ "expectedStrongSections": [
172
+ "资料来源清单",
173
+ "当日管理摘要",
174
+ "问题跟踪表",
175
+ "日报模板质量诊断",
176
+ "不能下结论的事项"
177
+ ],
178
+ "weakFailureModes": [
179
+ "把材料表为空写成现场没有材料进场。",
180
+ "把照片缺失写成现场无留痕结论。",
181
+ "直接判断项目进度或质量不合格。"
182
+ ],
183
+ "bannedClaims": [
184
+ "项目进度不合格",
185
+ "质量验收结论",
186
+ "安全合规结论",
187
+ "签证成立",
188
+ "索赔成立"
189
+ ]
190
+ },
191
+ {
192
+ "id": "construction-meeting-action-closure",
193
+ "skillId": "aios-construction-meeting",
194
+ "promptPath": "skills/aios-construction-meeting/prompts/basic-prompt.md",
195
+ "scenario": "工程会议纪要待办闭环和责任线索整理",
196
+ "sourceSignals": [
197
+ "advisory:source/prompts/04 meeting action closure prompt",
198
+ "advisory:source/prompt-runs weak-vs-optimized comparison case 4",
199
+ "source-shape:meeting minutes with speakers, follow-up statuses, and missing deadlines",
200
+ "boundary-shape:speaker is a responsibility clue, not necessarily final owner"
201
+ ],
202
+ "advisoryComparison": [
203
+ "普通提示词容易只生成会议摘要。",
204
+ "优化提示词把会议内容拆成待办闭环表、责任线索、需确认责任人和需确认期限。"
205
+ ],
206
+ "weakPrompt": "请整理这份会议纪要的重点和待办。",
207
+ "inputSummary": [
208
+ "输入为工程会议纪要、协调会记录或录音转写摘要。",
209
+ "资料包含多个发言人、过程状态、未完成事项和下次跟进内容。",
210
+ "部分事项没有明确最终责任人或截止时间。"
211
+ ],
212
+ "sampleInput": [
213
+ "资料类型:工程会议纪要片段。",
214
+ "纪要片段:参会人员讨论资质资料、证书继续教育、项目问题整改、资料提交和下次会议准备。",
215
+ "过程状态:部分事项写明仍在跟进、待提交、有待确认。",
216
+ "资料缺口:部分事项只有发言人,没有明确最终责任部门;多数事项没有截止时间。"
217
+ ],
218
+ "requiredPromptTerms": [
219
+ "资料来源清单",
220
+ "会议核心结论",
221
+ "待办闭环表",
222
+ "责任线索与需确认表",
223
+ "下次会议建议关注",
224
+ "不能下结论"
225
+ ],
226
+ "expectedOutputShape": [
227
+ "只提取会议资料明确出现的决定、问题、待办和分工。",
228
+ "责任人不明确时写需确认责任人。",
229
+ "期限不明确时写需确认期限。",
230
+ "发言人只作为责任线索,不自动等同最终责任人。"
231
+ ],
232
+ "expectedStrongSections": [
233
+ "资料来源清单",
234
+ "会议核心结论",
235
+ "待办闭环表",
236
+ "责任线索与需确认表",
237
+ "不能下结论的事项"
238
+ ],
239
+ "weakFailureModes": [
240
+ "把发言人写成最终责任人。",
241
+ "自行补截止日期。",
242
+ "把讨论事项改写成正式会议决议。"
243
+ ],
244
+ "bannedClaims": [
245
+ "最终责任归属",
246
+ "正式会议决议",
247
+ "合同通知效力",
248
+ "审批通过",
249
+ "签证成立"
250
+ ]
251
+ },
252
+ {
253
+ "id": "commercial-variation-evidence-chain",
254
+ "skillId": "aios-commercial-variation",
255
+ "promptPath": "skills/aios-commercial-variation/prompts/basic-prompt.md",
256
+ "scenario": "变更签证资料链、公开样表字段和过程线索整理",
257
+ "sourceSignals": [
258
+ "advisory:source/prompts/05 variation evidence chain prompt",
259
+ "advisory:source/prompt-runs weak-vs-optimized comparison case 5",
260
+ "source-shape:public variation form fields plus process clues from contract, daily, and meeting materials",
261
+ "boundary-shape:public sample fields do not establish project facts or settlement conclusions"
262
+ ],
263
+ "advisoryComparison": [
264
+ "普通提示词容易把样表、过程线索和正式依据混在一起。",
265
+ "优化提示词先判断资料链完整度,再区分样表字段、过程线索、合同依据、正式依据缺口和复核岗位。"
266
+ ],
267
+ "weakPrompt": "请分析这份变更签证资料是否完整。",
268
+ "inputSummary": [
269
+ "输入可能包含公开签证样表、联系单、合同条款、会议纪要、日报或照片说明。",
270
+ "资料可能只有字段结构和过程线索,没有完整签认链。",
271
+ "缺少具体签证单、审核书、工程量计算或业主确认资料时不能判断金额。"
272
+ ],
273
+ "sampleInput": [
274
+ "资料类型:工程变更签证资料链片段。",
275
+ "公开样表字段:工程名称、编号、变更项目名称、变更原因及内容、单位、数量、监理核定量、建设单位核定量、签章日期。",
276
+ "过程线索:会议纪要提到现场条件调整;日报记录相关施工活动;合同片段提到变更和结算需按流程确认。",
277
+ "资料缺口:未提供正式签证单、联系单编号、图纸变更单、工程量计算书、审核书和完整签认状态。"
278
+ ],
279
+ "requiredPromptTerms": [
280
+ "资料来源清单",
281
+ "资料来源和演示边界",
282
+ "资料链完整度判断",
283
+ "资料链清单",
284
+ "合同和流程依据",
285
+ "不能下结论"
286
+ ],
287
+ "expectedOutputShape": [
288
+ "先判断资料链完整度。",
289
+ "区分事实记录、合同依据、过程线索和正式依据待确认。",
290
+ "公开样表只能说明字段结构,不能代表项目事实。",
291
+ "输出经营、项目、造价、合同、法务和资料员复核分流。"
292
+ ],
293
+ "expectedStrongSections": [
294
+ "资料来源清单",
295
+ "资料链完整度判断",
296
+ "样表字段结构",
297
+ "资料链清单",
298
+ "不能下结论的事项"
299
+ ],
300
+ "weakFailureModes": [
301
+ "把公开样表当成项目事实。",
302
+ "把会议纪要或日报直接写成结算依据。",
303
+ "判断签证成立、索赔成立或金额倾向。"
304
+ ],
305
+ "bannedClaims": [
306
+ "签证成立",
307
+ "索赔成立",
308
+ "责任归属",
309
+ "最终金额",
310
+ "结算通过"
311
+ ]
312
+ },
313
+ {
314
+ "id": "construction-scheme-assistive-review",
315
+ "skillId": "aios-construction-scheme",
316
+ "promptPath": "skills/aios-construction-scheme/prompts/basic-prompt.md",
317
+ "scenario": "专项施工方案辅助复核、专家意见回查和 AI 生成失准复盘",
318
+ "sourceSignals": [
319
+ "advisory:source/prompts/06 scheme assistive review prompt",
320
+ "advisory:source/prompt-runs weak-vs-optimized comparison case 6",
321
+ "source-shape:special construction scheme feedback with calculation, figure, local-standard, and special-part issues",
322
+ "boundary-shape:assistant can produce review checklists but cannot confirm calculation correctness or approval outcome"
323
+ ],
324
+ "advisoryComparison": [
325
+ "普通提示词能总结 AI 方案问题,但不形成技术负责人可用的回查清单。",
326
+ "优化提示词把场景收口为失准复盘、专家修改说明回查、危险源控制和人工复核清单。"
327
+ ],
328
+ "weakPrompt": "请分析施工方案应用难点并给建议。",
329
+ "inputSummary": [
330
+ "输入为专项施工方案片段、专家修改说明、计算书目录或 AI 方案生成反馈。",
331
+ "资料可能包含地方标准、参数表、附图、计算书和特殊部位。",
332
+ "附图、扫描件和计算书公式需要专业人员或确定性工具复核。"
333
+ ],
334
+ "sampleInput": [
335
+ "资料类型:专项施工方案和 AI 方案生成反馈片段。",
336
+ "反馈片段:AI 生成方案中的参数表与计算书不一致,配图与项目无关,地方标准引用不完整。",
337
+ "方案片段:方案涉及关键工序、危险源、检查验收、应急措施、专家修改说明、附图和计算书。",
338
+ "资料缺口:附图、扫描页和计算书公式未经过专业人员或确定性工具复核。"
339
+ ],
340
+ "requiredPromptTerms": [
341
+ "资料来源清单",
342
+ "AI 生成失准原因复盘",
343
+ "专家修改说明回查清单",
344
+ "危险源和控制措施",
345
+ "需人工复核问题清单",
346
+ "不能下结论"
347
+ ],
348
+ "expectedOutputShape": [
349
+ "使用 AI 辅助施工方案复核口径。",
350
+ "输入为 AI 试用反馈时先输出失准原因复盘。",
351
+ "专家修改说明要回查正文、附图、计算书和验收要求。",
352
+ "不确认计算正确,不输出方案合格或专家论证通过。"
353
+ ],
354
+ "expectedStrongSections": [
355
+ "资料来源清单",
356
+ "AI 生成失准原因复盘",
357
+ "专家修改说明回查清单",
358
+ "危险源和控制措施",
359
+ "不能下结论的事项"
360
+ ],
361
+ "weakFailureModes": [
362
+ "使用 AI 审查施工方案口径。",
363
+ "输出方案合格或不合格。",
364
+ "确认计算书公式正确或专家论证通过。"
365
+ ],
366
+ "bannedClaims": [
367
+ "方案合格",
368
+ "计算正确",
369
+ "专家论证通过",
370
+ "审批通过",
371
+ "现场安全满足"
372
+ ]
373
+ }
374
+ ]
375
+ }
@@ -0,0 +1,179 @@
1
+ {
2
+ "schema": 1,
3
+ "name": "engineering-business-basic-model-output-example",
4
+ "version": "0.1",
5
+ "fixture": "prompts/evaluations/engineering-business-basic-fixtures.json",
6
+ "isExample": true,
7
+ "dataBoundary": "Example skeleton for validating output shape only. It is not a real model evaluation run and must not include customer names, contacts, project names, exact amounts, or raw source documents.",
8
+ "outputs": [
9
+ {
10
+ "caseId": "commercial-tender-response-matrix",
11
+ "promptVersion": "0.1",
12
+ "model": "example-skeleton",
13
+ "ranAt": "2026-06-16T00:00:00+08:00",
14
+ "notes": "示例骨架,仅用于校验输出结构。",
15
+ "output": [
16
+ "## 资料来源清单",
17
+ "- 输入资料:AI 技术标工具试用后的人工检查问题清单、评分点结构片段、资料缺口说明。",
18
+ "- 证据状态:缺少招标文件原文、评分办法页码、废标条款和技术标成稿。",
19
+ "",
20
+ "## 输入类型和资料状态判断",
21
+ "- 输入类型:人工检查问题清单和评分点结构,不是完整招标文件。",
22
+ "- 可处理范围:整理复核问题、评分点响应矩阵和补充资料清单。",
23
+ "",
24
+ "## AI 技术标复核问题回应矩阵",
25
+ "| 问题 | 影响 | 建议动作 | 责任复核 |",
26
+ "|---|---|---|---|",
27
+ "| 分章节合并后格式容易错乱 | 影响成稿一致性 | 建立章节合并检查清单 | 技术负责人 / 投标负责人 |",
28
+ "",
29
+ "## 评分点响应矩阵",
30
+ "| 评分点 | 当前资料状态 | 响应动作 | 需补充确认 |",
31
+ "|---|---|---|---|",
32
+ "| 施工总体部署 | 有评分点名称,缺评分细则 | 建立响应提纲 | 补充评分办法页码 |",
33
+ "",
34
+ "## 不能下结论的事项",
35
+ "- 不能判断真实评审结果、资格条件满足情况、废标项或采购路径。"
36
+ ]
37
+ },
38
+ {
39
+ "caseId": "commercial-contract-obligation-nodes",
40
+ "promptVersion": "0.1",
41
+ "model": "example-skeleton",
42
+ "ranAt": "2026-06-16T00:00:00+08:00",
43
+ "notes": "示例骨架,仅用于校验输出结构。",
44
+ "output": [
45
+ "## 资料来源清单",
46
+ "- 输入资料:工程分包合同片段、付款结算条款、空白字段说明。",
47
+ "- 证据状态:缺少完整合同、附件、签章页和最终谈判记录。",
48
+ "",
49
+ "## 合同基本信息",
50
+ "- 合同类型:工程分包或采购合同片段。",
51
+ "- 已见内容:月度工程量清单、考勤记录、工资表、验收和结算节点。",
52
+ "",
53
+ "## 空白字段优先核对表",
54
+ "| 字段 | 当前状态 | 影响 | 复核人 |",
55
+ "|---|---|---|---|",
56
+ "| 工程名称 | 空白 | 影响合同识别和归档 | 商务 / 法务 |",
57
+ "",
58
+ "## 关键履约节点",
59
+ "| 节点 | 触发条件 | 责任方 | 需准备资料 | 复核提示 |",
60
+ "|---|---|---|---|---|",
61
+ "| 月度结算资料提交 | 每月固定日期前 | 承包方 | 工程量清单、考勤、工资表 | 核对期限和审核口径 |",
62
+ "",
63
+ "## 不能下结论的事项",
64
+ "- 不能替代法务、造价、财务或项目负责人的专业复核。"
65
+ ]
66
+ },
67
+ {
68
+ "caseId": "construction-daily-issue-tracking",
69
+ "promptVersion": "0.1",
70
+ "model": "example-skeleton",
71
+ "ranAt": "2026-06-16T00:00:00+08:00",
72
+ "notes": "示例骨架,仅用于校验输出结构。",
73
+ "output": [
74
+ "## 资料来源清单",
75
+ "- 输入资料:单日施工日报片段、资源记录、模板空白字段说明。",
76
+ "- 证据状态:未见照片、完整资源台账、计划基线和现场检查记录。",
77
+ "",
78
+ "## 当日管理摘要",
79
+ "- 已明确事项:天气、当日施工内容、明日计划和部分人员工种。",
80
+ "- 需复核事项:材料、机械、照片和滞后措施是否因模板未填导致缺失。",
81
+ "",
82
+ "## 问题跟踪表",
83
+ "| 问题 | 类型 | 当前证据 | 建议动作 |",
84
+ "|---|---|---|---|",
85
+ "| 机械数量为空 | 模板质量 / 资料缺口 | 日报资源字段为空 | 补充机械台账或说明无需机械 |",
86
+ "",
87
+ "## 日报模板质量诊断",
88
+ "- 材料、机械、照片区域存在空白或未见附件,应作为资料质量问题先处理。",
89
+ "",
90
+ "## 不能下结论的事项",
91
+ "- 不能仅凭空白字段判断现场实际未发生、未进场或未留痕。"
92
+ ]
93
+ },
94
+ {
95
+ "caseId": "construction-meeting-action-closure",
96
+ "promptVersion": "0.1",
97
+ "model": "example-skeleton",
98
+ "ranAt": "2026-06-16T00:00:00+08:00",
99
+ "notes": "示例骨架,仅用于校验输出结构。",
100
+ "output": [
101
+ "## 资料来源清单",
102
+ "- 输入资料:工程会议纪要片段、过程状态和资料缺口说明。",
103
+ "- 证据状态:部分事项缺少最终责任部门和截止时间。",
104
+ "",
105
+ "## 会议核心结论",
106
+ "- 已明确:会议讨论了资质资料、证书继续教育、问题整改、资料提交和下次会议准备。",
107
+ "- 未明确:若纪要未写明责任部门或期限,应标为需确认。",
108
+ "",
109
+ "## 待办闭环表",
110
+ "| 待办 | 当前状态 | 责任线索 | 截止时间 | 下一步 |",
111
+ "|---|---|---|---|---|",
112
+ "| 资质资料补充 | 待提交 | 发言人或记录来源 | 需确认期限 | 明确责任部门和资料清单 |",
113
+ "",
114
+ "## 责任线索与需确认表",
115
+ "| 事项 | 已见线索 | 需确认 |",
116
+ "|---|---|---|",
117
+ "| 证书继续教育 | 会议中被讨论 | 是否形成正式任务、责任人和完成时间 |",
118
+ "",
119
+ "## 不能下结论的事项",
120
+ "- 不能把发言人自动等同最终责任人,不能补写未出现的截止日期。"
121
+ ]
122
+ },
123
+ {
124
+ "caseId": "commercial-variation-evidence-chain",
125
+ "promptVersion": "0.1",
126
+ "model": "example-skeleton",
127
+ "ranAt": "2026-06-16T00:00:00+08:00",
128
+ "notes": "示例骨架,仅用于校验输出结构。",
129
+ "output": [
130
+ "## 资料来源清单",
131
+ "- 输入资料:公开样表字段、会议纪要过程线索、日报过程线索、合同流程片段。",
132
+ "- 证据状态:缺少正式签证单、联系单编号、图纸变更单、计算书和审核书。",
133
+ "",
134
+ "## 资料链完整度判断",
135
+ "- 当前判断:部分线索,不构成完整资料链。",
136
+ "- 处理范围:整理字段结构、资料链缺口和复核分流。",
137
+ "",
138
+ "## 样表字段结构",
139
+ "- 样表字段可用于建立资料模板,但不能代表任何具体项目事实。",
140
+ "",
141
+ "## 资料链清单",
142
+ "| 资料类型 | 当前状态 | 用途 | 需补充确认 |",
143
+ "|---|---|---|---|",
144
+ "| 联系单编号 | 未提供 | 过程发起依据 | 补充正式编号和签认状态 |",
145
+ "",
146
+ "## 不能下结论的事项",
147
+ "- 不能根据过程线索判断结算、金额、责任或审批结果。"
148
+ ]
149
+ },
150
+ {
151
+ "caseId": "construction-scheme-assistive-review",
152
+ "promptVersion": "0.1",
153
+ "model": "example-skeleton",
154
+ "ranAt": "2026-06-16T00:00:00+08:00",
155
+ "notes": "示例骨架,仅用于校验输出结构。",
156
+ "output": [
157
+ "## 资料来源清单",
158
+ "- 输入资料:专项施工方案片段、专家修改说明、AI 方案生成反馈和资料缺口说明。",
159
+ "- 证据状态:附图、扫描页和计算书公式未经过专业人员或确定性工具复核。",
160
+ "",
161
+ "## AI 生成失准原因复盘",
162
+ "- 参数表与计算书不一致:需要回查参数来源和计算书引用。",
163
+ "- 配图与项目无关:需要替换为项目对应图纸或现场图。",
164
+ "",
165
+ "## 专家修改说明回查清单",
166
+ "| 回查对象 | 检查点 | 复核岗位 |",
167
+ "|---|---|---|",
168
+ "| 正文 | 是否响应专家意见 | 技术负责人 |",
169
+ "| 计算书 | 公式、参数和单位是否一致 | 专业工程师 / 确定性工具 |",
170
+ "",
171
+ "## 危险源和控制措施",
172
+ "- 仅整理方案中已出现的危险源、控制措施和验收要求,并标注需人工复核。",
173
+ "",
174
+ "## 不能下结论的事项",
175
+ "- 不能确认计算、审批、专家评审或现场执行结果。"
176
+ ]
177
+ }
178
+ ]
179
+ }