@itaila/archetype 0.3.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +475 -0
  3. package/dist/audit/audit-persona.d.ts +163 -0
  4. package/dist/audit/audit-persona.d.ts.map +1 -0
  5. package/dist/audit/audit-persona.js +415 -0
  6. package/dist/audit/audit-persona.js.map +1 -0
  7. package/dist/audit/brain-reflection.d.ts +33 -0
  8. package/dist/audit/brain-reflection.d.ts.map +1 -0
  9. package/dist/audit/brain-reflection.js +148 -0
  10. package/dist/audit/brain-reflection.js.map +1 -0
  11. package/dist/audit/conversation-audit.d.ts +12 -0
  12. package/dist/audit/conversation-audit.d.ts.map +1 -0
  13. package/dist/audit/conversation-audit.js +76 -0
  14. package/dist/audit/conversation-audit.js.map +1 -0
  15. package/dist/audit/prompt-audit.d.ts +10 -0
  16. package/dist/audit/prompt-audit.d.ts.map +1 -0
  17. package/dist/audit/prompt-audit.js +153 -0
  18. package/dist/audit/prompt-audit.js.map +1 -0
  19. package/dist/audit/prompt-dump.d.ts +137 -0
  20. package/dist/audit/prompt-dump.d.ts.map +1 -0
  21. package/dist/audit/prompt-dump.js +269 -0
  22. package/dist/audit/prompt-dump.js.map +1 -0
  23. package/dist/audit/trace-integrity.d.ts +33 -0
  24. package/dist/audit/trace-integrity.d.ts.map +1 -0
  25. package/dist/audit/trace-integrity.js +109 -0
  26. package/dist/audit/trace-integrity.js.map +1 -0
  27. package/dist/audit/types.d.ts +92 -0
  28. package/dist/audit/types.d.ts.map +1 -0
  29. package/dist/audit/types.js +2 -0
  30. package/dist/audit/types.js.map +1 -0
  31. package/dist/audit/version.d.ts +14 -0
  32. package/dist/audit/version.d.ts.map +1 -0
  33. package/dist/audit/version.js +65 -0
  34. package/dist/audit/version.js.map +1 -0
  35. package/dist/brain.d.ts +7 -0
  36. package/dist/brain.d.ts.map +1 -0
  37. package/dist/brain.js +83 -0
  38. package/dist/brain.js.map +1 -0
  39. package/dist/builder/actions.d.ts +60 -0
  40. package/dist/builder/actions.d.ts.map +1 -0
  41. package/dist/builder/actions.js +257 -0
  42. package/dist/builder/actions.js.map +1 -0
  43. package/dist/builder/browser.d.ts +140 -0
  44. package/dist/builder/browser.d.ts.map +1 -0
  45. package/dist/builder/browser.js +232 -0
  46. package/dist/builder/browser.js.map +1 -0
  47. package/dist/builder/executor.d.ts +228 -0
  48. package/dist/builder/executor.d.ts.map +1 -0
  49. package/dist/builder/executor.js +1548 -0
  50. package/dist/builder/executor.js.map +1 -0
  51. package/dist/builder/index.d.ts +24 -0
  52. package/dist/builder/index.d.ts.map +1 -0
  53. package/dist/builder/index.js +24 -0
  54. package/dist/builder/index.js.map +1 -0
  55. package/dist/builder/node-test-discovery.d.ts +13 -0
  56. package/dist/builder/node-test-discovery.d.ts.map +1 -0
  57. package/dist/builder/node-test-discovery.js +45 -0
  58. package/dist/builder/node-test-discovery.js.map +1 -0
  59. package/dist/builder/sandbox.d.ts +172 -0
  60. package/dist/builder/sandbox.d.ts.map +1 -0
  61. package/dist/builder/sandbox.js +294 -0
  62. package/dist/builder/sandbox.js.map +1 -0
  63. package/dist/builder/workspace-files.d.ts +63 -0
  64. package/dist/builder/workspace-files.d.ts.map +1 -0
  65. package/dist/builder/workspace-files.js +190 -0
  66. package/dist/builder/workspace-files.js.map +1 -0
  67. package/dist/core/actions.d.ts +55 -0
  68. package/dist/core/actions.d.ts.map +1 -0
  69. package/dist/core/actions.js +311 -0
  70. package/dist/core/actions.js.map +1 -0
  71. package/dist/core/attachment-notes.d.ts +7 -0
  72. package/dist/core/attachment-notes.d.ts.map +1 -0
  73. package/dist/core/attachment-notes.js +38 -0
  74. package/dist/core/attachment-notes.js.map +1 -0
  75. package/dist/core/context.d.ts +10 -0
  76. package/dist/core/context.d.ts.map +1 -0
  77. package/dist/core/context.js +108 -0
  78. package/dist/core/context.js.map +1 -0
  79. package/dist/core/crud-prompt.d.ts +16 -0
  80. package/dist/core/crud-prompt.d.ts.map +1 -0
  81. package/dist/core/crud-prompt.js +268 -0
  82. package/dist/core/crud-prompt.js.map +1 -0
  83. package/dist/core/crud-schema.d.ts +12 -0
  84. package/dist/core/crud-schema.d.ts.map +1 -0
  85. package/dist/core/crud-schema.js +42 -0
  86. package/dist/core/crud-schema.js.map +1 -0
  87. package/dist/core/effective-config.d.ts +13 -0
  88. package/dist/core/effective-config.d.ts.map +1 -0
  89. package/dist/core/effective-config.js +33 -0
  90. package/dist/core/effective-config.js.map +1 -0
  91. package/dist/core/entities.d.ts +82 -0
  92. package/dist/core/entities.d.ts.map +1 -0
  93. package/dist/core/entities.js +116 -0
  94. package/dist/core/entities.js.map +1 -0
  95. package/dist/core/entity-helpers.d.ts +47 -0
  96. package/dist/core/entity-helpers.d.ts.map +1 -0
  97. package/dist/core/entity-helpers.js +122 -0
  98. package/dist/core/entity-helpers.js.map +1 -0
  99. package/dist/core/entity-registry.d.ts +47 -0
  100. package/dist/core/entity-registry.d.ts.map +1 -0
  101. package/dist/core/entity-registry.js +54 -0
  102. package/dist/core/entity-registry.js.map +1 -0
  103. package/dist/core/eq.d.ts +13 -0
  104. package/dist/core/eq.d.ts.map +1 -0
  105. package/dist/core/eq.js +41 -0
  106. package/dist/core/eq.js.map +1 -0
  107. package/dist/core/focus-context.d.ts +19 -0
  108. package/dist/core/focus-context.d.ts.map +1 -0
  109. package/dist/core/focus-context.js +46 -0
  110. package/dist/core/focus-context.js.map +1 -0
  111. package/dist/core/focus-mode-actions.d.ts +23 -0
  112. package/dist/core/focus-mode-actions.d.ts.map +1 -0
  113. package/dist/core/focus-mode-actions.js +74 -0
  114. package/dist/core/focus-mode-actions.js.map +1 -0
  115. package/dist/core/greeting.d.ts +10 -0
  116. package/dist/core/greeting.d.ts.map +1 -0
  117. package/dist/core/greeting.js +41 -0
  118. package/dist/core/greeting.js.map +1 -0
  119. package/dist/core/identity.d.ts +13 -0
  120. package/dist/core/identity.d.ts.map +1 -0
  121. package/dist/core/identity.js +54 -0
  122. package/dist/core/identity.js.map +1 -0
  123. package/dist/core/knowledge.d.ts +10 -0
  124. package/dist/core/knowledge.d.ts.map +1 -0
  125. package/dist/core/knowledge.js +40 -0
  126. package/dist/core/knowledge.js.map +1 -0
  127. package/dist/core/memory-actions.d.ts +38 -0
  128. package/dist/core/memory-actions.d.ts.map +1 -0
  129. package/dist/core/memory-actions.js +181 -0
  130. package/dist/core/memory-actions.js.map +1 -0
  131. package/dist/core/memory.d.ts +35 -0
  132. package/dist/core/memory.d.ts.map +1 -0
  133. package/dist/core/memory.js +168 -0
  134. package/dist/core/memory.js.map +1 -0
  135. package/dist/core/peer-actions.d.ts +15 -0
  136. package/dist/core/peer-actions.d.ts.map +1 -0
  137. package/dist/core/peer-actions.js +33 -0
  138. package/dist/core/peer-actions.js.map +1 -0
  139. package/dist/core/prompt-builder.d.ts +46 -0
  140. package/dist/core/prompt-builder.d.ts.map +1 -0
  141. package/dist/core/prompt-builder.js +543 -0
  142. package/dist/core/prompt-builder.js.map +1 -0
  143. package/dist/core/prompt-mode.d.ts +3 -0
  144. package/dist/core/prompt-mode.d.ts.map +1 -0
  145. package/dist/core/prompt-mode.js +6 -0
  146. package/dist/core/prompt-mode.js.map +1 -0
  147. package/dist/core/prompted-turn.d.ts +6 -0
  148. package/dist/core/prompted-turn.d.ts.map +1 -0
  149. package/dist/core/prompted-turn.js +48 -0
  150. package/dist/core/prompted-turn.js.map +1 -0
  151. package/dist/core/request-builder.d.ts +14 -0
  152. package/dist/core/request-builder.d.ts.map +1 -0
  153. package/dist/core/request-builder.js +64 -0
  154. package/dist/core/request-builder.js.map +1 -0
  155. package/dist/core/session-routing.d.ts +23 -0
  156. package/dist/core/session-routing.d.ts.map +1 -0
  157. package/dist/core/session-routing.js +59 -0
  158. package/dist/core/session-routing.js.map +1 -0
  159. package/dist/core/voice.d.ts +6 -0
  160. package/dist/core/voice.d.ts.map +1 -0
  161. package/dist/core/voice.js +30 -0
  162. package/dist/core/voice.js.map +1 -0
  163. package/dist/engine/chat.d.ts +45 -0
  164. package/dist/engine/chat.d.ts.map +1 -0
  165. package/dist/engine/chat.js +308 -0
  166. package/dist/engine/chat.js.map +1 -0
  167. package/dist/engine/continuity.d.ts +107 -0
  168. package/dist/engine/continuity.d.ts.map +1 -0
  169. package/dist/engine/continuity.js +320 -0
  170. package/dist/engine/continuity.js.map +1 -0
  171. package/dist/engine/crud.d.ts +62 -0
  172. package/dist/engine/crud.d.ts.map +1 -0
  173. package/dist/engine/crud.js +260 -0
  174. package/dist/engine/crud.js.map +1 -0
  175. package/dist/engine/side-effects.d.ts +93 -0
  176. package/dist/engine/side-effects.d.ts.map +1 -0
  177. package/dist/engine/side-effects.js +271 -0
  178. package/dist/engine/side-effects.js.map +1 -0
  179. package/dist/engine/staging.d.ts +29 -0
  180. package/dist/engine/staging.d.ts.map +1 -0
  181. package/dist/engine/staging.js +159 -0
  182. package/dist/engine/staging.js.map +1 -0
  183. package/dist/engine/working-set.d.ts +18 -0
  184. package/dist/engine/working-set.d.ts.map +1 -0
  185. package/dist/engine/working-set.js +246 -0
  186. package/dist/engine/working-set.js.map +1 -0
  187. package/dist/evals/action-contracts.d.ts +40 -0
  188. package/dist/evals/action-contracts.d.ts.map +1 -0
  189. package/dist/evals/action-contracts.js +208 -0
  190. package/dist/evals/action-contracts.js.map +1 -0
  191. package/dist/evals/brain-bloat.d.ts +39 -0
  192. package/dist/evals/brain-bloat.d.ts.map +1 -0
  193. package/dist/evals/brain-bloat.js +167 -0
  194. package/dist/evals/brain-bloat.js.map +1 -0
  195. package/dist/evals/brain-prescriptions.d.ts +30 -0
  196. package/dist/evals/brain-prescriptions.d.ts.map +1 -0
  197. package/dist/evals/brain-prescriptions.js +148 -0
  198. package/dist/evals/brain-prescriptions.js.map +1 -0
  199. package/dist/evals/cross-layer-duplicates.d.ts +49 -0
  200. package/dist/evals/cross-layer-duplicates.d.ts.map +1 -0
  201. package/dist/evals/cross-layer-duplicates.js +289 -0
  202. package/dist/evals/cross-layer-duplicates.js.map +1 -0
  203. package/dist/evals/entity-visibility.d.ts +28 -0
  204. package/dist/evals/entity-visibility.d.ts.map +1 -0
  205. package/dist/evals/entity-visibility.js +216 -0
  206. package/dist/evals/entity-visibility.js.map +1 -0
  207. package/dist/evals/index.d.ts +19 -0
  208. package/dist/evals/index.d.ts.map +1 -0
  209. package/dist/evals/index.js +11 -0
  210. package/dist/evals/index.js.map +1 -0
  211. package/dist/evals/judge.d.ts +22 -0
  212. package/dist/evals/judge.d.ts.map +1 -0
  213. package/dist/evals/judge.js +337 -0
  214. package/dist/evals/judge.js.map +1 -0
  215. package/dist/evals/operational-contract.d.ts +40 -0
  216. package/dist/evals/operational-contract.d.ts.map +1 -0
  217. package/dist/evals/operational-contract.js +115 -0
  218. package/dist/evals/operational-contract.js.map +1 -0
  219. package/dist/evals/prompt-content.d.ts +14 -0
  220. package/dist/evals/prompt-content.d.ts.map +1 -0
  221. package/dist/evals/prompt-content.js +104 -0
  222. package/dist/evals/prompt-content.js.map +1 -0
  223. package/dist/evals/runtime.d.ts +4 -0
  224. package/dist/evals/runtime.d.ts.map +1 -0
  225. package/dist/evals/runtime.js +197 -0
  226. package/dist/evals/runtime.js.map +1 -0
  227. package/dist/evals/sample-projects.d.ts +143 -0
  228. package/dist/evals/sample-projects.d.ts.map +1 -0
  229. package/dist/evals/sample-projects.js +644 -0
  230. package/dist/evals/sample-projects.js.map +1 -0
  231. package/dist/evals/types.d.ts +88 -0
  232. package/dist/evals/types.d.ts.map +1 -0
  233. package/dist/evals/types.js +2 -0
  234. package/dist/evals/types.js.map +1 -0
  235. package/dist/foundation/index.d.ts +158 -0
  236. package/dist/foundation/index.d.ts.map +1 -0
  237. package/dist/foundation/index.js +256 -0
  238. package/dist/foundation/index.js.map +1 -0
  239. package/dist/index.d.ts +223 -0
  240. package/dist/index.d.ts.map +1 -0
  241. package/dist/index.js +998 -0
  242. package/dist/index.js.map +1 -0
  243. package/dist/managed/autonomous-loop.d.ts +199 -0
  244. package/dist/managed/autonomous-loop.d.ts.map +1 -0
  245. package/dist/managed/autonomous-loop.js +451 -0
  246. package/dist/managed/autonomous-loop.js.map +1 -0
  247. package/dist/managed/conversation.d.ts +20 -0
  248. package/dist/managed/conversation.d.ts.map +1 -0
  249. package/dist/managed/conversation.js +40 -0
  250. package/dist/managed/conversation.js.map +1 -0
  251. package/dist/managed/knowledge.d.ts +7 -0
  252. package/dist/managed/knowledge.d.ts.map +1 -0
  253. package/dist/managed/knowledge.js +174 -0
  254. package/dist/managed/knowledge.js.map +1 -0
  255. package/dist/managed/memory-manager.d.ts +7 -0
  256. package/dist/managed/memory-manager.d.ts.map +1 -0
  257. package/dist/managed/memory-manager.js +18 -0
  258. package/dist/managed/memory-manager.js.map +1 -0
  259. package/dist/managed/memory-review.d.ts +45 -0
  260. package/dist/managed/memory-review.d.ts.map +1 -0
  261. package/dist/managed/memory-review.js +130 -0
  262. package/dist/managed/memory-review.js.map +1 -0
  263. package/dist/managed/storage.d.ts +2 -0
  264. package/dist/managed/storage.d.ts.map +1 -0
  265. package/dist/managed/storage.js +2 -0
  266. package/dist/managed/storage.js.map +1 -0
  267. package/dist/managed/work-history.d.ts +23 -0
  268. package/dist/managed/work-history.d.ts.map +1 -0
  269. package/dist/managed/work-history.js +31 -0
  270. package/dist/managed/work-history.js.map +1 -0
  271. package/dist/observability/index.d.ts +15 -0
  272. package/dist/observability/index.d.ts.map +1 -0
  273. package/dist/observability/index.js +15 -0
  274. package/dist/observability/index.js.map +1 -0
  275. package/dist/observability/render-run-markdown.d.ts +90 -0
  276. package/dist/observability/render-run-markdown.d.ts.map +1 -0
  277. package/dist/observability/render-run-markdown.js +231 -0
  278. package/dist/observability/render-run-markdown.js.map +1 -0
  279. package/dist/observability/turn-reporter.d.ts +20 -0
  280. package/dist/observability/turn-reporter.d.ts.map +1 -0
  281. package/dist/observability/turn-reporter.js +106 -0
  282. package/dist/observability/turn-reporter.js.map +1 -0
  283. package/dist/persona.d.ts +49 -0
  284. package/dist/persona.d.ts.map +1 -0
  285. package/dist/persona.js +287 -0
  286. package/dist/persona.js.map +1 -0
  287. package/dist/playbook/defaults.d.ts +25 -0
  288. package/dist/playbook/defaults.d.ts.map +1 -0
  289. package/dist/playbook/defaults.js +108 -0
  290. package/dist/playbook/defaults.js.map +1 -0
  291. package/dist/playbook/invariants.d.ts +244 -0
  292. package/dist/playbook/invariants.d.ts.map +1 -0
  293. package/dist/playbook/invariants.js +259 -0
  294. package/dist/playbook/invariants.js.map +1 -0
  295. package/dist/playbook/templates.d.ts +7 -0
  296. package/dist/playbook/templates.d.ts.map +1 -0
  297. package/dist/playbook/templates.js +437 -0
  298. package/dist/playbook/templates.js.map +1 -0
  299. package/dist/providers/gemini.d.ts +73 -0
  300. package/dist/providers/gemini.d.ts.map +1 -0
  301. package/dist/providers/gemini.js +536 -0
  302. package/dist/providers/gemini.js.map +1 -0
  303. package/dist/providers/types.d.ts +2 -0
  304. package/dist/providers/types.d.ts.map +1 -0
  305. package/dist/providers/types.js +2 -0
  306. package/dist/providers/types.js.map +1 -0
  307. package/dist/providers/zod-to-gemini.d.ts +8 -0
  308. package/dist/providers/zod-to-gemini.d.ts.map +1 -0
  309. package/dist/providers/zod-to-gemini.js +148 -0
  310. package/dist/providers/zod-to-gemini.js.map +1 -0
  311. package/dist/samples/pm-spec-agent.d.ts +22 -0
  312. package/dist/samples/pm-spec-agent.d.ts.map +1 -0
  313. package/dist/samples/pm-spec-agent.js +53 -0
  314. package/dist/samples/pm-spec-agent.js.map +1 -0
  315. package/dist/types.d.ts +920 -0
  316. package/dist/types.d.ts.map +1 -0
  317. package/dist/types.js +2 -0
  318. package/dist/types.js.map +1 -0
  319. package/package.json +68 -0
@@ -0,0 +1,11 @@
1
+ export { runEvalConversation } from './runtime.js';
2
+ export { auditBrainBloat } from './brain-bloat.js';
3
+ export { auditOperationalPromptContract } from './operational-contract.js';
4
+ export { auditPromptContent } from './prompt-content.js';
5
+ export { auditCrossLayerDuplicates } from './cross-layer-duplicates.js';
6
+ export { auditActionContracts } from './action-contracts.js';
7
+ export { auditBrainPrescriptions } from './brain-prescriptions.js';
8
+ export { auditEntityVisibility } from './entity-visibility.js';
9
+ export { SAMPLE_PROJECTS, coachProject, nutritionProject, fitnessProject, languageTutorProject, chiefOfStaffProject, } from './sample-projects.js';
10
+ export { judgeEvalTurn, judgeEvalConversation, judgePairwiseConversations, } from './judge.js';
11
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/evals/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAA;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AAClD,OAAO,EAAE,8BAA8B,EAAE,MAAM,2BAA2B,CAAA;AAC1E,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAA;AACxD,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAA;AACvE,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAA;AAC5D,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAA;AAClE,OAAO,EAAE,qBAAqB,EAAE,MAAM,wBAAwB,CAAA;AAwC9D,OAAO,EACL,eAAe,EACf,YAAY,EACZ,gBAAgB,EAChB,cAAc,EACd,oBAAoB,EACpB,mBAAmB,GACpB,MAAM,sBAAsB,CAAA;AAC7B,OAAO,EACL,aAAa,EACb,qBAAqB,EACrB,0BAA0B,GAC3B,MAAM,YAAY,CAAA"}
@@ -0,0 +1,22 @@
1
+ import type { EvalJudgeScenario, EvalJudgeVerdict, EvalPairwiseVerdict, EvalTurnResult } from './types.js';
2
+ export declare function judgeEvalTurn(apiKey: string, projectName: string, failureSurface: string, scenario: EvalJudgeScenario, transcript: string, turn: EvalTurnResult, options?: {
3
+ model?: string;
4
+ fallbackModels?: string[];
5
+ }): Promise<EvalJudgeVerdict>;
6
+ export declare function judgeEvalConversation(apiKey: string, projectName: string, failureSurface: string, scenario: EvalJudgeScenario, transcript: string, stateBefore: string, stateAfter: string, options?: {
7
+ model?: string;
8
+ fallbackModels?: string[];
9
+ }): Promise<EvalJudgeVerdict>;
10
+ export declare function judgePairwiseConversations(apiKey: string, scenario: EvalJudgeScenario, conversationA: {
11
+ label: string;
12
+ transcript: string;
13
+ stateAfter: string;
14
+ }, conversationB: {
15
+ label: string;
16
+ transcript: string;
17
+ stateAfter: string;
18
+ }, options?: {
19
+ model?: string;
20
+ fallbackModels?: string[];
21
+ }): Promise<EvalPairwiseVerdict>;
22
+ //# sourceMappingURL=judge.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../src/evals/judge.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,iBAAiB,EACjB,gBAAgB,EAChB,mBAAmB,EACnB,cAAc,EACf,MAAM,YAAY,CAAA;AA8HnB,wBAAsB,aAAa,CACjC,MAAM,EAAE,MAAM,EACd,WAAW,EAAE,MAAM,EACnB,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,iBAAiB,EAC3B,UAAU,EAAE,MAAM,EAClB,IAAI,EAAE,cAAc,EACpB,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CAAE,GACtD,OAAO,CAAC,gBAAgB,CAAC,CAuF3B;AAED,wBAAsB,qBAAqB,CACzC,MAAM,EAAE,MAAM,EACd,WAAW,EAAE,MAAM,EACnB,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,iBAAiB,EAC3B,UAAU,EAAE,MAAM,EAClB,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CAAE,GACtD,OAAO,CAAC,gBAAgB,CAAC,CA8D3B;AAED,wBAAsB,0BAA0B,CAC9C,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,iBAAiB,EAC3B,aAAa,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,EACxE,aAAa,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,EACxE,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CAAE,GACtD,OAAO,CAAC,mBAAmB,CAAC,CAsE9B"}
@@ -0,0 +1,337 @@
1
+ import { GoogleGenerativeAI, SchemaType } from '@google/generative-ai';
2
+ const JUDGE_MODEL = 'gemini-3.5-flash';
3
+ const JUDGE_FALLBACK_CHAIN = ['gemini-3.1-pro-preview', 'gemini-3.1-flash-lite'];
4
+ const JUDGE_MAX_RETRIES = 2;
5
+ /**
6
+ * Resolve the model chain for judge calls: caller override → env → default.
7
+ * Returns [primary, ...fallbacks] with duplicates removed.
8
+ */
9
+ function resolveJudgeChain(primaryOverride, fallbackOverride) {
10
+ const primary = primaryOverride ?? process.env.ARCHETYPE_JUDGE_MODEL ?? JUDGE_MODEL;
11
+ const envFallbacks = (process.env.ARCHETYPE_JUDGE_FALLBACK_MODELS ?? '')
12
+ .split(',')
13
+ .map((s) => s.trim())
14
+ .filter(Boolean);
15
+ const fallbacks = fallbackOverride ?? (envFallbacks.length > 0 ? envFallbacks : JUDGE_FALLBACK_CHAIN);
16
+ const chain = [primary, ...fallbacks];
17
+ const seen = new Set();
18
+ return chain.filter((m) => {
19
+ if (seen.has(m))
20
+ return false;
21
+ seen.add(m);
22
+ return true;
23
+ });
24
+ }
25
+ function isRetryable(error) {
26
+ const msg = error instanceof Error ? error.message : String(error);
27
+ return /\b(429|5\d\d|timeout|timed out|overloaded|unavailable|resource exhausted)\b/i.test(msg);
28
+ }
29
+ async function callJudgeWithFallback(apiKey, modelChain, makeModel, input) {
30
+ const genAI = new GoogleGenerativeAI(apiKey);
31
+ let lastError;
32
+ for (let m = 0; m < modelChain.length; m++) {
33
+ const current = modelChain[m];
34
+ if (m > 0) {
35
+ console.warn(`[archetype:judge] ${modelChain[m - 1]} exhausted — falling back to ${current}`);
36
+ }
37
+ const model = makeModel(genAI, current);
38
+ for (let attempt = 0; attempt <= JUDGE_MAX_RETRIES; attempt++) {
39
+ try {
40
+ const result = await model.generateContent(input);
41
+ return result.response.text();
42
+ }
43
+ catch (err) {
44
+ lastError = err instanceof Error ? err : new Error(String(err));
45
+ if (!isRetryable(lastError))
46
+ throw lastError;
47
+ if (attempt < JUDGE_MAX_RETRIES) {
48
+ const delayMs = Math.min(500 * Math.pow(2, attempt), 4000);
49
+ console.warn(`[archetype:judge] ${current} attempt ${attempt + 1} failed: ${lastError.message} — retrying in ${delayMs}ms`);
50
+ await new Promise((r) => setTimeout(r, delayMs));
51
+ }
52
+ }
53
+ }
54
+ }
55
+ throw lastError ?? new Error('[archetype:judge] all models exhausted');
56
+ }
57
+ const JUDGE_CRITERIA = [
58
+ {
59
+ name: 'Human voice',
60
+ desc2: 'Reads like a capable human in this role, not like an LLM performing a persona.',
61
+ desc0: 'Feels robotic, generic, or obviously prompt-shaped.',
62
+ },
63
+ {
64
+ name: 'Relationship fit',
65
+ desc2: 'The trust posture matches the domain: warm where needed, firm where needed, never off-key.',
66
+ desc0: 'The relationship archetype feels mismatched to the domain or moment.',
67
+ },
68
+ {
69
+ name: 'Judgment',
70
+ desc2: 'Takes or withholds action appropriately; good sense of when to talk, when to act, and when to confirm.',
71
+ desc0: 'Acts too eagerly, stays too passive, or clearly mis-times action.',
72
+ },
73
+ {
74
+ name: 'Invisible operations',
75
+ desc2: 'Side-effects happen naturally without awkward tool narration or self-consciousness.',
76
+ desc0: 'Operations feel clanky, overly explicit, or disrupt the conversation.',
77
+ },
78
+ {
79
+ name: 'Memory hygiene',
80
+ desc2: 'Remembers the right thing, updates instead of duplicating, and avoids storing junk.',
81
+ desc0: 'Misses durable signal or creates noisy/duplicated memory.',
82
+ },
83
+ {
84
+ name: 'Goal advancement',
85
+ desc2: 'Meaningfully moves the user toward the persona north star in this turn.',
86
+ desc0: 'Sounds okay but does not create real forward motion.',
87
+ },
88
+ {
89
+ name: 'Specificity',
90
+ desc2: 'Grounded in the provided context and says something particular rather than interchangeable.',
91
+ desc0: 'Could have been said to almost anyone in almost any situation.',
92
+ },
93
+ {
94
+ name: 'Come-back test',
95
+ desc2: 'Makes the user want another turn because it is useful, alive, and well-judged.',
96
+ desc0: 'The user would be less likely to come back after this response.',
97
+ },
98
+ ];
99
+ const JUDGE_PROMPT = `You are evaluating whether an Archetype persona actually works in practice.
100
+
101
+ Score each criterion 0, 1, or 2.
102
+
103
+ ${JUDGE_CRITERIA.map((criterion, index) => `${index + 1}. ${criterion.name}
104
+ - 2: ${criterion.desc2}
105
+ - 1: Partially works but leaves meaningful room for improvement.
106
+ - 0: ${criterion.desc0}`).join('\n\n')}
107
+
108
+ You are not only judging the response quality. You are also stress-testing the concept:
109
+ - Is the persona itself well-shaped for this domain?
110
+ - Does the SDK seem to make the behavior easier or harder?
111
+ - Do the side-effects feel invisible and coherent?
112
+
113
+ Use the transcript, executed/proposed actions, and state delta.
114
+ Return ONLY valid JSON.`;
115
+ export async function judgeEvalTurn(apiKey, projectName, failureSurface, scenario, transcript, turn, options) {
116
+ const modelChain = resolveJudgeChain(options?.model, options?.fallbackModels);
117
+ const schema = {
118
+ type: SchemaType.OBJECT,
119
+ properties: {
120
+ scores: {
121
+ type: SchemaType.ARRAY,
122
+ items: {
123
+ type: SchemaType.OBJECT,
124
+ properties: {
125
+ criterion: { type: SchemaType.STRING },
126
+ score: { type: SchemaType.NUMBER },
127
+ reasoning: { type: SchemaType.STRING },
128
+ },
129
+ required: ['criterion', 'score', 'reasoning'],
130
+ },
131
+ },
132
+ promptFixes: {
133
+ type: SchemaType.ARRAY,
134
+ items: { type: SchemaType.STRING },
135
+ },
136
+ sdkGaps: {
137
+ type: SchemaType.ARRAY,
138
+ items: { type: SchemaType.STRING },
139
+ },
140
+ conceptGaps: {
141
+ type: SchemaType.ARRAY,
142
+ items: { type: SchemaType.STRING },
143
+ },
144
+ },
145
+ required: ['scores', 'promptFixes', 'sdkGaps', 'conceptGaps'],
146
+ };
147
+ const makeModel = (genAI, modelName) => genAI.getGenerativeModel({
148
+ model: modelName,
149
+ systemInstruction: JUDGE_PROMPT,
150
+ generationConfig: {
151
+ temperature: 0.2,
152
+ responseMimeType: 'application/json',
153
+ responseSchema: schema,
154
+ },
155
+ });
156
+ const actionLines = [];
157
+ for (const record of turn.actionRecords) {
158
+ const mode = record.proposed ? 'proposed' : record.success ? 'executed' : 'failed';
159
+ actionLines.push(`- ${mode}: ${record.action.name}${record.error ? ` (${record.error})` : ''}`);
160
+ }
161
+ for (const crud of turn.trace.crudActions) {
162
+ const mode = crud.status === 'invalid' ? 'invalid' : 'executed';
163
+ const params = Object.keys(crud.params ?? {}).length > 0 ? ` ${JSON.stringify(crud.params)}` : '';
164
+ actionLines.push(`- ${mode}: crud ${crud.operation} ${crud.entity}${crud.id ? ` ${crud.id}` : ''}${params}${crud.error ? ` (${crud.error})` : ''}`);
165
+ }
166
+ const actionSummary = actionLines.length === 0 ? 'No actions' : actionLines.join('\n');
167
+ const input = `PROJECT: ${projectName}
168
+ FAILURE SURFACE: ${failureSurface}
169
+ SCENARIO: ${scenario.name}
170
+ DESCRIPTION: ${scenario.description}
171
+ TESTS: ${scenario.tests.join(', ')}
172
+ ${scenario.expectedBehavior?.length ? `EXPECTED BEHAVIOR:\n${scenario.expectedBehavior.map(item => `- ${item}`).join('\n')}` : ''}
173
+
174
+ TRANSCRIPT SO FAR:
175
+ ${transcript}
176
+
177
+ CURRENT TURN:
178
+ USER: ${turn.userMessage}
179
+ ASSISTANT: ${turn.assistantMessage}
180
+
181
+ ACTIONS:
182
+ ${actionSummary}
183
+
184
+ STATE BEFORE:
185
+ ${turn.stateBefore}
186
+
187
+ STATE AFTER:
188
+ ${turn.stateAfter}
189
+
190
+ Score the current turn.`;
191
+ const text = await callJudgeWithFallback(apiKey, modelChain, makeModel, input);
192
+ return parseJudgeVerdict(text);
193
+ }
194
+ export async function judgeEvalConversation(apiKey, projectName, failureSurface, scenario, transcript, stateBefore, stateAfter, options) {
195
+ const modelChain = resolveJudgeChain(options?.model, options?.fallbackModels);
196
+ const schema = {
197
+ type: SchemaType.OBJECT,
198
+ properties: {
199
+ scores: {
200
+ type: SchemaType.ARRAY,
201
+ items: {
202
+ type: SchemaType.OBJECT,
203
+ properties: {
204
+ criterion: { type: SchemaType.STRING },
205
+ score: { type: SchemaType.NUMBER },
206
+ reasoning: { type: SchemaType.STRING },
207
+ },
208
+ required: ['criterion', 'score', 'reasoning'],
209
+ },
210
+ },
211
+ promptFixes: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
212
+ sdkGaps: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
213
+ conceptGaps: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
214
+ },
215
+ required: ['scores', 'promptFixes', 'sdkGaps', 'conceptGaps'],
216
+ };
217
+ const makeModel = (genAI, modelName) => genAI.getGenerativeModel({
218
+ model: modelName,
219
+ systemInstruction: `${JUDGE_PROMPT}
220
+
221
+ In addition to judging the latest turn, judge the entire conversation arc:
222
+ - Does the persona stay coherent across turns?
223
+ - Does it compound value rather than resetting every turn?
224
+ - Does memory use help rather than create drift?
225
+ - Do the actions across turns feel clean and well-timed?`,
226
+ generationConfig: {
227
+ temperature: 0.2,
228
+ responseMimeType: 'application/json',
229
+ responseSchema: schema,
230
+ },
231
+ });
232
+ const input = `PROJECT: ${projectName}
233
+ FAILURE SURFACE: ${failureSurface}
234
+ SCENARIO: ${scenario.name}
235
+ DESCRIPTION: ${scenario.description}
236
+ TESTS: ${scenario.tests.join(', ')}
237
+ ${scenario.expectedBehavior?.length ? `EXPECTED BEHAVIOR:\n${scenario.expectedBehavior.map(item => `- ${item}`).join('\n')}` : ''}
238
+
239
+ FULL TRANSCRIPT:
240
+ ${transcript}
241
+
242
+ STATE BEFORE:
243
+ ${stateBefore}
244
+
245
+ STATE AFTER:
246
+ ${stateAfter}
247
+
248
+ Score the overall conversation, not just a single turn.`;
249
+ const text = await callJudgeWithFallback(apiKey, modelChain, makeModel, input);
250
+ return parseJudgeVerdict(text);
251
+ }
252
+ export async function judgePairwiseConversations(apiKey, scenario, conversationA, conversationB, options) {
253
+ const modelChain = resolveJudgeChain(options?.model, options?.fallbackModels);
254
+ const schema = {
255
+ type: SchemaType.OBJECT,
256
+ properties: {
257
+ winner: {
258
+ type: SchemaType.STRING,
259
+ enum: ['a', 'b', 'tie'],
260
+ },
261
+ reasoning: { type: SchemaType.STRING },
262
+ promptFixes: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
263
+ sdkGaps: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
264
+ conceptGaps: { type: SchemaType.ARRAY, items: { type: SchemaType.STRING } },
265
+ },
266
+ required: ['winner', 'reasoning', 'promptFixes', 'sdkGaps', 'conceptGaps'],
267
+ };
268
+ const makeModel = (genAI, modelName) => genAI.getGenerativeModel({
269
+ model: modelName,
270
+ systemInstruction: `You are comparing two persona conversations for the same scenario.
271
+
272
+ Decide which conversation is better overall on:
273
+ - human voice
274
+ - relationship fit
275
+ - invisible operations
276
+ - goal advancement
277
+ - state hygiene across turns
278
+
279
+ Choose:
280
+ - "a" if A is better
281
+ - "b" if B is better
282
+ - "tie" only if they are genuinely comparable
283
+
284
+ Return ONLY valid JSON.`,
285
+ generationConfig: {
286
+ temperature: 0.2,
287
+ responseMimeType: 'application/json',
288
+ responseSchema: schema,
289
+ },
290
+ });
291
+ const input = `SCENARIO: ${scenario.name}
292
+ DESCRIPTION: ${scenario.description}
293
+ TESTS: ${scenario.tests.join(', ')}
294
+
295
+ CONVERSATION A (${conversationA.label}):
296
+ ${conversationA.transcript}
297
+
298
+ STATE AFTER A:
299
+ ${conversationA.stateAfter}
300
+
301
+ CONVERSATION B (${conversationB.label}):
302
+ ${conversationB.transcript}
303
+
304
+ STATE AFTER B:
305
+ ${conversationB.stateAfter}
306
+
307
+ Which conversation is better overall?`;
308
+ const text = await callJudgeWithFallback(apiKey, modelChain, makeModel, input);
309
+ const parsed = JSON.parse(text);
310
+ return {
311
+ winner: parsed.winner,
312
+ reasoning: parsed.reasoning,
313
+ promptFixes: parsed.promptFixes ?? [],
314
+ sdkGaps: parsed.sdkGaps ?? [],
315
+ conceptGaps: parsed.conceptGaps ?? [],
316
+ };
317
+ }
318
+ function parseJudgeVerdict(rawText) {
319
+ const parsed = JSON.parse(rawText);
320
+ const scores = parsed.scores.map(score => ({
321
+ criterion: score.criterion,
322
+ score: Math.round(score.score),
323
+ reasoning: score.reasoning,
324
+ }));
325
+ const average = scores.reduce((sum, score) => sum + score.score, 0) / scores.length;
326
+ const hasZero = scores.some(score => score.score === 0);
327
+ return {
328
+ scores,
329
+ average: Math.round(average * 100) / 100,
330
+ hasZero,
331
+ pass: average >= 1.5 && !hasZero,
332
+ promptFixes: parsed.promptFixes ?? [],
333
+ sdkGaps: parsed.sdkGaps ?? [],
334
+ conceptGaps: parsed.conceptGaps ?? [],
335
+ };
336
+ }
337
+ //# sourceMappingURL=judge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/evals/judge.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,UAAU,EAAe,MAAM,uBAAuB,CAAA;AAQnF,MAAM,WAAW,GAAG,kBAAkB,CAAA;AACtC,MAAM,oBAAoB,GAAG,CAAC,wBAAwB,EAAE,uBAAuB,CAAC,CAAA;AAChF,MAAM,iBAAiB,GAAG,CAAC,CAAA;AAE3B;;;GAGG;AACH,SAAS,iBAAiB,CAAC,eAAwB,EAAE,gBAA2B;IAC9E,MAAM,OAAO,GAAG,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,WAAW,CAAA;IACnF,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,+BAA+B,IAAI,EAAE,CAAC;SACrE,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,OAAO,CAAC,CAAA;IAClB,MAAM,SAAS,GAAG,gBAAgB,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAA;IACrG,MAAM,KAAK,GAAG,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,CAAA;IACrC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAA;IAC9B,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACxB,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAA;QAC7B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QACX,OAAO,IAAI,CAAA;IACb,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;IAClE,OAAO,8EAA8E,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACjG,CAAC;AAED,KAAK,UAAU,qBAAqB,CAClC,MAAc,EACd,UAAoB,EACpB,SAAkJ,EAClJ,KAAa;IAEb,MAAM,KAAK,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAA;IAC5C,IAAI,SAA4B,CAAA;IAEhC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;QAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACV,OAAO,CAAC,IAAI,CAAC,qBAAqB,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,gCAAgC,OAAO,EAAE,CAAC,CAAA;QAC/F,CAAC;QACD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;QAEvC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,iBAAiB,EAAE,OAAO,EAAE,EAAE,CAAC;YAC9D,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,eAAe,CAAC,KAAK,CAAC,CAAA;gBACjD,OAAO,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAkB,CAAA;YAC/C,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAA;gBAC/D,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC;oBAAE,MAAM,SAAS,CAAA;gBAC5C,IAAI,OAAO,GAAG,iBAAiB,EAAE,CAAC;oBAChC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,CAAA;oBAC1D,OAAO,CAAC,IAAI,CAAC,qBAAqB,OAAO,YAAY,OAAO,GAAG,CAAC,YAAY,SAAS,CAAC,OAAO,kBAAkB,OAAO,IAAI,CAAC,CAAA;oBAC3H,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAA;gBAClD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;AACxE,CAAC;AAED,MAAM,cAAc,GAAG;IACrB;QACE,IAAI,EAAE,aAAa;QACnB,KAAK,EAAE,gFAAgF;QACvF,KAAK,EAAE,qDAAqD;KAC7D;IACD;QACE,IAAI,EAAE,kBAAkB;QACxB,KAAK,EAAE,4FAA4F;QACnG,KAAK,EAAE,sEAAsE;KAC9E;IACD;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,wGAAwG;QAC/G,KAAK,EAAE,mEAAmE;KAC3E;IACD;QACE,IAAI,EAAE,sBAAsB;QAC5B,KAAK,EAAE,qFAAqF;QAC5F,KAAK,EAAE,uEAAuE;KAC/E;IACD;QACE,IAAI,EAAE,gBAAgB;QACtB,KAAK,EAAE,qFAAqF;QAC5F,KAAK,EAAE,2DAA2D;KACnE;IACD;QACE,IAAI,EAAE,kBAAkB;QACxB,KAAK,EAAE,yEAAyE;QAChF,KAAK,EAAE,sDAAsD;KAC9D;IACD;QACE,IAAI,EAAE,aAAa;QACnB,KAAK,EAAE,6FAA6F;QACpG,KAAK,EAAE,gEAAgE;KACxE;IACD;QACE,IAAI,EAAE,gBAAgB;QACtB,KAAK,EAAE,gFAAgF;QACvF,KAAK,EAAE,iEAAiE;KACzE;CACF,CAAA;AAED,MAAM,YAAY,GAAG;;;;EAInB,cAAc,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,KAAK,GAAG,CAAC,KAAK,SAAS,CAAC,IAAI;OACnE,SAAS,CAAC,KAAK;;OAEf,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;;;;;;;;wBAQd,CAAA;AAExB,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,MAAc,EACd,WAAmB,EACnB,cAAsB,EACtB,QAA2B,EAC3B,UAAkB,EAClB,IAAoB,EACpB,OAAuD;IAEvD,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAA;IAE7E,MAAM,MAAM,GAAG;QACb,IAAI,EAAE,UAAU,CAAC,MAAM;QACvB,UAAU,EAAE;YACV,MAAM,EAAE;gBACN,IAAI,EAAE,UAAU,CAAC,KAAK;gBACtB,KAAK,EAAE;oBACL,IAAI,EAAE,UAAU,CAAC,MAAM;oBACvB,UAAU,EAAE;wBACV,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;wBACtC,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;wBAClC,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;qBACvC;oBACD,QAAQ,EAAE,CAAC,WAAW,EAAE,OAAO,EAAE,WAAW,CAAC;iBAC9C;aACF;YACD,WAAW,EAAE;gBACX,IAAI,EAAE,UAAU,CAAC,KAAK;gBACtB,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;aACnC;YACD,OAAO,EAAE;gBACP,IAAI,EAAE,UAAU,CAAC,KAAK;gBACtB,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;aACnC;YACD,WAAW,EAAE;gBACX,IAAI,EAAE,UAAU,CAAC,KAAK;gBACtB,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;aACnC;SACF;QACD,QAAQ,EAAE,CAAC,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,aAAa,CAAC;KAC9D,CAAA;IAED,MAAM,SAAS,GAAG,CAAC,KAAyB,EAAE,SAAiB,EAAE,EAAE,CACjE,KAAK,CAAC,kBAAkB,CAAC;QACvB,KAAK,EAAE,SAAS;QAChB,iBAAiB,EAAE,YAAY;QAC/B,gBAAgB,EAAE;YAChB,WAAW,EAAE,GAAG;YAChB,gBAAgB,EAAE,kBAAkB;YACpC,cAAc,EAAE,MAA2B;SAC5C;KACF,CAAC,CAAA;IAEJ,MAAM,WAAW,GAAa,EAAE,CAAA;IAEhC,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QACxC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAA;QAClF,WAAW,CAAC,IAAI,CAAC,KAAK,IAAI,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;IACjG,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAA;QAC/D,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QACjG,WAAW,CAAC,IAAI,CAAC,KAAK,IAAI,UAAU,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;IACrJ,CAAC;IAED,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAEtF,MAAM,KAAK,GAAG,YAAY,WAAW;mBACpB,cAAc;YACrB,QAAQ,CAAC,IAAI;eACV,QAAQ,CAAC,WAAW;SAC1B,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;EAChC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC,CAAC,CAAC,uBAAuB,QAAQ,CAAC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;;;EAG/H,UAAU;;;QAGJ,IAAI,CAAC,WAAW;aACX,IAAI,CAAC,gBAAgB;;;EAGhC,aAAa;;;EAGb,IAAI,CAAC,WAAW;;;EAGhB,IAAI,CAAC,UAAU;;wBAEO,CAAA;IAEtB,MAAM,IAAI,GAAG,MAAM,qBAAqB,CAAS,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,KAAK,CAAC,CAAA;IACtF,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAA;AAChC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,MAAc,EACd,WAAmB,EACnB,cAAsB,EACtB,QAA2B,EAC3B,UAAkB,EAClB,WAAmB,EACnB,UAAkB,EAClB,OAAuD;IAEvD,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAA;IAE7E,MAAM,MAAM,GAAG;QACb,IAAI,EAAE,UAAU,CAAC,MAAM;QACvB,UAAU,EAAE;YACV,MAAM,EAAE;gBACN,IAAI,EAAE,UAAU,CAAC,KAAK;gBACtB,KAAK,EAAE;oBACL,IAAI,EAAE,UAAU,CAAC,MAAM;oBACvB,UAAU,EAAE;wBACV,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;wBACtC,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;wBAClC,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;qBACvC;oBACD,QAAQ,EAAE,CAAC,WAAW,EAAE,OAAO,EAAE,WAAW,CAAC;iBAC9C;aACF;YACD,WAAW,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,EAAE;YAC3E,OAAO,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,EAAE;YACvE,WAAW,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,EAAE;SAC5E;QACD,QAAQ,EAAE,CAAC,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,aAAa,CAAC;KAC9D,CAAA;IAED,MAAM,SAAS,GAAG,CAAC,KAAyB,EAAE,SAAiB,EAAE,EAAE,CACjE,KAAK,CAAC,kBAAkB,CAAC;QACvB,KAAK,EAAE,SAAS;QAChB,iBAAiB,EAAE,GAAG,YAAY;;;;;;yDAMiB;QACnD,gBAAgB,EAAE;YAChB,WAAW,EAAE,GAAG;YAChB,gBAAgB,EAAE,kBAAkB;YACpC,cAAc,EAAE,MAA2B;SAC5C;KACF,CAAC,CAAA;IAEJ,MAAM,KAAK,GAAG,YAAY,WAAW;mBACpB,cAAc;YACrB,QAAQ,CAAC,IAAI;eACV,QAAQ,CAAC,WAAW;SAC1B,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;EAChC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC,CAAC,CAAC,uBAAuB,QAAQ,CAAC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;;;EAG/H,UAAU;;;EAGV,WAAW;;;EAGX,UAAU;;wDAE4C,CAAA;IAEtD,MAAM,IAAI,GAAG,MAAM,qBAAqB,CAAS,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,KAAK,CAAC,CAAA;IACtF,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAA;AAChC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,MAAc,EACd,QAA2B,EAC3B,aAAwE,EACxE,aAAwE,EACxE,OAAuD;IAEvD,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAA;IAE7E,MAAM,MAAM,GAAG;QACb,IAAI,EAAE,UAAU,CAAC,MAAM;QACvB,UAAU,EAAE;YACV,MAAM,EAAE;gBACN,IAAI,EAAE,UAAU,CAAC,MAAM;gBACvB,IAAI,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC;aACxB;YACD,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE;YACtC,WAAW,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,EAAE;YAC3E,OAAO,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,EAAE;YACvE,WAAW,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,CAAC,MAAM,EAAE,EAAE;SAC5E;QACD,QAAQ,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,SAAS,EAAE,aAAa,CAAC;KAC3E,CAAA;IAED,MAAM,SAAS,GAAG,CAAC,KAAyB,EAAE,SAAiB,EAAE,EAAE,CACjE,KAAK,CAAC,kBAAkB,CAAC;QACvB,KAAK,EAAE,SAAS;QAChB,iBAAiB,EAAE;;;;;;;;;;;;;;wBAcD;QAClB,gBAAgB,EAAE;YAChB,WAAW,EAAE,GAAG;YAChB,gBAAgB,EAAE,kBAAkB;YACpC,cAAc,EAAE,MAA2B;SAC5C;KACF,CAAC,CAAA;IAEJ,MAAM,KAAK,GAAG,aAAa,QAAQ,CAAC,IAAI;eAC3B,QAAQ,CAAC,WAAW;SAC1B,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;;kBAEhB,aAAa,CAAC,KAAK;EACnC,aAAa,CAAC,UAAU;;;EAGxB,aAAa,CAAC,UAAU;;kBAER,aAAa,CAAC,KAAK;EACnC,aAAa,CAAC,UAAU;;;EAGxB,aAAa,CAAC,UAAU;;sCAEY,CAAA;IAEpC,MAAM,IAAI,GAAG,MAAM,qBAAqB,CAAS,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,KAAK,CAAC,CAAA;IACtF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAwB,CAAA;IACtD,OAAO;QACL,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,EAAE;QACrC,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;QAC7B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,EAAE;KACtC,CAAA;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,OAAe;IACxC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAKhC,CAAA;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACzC,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC;QAC9B,SAAS,EAAE,KAAK,CAAC,SAAS;KAC3B,CAAC,CAAC,CAAA;IACH,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACnF,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,KAAK,CAAC,CAAC,CAAA;IAEvD,OAAO;QACL,MAAM;QACN,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG;QACxC,OAAO;QACP,IAAI,EAAE,OAAO,IAAI,GAAG,IAAI,CAAC,OAAO;QAChC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,EAAE;QACrC,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;QAC7B,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,EAAE;KACtC,CAAA;AACH,CAAC"}
@@ -0,0 +1,40 @@
1
+ import type { LLMProviderRequest, PromptMode, TurnTrace } from '../types.js';
2
+ export interface OperationalPromptRequirement {
3
+ label: string;
4
+ /**
5
+ * Concrete tokens that should appear in the prompt when the surface is directly exposed.
6
+ * Example: actual entity ids, role ids, enum values.
7
+ */
8
+ tokens: string[];
9
+ /**
10
+ * Tokens that must all survive into the prompt. Use this for continuity
11
+ * contracts where "some mention happened" is not enough and the next turn
12
+ * truly needs specific carried-forward information.
13
+ */
14
+ requiredTokens?: string[];
15
+ /**
16
+ * Alternative retrieval/documentation tokens that satisfy the requirement when the full
17
+ * surface is intentionally not in working memory.
18
+ */
19
+ retrievalTokens?: string[];
20
+ }
21
+ export interface OperationalPromptContractInput {
22
+ request: Pick<LLMProviderRequest, 'systemPrompt' | 'message'>;
23
+ trace?: TurnTrace;
24
+ expectedMode: PromptMode;
25
+ ids?: OperationalPromptRequirement[];
26
+ enums?: OperationalPromptRequirement[];
27
+ recipients?: OperationalPromptRequirement[];
28
+ continuity?: OperationalPromptRequirement[];
29
+ allowRepair?: boolean;
30
+ }
31
+ export interface OperationalPromptContractIssue {
32
+ severity: 'error' | 'warn';
33
+ message: string;
34
+ }
35
+ export interface OperationalPromptContractResult {
36
+ pass: boolean;
37
+ issues: OperationalPromptContractIssue[];
38
+ }
39
+ export declare function auditOperationalPromptContract(input: OperationalPromptContractInput): OperationalPromptContractResult;
40
+ //# sourceMappingURL=operational-contract.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"operational-contract.d.ts","sourceRoot":"","sources":["../../src/evals/operational-contract.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,aAAa,CAAA;AAE5E,MAAM,WAAW,4BAA4B;IAC3C,KAAK,EAAE,MAAM,CAAA;IACb;;;OAGG;IACH,MAAM,EAAE,MAAM,EAAE,CAAA;IAChB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,EAAE,CAAA;CAC3B;AAED,MAAM,WAAW,8BAA8B;IAC7C,OAAO,EAAE,IAAI,CAAC,kBAAkB,EAAE,cAAc,GAAG,SAAS,CAAC,CAAA;IAC7D,KAAK,CAAC,EAAE,SAAS,CAAA;IACjB,YAAY,EAAE,UAAU,CAAA;IACxB,GAAG,CAAC,EAAE,4BAA4B,EAAE,CAAA;IACpC,KAAK,CAAC,EAAE,4BAA4B,EAAE,CAAA;IACtC,UAAU,CAAC,EAAE,4BAA4B,EAAE,CAAA;IAC3C,UAAU,CAAC,EAAE,4BAA4B,EAAE,CAAA;IAC3C,WAAW,CAAC,EAAE,OAAO,CAAA;CACtB;AAED,MAAM,WAAW,8BAA8B;IAC7C,QAAQ,EAAE,OAAO,GAAG,MAAM,CAAA;IAC1B,OAAO,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,OAAO,CAAA;IACb,MAAM,EAAE,8BAA8B,EAAE,CAAA;CACzC;AA0CD,wBAAgB,8BAA8B,CAC5C,KAAK,EAAE,8BAA8B,GACpC,+BAA+B,CAmFjC"}
@@ -0,0 +1,115 @@
1
+ const CONVERSATION_MARKERS = [
2
+ 'real person in front of you',
3
+ 'Momentum:',
4
+ 'This is a desktop side panel.',
5
+ 'This is a mobile chat interface.',
6
+ ];
7
+ const FOCUS_OUTPUT_MARKER = 'Return one raw JSON object: { "message": "...", "actions": [...] }. No markdown.';
8
+ function containsAny(haystack, needles) {
9
+ return needles.some(needle => haystack.includes(needle));
10
+ }
11
+ function countOccurrences(haystack, needle) {
12
+ if (!needle)
13
+ return 0;
14
+ return haystack.split(needle).length - 1;
15
+ }
16
+ function checkSurface(issues, prompt, kind, requirements) {
17
+ for (const requirement of requirements ?? []) {
18
+ const requiredTokensPresent = (requirement.requiredTokens ?? []).every(token => prompt.includes(token));
19
+ const hasDirectSurface = containsAny(prompt, requirement.tokens);
20
+ const hasRetrievalSurface = requirement.retrievalTokens?.length
21
+ ? containsAny(prompt, requirement.retrievalTokens)
22
+ : false;
23
+ if (!requiredTokensPresent || (!hasDirectSurface && !hasRetrievalSurface)) {
24
+ issues.push({
25
+ severity: 'error',
26
+ message: `${kind} surface missing for ${requirement.label}`,
27
+ });
28
+ }
29
+ }
30
+ }
31
+ export function auditOperationalPromptContract(input) {
32
+ const issues = [];
33
+ const prompt = input.request.systemPrompt;
34
+ if (input.expectedMode === 'operational') {
35
+ const hasActionContract = prompt.includes('--- ACTION RESPONSE CONTRACT ---');
36
+ const hasEntityCrudContract = prompt.includes('--- ENTITY CRUD RESPONSE CONTRACT ---');
37
+ if (!prompt.includes('Operational reality:')) {
38
+ issues.push({ severity: 'error', message: 'Operational prompt is missing operational reality guidance' });
39
+ }
40
+ if (!hasActionContract && !hasEntityCrudContract) {
41
+ issues.push({ severity: 'error', message: 'Operational prompt is missing the canonical action or entity response contract block' });
42
+ }
43
+ else if (hasActionContract && countOccurrences(prompt, '--- ACTION RESPONSE CONTRACT ---') > 1) {
44
+ issues.push({ severity: 'warn', message: 'Operational prompt contains duplicate action API contract blocks' });
45
+ }
46
+ if (!prompt.includes('Return exactly one raw JSON object and nothing else.')) {
47
+ issues.push({ severity: 'error', message: 'Operational prompt is missing the raw JSON output contract' });
48
+ }
49
+ if (!prompt.includes('Do not wrap the response in markdown code fences.')) {
50
+ issues.push({ severity: 'error', message: 'Operational prompt is missing the no-code-fences output rule' });
51
+ }
52
+ for (const marker of CONVERSATION_MARKERS) {
53
+ if (prompt.includes(marker)) {
54
+ issues.push({ severity: 'error', message: `Operational prompt still contains conversational scaffold: ${marker}` });
55
+ }
56
+ }
57
+ }
58
+ else if (input.expectedMode === 'focus') {
59
+ if (!prompt.includes(FOCUS_OUTPUT_MARKER)) {
60
+ issues.push({ severity: 'error', message: 'Focus prompt is missing the focus JSON output contract' });
61
+ }
62
+ if (prompt.includes('Operational reality:')) {
63
+ issues.push({ severity: 'error', message: 'Focus prompt still contains operational reality guidance' });
64
+ }
65
+ for (const marker of CONVERSATION_MARKERS) {
66
+ if (prompt.includes(marker)) {
67
+ issues.push({ severity: 'error', message: `Focus prompt still contains conversational scaffold: ${marker}` });
68
+ }
69
+ }
70
+ if (prompt.includes('"followUps":')) {
71
+ issues.push({ severity: 'warn', message: 'Focus prompt still contains follow-up guidance' });
72
+ }
73
+ if (prompt.includes('Outcome notes:')) {
74
+ issues.push({ severity: 'warn', message: 'Focus prompt still contains outcome-notes guidance' });
75
+ }
76
+ const sequentialGuidanceCount = countOccurrences(prompt, 'actions is a list')
77
+ + countOccurrences(prompt, '"actions" is a list');
78
+ if (sequentialGuidanceCount > 1) {
79
+ issues.push({ severity: 'warn', message: 'Focus prompt contains duplicate sequential action guidance' });
80
+ }
81
+ }
82
+ else {
83
+ if (!prompt.includes('"followUps":')) {
84
+ issues.push({ severity: 'warn', message: 'Conversation prompt is missing follow-up guidance' });
85
+ }
86
+ }
87
+ checkSurface(issues, prompt, 'ID', input.ids);
88
+ checkSurface(issues, prompt, 'enum', input.enums);
89
+ checkSurface(issues, prompt, 'recipient', input.recipients);
90
+ checkSurface(issues, prompt, 'continuity', input.continuity);
91
+ if (input.trace) {
92
+ if (!input.trace.parseOk) {
93
+ issues.push({ severity: 'error', message: 'Turn trace shows JSON parse failure' });
94
+ }
95
+ if (!input.allowRepair && input.trace.repairAttempted) {
96
+ issues.push({ severity: 'warn', message: 'Turn trace needed repair to satisfy the action contract' });
97
+ }
98
+ for (const action of input.trace.actions) {
99
+ if (action.status === 'unknown_action') {
100
+ issues.push({ severity: 'error', message: `Turn trace contains unknown action: ${action.name}` });
101
+ }
102
+ else if (action.status === 'invalid') {
103
+ issues.push({ severity: 'warn', message: `Turn trace contains invalid action params: ${action.name}` });
104
+ }
105
+ }
106
+ for (const error of input.trace.errors) {
107
+ issues.push({ severity: 'warn', message: `Turn trace error: ${error}` });
108
+ }
109
+ }
110
+ return {
111
+ pass: issues.every(issue => issue.severity !== 'error'),
112
+ issues,
113
+ };
114
+ }
115
+ //# sourceMappingURL=operational-contract.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"operational-contract.js","sourceRoot":"","sources":["../../src/evals/operational-contract.ts"],"names":[],"mappings":"AA2CA,MAAM,oBAAoB,GAAG;IAC3B,6BAA6B;IAC7B,WAAW;IACX,+BAA+B;IAC/B,kCAAkC;CACnC,CAAA;AAED,MAAM,mBAAmB,GAAG,kFAAkF,CAAA;AAE9G,SAAS,WAAW,CAAC,QAAgB,EAAE,OAAiB;IACtD,OAAO,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAA;AAC1D,CAAC;AAED,SAAS,gBAAgB,CAAC,QAAgB,EAAE,MAAc;IACxD,IAAI,CAAC,MAAM;QAAE,OAAO,CAAC,CAAA;IACrB,OAAO,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;AAC1C,CAAC;AAED,SAAS,YAAY,CACnB,MAAwC,EACxC,MAAc,EACd,IAAgD,EAChD,YAA6C;IAE7C,KAAK,MAAM,WAAW,IAAI,YAAY,IAAI,EAAE,EAAE,CAAC;QAC7C,MAAM,qBAAqB,GAAG,CAAC,WAAW,CAAC,cAAc,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;QACvG,MAAM,gBAAgB,GAAG,WAAW,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAA;QAChE,MAAM,mBAAmB,GAAG,WAAW,CAAC,eAAe,EAAE,MAAM;YAC7D,CAAC,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,CAAC,eAAe,CAAC;YAClD,CAAC,CAAC,KAAK,CAAA;QAET,IAAI,CAAC,qBAAqB,IAAI,CAAC,CAAC,gBAAgB,IAAI,CAAC,mBAAmB,CAAC,EAAE,CAAC;YAC1E,MAAM,CAAC,IAAI,CAAC;gBACV,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,wBAAwB,WAAW,CAAC,KAAK,EAAE;aAC5D,CAAC,CAAA;QACJ,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,8BAA8B,CAC5C,KAAqC;IAErC,MAAM,MAAM,GAAqC,EAAE,CAAA;IACnD,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,YAAY,CAAA;IAEzC,IAAI,KAAK,CAAC,YAAY,KAAK,aAAa,EAAE,CAAC;QACzC,MAAM,iBAAiB,GAAG,MAAM,CAAC,QAAQ,CAAC,kCAAkC,CAAC,CAAA;QAC7E,MAAM,qBAAqB,GAAG,MAAM,CAAC,QAAQ,CAAC,uCAAuC,CAAC,CAAA;QACtF,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,4DAA4D,EAAE,CAAC,CAAA;QAC3G,CAAC;QACD,IAAI,CAAC,iBAAiB,IAAI,CAAC,qBAAqB,EAAE,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,sFAAsF,EAAE,CAAC,CAAA;QACrI,CAAC;aAAM,IAAI,iBAAiB,IAAI,gBAAgB,CAAC,MAAM,EAAE,kCAAkC,CAAC,GAAG,CAAC,EAAE,CAAC;YACjG,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,kEAAkE,EAAE,CAAC,CAAA;QAChH,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,sDAAsD,CAAC,EAAE,CAAC;YAC7E,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,4DAA4D,EAAE,CAAC,CAAA;QAC3G,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,mDAAmD,CAAC,EAAE,CAAC;YAC1E,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,8DAA8D,EAAE,CAAC,CAAA;QAC7G,CAAC;QACD,KAAK,MAAM,MAAM,IAAI,oBAAoB,EAAE,CAAC;YAC1C,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,8DAA8D,MAAM,EAAE,EAAE,CAAC,CAAA;YACrH,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,KAAK,CAAC,YAAY,KAAK,OAAO,EAAE,CAAC;QAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;YAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,wDAAwD,EAAE,CAAC,CAAA;QACvG,CAAC;QACD,IAAI,MAAM,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;YAC5C,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,0DAA0D,EAAE,CAAC,CAAA;QACzG,CAAC;QACD,KAAK,MAAM,MAAM,IAAI,oBAAoB,EAAE,CAAC;YAC1C,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,wDAAwD,MAAM,EAAE,EAAE,CAAC,CAAA;YAC/G,CAAC;QACH,CAAC;QACD,IAAI,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,gDAAgD,EAAE,CAAC,CAAA;QAC9F,CAAC;QACD,IAAI,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACtC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,oDAAoD,EAAE,CAAC,CAAA;QAClG,CAAC;QACD,MAAM,uBAAuB,GAAG,gBAAgB,CAAC,MAAM,EAAE,mBAAmB,CAAC;cACzE,gBAAgB,CAAC,MAAM,EAAE,qBAAqB,CAAC,CAAA;QACnD,IAAI,uBAAuB,GAAG,CAAC,EAAE,CAAC;YAChC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,4DAA4D,EAAE,CAAC,CAAA;QAC1G,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;YACrC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,mDAAmD,EAAE,CAAC,CAAA;QACjG,CAAC;IACH,CAAC;IAED,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC,CAAA;IAC7C,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAA;IACjD,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,KAAK,CAAC,UAAU,CAAC,CAAA;IAC3D,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,KAAK,CAAC,UAAU,CAAC,CAAA;IAE5D,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QAChB,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,qCAAqC,EAAE,CAAC,CAAA;QACpF,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,KAAK,CAAC,eAAe,EAAE,CAAC;YACtD,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,yDAAyD,EAAE,CAAC,CAAA;QACvG,CAAC;QACD,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACzC,IAAI,MAAM,CAAC,MAAM,KAAK,gBAAgB,EAAE,CAAC;gBACvC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,uCAAuC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;YACnG,CAAC;iBAAM,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBACvC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,8CAA8C,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;YACzG,CAAC;QACH,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,qBAAqB,KAAK,EAAE,EAAE,CAAC,CAAA;QAC1E,CAAC;IACH,CAAC;IAED,OAAO;QACL,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,QAAQ,KAAK,OAAO,CAAC;QACvD,MAAM;KACP,CAAA;AACH,CAAC"}
@@ -0,0 +1,14 @@
1
+ export interface PromptContentAuditInput {
2
+ prompt: string;
3
+ declaredEntities?: string[];
4
+ }
5
+ export interface PromptContentAuditIssue {
6
+ severity: 'error' | 'warn';
7
+ message: string;
8
+ }
9
+ export interface PromptContentAuditResult {
10
+ pass: boolean;
11
+ issues: PromptContentAuditIssue[];
12
+ }
13
+ export declare function auditPromptContent(input: PromptContentAuditInput): PromptContentAuditResult;
14
+ //# sourceMappingURL=prompt-content.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt-content.d.ts","sourceRoot":"","sources":["../../src/evals/prompt-content.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;CAC5B;AAED,MAAM,WAAW,uBAAuB;IACtC,QAAQ,EAAE,OAAO,GAAG,MAAM,CAAA;IAC1B,OAAO,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,wBAAwB;IACvC,IAAI,EAAE,OAAO,CAAA;IACb,MAAM,EAAE,uBAAuB,EAAE,CAAA;CAClC;AAkCD,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,uBAAuB,GAAG,wBAAwB,CAiF3F"}