@cogitator-ai/core 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/README.md +920 -15
  2. package/dist/cogitator.d.ts +31 -1
  3. package/dist/cogitator.d.ts.map +1 -1
  4. package/dist/cogitator.js +127 -6
  5. package/dist/cogitator.js.map +1 -1
  6. package/dist/constitutional/constitution.d.ts +9 -0
  7. package/dist/constitutional/constitution.d.ts.map +1 -0
  8. package/dist/constitutional/constitution.js +215 -0
  9. package/dist/constitutional/constitution.js.map +1 -0
  10. package/dist/constitutional/constitutional-ai.d.ts +36 -0
  11. package/dist/constitutional/constitutional-ai.d.ts.map +1 -0
  12. package/dist/constitutional/constitutional-ai.js +163 -0
  13. package/dist/constitutional/constitutional-ai.js.map +1 -0
  14. package/dist/constitutional/critique-reviser.d.ts +20 -0
  15. package/dist/constitutional/critique-reviser.d.ts.map +1 -0
  16. package/dist/constitutional/critique-reviser.js +98 -0
  17. package/dist/constitutional/critique-reviser.js.map +1 -0
  18. package/dist/constitutional/index.d.ts +13 -0
  19. package/dist/constitutional/index.d.ts.map +1 -0
  20. package/dist/constitutional/index.js +8 -0
  21. package/dist/constitutional/index.js.map +1 -0
  22. package/dist/constitutional/input-filter.d.ts +19 -0
  23. package/dist/constitutional/input-filter.d.ts.map +1 -0
  24. package/dist/constitutional/input-filter.js +88 -0
  25. package/dist/constitutional/input-filter.js.map +1 -0
  26. package/dist/constitutional/output-filter.d.ts +19 -0
  27. package/dist/constitutional/output-filter.d.ts.map +1 -0
  28. package/dist/constitutional/output-filter.js +86 -0
  29. package/dist/constitutional/output-filter.js.map +1 -0
  30. package/dist/constitutional/prompts.d.ts +11 -0
  31. package/dist/constitutional/prompts.d.ts.map +1 -0
  32. package/dist/constitutional/prompts.js +202 -0
  33. package/dist/constitutional/prompts.js.map +1 -0
  34. package/dist/constitutional/tool-guard.d.ts +18 -0
  35. package/dist/constitutional/tool-guard.d.ts.map +1 -0
  36. package/dist/constitutional/tool-guard.js +125 -0
  37. package/dist/constitutional/tool-guard.js.map +1 -0
  38. package/dist/cost-routing/budget-enforcer.d.ts +26 -0
  39. package/dist/cost-routing/budget-enforcer.d.ts.map +1 -0
  40. package/dist/cost-routing/budget-enforcer.js +86 -0
  41. package/dist/cost-routing/budget-enforcer.js.map +1 -0
  42. package/dist/cost-routing/cost-router.d.ts +34 -0
  43. package/dist/cost-routing/cost-router.d.ts.map +1 -0
  44. package/dist/cost-routing/cost-router.js +80 -0
  45. package/dist/cost-routing/cost-router.js.map +1 -0
  46. package/dist/cost-routing/cost-tracker.d.ts +20 -0
  47. package/dist/cost-routing/cost-tracker.d.ts.map +1 -0
  48. package/dist/cost-routing/cost-tracker.js +85 -0
  49. package/dist/cost-routing/cost-tracker.js.map +1 -0
  50. package/dist/cost-routing/index.d.ts +6 -0
  51. package/dist/cost-routing/index.d.ts.map +1 -0
  52. package/dist/cost-routing/index.js +6 -0
  53. package/dist/cost-routing/index.js.map +1 -0
  54. package/dist/cost-routing/model-selector.d.ts +15 -0
  55. package/dist/cost-routing/model-selector.d.ts.map +1 -0
  56. package/dist/cost-routing/model-selector.js +216 -0
  57. package/dist/cost-routing/model-selector.js.map +1 -0
  58. package/dist/cost-routing/task-analyzer.d.ts +13 -0
  59. package/dist/cost-routing/task-analyzer.d.ts.map +1 -0
  60. package/dist/cost-routing/task-analyzer.js +185 -0
  61. package/dist/cost-routing/task-analyzer.js.map +1 -0
  62. package/dist/index.d.ts +13 -2
  63. package/dist/index.d.ts.map +1 -1
  64. package/dist/index.js +7 -2
  65. package/dist/index.js.map +1 -1
  66. package/dist/learning/ab-testing.d.ts +45 -0
  67. package/dist/learning/ab-testing.d.ts.map +1 -0
  68. package/dist/learning/ab-testing.js +267 -0
  69. package/dist/learning/ab-testing.js.map +1 -0
  70. package/dist/learning/agent-optimizer.d.ts.map +1 -1
  71. package/dist/learning/agent-optimizer.js +26 -21
  72. package/dist/learning/agent-optimizer.js.map +1 -1
  73. package/dist/learning/auto-optimizer.d.ts +38 -0
  74. package/dist/learning/auto-optimizer.d.ts.map +1 -0
  75. package/dist/learning/auto-optimizer.js +229 -0
  76. package/dist/learning/auto-optimizer.js.map +1 -0
  77. package/dist/learning/demo-selector.d.ts.map +1 -1
  78. package/dist/learning/demo-selector.js +7 -7
  79. package/dist/learning/demo-selector.js.map +1 -1
  80. package/dist/learning/index.d.ts +13 -1
  81. package/dist/learning/index.d.ts.map +1 -1
  82. package/dist/learning/index.js +7 -1
  83. package/dist/learning/index.js.map +1 -1
  84. package/dist/learning/instruction-optimizer.d.ts.map +1 -1
  85. package/dist/learning/instruction-optimizer.js +7 -11
  86. package/dist/learning/instruction-optimizer.js.map +1 -1
  87. package/dist/learning/metrics.d.ts.map +1 -1
  88. package/dist/learning/metrics.js +26 -16
  89. package/dist/learning/metrics.js.map +1 -1
  90. package/dist/learning/postgres-trace-store.d.ts +53 -0
  91. package/dist/learning/postgres-trace-store.d.ts.map +1 -0
  92. package/dist/learning/postgres-trace-store.js +692 -0
  93. package/dist/learning/postgres-trace-store.js.map +1 -0
  94. package/dist/learning/prompt-logger.d.ts +29 -0
  95. package/dist/learning/prompt-logger.d.ts.map +1 -0
  96. package/dist/learning/prompt-logger.js +157 -0
  97. package/dist/learning/prompt-logger.js.map +1 -0
  98. package/dist/learning/prompt-monitor.d.ts +29 -0
  99. package/dist/learning/prompt-monitor.d.ts.map +1 -0
  100. package/dist/learning/prompt-monitor.js +243 -0
  101. package/dist/learning/prompt-monitor.js.map +1 -0
  102. package/dist/learning/prompts.d.ts.map +1 -1
  103. package/dist/learning/prompts.js +24 -13
  104. package/dist/learning/prompts.js.map +1 -1
  105. package/dist/learning/rollback-manager.d.ts +36 -0
  106. package/dist/learning/rollback-manager.d.ts.map +1 -0
  107. package/dist/learning/rollback-manager.js +177 -0
  108. package/dist/learning/rollback-manager.js.map +1 -0
  109. package/dist/learning/trace-store.d.ts.map +1 -1
  110. package/dist/learning/trace-store.js +8 -10
  111. package/dist/learning/trace-store.js.map +1 -1
  112. package/dist/reasoning/branch-evaluator.d.ts.map +1 -1
  113. package/dist/reasoning/branch-evaluator.js +14 -8
  114. package/dist/reasoning/branch-evaluator.js.map +1 -1
  115. package/dist/reasoning/branch-generator.d.ts.map +1 -1
  116. package/dist/reasoning/branch-generator.js +5 -3
  117. package/dist/reasoning/branch-generator.js.map +1 -1
  118. package/dist/reasoning/prompts.d.ts.map +1 -1
  119. package/dist/reasoning/prompts.js +7 -5
  120. package/dist/reasoning/prompts.js.map +1 -1
  121. package/dist/reasoning/thought-tree.d.ts.map +1 -1
  122. package/dist/reasoning/thought-tree.js +9 -11
  123. package/dist/reasoning/thought-tree.js.map +1 -1
  124. package/dist/reflection/insight-store.d.ts.map +1 -1
  125. package/dist/reflection/insight-store.js +8 -6
  126. package/dist/reflection/insight-store.js.map +1 -1
  127. package/dist/reflection/prompts.d.ts.map +1 -1
  128. package/dist/reflection/prompts.js +11 -6
  129. package/dist/reflection/prompts.js.map +1 -1
  130. package/dist/reflection/reflection-engine.d.ts.map +1 -1
  131. package/dist/reflection/reflection-engine.js +8 -10
  132. package/dist/reflection/reflection-engine.js.map +1 -1
  133. package/dist/time-travel/checkpoint-store.d.ts +34 -0
  134. package/dist/time-travel/checkpoint-store.d.ts.map +1 -0
  135. package/dist/time-travel/checkpoint-store.js +240 -0
  136. package/dist/time-travel/checkpoint-store.js.map +1 -0
  137. package/dist/time-travel/comparator.d.ts +26 -0
  138. package/dist/time-travel/comparator.d.ts.map +1 -0
  139. package/dist/time-travel/comparator.js +253 -0
  140. package/dist/time-travel/comparator.js.map +1 -0
  141. package/dist/time-travel/forker.d.ts +22 -0
  142. package/dist/time-travel/forker.d.ts.map +1 -0
  143. package/dist/time-travel/forker.js +118 -0
  144. package/dist/time-travel/forker.js.map +1 -0
  145. package/dist/time-travel/index.d.ts +6 -0
  146. package/dist/time-travel/index.d.ts.map +1 -0
  147. package/dist/time-travel/index.js +6 -0
  148. package/dist/time-travel/index.js.map +1 -0
  149. package/dist/time-travel/replayer.d.ts +20 -0
  150. package/dist/time-travel/replayer.d.ts.map +1 -0
  151. package/dist/time-travel/replayer.js +147 -0
  152. package/dist/time-travel/replayer.js.map +1 -0
  153. package/dist/time-travel/time-travel.d.ts +41 -0
  154. package/dist/time-travel/time-travel.d.ts.map +1 -0
  155. package/dist/time-travel/time-travel.js +127 -0
  156. package/dist/time-travel/time-travel.js.map +1 -0
  157. package/package.json +13 -5
@@ -0,0 +1,36 @@
1
+ import type { GuardrailConfig, Constitution, ConstitutionalPrinciple, FilterResult, ToolGuardResult, RevisionResult, LLMBackend, Message, Tool, ToolContext, FilterLayer } from '@cogitator-ai/types';
2
+ export interface ConstitutionalAIOptions {
3
+ llm: LLMBackend;
4
+ constitution?: Constitution;
5
+ config?: Partial<GuardrailConfig>;
6
+ }
7
+ export declare class ConstitutionalAI {
8
+ private inputFilter;
9
+ private outputFilter;
10
+ private toolGuard;
11
+ private critiqueReviser;
12
+ private _config;
13
+ private _constitution;
14
+ private violationLog;
15
+ private logger;
16
+ constructor(options: ConstitutionalAIOptions);
17
+ get config(): GuardrailConfig;
18
+ get constitution(): Constitution;
19
+ filterInput(input: string, context?: string): Promise<FilterResult>;
20
+ filterOutput(output: string, context: Message[]): Promise<FilterResult>;
21
+ guardTool(tool: Tool, args: Record<string, unknown>, context: ToolContext): Promise<ToolGuardResult>;
22
+ critiqueAndRevise(response: string, context: Message[]): Promise<RevisionResult>;
23
+ setConstitution(constitution: Constitution): void;
24
+ addPrinciple(principle: ConstitutionalPrinciple): void;
25
+ removePrinciple(id: string): void;
26
+ getConstitution(): Constitution;
27
+ getConfig(): GuardrailConfig;
28
+ getViolationLog(): Array<{
29
+ timestamp: Date;
30
+ layer: FilterLayer;
31
+ result: FilterResult;
32
+ }>;
33
+ clearViolationLog(): void;
34
+ private logViolation;
35
+ }
36
+ //# sourceMappingURL=constitutional-ai.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"constitutional-ai.d.ts","sourceRoot":"","sources":["../../src/constitutional/constitutional-ai.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,eAAe,EACf,YAAY,EACZ,uBAAuB,EACvB,YAAY,EACZ,eAAe,EACf,cAAc,EACd,UAAU,EACV,OAAO,EACP,IAAI,EACJ,WAAW,EACX,WAAW,EACZ,MAAM,qBAAqB,CAAC;AAQ7B,MAAM,WAAW,uBAAuB;IACtC,GAAG,EAAE,UAAU,CAAC;IAChB,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;CACnC;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,aAAa,CAAe;IACpC,OAAO,CAAC,YAAY,CAA4E;IAChG,OAAO,CAAC,MAAM,CAAwD;gBAE1D,OAAO,EAAE,uBAAuB;IAmD5C,IAAI,MAAM,IAAI,eAAe,CAE5B;IAED,IAAI,YAAY,IAAI,YAAY,CAE/B;IAEK,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAUnE,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,YAAY,CAAC;IAsBvE,SAAS,CACb,IAAI,EAAE,IAAI,EACV,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC7B,OAAO,EAAE,WAAW,GACnB,OAAO,CAAC,eAAe,CAAC;IAarB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,cAAc,CAAC;IAItF,eAAe,CAAC,YAAY,EAAE,YAAY,GAAG,IAAI;IASjD,YAAY,CAAC,SAAS,EAAE,uBAAuB,GAAG,IAAI;IActD,eAAe,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAQjC,eAAe,IAAI,YAAY;IAI/B,SAAS,IAAI,eAAe;IAI5B,eAAe,IAAI,KAAK,CAAC;QAAE,SAAS,EAAE,IAAI,CAAC;QAAC,KAAK,EAAE,WAAW,CAAC;QAAC,MAAM,EAAE,YAAY,CAAA;KAAE,CAAC;IAIvF,iBAAiB,IAAI,IAAI;IAIzB,OAAO,CAAC,YAAY;CAiBrB"}
@@ -0,0 +1,163 @@
1
+ import { DEFAULT_CONSTITUTION } from './constitution';
2
+ import { InputFilter } from './input-filter';
3
+ import { OutputFilter } from './output-filter';
4
+ import { ToolGuard } from './tool-guard';
5
+ import { CritiqueReviser } from './critique-reviser';
6
+ import { getLogger } from '../logger';
7
+ export class ConstitutionalAI {
8
+ inputFilter;
9
+ outputFilter;
10
+ toolGuard;
11
+ critiqueReviser;
12
+ _config;
13
+ _constitution;
14
+ violationLog = [];
15
+ logger = getLogger().child({ component: 'ConstitutionalAI' });
16
+ constructor(options) {
17
+ const defaultConfig = {
18
+ enabled: true,
19
+ filterInput: true,
20
+ filterOutput: true,
21
+ filterToolCalls: true,
22
+ filterToolResults: false,
23
+ enableCritiqueRevision: true,
24
+ maxRevisionIterations: 3,
25
+ revisionConfidenceThreshold: 0.85,
26
+ thresholds: {
27
+ violence: 'medium',
28
+ hate: 'low',
29
+ sexual: 'medium',
30
+ 'self-harm': 'low',
31
+ illegal: 'low',
32
+ privacy: 'medium',
33
+ misinformation: 'high',
34
+ manipulation: 'medium',
35
+ },
36
+ strictMode: false,
37
+ logViolations: true,
38
+ };
39
+ this._config = { ...defaultConfig, ...options.config };
40
+ this._constitution = options.constitution ?? DEFAULT_CONSTITUTION;
41
+ this.inputFilter = new InputFilter({
42
+ llm: options.llm,
43
+ config: this._config,
44
+ constitution: this._constitution,
45
+ });
46
+ this.outputFilter = new OutputFilter({
47
+ llm: options.llm,
48
+ config: this._config,
49
+ constitution: this._constitution,
50
+ });
51
+ this.toolGuard = new ToolGuard({
52
+ config: this._config,
53
+ constitution: this._constitution,
54
+ });
55
+ this.critiqueReviser = new CritiqueReviser({
56
+ llm: options.llm,
57
+ config: this._config,
58
+ constitution: this._constitution,
59
+ });
60
+ }
61
+ get config() {
62
+ return this._config;
63
+ }
64
+ get constitution() {
65
+ return this._constitution;
66
+ }
67
+ async filterInput(input, context) {
68
+ if (!this._config.filterInput) {
69
+ return { allowed: true, harmScores: [] };
70
+ }
71
+ const result = await this.inputFilter.filter(input, context);
72
+ this.logViolation('input', result);
73
+ return result;
74
+ }
75
+ async filterOutput(output, context) {
76
+ if (!this._config.filterOutput) {
77
+ return { allowed: true, harmScores: [] };
78
+ }
79
+ const result = await this.outputFilter.filter(output, context);
80
+ if (!result.allowed && this._config.enableCritiqueRevision) {
81
+ const revision = await this.critiqueAndRevise(output, context);
82
+ if (revision.revised !== revision.original) {
83
+ return {
84
+ allowed: true,
85
+ harmScores: result.harmScores,
86
+ suggestedRevision: revision.revised,
87
+ };
88
+ }
89
+ }
90
+ this.logViolation('output', result);
91
+ return result;
92
+ }
93
+ async guardTool(tool, args, context) {
94
+ if (!this._config.filterToolCalls) {
95
+ return {
96
+ approved: true,
97
+ requiresConfirmation: false,
98
+ sideEffects: tool.sideEffects ?? [],
99
+ riskLevel: 'low',
100
+ };
101
+ }
102
+ return this.toolGuard.evaluate(tool, args, context);
103
+ }
104
+ async critiqueAndRevise(response, context) {
105
+ return this.critiqueReviser.critiqueAndRevise(response, context);
106
+ }
107
+ setConstitution(constitution) {
108
+ this._constitution = constitution;
109
+ this.inputFilter.updateConstitution(constitution);
110
+ this.outputFilter.updateConstitution(constitution);
111
+ this.toolGuard.updateConstitution(constitution);
112
+ this.critiqueReviser.updateConstitution(constitution);
113
+ this.logger.info('Constitution updated', { constitutionId: constitution.id });
114
+ }
115
+ addPrinciple(principle) {
116
+ const exists = this._constitution.principles.some((p) => p.id === principle.id);
117
+ if (exists) {
118
+ this.logger.warn('Principle already exists, skipping', { principleId: principle.id });
119
+ return;
120
+ }
121
+ const updated = {
122
+ ...this._constitution,
123
+ principles: [...this._constitution.principles, principle],
124
+ };
125
+ this.setConstitution(updated);
126
+ }
127
+ removePrinciple(id) {
128
+ const updated = {
129
+ ...this._constitution,
130
+ principles: this._constitution.principles.filter((p) => p.id !== id),
131
+ };
132
+ this.setConstitution(updated);
133
+ }
134
+ getConstitution() {
135
+ return this._constitution;
136
+ }
137
+ getConfig() {
138
+ return { ...this._config };
139
+ }
140
+ getViolationLog() {
141
+ return [...this.violationLog];
142
+ }
143
+ clearViolationLog() {
144
+ this.violationLog = [];
145
+ }
146
+ logViolation(layer, result) {
147
+ if (!result.allowed || result.harmScores.length > 0) {
148
+ if (this._config.logViolations) {
149
+ this.violationLog.push({ timestamp: new Date(), layer, result });
150
+ this.logger.warn('Guardrail violation detected', {
151
+ layer,
152
+ allowed: result.allowed,
153
+ harmCount: result.harmScores.length,
154
+ categories: result.harmScores.map((s) => s.category),
155
+ });
156
+ }
157
+ if (this._config.onViolation) {
158
+ this._config.onViolation(result, layer);
159
+ }
160
+ }
161
+ }
162
+ }
163
+ //# sourceMappingURL=constitutional-ai.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"constitutional-ai.js","sourceRoot":"","sources":["../../src/constitutional/constitutional-ai.ts"],"names":[],"mappings":"AAaA,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAQtC,MAAM,OAAO,gBAAgB;IACnB,WAAW,CAAc;IACzB,YAAY,CAAe;IAC3B,SAAS,CAAY;IACrB,eAAe,CAAkB;IACjC,OAAO,CAAkB;IACzB,aAAa,CAAe;IAC5B,YAAY,GAAyE,EAAE,CAAC;IACxF,MAAM,GAAG,SAAS,EAAE,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;IAEtE,YAAY,OAAgC;QAC1C,MAAM,aAAa,GAAoB;YACrC,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,IAAI;YACjB,YAAY,EAAE,IAAI;YAClB,eAAe,EAAE,IAAI;YACrB,iBAAiB,EAAE,KAAK;YACxB,sBAAsB,EAAE,IAAI;YAC5B,qBAAqB,EAAE,CAAC;YACxB,2BAA2B,EAAE,IAAI;YACjC,UAAU,EAAE;gBACV,QAAQ,EAAE,QAAQ;gBAClB,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,QAAQ;gBAChB,WAAW,EAAE,KAAK;gBAClB,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,QAAQ;gBACjB,cAAc,EAAE,MAAM;gBACtB,YAAY,EAAE,QAAQ;aACvB;YACD,UAAU,EAAE,KAAK;YACjB,aAAa,EAAE,IAAI;SACpB,CAAC;QAEF,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,aAAa,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QACvD,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,YAAY,IAAI,oBAAoB,CAAC;QAElE,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC;YACjC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,MAAM,EAAE,IAAI,CAAC,OAAO;YACpB,YAAY,EAAE,IAAI,CAAC,aAAa;SACjC,CAAC,CAAC;QAEH,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC;YACnC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,MAAM,EAAE,IAAI,CAAC,OAAO;YACpB,YAAY,EAAE,IAAI,CAAC,aAAa;SACjC,CAAC,CAAC;QAEH,IAAI,CAAC,SAAS,GAAG,IAAI,SAAS,CAAC;YAC7B,MAAM,EAAE,IAAI,CAAC,OAAO;YACpB,YAAY,EAAE,IAAI,CAAC,aAAa;SACjC,CAAC,CAAC;QAEH,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC;YACzC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,MAAM,EAAE,IAAI,CAAC,OAAO;YACpB,YAAY,EAAE,IAAI,CAAC,aAAa;SACjC,CAAC,CAAC;IACL,CAAC;IAED,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,KAAa,EAAE,OAAgB;QAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAC9B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;QAC3C,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC7D,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QACnC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,MAAc,EAAE,OAAkB;QACnD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;YAC/B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;QAC3C,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAE/D,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,CAAC;YAC3D,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YAC/D,IAAI,QAAQ,CAAC,OAAO,KAAK,QAAQ,CAAC,QAAQ,EAAE,CAAC;gBAC3C,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,UAAU,EAAE,MAAM,CAAC,UAAU;oBAC7B,iBAAiB,EAAE,QAAQ,CAAC,OAAO;iBACpC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACpC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,SAAS,CACb,IAAU,EACV,IAA6B,EAC7B,OAAoB;QAEpB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YAClC,OAAO;gBACL,QAAQ,EAAE,IAAI;gBACd,oBAAoB,EAAE,KAAK;gBAC3B,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE;gBACnC,SAAS,EAAE,KAAK;aACjB,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,QAAgB,EAAE,OAAkB;QAC1D,OAAO,IAAI,CAAC,eAAe,CAAC,iBAAiB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnE,CAAC;IAED,eAAe,CAAC,YAA0B;QACxC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC;QAClD,IAAI,CAAC,YAAY,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC;QACnD,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC;QAChD,IAAI,CAAC,eAAe,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC;QACtD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,cAAc,EAAE,YAAY,CAAC,EAAE,EAAE,CAAC,CAAC;IAChF,CAAC;IAED,YAAY,CAAC,SAAkC;QAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,EAAE,CAAC,CAAC;QAChF,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oCAAoC,EAAE,EAAE,WAAW,EAAE,SAAS,CAAC,EAAE,EAAE,CAAC,CAAC;YACtF,OAAO;QACT,CAAC;QAED,MAAM,OAAO,GAAiB;YAC5B,GAAG,IAAI,CAAC,aAAa;YACrB,UAAU,EAAE,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,SAAS,CAAC;SAC1D,CAAC;QACF,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAED,eAAe,CAAC,EAAU;QACxB,MAAM,OAAO,GAAiB;YAC5B,GAAG,IAAI,CAAC,aAAa;YACrB,UAAU,EAAE,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC;SACrE,CAAC;QACF,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAED,eAAe;QACb,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;IAED,eAAe;QACb,OAAO,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAED,iBAAiB;QACf,IAAI,CAAC,YAAY,GAAG,EAAE,CAAC;IACzB,CAAC;IAEO,YAAY,CAAC,KAAkB,EAAE,MAAoB;QAC3D,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpD,IAAI,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,CAAC;gBAC/B,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;gBACjE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,8BAA8B,EAAE;oBAC/C,KAAK;oBACL,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC,MAAM;oBACnC,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;iBACrD,CAAC,CAAC;YACL,CAAC;YAED,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;gBAC7B,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,20 @@
1
+ import type { GuardrailConfig, CritiqueResult, RevisionResult, Constitution, ConstitutionalPrinciple, LLMBackend, Message } from '@cogitator-ai/types';
2
+ export interface CritiqueReviserOptions {
3
+ llm: LLMBackend;
4
+ config: GuardrailConfig;
5
+ constitution: Constitution;
6
+ }
7
+ export declare class CritiqueReviser {
8
+ private llm;
9
+ private config;
10
+ private constitution;
11
+ private principles;
12
+ constructor(options: CritiqueReviserOptions);
13
+ critiqueAndRevise(response: string, _context: Message[]): Promise<RevisionResult>;
14
+ critique(response: string, principles?: ConstitutionalPrinciple[]): Promise<CritiqueResult>;
15
+ revise(response: string, critique: CritiqueResult, violatedPrinciples: ConstitutionalPrinciple[]): Promise<string>;
16
+ private selectPrinciples;
17
+ private categoryKeywords;
18
+ updateConstitution(constitution: Constitution): void;
19
+ }
20
+ //# sourceMappingURL=critique-reviser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"critique-reviser.d.ts","sourceRoot":"","sources":["../../src/constitutional/critique-reviser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,eAAe,EACf,cAAc,EACd,cAAc,EACd,YAAY,EACZ,uBAAuB,EACvB,UAAU,EACV,OAAO,EACR,MAAM,qBAAqB,CAAC;AAI7B,MAAM,WAAW,sBAAsB;IACrC,GAAG,EAAE,UAAU,CAAC;IAChB,MAAM,EAAE,eAAe,CAAC;IACxB,YAAY,EAAE,YAAY,CAAC;CAC5B;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,GAAG,CAAa;IACxB,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,UAAU,CAA4B;gBAElC,OAAO,EAAE,sBAAsB;IAOrC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,cAAc,CAAC;IAuCjF,QAAQ,CACZ,QAAQ,EAAE,MAAM,EAChB,UAAU,CAAC,EAAE,uBAAuB,EAAE,GACrC,OAAO,CAAC,cAAc,CAAC;IAcpB,MAAM,CACV,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,cAAc,EACxB,kBAAkB,EAAE,uBAAuB,EAAE,GAC5C,OAAO,CAAC,MAAM,CAAC;IAalB,OAAO,CAAC,gBAAgB;IAuBxB,OAAO,CAAC,gBAAgB,CAStB;IAEF,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,IAAI;CAIrD"}
@@ -0,0 +1,98 @@
1
+ import { buildCritiquePrompt, buildRevisionPrompt, parseCritiqueResponse } from './prompts';
2
+ import { filterPrinciplesByLayer } from './constitution';
3
+ export class CritiqueReviser {
4
+ llm;
5
+ config;
6
+ constitution;
7
+ principles;
8
+ constructor(options) {
9
+ this.llm = options.llm;
10
+ this.config = options.config;
11
+ this.constitution = options.constitution;
12
+ this.principles = filterPrinciplesByLayer(this.constitution, 'output');
13
+ }
14
+ async critiqueAndRevise(response, _context) {
15
+ let current = response;
16
+ const history = [];
17
+ for (let i = 0; i < this.config.maxRevisionIterations; i++) {
18
+ const selectedPrinciples = this.selectPrinciples(current, i);
19
+ const critique = await this.critique(current, selectedPrinciples);
20
+ history.push(critique);
21
+ if (!critique.isHarmful) {
22
+ break;
23
+ }
24
+ if (critique.harmScores.length > 0) {
25
+ const maxConfidence = Math.max(...critique.harmScores.map((s) => s.confidence));
26
+ if (maxConfidence < this.config.revisionConfidenceThreshold) {
27
+ break;
28
+ }
29
+ }
30
+ const violatedPrinciples = this.principles.filter((p) => critique.principlesViolated.includes(p.id));
31
+ if (violatedPrinciples.length === 0) {
32
+ break;
33
+ }
34
+ current = await this.revise(current, critique, violatedPrinciples);
35
+ }
36
+ return {
37
+ original: response,
38
+ revised: current,
39
+ iterations: history.length,
40
+ critiqueHistory: history,
41
+ };
42
+ }
43
+ async critique(response, principles) {
44
+ const toUse = principles ?? this.principles;
45
+ const prompt = buildCritiquePrompt(response, toUse);
46
+ const result = await this.llm.chat({
47
+ model: this.config.model ?? 'gpt-4o-mini',
48
+ messages: [{ role: 'user', content: prompt }],
49
+ temperature: 0,
50
+ maxTokens: 800,
51
+ });
52
+ return parseCritiqueResponse(result.content);
53
+ }
54
+ async revise(response, critique, violatedPrinciples) {
55
+ const prompt = buildRevisionPrompt(response, critique, violatedPrinciples);
56
+ const result = await this.llm.chat({
57
+ model: this.config.model ?? 'gpt-4o-mini',
58
+ messages: [{ role: 'user', content: prompt }],
59
+ temperature: 0.3,
60
+ maxTokens: 2000,
61
+ });
62
+ return result.content;
63
+ }
64
+ selectPrinciples(response, iteration) {
65
+ if (iteration === 0) {
66
+ return this.principles.filter((p) => p.severity === 'high');
67
+ }
68
+ const lowered = response.toLowerCase();
69
+ const relevant = this.principles.filter((p) => {
70
+ for (const category of p.harmCategories ?? []) {
71
+ if (this.categoryKeywords[category]?.some((kw) => lowered.includes(kw))) {
72
+ return true;
73
+ }
74
+ }
75
+ return false;
76
+ });
77
+ if (relevant.length > 0) {
78
+ return relevant;
79
+ }
80
+ const shuffled = [...this.principles].sort(() => Math.random() - 0.5);
81
+ return shuffled.slice(0, Math.min(5, shuffled.length));
82
+ }
83
+ categoryKeywords = {
84
+ violence: ['kill', 'murder', 'weapon', 'attack', 'bomb', 'harm', 'hurt', 'fight'],
85
+ hate: ['hate', 'racist', 'sexist', 'slur', 'discriminat'],
86
+ sexual: ['sex', 'porn', 'nude', 'erotic', 'explicit'],
87
+ 'self-harm': ['suicide', 'self-harm', 'cut myself', 'kill myself'],
88
+ illegal: ['hack', 'steal', 'fraud', 'drug', 'illegal'],
89
+ privacy: ['password', 'ssn', 'social security', 'credit card', 'address'],
90
+ misinformation: ['fake', 'conspiracy', 'hoax'],
91
+ manipulation: ['manipulate', 'deceive', 'trick', 'scam'],
92
+ };
93
+ updateConstitution(constitution) {
94
+ this.constitution = constitution;
95
+ this.principles = filterPrinciplesByLayer(constitution, 'output');
96
+ }
97
+ }
98
+ //# sourceMappingURL=critique-reviser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"critique-reviser.js","sourceRoot":"","sources":["../../src/constitutional/critique-reviser.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,MAAM,WAAW,CAAC;AAC5F,OAAO,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAQzD,MAAM,OAAO,eAAe;IAClB,GAAG,CAAa;IAChB,MAAM,CAAkB;IACxB,YAAY,CAAe;IAC3B,UAAU,CAA4B;IAE9C,YAAY,OAA+B;QACzC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,UAAU,GAAG,uBAAuB,CAAC,IAAI,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,QAAgB,EAAE,QAAmB;QAC3D,IAAI,OAAO,GAAG,QAAQ,CAAC;QACvB,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3D,MAAM,kBAAkB,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC7D,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC;YAClE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEvB,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,CAAC;gBACxB,MAAM;YACR,CAAC;YAED,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACnC,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;gBAChF,IAAI,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,2BAA2B,EAAE,CAAC;oBAC5D,MAAM;gBACR,CAAC;YACH,CAAC;YAED,MAAM,kBAAkB,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACtD,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAC3C,CAAC;YAEF,IAAI,kBAAkB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACpC,MAAM;YACR,CAAC;YAED,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,kBAAkB,CAAC,CAAC;QACrE,CAAC;QAED,OAAO;YACL,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE,OAAO;YAChB,UAAU,EAAE,OAAO,CAAC,MAAM;YAC1B,eAAe,EAAE,OAAO;SACzB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,QAAQ,CACZ,QAAgB,EAChB,UAAsC;QAEtC,MAAM,KAAK,GAAG,UAAU,IAAI,IAAI,CAAC,UAAU,CAAC;QAC5C,MAAM,MAAM,GAAG,mBAAmB,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAEpD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YACjC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,aAAa;YACzC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,WAAW,EAAE,CAAC;YACd,SAAS,EAAE,GAAG;SACf,CAAC,CAAC;QAEH,OAAO,qBAAqB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,KAAK,CAAC,MAAM,CACV,QAAgB,EAChB,QAAwB,EACxB,kBAA6C;QAE7C,MAAM,MAAM,GAAG,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,EAAE,kBAAkB,CAAC,CAAC;QAE3E,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YACjC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,aAAa;YACzC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,WAAW,EAAE,GAAG;YAChB,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC,OAAO,CAAC;IACxB,CAAC;IAEO,gBAAgB,CAAC,QAAgB,EAAE,SAAiB;QAC1D,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACpB,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC;QAC9D,CAAC;QAED,MAAM,OAAO,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YAC5C,KAAK,MAAM,QAAQ,IAAI,CAAC,CAAC,cAAc,IAAI,EAAE,EAAE,CAAC;gBAC9C,IAAI,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;oBACxE,OAAO,IAAI,CAAC;gBACd,CAAC;YACH,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC;QACtE,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;IACzD,CAAC;IAEO,gBAAgB,GAA6B;QACnD,QAAQ,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;QACjF,IAAI,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,aAAa,CAAC;QACzD,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,CAAC;QACrD,WAAW,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,CAAC;QAClE,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC;QACtD,OAAO,EAAE,CAAC,UAAU,EAAE,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,SAAS,CAAC;QACzE,cAAc,EAAE,CAAC,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC;QAC9C,YAAY,EAAE,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,CAAC;KACzD,CAAC;IAEF,kBAAkB,CAAC,YAA0B;QAC3C,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,UAAU,GAAG,uBAAuB,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;IACpE,CAAC;CACF"}
@@ -0,0 +1,13 @@
1
+ export { ConstitutionalAI } from './constitutional-ai';
2
+ export type { ConstitutionalAIOptions } from './constitutional-ai';
3
+ export { InputFilter } from './input-filter';
4
+ export type { InputFilterOptions } from './input-filter';
5
+ export { OutputFilter } from './output-filter';
6
+ export type { OutputFilterOptions } from './output-filter';
7
+ export { ToolGuard } from './tool-guard';
8
+ export type { ToolGuardOptions } from './tool-guard';
9
+ export { CritiqueReviser } from './critique-reviser';
10
+ export type { CritiqueReviserOptions } from './critique-reviser';
11
+ export { DEFAULT_CONSTITUTION, DEFAULT_PRINCIPLES, createConstitution, extendConstitution, filterPrinciplesByLayer, getPrinciplesByCategory, getPrinciplesBySeverity, } from './constitution';
12
+ export { buildInputEvaluationPrompt, buildOutputEvaluationPrompt, buildCritiquePrompt, buildRevisionPrompt, parseEvaluationResponse, parseCritiqueResponse, } from './prompts';
13
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/constitutional/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AACvD,YAAY,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAEnE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,YAAY,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAEzD,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/C,YAAY,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAE3D,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,YAAY,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,YAAY,EAAE,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAEjE,OAAO,EACL,oBAAoB,EACpB,kBAAkB,EAClB,kBAAkB,EAClB,kBAAkB,EAClB,uBAAuB,EACvB,uBAAuB,EACvB,uBAAuB,GACxB,MAAM,gBAAgB,CAAC;AAExB,OAAO,EACL,0BAA0B,EAC1B,2BAA2B,EAC3B,mBAAmB,EACnB,mBAAmB,EACnB,uBAAuB,EACvB,qBAAqB,GACtB,MAAM,WAAW,CAAC"}
@@ -0,0 +1,8 @@
1
+ export { ConstitutionalAI } from './constitutional-ai';
2
+ export { InputFilter } from './input-filter';
3
+ export { OutputFilter } from './output-filter';
4
+ export { ToolGuard } from './tool-guard';
5
+ export { CritiqueReviser } from './critique-reviser';
6
+ export { DEFAULT_CONSTITUTION, DEFAULT_PRINCIPLES, createConstitution, extendConstitution, filterPrinciplesByLayer, getPrinciplesByCategory, getPrinciplesBySeverity, } from './constitution';
7
+ export { buildInputEvaluationPrompt, buildOutputEvaluationPrompt, buildCritiquePrompt, buildRevisionPrompt, parseEvaluationResponse, parseCritiqueResponse, } from './prompts';
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/constitutional/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAGvD,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAG7C,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAG/C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAGzC,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAGrD,OAAO,EACL,oBAAoB,EACpB,kBAAkB,EAClB,kBAAkB,EAClB,kBAAkB,EAClB,uBAAuB,EACvB,uBAAuB,EACvB,uBAAuB,GACxB,MAAM,gBAAgB,CAAC;AAExB,OAAO,EACL,0BAA0B,EAC1B,2BAA2B,EAC3B,mBAAmB,EACnB,mBAAmB,EACnB,uBAAuB,EACvB,qBAAqB,GACtB,MAAM,WAAW,CAAC"}
@@ -0,0 +1,19 @@
1
+ import type { GuardrailConfig, FilterResult, Constitution, LLMBackend } from '@cogitator-ai/types';
2
+ export interface InputFilterOptions {
3
+ llm: LLMBackend;
4
+ config: GuardrailConfig;
5
+ constitution: Constitution;
6
+ }
7
+ export declare class InputFilter {
8
+ private llm;
9
+ private config;
10
+ private constitution;
11
+ private principles;
12
+ constructor(options: InputFilterOptions);
13
+ filter(input: string, context?: string): Promise<FilterResult>;
14
+ private quickScan;
15
+ private evaluateWithLLM;
16
+ private applyThresholds;
17
+ updateConstitution(constitution: Constitution): void;
18
+ }
19
+ //# sourceMappingURL=input-filter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"input-filter.d.ts","sourceRoot":"","sources":["../../src/constitutional/input-filter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,eAAe,EACf,YAAY,EAGZ,YAAY,EACZ,UAAU,EACX,MAAM,qBAAqB,CAAC;AAI7B,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,UAAU,CAAC;IAChB,MAAM,EAAE,eAAe,CAAC;IACxB,YAAY,EAAE,YAAY,CAAC;CAC5B;AAED,qBAAa,WAAW;IACtB,OAAO,CAAC,GAAG,CAAa;IACxB,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,UAAU,CAA4B;gBAElC,OAAO,EAAE,kBAAkB;IAOjC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAcpE,OAAO,CAAC,SAAS;YA8BH,eAAe;IAc7B,OAAO,CAAC,eAAe;IAyBvB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,IAAI;CAIrD"}
@@ -0,0 +1,88 @@
1
+ import { buildInputEvaluationPrompt, parseEvaluationResponse } from './prompts';
2
+ import { filterPrinciplesByLayer } from './constitution';
3
+ export class InputFilter {
4
+ llm;
5
+ config;
6
+ constitution;
7
+ principles;
8
+ constructor(options) {
9
+ this.llm = options.llm;
10
+ this.config = options.config;
11
+ this.constitution = options.constitution;
12
+ this.principles = filterPrinciplesByLayer(this.constitution, 'input');
13
+ }
14
+ async filter(input, context) {
15
+ const quickResult = this.quickScan(input);
16
+ if (!quickResult.allowed) {
17
+ return quickResult;
18
+ }
19
+ if (this.principles.length === 0) {
20
+ return { allowed: true, harmScores: [] };
21
+ }
22
+ const harmScores = await this.evaluateWithLLM(input, context);
23
+ return this.applyThresholds(harmScores);
24
+ }
25
+ quickScan(input) {
26
+ const lowered = input.toLowerCase();
27
+ const patterns = [
28
+ { pattern: /how\s+to\s+(make|build|create)\s+(a\s+)?bomb/i, category: 'violence' },
29
+ { pattern: /how\s+to\s+kill\s+(myself|someone)/i, category: 'violence' },
30
+ { pattern: /how\s+to\s+hack\s+(into|a)/i, category: 'illegal' },
31
+ { pattern: /child\s+porn(ography)?/i, category: 'sexual' },
32
+ { pattern: /ways\s+to\s+commit\s+suicide/i, category: 'self-harm' },
33
+ ];
34
+ for (const { pattern, category } of patterns) {
35
+ if (pattern.test(lowered)) {
36
+ return {
37
+ allowed: false,
38
+ harmScores: [
39
+ {
40
+ category,
41
+ severity: 'high',
42
+ confidence: 0.95,
43
+ reasoning: 'Matched explicit harmful pattern',
44
+ },
45
+ ],
46
+ blockedReason: `Input contains potentially harmful content (${category})`,
47
+ };
48
+ }
49
+ }
50
+ return { allowed: true, harmScores: [] };
51
+ }
52
+ async evaluateWithLLM(input, _context) {
53
+ const prompt = buildInputEvaluationPrompt(input, this.principles);
54
+ const response = await this.llm.chat({
55
+ model: this.config.model ?? 'gpt-4o-mini',
56
+ messages: [{ role: 'user', content: prompt }],
57
+ temperature: 0,
58
+ maxTokens: 500,
59
+ });
60
+ const result = parseEvaluationResponse(response.content);
61
+ return result.harmScores;
62
+ }
63
+ applyThresholds(harmScores) {
64
+ const thresholds = this.config.thresholds;
65
+ const severityOrder = { low: 1, medium: 2, high: 3 };
66
+ const violations = harmScores.filter((score) => {
67
+ const threshold = thresholds[score.category] ?? 'high';
68
+ return severityOrder[score.severity] >= severityOrder[threshold];
69
+ });
70
+ if (violations.length === 0) {
71
+ return { allowed: true, harmScores };
72
+ }
73
+ if (this.config.strictMode) {
74
+ const categories = [...new Set(violations.map((v) => v.category))];
75
+ return {
76
+ allowed: false,
77
+ harmScores,
78
+ blockedReason: `Input violates safety policies: ${categories.join(', ')}`,
79
+ };
80
+ }
81
+ return { allowed: true, harmScores };
82
+ }
83
+ updateConstitution(constitution) {
84
+ this.constitution = constitution;
85
+ this.principles = filterPrinciplesByLayer(constitution, 'input');
86
+ }
87
+ }
88
+ //# sourceMappingURL=input-filter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"input-filter.js","sourceRoot":"","sources":["../../src/constitutional/input-filter.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,0BAA0B,EAAE,uBAAuB,EAAE,MAAM,WAAW,CAAC;AAChF,OAAO,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAQzD,MAAM,OAAO,WAAW;IACd,GAAG,CAAa;IAChB,MAAM,CAAkB;IACxB,YAAY,CAAe;IAC3B,UAAU,CAA4B;IAE9C,YAAY,OAA2B;QACrC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,UAAU,GAAG,uBAAuB,CAAC,IAAI,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;IACxE,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,OAAgB;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC1C,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,WAAW,CAAC;QACrB,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;QAC3C,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC9D,OAAO,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IAC1C,CAAC;IAEO,SAAS,CAAC,KAAa;QAC7B,MAAM,OAAO,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG;YACf,EAAE,OAAO,EAAE,+CAA+C,EAAE,QAAQ,EAAE,UAAmB,EAAE;YAC3F,EAAE,OAAO,EAAE,qCAAqC,EAAE,QAAQ,EAAE,UAAmB,EAAE;YACjF,EAAE,OAAO,EAAE,6BAA6B,EAAE,QAAQ,EAAE,SAAkB,EAAE;YACxE,EAAE,OAAO,EAAE,yBAAyB,EAAE,QAAQ,EAAE,QAAiB,EAAE;YACnE,EAAE,OAAO,EAAE,+BAA+B,EAAE,QAAQ,EAAE,WAAoB,EAAE;SAC7E,CAAC;QAEF,KAAK,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC7C,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1B,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,UAAU,EAAE;wBACV;4BACE,QAAQ;4BACR,QAAQ,EAAE,MAAM;4BAChB,UAAU,EAAE,IAAI;4BAChB,SAAS,EAAE,kCAAkC;yBAC9C;qBACF;oBACD,aAAa,EAAE,+CAA+C,QAAQ,GAAG;iBAC1E,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;IAC3C,CAAC;IAEO,KAAK,CAAC,eAAe,CAAC,KAAa,EAAE,QAAiB;QAC5D,MAAM,MAAM,GAAG,0BAA0B,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAElE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YACnC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,aAAa;YACzC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,WAAW,EAAE,CAAC;YACd,SAAS,EAAE,GAAG;SACf,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QACzD,OAAO,MAAM,CAAC,UAAU,CAAC;IAC3B,CAAC;IAEO,eAAe,CAAC,UAAuB;QAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;QAC1C,MAAM,aAAa,GAA2B,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;QAE7E,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;YAC7C,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC;YACvD,OAAO,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,SAAS,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;QAEH,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;QACvC,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YAC3B,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YACnE,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,UAAU;gBACV,aAAa,EAAE,mCAAmC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;aAC1E,CAAC;QACJ,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IACvC,CAAC;IAED,kBAAkB,CAAC,YAA0B;QAC3C,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,UAAU,GAAG,uBAAuB,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;IACnE,CAAC;CACF"}
@@ -0,0 +1,19 @@
1
+ import type { GuardrailConfig, FilterResult, Constitution, LLMBackend, Message } from '@cogitator-ai/types';
2
+ export interface OutputFilterOptions {
3
+ llm: LLMBackend;
4
+ config: GuardrailConfig;
5
+ constitution: Constitution;
6
+ }
7
+ export declare class OutputFilter {
8
+ private llm;
9
+ private config;
10
+ private constitution;
11
+ private principles;
12
+ constructor(options: OutputFilterOptions);
13
+ filter(output: string, context: Message[]): Promise<FilterResult>;
14
+ private quickScan;
15
+ private evaluateWithLLM;
16
+ private applyThresholds;
17
+ updateConstitution(constitution: Constitution): void;
18
+ }
19
+ //# sourceMappingURL=output-filter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"output-filter.d.ts","sourceRoot":"","sources":["../../src/constitutional/output-filter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,eAAe,EACf,YAAY,EAGZ,YAAY,EACZ,UAAU,EACV,OAAO,EACR,MAAM,qBAAqB,CAAC;AAI7B,MAAM,WAAW,mBAAmB;IAClC,GAAG,EAAE,UAAU,CAAC;IAChB,MAAM,EAAE,eAAe,CAAC;IACxB,YAAY,EAAE,YAAY,CAAC;CAC5B;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,GAAG,CAAa;IACxB,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,UAAU,CAA4B;gBAElC,OAAO,EAAE,mBAAmB;IAOlC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,YAAY,CAAC;IAcvE,OAAO,CAAC,SAAS;YA6BH,eAAe;IAc7B,OAAO,CAAC,eAAe;IAyBvB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,IAAI;CAIrD"}
@@ -0,0 +1,86 @@
1
+ import { buildOutputEvaluationPrompt, parseEvaluationResponse } from './prompts';
2
+ import { filterPrinciplesByLayer } from './constitution';
3
+ export class OutputFilter {
4
+ llm;
5
+ config;
6
+ constitution;
7
+ principles;
8
+ constructor(options) {
9
+ this.llm = options.llm;
10
+ this.config = options.config;
11
+ this.constitution = options.constitution;
12
+ this.principles = filterPrinciplesByLayer(this.constitution, 'output');
13
+ }
14
+ async filter(output, context) {
15
+ const quickResult = this.quickScan(output);
16
+ if (!quickResult.allowed) {
17
+ return quickResult;
18
+ }
19
+ if (this.principles.length === 0) {
20
+ return { allowed: true, harmScores: [] };
21
+ }
22
+ const harmScores = await this.evaluateWithLLM(output, context);
23
+ return this.applyThresholds(harmScores);
24
+ }
25
+ quickScan(output) {
26
+ const lowered = output.toLowerCase();
27
+ const patterns = [
28
+ { pattern: /rm\s+-rf\s+\/(?!\w)/i, category: 'illegal' },
29
+ { pattern: /format\s+c:/i, category: 'illegal' },
30
+ { pattern: /sudo\s+rm\s+-rf/i, category: 'illegal' },
31
+ ];
32
+ for (const { pattern, category } of patterns) {
33
+ if (pattern.test(lowered)) {
34
+ return {
35
+ allowed: false,
36
+ harmScores: [
37
+ {
38
+ category,
39
+ severity: 'high',
40
+ confidence: 0.95,
41
+ reasoning: 'Contains dangerous system command',
42
+ },
43
+ ],
44
+ blockedReason: 'Output contains potentially dangerous commands',
45
+ };
46
+ }
47
+ }
48
+ return { allowed: true, harmScores: [] };
49
+ }
50
+ async evaluateWithLLM(output, context) {
51
+ const prompt = buildOutputEvaluationPrompt(output, context, this.principles);
52
+ const response = await this.llm.chat({
53
+ model: this.config.model ?? 'gpt-4o-mini',
54
+ messages: [{ role: 'user', content: prompt }],
55
+ temperature: 0,
56
+ maxTokens: 500,
57
+ });
58
+ const result = parseEvaluationResponse(response.content);
59
+ return result.harmScores;
60
+ }
61
+ applyThresholds(harmScores) {
62
+ const thresholds = this.config.thresholds;
63
+ const severityOrder = { low: 1, medium: 2, high: 3 };
64
+ const violations = harmScores.filter((score) => {
65
+ const threshold = thresholds[score.category] ?? 'high';
66
+ return severityOrder[score.severity] >= severityOrder[threshold];
67
+ });
68
+ if (violations.length === 0) {
69
+ return { allowed: true, harmScores };
70
+ }
71
+ if (this.config.strictMode) {
72
+ const categories = [...new Set(violations.map((v) => v.category))];
73
+ return {
74
+ allowed: false,
75
+ harmScores,
76
+ blockedReason: `Output violates safety policies: ${categories.join(', ')}`,
77
+ };
78
+ }
79
+ return { allowed: true, harmScores };
80
+ }
81
+ updateConstitution(constitution) {
82
+ this.constitution = constitution;
83
+ this.principles = filterPrinciplesByLayer(constitution, 'output');
84
+ }
85
+ }
86
+ //# sourceMappingURL=output-filter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"output-filter.js","sourceRoot":"","sources":["../../src/constitutional/output-filter.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,2BAA2B,EAAE,uBAAuB,EAAE,MAAM,WAAW,CAAC;AACjF,OAAO,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAQzD,MAAM,OAAO,YAAY;IACf,GAAG,CAAa;IAChB,MAAM,CAAkB;IACxB,YAAY,CAAe;IAC3B,UAAU,CAA4B;IAE9C,YAAY,OAA4B;QACtC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,UAAU,GAAG,uBAAuB,CAAC,IAAI,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,OAAkB;QAC7C,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,WAAW,CAAC;QACrB,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;QAC3C,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC/D,OAAO,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IAC1C,CAAC;IAEO,SAAS,CAAC,MAAc;QAC9B,MAAM,OAAO,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;QAErC,MAAM,QAAQ,GAAG;YACf,EAAE,OAAO,EAAE,sBAAsB,EAAE,QAAQ,EAAE,SAAkB,EAAE;YACjE,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,SAAkB,EAAE;YACzD,EAAE,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,SAAkB,EAAE;SAC9D,CAAC;QAEF,KAAK,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC7C,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1B,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,UAAU,EAAE;wBACV;4BACE,QAAQ;4BACR,QAAQ,EAAE,MAAM;4BAChB,UAAU,EAAE,IAAI;4BAChB,SAAS,EAAE,mCAAmC;yBAC/C;qBACF;oBACD,aAAa,EAAE,gDAAgD;iBAChE,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;IAC3C,CAAC;IAEO,KAAK,CAAC,eAAe,CAAC,MAAc,EAAE,OAAkB;QAC9D,MAAM,MAAM,GAAG,2BAA2B,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAE7E,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;YACnC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,aAAa;YACzC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,WAAW,EAAE,CAAC;YACd,SAAS,EAAE,GAAG;SACf,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QACzD,OAAO,MAAM,CAAC,UAAU,CAAC;IAC3B,CAAC;IAEO,eAAe,CAAC,UAAuB;QAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;QAC1C,MAAM,aAAa,GAA2B,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;QAE7E,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;YAC7C,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,MAAM,CAAC;YACvD,OAAO,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,SAAS,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;QAEH,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;QACvC,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YAC3B,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YACnE,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,UAAU;gBACV,aAAa,EAAE,oCAAoC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;aAC3E,CAAC;QACJ,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IACvC,CAAC;IAED,kBAAkB,CAAC,YAA0B;QAC3C,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,UAAU,GAAG,uBAAuB,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;IACpE,CAAC;CACF"}
@@ -0,0 +1,11 @@
1
+ import type { ConstitutionalPrinciple, HarmScore, CritiqueResult, Message } from '@cogitator-ai/types';
2
+ export declare function buildInputEvaluationPrompt(input: string, principles: ConstitutionalPrinciple[]): string;
3
+ export declare function buildOutputEvaluationPrompt(output: string, context: Message[], principles: ConstitutionalPrinciple[]): string;
4
+ export declare function buildCritiquePrompt(response: string, principles: ConstitutionalPrinciple[]): string;
5
+ export declare function buildRevisionPrompt(response: string, critique: CritiqueResult, principles: ConstitutionalPrinciple[]): string;
6
+ export declare function parseEvaluationResponse(content: string): {
7
+ isHarmful: boolean;
8
+ harmScores: HarmScore[];
9
+ };
10
+ export declare function parseCritiqueResponse(content: string): CritiqueResult;
11
+ //# sourceMappingURL=prompts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../src/constitutional/prompts.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,uBAAuB,EACvB,SAAS,EACT,cAAc,EAGd,OAAO,EACR,MAAM,qBAAqB,CAAC;AAE7B,wBAAgB,0BAA0B,CACxC,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,uBAAuB,EAAE,GACpC,MAAM,CAiCR;AAED,wBAAgB,2BAA2B,CACzC,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,EAAE,EAClB,UAAU,EAAE,uBAAuB,EAAE,GACpC,MAAM,CAyCR;AAED,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,uBAAuB,EAAE,GACpC,MAAM,CAkCR;AAED,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,cAAc,EACxB,UAAU,EAAE,uBAAuB,EAAE,GACpC,MAAM,CA0BR;AAED,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG;IACxD,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB,CAeA;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,cAAc,CAwBrE"}