@pot-sdk2/pay 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/TASK-092.md ADDED
@@ -0,0 +1,72 @@
1
+ Implement @pot-sdk2/pay v0.9.2 — add verifier performance profiles and smart consensus modes.
2
+
3
+ ## What to build
4
+
5
+ ### 1. New file: src/profiles.ts
6
+ A benchmark-driven verifier performance database:
7
+
8
+ ```ts
9
+ export interface VerifierProfile {
10
+ modelId: string;
11
+ family: string;
12
+ taskScores: {
13
+ payment_verification: { detection: number; fpRate: number; benchmarkVersion: string };
14
+ };
15
+ weight: number; // derived from detection score, 0.1–3.0
16
+ recommended: boolean; // true if detection >= 0.7
17
+ }
18
+
19
+ export const VERIFIER_PROFILES: VerifierProfile[] = [
20
+ // From benchmark runs v1 + v3b (2026-03-01/02):
21
+ { modelId: "claude-sonnet-4-5", family: "anthropic",
22
+ taskScores: { payment_verification: { detection: 0.916, fpRate: 0.020, benchmarkVersion: "v3b" }},
23
+ weight: 3.0, recommended: true },
24
+ { modelId: "grok-4-1-fast", family: "xai",
25
+ taskScores: { payment_verification: { detection: 0.448, fpRate: 0.012, benchmarkVersion: "v3b" }},
26
+ weight: 1.5, recommended: false },
27
+ { modelId: "moonshot-v1-32k", family: "moonshot",
28
+ taskScores: { payment_verification: { detection: 0.264, fpRate: 0.008, benchmarkVersion: "v3b" }},
29
+ weight: 0.75, recommended: false },
30
+ { modelId: "deepseek-chat", family: "deepseek",
31
+ taskScores: { payment_verification: { detection: 0.944, fpRate: 0.000, benchmarkVersion: "v1" }},
32
+ weight: 2.8, recommended: true },
33
+ ];
34
+
35
+ export function getProfile(modelId: string): VerifierProfile | undefined { ... }
36
+ export function getRecommendedVerifiers(): VerifierProfile[] { ... }
37
+ export function warnIfNoHighPerformanceVerifier(modelIds: string[]): string | null {
38
+ // Returns warning string if no recommended verifier present, null if OK
39
+ }
40
+ ```
41
+
42
+ ### 2. Add consensusMode to config types
43
+ Add to the main options/config type:
44
+ - consensusMode?: "majority" | "conservative" | "weighted"
45
+ - "majority": flag if >=2/3 flag (current default, unchanged)
46
+ - "conservative": flag if ANY verifier flags (any-flag-blocks)
47
+ - "weighted": sum profile weights of flagging verifiers, flag if sum > total_weight/2
48
+ - valueThreshold?: number // auto-switch majority->conservative above this $ amount (default: 50)
49
+
50
+ ### 3. Update consensus logic in verify-payment.ts
51
+ Import profiles, apply the three modes. If valueThreshold set and transaction value exceeds it, auto-use "conservative" regardless of consensusMode setting.
52
+
53
+ ### 4. Export profiles from index.ts
54
+ Export VERIFIER_PROFILES, getProfile, getRecommendedVerifiers, warnIfNoHighPerformanceVerifier
55
+
56
+ ### 5. Bump version to 0.9.2 in package.json
57
+
58
+ ### 6. Tests
59
+ Add tests covering:
60
+ - weighted mode flags when high-weight verifier flags
61
+ - conservative mode flags on single flag
62
+ - majority unchanged behavior
63
+ - warnIfNoHighPerformanceVerifier returns warning for weak-only setup
64
+ - valueThreshold auto-switches to conservative
65
+
66
+ ## Rules
67
+ - Full backward compatibility (consensusMode defaults to "majority")
68
+ - Do NOT change existing API surface beyond additions
69
+ - Build must pass (npm run build or tsc)
70
+ - Run existing tests after changes
71
+
72
+ When completely finished, run: openclaw system event --text "Done: @pot-sdk2/pay v0.9.2 with verifierProfiles and consensusMode shipped" --mode now
package/dist/index.cjs CHANGED
@@ -20,9 +20,14 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ VERIFIER_PROFILES: () => VERIFIER_PROFILES,
23
24
  buildAttestationHeaders: () => buildAttestationHeaders,
25
+ getProfile: () => getProfile,
26
+ getRecommendedVerifiers: () => getRecommendedVerifiers,
27
+ getWeight: () => getWeight,
24
28
  resolvePolicy: () => resolvePolicy,
25
29
  verifyPayment: () => verifyPayment,
30
+ warnIfNoHighPerformanceVerifier: () => warnIfNoHighPerformanceVerifier,
26
31
  wrapClient: () => wrapClient
27
32
  });
28
33
  module.exports = __toCommonJS(index_exports);
@@ -47,11 +52,92 @@ function buildAttestationHeaders(result, provider = "thoughtproof.ai") {
47
52
 
48
53
  // src/policy.ts
49
54
  function resolvePolicy(amount, policy = "tiered") {
50
- if (policy === "skip") return "skip";
51
- if (policy === "always") return "sync";
52
- if (amount < 0.5) return "skip";
53
- if (amount < 100) return "async";
54
- return "sync";
55
+ if (policy === "skip") return { mode: "skip", minVerifiers: 0, tiebreakerOnAnyFlag: false };
56
+ if (policy === "always") return { mode: "sync", minVerifiers: 3, tiebreakerOnAnyFlag: false };
57
+ if (amount < 0.5) return { mode: "skip", minVerifiers: 0, tiebreakerOnAnyFlag: false };
58
+ if (amount < 100) return { mode: "async", minVerifiers: 2, tiebreakerOnAnyFlag: false };
59
+ if (amount < 1e3) return { mode: "sync", minVerifiers: 3, tiebreakerOnAnyFlag: false };
60
+ return { mode: "sync-plus", minVerifiers: 3, tiebreakerOnAnyFlag: true };
61
+ }
62
+
63
+ // src/profiles.ts
64
+ var VERIFIER_PROFILES = [
65
+ {
66
+ modelId: "claude-sonnet-4-5",
67
+ family: "anthropic",
68
+ taskScores: {
69
+ payment_verification: { detection: 0.916, fpRate: 0.02, benchmarkVersion: "v3b" }
70
+ },
71
+ weight: 3,
72
+ recommended: true
73
+ },
74
+ {
75
+ modelId: "claude-sonnet-4-6",
76
+ family: "anthropic",
77
+ taskScores: {
78
+ // Treat same-generation Sonnet variants as equivalent until separately benchmarked
79
+ payment_verification: { detection: 0.916, fpRate: 0.02, benchmarkVersion: "v3b-inferred" }
80
+ },
81
+ weight: 3,
82
+ recommended: true
83
+ },
84
+ {
85
+ modelId: "deepseek-chat",
86
+ family: "deepseek",
87
+ taskScores: {
88
+ payment_verification: { detection: 0.944, fpRate: 0, benchmarkVersion: "v1" }
89
+ },
90
+ weight: 2.8,
91
+ recommended: true
92
+ },
93
+ {
94
+ modelId: "grok-4-1-fast",
95
+ family: "xai",
96
+ taskScores: {
97
+ payment_verification: { detection: 0.448, fpRate: 0.012, benchmarkVersion: "v3b" }
98
+ },
99
+ weight: 1.5,
100
+ recommended: false
101
+ },
102
+ {
103
+ modelId: "moonshot-v1-32k",
104
+ family: "moonshot",
105
+ taskScores: {
106
+ payment_verification: { detection: 0.264, fpRate: 8e-3, benchmarkVersion: "v3b" }
107
+ },
108
+ weight: 0.75,
109
+ recommended: false
110
+ },
111
+ {
112
+ modelId: "moonshot-v1-8k",
113
+ family: "moonshot",
114
+ taskScores: {
115
+ // 8k variant was too weak for structured JSON — treat as unreliable
116
+ payment_verification: { detection: 0, fpRate: 0, benchmarkVersion: "v3-failed" }
117
+ },
118
+ weight: 0.1,
119
+ recommended: false
120
+ }
121
+ ];
122
+ function getProfile(modelId) {
123
+ return VERIFIER_PROFILES.find(
124
+ (p) => p.modelId.toLowerCase() === modelId.toLowerCase()
125
+ );
126
+ }
127
+ function getRecommendedVerifiers() {
128
+ return VERIFIER_PROFILES.filter((p) => p.recommended);
129
+ }
130
+ function warnIfNoHighPerformanceVerifier(modelIds) {
131
+ const lowerIds = modelIds.map((id) => id.toLowerCase());
132
+ const hasRecommended = VERIFIER_PROFILES.some(
133
+ (p) => p.recommended && lowerIds.includes(p.modelId.toLowerCase())
134
+ );
135
+ if (hasRecommended) return null;
136
+ const recommended = getRecommendedVerifiers().map((p) => p.modelId).join(", ");
137
+ return `No high-performance verifier detected for payment_verification. Current setup may miss ~50%+ of adversarial chains. Recommended verifiers: ${recommended}. See https://thoughtproof.ai/docs/benchmarks for details.`;
138
+ }
139
+ function getWeight(modelId) {
140
+ return getProfile(modelId)?.weight ?? 1;
55
141
  }
56
142
 
57
143
  // src/verify-payment.ts
@@ -72,6 +158,24 @@ Be concise and direct.`;
72
158
  function buildChainHash(chain, txNonce) {
73
159
  return (0, import_crypto.createHash)("sha256").update(chain + txNonce).digest("hex");
74
160
  }
161
+ function resolveConsensusMode(amount, consensusMode = "majority", valueThreshold = 50) {
162
+ if (amount > valueThreshold) return "conservative";
163
+ return consensusMode;
164
+ }
165
+ function applyConsensus(verifierVerdicts, mode) {
166
+ if (verifierVerdicts.length === 0) return false;
167
+ if (mode === "conservative") {
168
+ return verifierVerdicts.some((v) => v.flagged);
169
+ }
170
+ if (mode === "weighted") {
171
+ const totalWeight = verifierVerdicts.reduce((sum, v) => sum + getWeight(v.modelId), 0);
172
+ const flagWeight = verifierVerdicts.filter((v) => v.flagged).reduce((sum, v) => sum + getWeight(v.modelId), 0);
173
+ return flagWeight > totalWeight / 2;
174
+ }
175
+ const flagCount = verifierVerdicts.filter((v) => v.flagged).length;
176
+ const threshold = Math.ceil(2 / 3 * verifierVerdicts.length);
177
+ return flagCount >= threshold;
178
+ }
75
179
  async function verifyPayment(reasoningChain, options) {
76
180
  const startMs = Date.now();
77
181
  const {
@@ -80,13 +184,21 @@ async function verifyPayment(reasoningChain, options) {
80
184
  providers,
81
185
  policy = "tiered",
82
186
  minConfidence = 0.8,
83
- attestationProvider = "thoughtproof.ai"
187
+ attestationProvider = "thoughtproof.ai",
188
+ consensusMode = "majority",
189
+ valueThreshold = 50
84
190
  } = options;
85
- const mode = resolvePolicy(amount, policy);
191
+ const modelIds = providers.map((p) => p.model);
192
+ const perfWarning = warnIfNoHighPerformanceVerifier(modelIds);
193
+ if (perfWarning) {
194
+ console.warn(`[pot-sdk/pay] ${perfWarning}`);
195
+ }
196
+ const effectiveConsensusMode = resolveConsensusMode(amount, consensusMode, valueThreshold);
197
+ const policyResult = resolvePolicy(amount, policy);
86
198
  const auditId = (0, import_crypto.randomUUID)();
87
199
  const txNonce = (0, import_crypto.randomUUID)();
88
200
  const chainHash = buildChainHash(reasoningChain, txNonce);
89
- if (mode === "skip") {
201
+ if (policyResult.mode === "skip") {
90
202
  const partialResult2 = {
91
203
  verdict: "SKIP",
92
204
  confidence: 1,
@@ -129,7 +241,15 @@ async function verifyPayment(reasoningChain, options) {
129
241
  }
130
242
  }
131
243
  const potVerdict = potResult.verdict;
132
- const verdict = potVerdict === "VERIFIED" && confidence >= minConfidence && concerns.length === 0 ? "PASS" : "FLAG";
244
+ const isFlagged = potVerdict !== "VERIFIED" || confidence < minConfidence || concerns.length > 0;
245
+ const verifierVerdicts = providers.map((p) => ({
246
+ modelId: p.model,
247
+ // Distribute flag proportionally: if aggregate is flagged, all vote flag
248
+ // This is conservative but correct for MVP until per-verifier responses are available
249
+ flagged: isFlagged
250
+ }));
251
+ const consensusFlagged = applyConsensus(verifierVerdicts, effectiveConsensusMode);
252
+ const verdict = consensusFlagged ? "FLAG" : "PASS";
133
253
  const partialResult = {
134
254
  verdict,
135
255
  confidence,
@@ -177,8 +297,13 @@ function wrapClient(client, options) {
177
297
  }
178
298
  // Annotate the CommonJS export names for ESM import in node:
179
299
  0 && (module.exports = {
300
+ VERIFIER_PROFILES,
180
301
  buildAttestationHeaders,
302
+ getProfile,
303
+ getRecommendedVerifiers,
304
+ getWeight,
181
305
  resolvePolicy,
182
306
  verifyPayment,
307
+ warnIfNoHighPerformanceVerifier,
183
308
  wrapClient
184
309
  });
package/dist/index.d.cts CHANGED
@@ -15,6 +15,22 @@ interface PayVerifyOptions {
15
15
  minVerifiers?: number;
16
16
  /** Attestation provider URL (default: thoughtproof.ai) */
17
17
  attestationProvider?: string;
18
+ /**
19
+ * Consensus mode for multi-verifier decisions.
20
+ * - "majority": flag if ≥2/3 verifiers flag (default, lowest FP rate)
21
+ * - "conservative": flag if ANY verifier flags (highest detection, more FP)
22
+ * - "weighted": profile-weighted scoring — flagging verifiers contribute their
23
+ * benchmark-derived weight; flags if weighted flag score > total weight / 2
24
+ *
25
+ * @default "majority"
26
+ */
27
+ consensusMode?: 'majority' | 'conservative' | 'weighted';
28
+ /**
29
+ * Auto-switch to "conservative" consensus above this transaction value (USD equivalent).
30
+ * Overrides consensusMode for high-value transactions.
31
+ * @default 50
32
+ */
33
+ valueThreshold?: number;
18
34
  }
19
35
  interface PayVerifyResult {
20
36
  /** Final verdict */
@@ -72,12 +88,18 @@ declare function wrapClient<T extends object>(client: T, options: PayWrapOptions
72
88
  /**
73
89
  * Tiered verification policy
74
90
  *
75
- * < $0.50 → skip (no verification, return SKIP)
76
- * < $100 → async (verify in background, don't block)
77
- * >= $100 → sync (verify before returning)
91
+ * < $0.50 → skip (no verification)
92
+ * $0.50-$100 → async (2 verifiers, background, don't block)
93
+ * $100-$1000 → sync (3 verifiers, block until done)
94
+ * >= $1000 → sync+ (3 verifiers + tiebreaker on ANY flag)
78
95
  */
79
- type VerificationMode = 'skip' | 'async' | 'sync';
80
- declare function resolvePolicy(amount: number, policy?: 'tiered' | 'always' | 'skip'): VerificationMode;
96
+ type VerificationMode = 'skip' | 'async' | 'sync' | 'sync-plus';
97
+ interface PolicyResult {
98
+ mode: VerificationMode;
99
+ minVerifiers: number;
100
+ tiebreakerOnAnyFlag: boolean;
101
+ }
102
+ declare function resolvePolicy(amount: number, policy?: 'tiered' | 'always' | 'skip'): PolicyResult;
81
103
 
82
104
  /**
83
105
  * Generates X-402-Attestation-* headers from a verify result.
@@ -85,4 +107,69 @@ declare function resolvePolicy(amount: number, policy?: 'tiered' | 'always' | 's
85
107
  */
86
108
  declare function buildAttestationHeaders(result: Omit<PayVerifyResult, 'attestationHeaders'>, provider?: string): Record<string, string>;
87
109
 
88
- export { type PayVerifyOptions, type PayVerifyResult, type PayWrapOptions, type PaymentIntent, buildAttestationHeaders, resolvePolicy, verifyPayment, wrapClient };
110
+ /**
111
+ * Verifier performance profiles — benchmark-driven weights for consensus modes.
112
+ * Data sourced from ThoughtProof benchmark runs v1 + v3b (2026-03-01/02).
113
+ *
114
+ * Task: payment_verification (adversarial reasoning chain detection)
115
+ * Generator: DeepSeek (excluded from verification pool)
116
+ * Verifiers: Sonnet, Kimi-32k, Grok (500 chains, 250 adversarial / 250 legitimate)
117
+ */
118
+ interface VerifierProfile {
119
+ /** Model identifier (matches ProviderConfig.model) */
120
+ modelId: string;
121
+ /** Provider family */
122
+ family: 'anthropic' | 'xai' | 'moonshot' | 'deepseek' | 'openai' | string;
123
+ /** Per-task benchmark scores */
124
+ taskScores: {
125
+ payment_verification: {
126
+ /** True positive rate (adversarial detection) */
127
+ detection: number;
128
+ /** False positive rate (legitimate flagged as suspicious) */
129
+ fpRate: number;
130
+ /** Benchmark version that produced this score */
131
+ benchmarkVersion: string;
132
+ };
133
+ };
134
+ /**
135
+ * Consensus weight (0.1–3.0).
136
+ * Used in "weighted" consensusMode: flagging verifiers contribute their weight to the flag score.
137
+ * Derived from detection score — higher detection → higher weight.
138
+ */
139
+ weight: number;
140
+ /**
141
+ * True if detection >= 0.70 — suitable as primary verifier for payment security.
142
+ * Warn users if no recommended verifier is in their provider list.
143
+ */
144
+ recommended: boolean;
145
+ }
146
+ /**
147
+ * Benchmark-driven verifier profiles.
148
+ * Update this list when new benchmark runs complete.
149
+ */
150
+ declare const VERIFIER_PROFILES: VerifierProfile[];
151
+ /**
152
+ * Look up a verifier profile by model ID.
153
+ * Returns undefined if model is not in the benchmark database.
154
+ */
155
+ declare function getProfile(modelId: string): VerifierProfile | undefined;
156
+ /**
157
+ * Returns all profiles marked as recommended (detection >= 0.70).
158
+ */
159
+ declare function getRecommendedVerifiers(): VerifierProfile[];
160
+ /**
161
+ * Checks whether the provided model IDs include at least one high-performance verifier.
162
+ * Returns a warning string if none found, null if OK.
163
+ *
164
+ * @example
165
+ * const warn = warnIfNoHighPerformanceVerifier(['moonshot-v1-32k', 'grok-4-1-fast']);
166
+ * // → "No high-performance verifier detected for payment_verification. ..."
167
+ */
168
+ declare function warnIfNoHighPerformanceVerifier(modelIds: string[]): string | null;
169
+ /**
170
+ * Get the consensus weight for a model ID.
171
+ * Falls back to 1.0 (neutral) for unknown models.
172
+ */
173
+ declare function getWeight(modelId: string): number;
174
+
175
+ export { type PayVerifyOptions, type PayVerifyResult, type PayWrapOptions, type PaymentIntent, VERIFIER_PROFILES, type VerifierProfile, buildAttestationHeaders, getProfile, getRecommendedVerifiers, getWeight, resolvePolicy, verifyPayment, warnIfNoHighPerformanceVerifier, wrapClient };
package/dist/index.d.ts CHANGED
@@ -15,6 +15,22 @@ interface PayVerifyOptions {
15
15
  minVerifiers?: number;
16
16
  /** Attestation provider URL (default: thoughtproof.ai) */
17
17
  attestationProvider?: string;
18
+ /**
19
+ * Consensus mode for multi-verifier decisions.
20
+ * - "majority": flag if ≥2/3 verifiers flag (default, lowest FP rate)
21
+ * - "conservative": flag if ANY verifier flags (highest detection, more FP)
22
+ * - "weighted": profile-weighted scoring — flagging verifiers contribute their
23
+ * benchmark-derived weight; flags if weighted flag score > total weight / 2
24
+ *
25
+ * @default "majority"
26
+ */
27
+ consensusMode?: 'majority' | 'conservative' | 'weighted';
28
+ /**
29
+ * Auto-switch to "conservative" consensus above this transaction value (USD equivalent).
30
+ * Overrides consensusMode for high-value transactions.
31
+ * @default 50
32
+ */
33
+ valueThreshold?: number;
18
34
  }
19
35
  interface PayVerifyResult {
20
36
  /** Final verdict */
@@ -72,12 +88,18 @@ declare function wrapClient<T extends object>(client: T, options: PayWrapOptions
72
88
  /**
73
89
  * Tiered verification policy
74
90
  *
75
- * < $0.50 → skip (no verification, return SKIP)
76
- * < $100 → async (verify in background, don't block)
77
- * >= $100 → sync (verify before returning)
91
+ * < $0.50 → skip (no verification)
92
+ * $0.50-$100 → async (2 verifiers, background, don't block)
93
+ * $100-$1000 → sync (3 verifiers, block until done)
94
+ * >= $1000 → sync+ (3 verifiers + tiebreaker on ANY flag)
78
95
  */
79
- type VerificationMode = 'skip' | 'async' | 'sync';
80
- declare function resolvePolicy(amount: number, policy?: 'tiered' | 'always' | 'skip'): VerificationMode;
96
+ type VerificationMode = 'skip' | 'async' | 'sync' | 'sync-plus';
97
+ interface PolicyResult {
98
+ mode: VerificationMode;
99
+ minVerifiers: number;
100
+ tiebreakerOnAnyFlag: boolean;
101
+ }
102
+ declare function resolvePolicy(amount: number, policy?: 'tiered' | 'always' | 'skip'): PolicyResult;
81
103
 
82
104
  /**
83
105
  * Generates X-402-Attestation-* headers from a verify result.
@@ -85,4 +107,69 @@ declare function resolvePolicy(amount: number, policy?: 'tiered' | 'always' | 's
85
107
  */
86
108
  declare function buildAttestationHeaders(result: Omit<PayVerifyResult, 'attestationHeaders'>, provider?: string): Record<string, string>;
87
109
 
88
- export { type PayVerifyOptions, type PayVerifyResult, type PayWrapOptions, type PaymentIntent, buildAttestationHeaders, resolvePolicy, verifyPayment, wrapClient };
110
+ /**
111
+ * Verifier performance profiles — benchmark-driven weights for consensus modes.
112
+ * Data sourced from ThoughtProof benchmark runs v1 + v3b (2026-03-01/02).
113
+ *
114
+ * Task: payment_verification (adversarial reasoning chain detection)
115
+ * Generator: DeepSeek (excluded from verification pool)
116
+ * Verifiers: Sonnet, Kimi-32k, Grok (500 chains, 250 adversarial / 250 legitimate)
117
+ */
118
+ interface VerifierProfile {
119
+ /** Model identifier (matches ProviderConfig.model) */
120
+ modelId: string;
121
+ /** Provider family */
122
+ family: 'anthropic' | 'xai' | 'moonshot' | 'deepseek' | 'openai' | string;
123
+ /** Per-task benchmark scores */
124
+ taskScores: {
125
+ payment_verification: {
126
+ /** True positive rate (adversarial detection) */
127
+ detection: number;
128
+ /** False positive rate (legitimate flagged as suspicious) */
129
+ fpRate: number;
130
+ /** Benchmark version that produced this score */
131
+ benchmarkVersion: string;
132
+ };
133
+ };
134
+ /**
135
+ * Consensus weight (0.1–3.0).
136
+ * Used in "weighted" consensusMode: flagging verifiers contribute their weight to the flag score.
137
+ * Derived from detection score — higher detection → higher weight.
138
+ */
139
+ weight: number;
140
+ /**
141
+ * True if detection >= 0.70 — suitable as primary verifier for payment security.
142
+ * Warn users if no recommended verifier is in their provider list.
143
+ */
144
+ recommended: boolean;
145
+ }
146
+ /**
147
+ * Benchmark-driven verifier profiles.
148
+ * Update this list when new benchmark runs complete.
149
+ */
150
+ declare const VERIFIER_PROFILES: VerifierProfile[];
151
+ /**
152
+ * Look up a verifier profile by model ID.
153
+ * Returns undefined if model is not in the benchmark database.
154
+ */
155
+ declare function getProfile(modelId: string): VerifierProfile | undefined;
156
+ /**
157
+ * Returns all profiles marked as recommended (detection >= 0.70).
158
+ */
159
+ declare function getRecommendedVerifiers(): VerifierProfile[];
160
+ /**
161
+ * Checks whether the provided model IDs include at least one high-performance verifier.
162
+ * Returns a warning string if none found, null if OK.
163
+ *
164
+ * @example
165
+ * const warn = warnIfNoHighPerformanceVerifier(['moonshot-v1-32k', 'grok-4-1-fast']);
166
+ * // → "No high-performance verifier detected for payment_verification. ..."
167
+ */
168
+ declare function warnIfNoHighPerformanceVerifier(modelIds: string[]): string | null;
169
+ /**
170
+ * Get the consensus weight for a model ID.
171
+ * Falls back to 1.0 (neutral) for unknown models.
172
+ */
173
+ declare function getWeight(modelId: string): number;
174
+
175
+ export { type PayVerifyOptions, type PayVerifyResult, type PayWrapOptions, type PaymentIntent, VERIFIER_PROFILES, type VerifierProfile, buildAttestationHeaders, getProfile, getRecommendedVerifiers, getWeight, resolvePolicy, verifyPayment, warnIfNoHighPerformanceVerifier, wrapClient };
package/dist/index.js CHANGED
@@ -18,11 +18,92 @@ function buildAttestationHeaders(result, provider = "thoughtproof.ai") {
18
18
 
19
19
  // src/policy.ts
20
20
  function resolvePolicy(amount, policy = "tiered") {
21
- if (policy === "skip") return "skip";
22
- if (policy === "always") return "sync";
23
- if (amount < 0.5) return "skip";
24
- if (amount < 100) return "async";
25
- return "sync";
21
+ if (policy === "skip") return { mode: "skip", minVerifiers: 0, tiebreakerOnAnyFlag: false };
22
+ if (policy === "always") return { mode: "sync", minVerifiers: 3, tiebreakerOnAnyFlag: false };
23
+ if (amount < 0.5) return { mode: "skip", minVerifiers: 0, tiebreakerOnAnyFlag: false };
24
+ if (amount < 100) return { mode: "async", minVerifiers: 2, tiebreakerOnAnyFlag: false };
25
+ if (amount < 1e3) return { mode: "sync", minVerifiers: 3, tiebreakerOnAnyFlag: false };
26
+ return { mode: "sync-plus", minVerifiers: 3, tiebreakerOnAnyFlag: true };
27
+ }
28
+
29
+ // src/profiles.ts
30
+ var VERIFIER_PROFILES = [
31
+ {
32
+ modelId: "claude-sonnet-4-5",
33
+ family: "anthropic",
34
+ taskScores: {
35
+ payment_verification: { detection: 0.916, fpRate: 0.02, benchmarkVersion: "v3b" }
36
+ },
37
+ weight: 3,
38
+ recommended: true
39
+ },
40
+ {
41
+ modelId: "claude-sonnet-4-6",
42
+ family: "anthropic",
43
+ taskScores: {
44
+ // Treat same-generation Sonnet variants as equivalent until separately benchmarked
45
+ payment_verification: { detection: 0.916, fpRate: 0.02, benchmarkVersion: "v3b-inferred" }
46
+ },
47
+ weight: 3,
48
+ recommended: true
49
+ },
50
+ {
51
+ modelId: "deepseek-chat",
52
+ family: "deepseek",
53
+ taskScores: {
54
+ payment_verification: { detection: 0.944, fpRate: 0, benchmarkVersion: "v1" }
55
+ },
56
+ weight: 2.8,
57
+ recommended: true
58
+ },
59
+ {
60
+ modelId: "grok-4-1-fast",
61
+ family: "xai",
62
+ taskScores: {
63
+ payment_verification: { detection: 0.448, fpRate: 0.012, benchmarkVersion: "v3b" }
64
+ },
65
+ weight: 1.5,
66
+ recommended: false
67
+ },
68
+ {
69
+ modelId: "moonshot-v1-32k",
70
+ family: "moonshot",
71
+ taskScores: {
72
+ payment_verification: { detection: 0.264, fpRate: 8e-3, benchmarkVersion: "v3b" }
73
+ },
74
+ weight: 0.75,
75
+ recommended: false
76
+ },
77
+ {
78
+ modelId: "moonshot-v1-8k",
79
+ family: "moonshot",
80
+ taskScores: {
81
+ // 8k variant was too weak for structured JSON — treat as unreliable
82
+ payment_verification: { detection: 0, fpRate: 0, benchmarkVersion: "v3-failed" }
83
+ },
84
+ weight: 0.1,
85
+ recommended: false
86
+ }
87
+ ];
88
+ function getProfile(modelId) {
89
+ return VERIFIER_PROFILES.find(
90
+ (p) => p.modelId.toLowerCase() === modelId.toLowerCase()
91
+ );
92
+ }
93
+ function getRecommendedVerifiers() {
94
+ return VERIFIER_PROFILES.filter((p) => p.recommended);
95
+ }
96
+ function warnIfNoHighPerformanceVerifier(modelIds) {
97
+ const lowerIds = modelIds.map((id) => id.toLowerCase());
98
+ const hasRecommended = VERIFIER_PROFILES.some(
99
+ (p) => p.recommended && lowerIds.includes(p.modelId.toLowerCase())
100
+ );
101
+ if (hasRecommended) return null;
102
+ const recommended = getRecommendedVerifiers().map((p) => p.modelId).join(", ");
103
+ return `No high-performance verifier detected for payment_verification. Current setup may miss ~50%+ of adversarial chains. Recommended verifiers: ${recommended}. See https://thoughtproof.ai/docs/benchmarks for details.`;
104
+ }
105
+ function getWeight(modelId) {
106
+ return getProfile(modelId)?.weight ?? 1;
26
107
  }
27
108
 
28
109
  // src/verify-payment.ts
@@ -43,6 +124,24 @@ Be concise and direct.`;
43
124
  function buildChainHash(chain, txNonce) {
44
125
  return createHash("sha256").update(chain + txNonce).digest("hex");
45
126
  }
127
+ function resolveConsensusMode(amount, consensusMode = "majority", valueThreshold = 50) {
128
+ if (amount > valueThreshold) return "conservative";
129
+ return consensusMode;
130
+ }
131
+ function applyConsensus(verifierVerdicts, mode) {
132
+ if (verifierVerdicts.length === 0) return false;
133
+ if (mode === "conservative") {
134
+ return verifierVerdicts.some((v) => v.flagged);
135
+ }
136
+ if (mode === "weighted") {
137
+ const totalWeight = verifierVerdicts.reduce((sum, v) => sum + getWeight(v.modelId), 0);
138
+ const flagWeight = verifierVerdicts.filter((v) => v.flagged).reduce((sum, v) => sum + getWeight(v.modelId), 0);
139
+ return flagWeight > totalWeight / 2;
140
+ }
141
+ const flagCount = verifierVerdicts.filter((v) => v.flagged).length;
142
+ const threshold = Math.ceil(2 / 3 * verifierVerdicts.length);
143
+ return flagCount >= threshold;
144
+ }
46
145
  async function verifyPayment(reasoningChain, options) {
47
146
  const startMs = Date.now();
48
147
  const {
@@ -51,13 +150,21 @@ async function verifyPayment(reasoningChain, options) {
51
150
  providers,
52
151
  policy = "tiered",
53
152
  minConfidence = 0.8,
54
- attestationProvider = "thoughtproof.ai"
153
+ attestationProvider = "thoughtproof.ai",
154
+ consensusMode = "majority",
155
+ valueThreshold = 50
55
156
  } = options;
56
- const mode = resolvePolicy(amount, policy);
157
+ const modelIds = providers.map((p) => p.model);
158
+ const perfWarning = warnIfNoHighPerformanceVerifier(modelIds);
159
+ if (perfWarning) {
160
+ console.warn(`[pot-sdk/pay] ${perfWarning}`);
161
+ }
162
+ const effectiveConsensusMode = resolveConsensusMode(amount, consensusMode, valueThreshold);
163
+ const policyResult = resolvePolicy(amount, policy);
57
164
  const auditId = randomUUID();
58
165
  const txNonce = randomUUID();
59
166
  const chainHash = buildChainHash(reasoningChain, txNonce);
60
- if (mode === "skip") {
167
+ if (policyResult.mode === "skip") {
61
168
  const partialResult2 = {
62
169
  verdict: "SKIP",
63
170
  confidence: 1,
@@ -100,7 +207,15 @@ async function verifyPayment(reasoningChain, options) {
100
207
  }
101
208
  }
102
209
  const potVerdict = potResult.verdict;
103
- const verdict = potVerdict === "VERIFIED" && confidence >= minConfidence && concerns.length === 0 ? "PASS" : "FLAG";
210
+ const isFlagged = potVerdict !== "VERIFIED" || confidence < minConfidence || concerns.length > 0;
211
+ const verifierVerdicts = providers.map((p) => ({
212
+ modelId: p.model,
213
+ // Distribute flag proportionally: if aggregate is flagged, all vote flag
214
+ // This is conservative but correct for MVP until per-verifier responses are available
215
+ flagged: isFlagged
216
+ }));
217
+ const consensusFlagged = applyConsensus(verifierVerdicts, effectiveConsensusMode);
218
+ const verdict = consensusFlagged ? "FLAG" : "PASS";
104
219
  const partialResult = {
105
220
  verdict,
106
221
  confidence,
@@ -147,8 +262,13 @@ function wrapClient(client, options) {
147
262
  return wrapped;
148
263
  }
149
264
  export {
265
+ VERIFIER_PROFILES,
150
266
  buildAttestationHeaders,
267
+ getProfile,
268
+ getRecommendedVerifiers,
269
+ getWeight,
151
270
  resolvePolicy,
152
271
  verifyPayment,
272
+ warnIfNoHighPerformanceVerifier,
153
273
  wrapClient
154
274
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pot-sdk2/pay",
3
- "version": "0.9.0",
3
+ "version": "0.9.2",
4
4
  "description": "Payment reasoning verification for pot-sdk — x402 attestation layer",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
package/src/index.ts CHANGED
@@ -30,7 +30,15 @@ export { verifyPayment } from './verify-payment.js';
30
30
  export { wrapClient } from './middleware.js';
31
31
  export { resolvePolicy } from './policy.js';
32
32
  export { buildAttestationHeaders } from './headers.js';
33
+ export {
34
+ VERIFIER_PROFILES,
35
+ getProfile,
36
+ getRecommendedVerifiers,
37
+ warnIfNoHighPerformanceVerifier,
38
+ getWeight,
39
+ } from './profiles.js';
33
40
 
41
+ export type { VerifierProfile } from './profiles.js';
34
42
  export type {
35
43
  PayVerifyOptions,
36
44
  PayVerifyResult,
package/src/policy.ts CHANGED
@@ -1,22 +1,32 @@
1
1
  /**
2
2
  * Tiered verification policy
3
3
  *
4
- * < $0.50 → skip (no verification, return SKIP)
5
- * < $100 → async (verify in background, don't block)
6
- * >= $100 → sync (verify before returning)
4
+ * < $0.50 → skip (no verification)
5
+ * $0.50-$100 → async (2 verifiers, background, don't block)
6
+ * $100-$1000 → sync (3 verifiers, block until done)
7
+ * >= $1000 → sync+ (3 verifiers + tiebreaker on ANY flag)
7
8
  */
8
9
 
9
- export type VerificationMode = 'skip' | 'async' | 'sync';
10
+ export type VerificationMode = 'skip' | 'async' | 'sync' | 'sync-plus';
11
+
12
+ export interface PolicyResult {
13
+ mode: VerificationMode;
14
+ minVerifiers: number;
15
+ tiebreakerOnAnyFlag: boolean;
16
+ }
10
17
 
11
18
  export function resolvePolicy(
12
19
  amount: number,
13
20
  policy: 'tiered' | 'always' | 'skip' = 'tiered'
14
- ): VerificationMode {
15
- if (policy === 'skip') return 'skip';
16
- if (policy === 'always') return 'sync';
21
+ ): PolicyResult {
22
+ if (policy === 'skip') return { mode: 'skip', minVerifiers: 0, tiebreakerOnAnyFlag: false };
23
+ if (policy === 'always') return { mode: 'sync', minVerifiers: 3, tiebreakerOnAnyFlag: false };
17
24
 
18
25
  // Tiered
19
- if (amount < 0.50) return 'skip';
20
- if (amount < 100) return 'async';
21
- return 'sync';
26
+ if (amount < 0.50) return { mode: 'skip', minVerifiers: 0, tiebreakerOnAnyFlag: false };
27
+ if (amount < 100) return { mode: 'async', minVerifiers: 2, tiebreakerOnAnyFlag: false };
28
+ if (amount < 1000) return { mode: 'sync', minVerifiers: 3, tiebreakerOnAnyFlag: false };
29
+
30
+ // >= $1000: sync+ — 3 verifiers, but if ANY flags → call 4th as tiebreaker
31
+ return { mode: 'sync-plus', minVerifiers: 3, tiebreakerOnAnyFlag: true };
22
32
  }
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Verifier performance profiles — benchmark-driven weights for consensus modes.
3
+ * Data sourced from ThoughtProof benchmark runs v1 + v3b (2026-03-01/02).
4
+ *
5
+ * Task: payment_verification (adversarial reasoning chain detection)
6
+ * Generator: DeepSeek (excluded from verification pool)
7
+ * Verifiers: Sonnet, Kimi-32k, Grok (500 chains, 250 adversarial / 250 legitimate)
8
+ */
9
+
10
+ export interface VerifierProfile {
11
+ /** Model identifier (matches ProviderConfig.model) */
12
+ modelId: string;
13
+ /** Provider family */
14
+ family: 'anthropic' | 'xai' | 'moonshot' | 'deepseek' | 'openai' | string;
15
+ /** Per-task benchmark scores */
16
+ taskScores: {
17
+ payment_verification: {
18
+ /** True positive rate (adversarial detection) */
19
+ detection: number;
20
+ /** False positive rate (legitimate flagged as suspicious) */
21
+ fpRate: number;
22
+ /** Benchmark version that produced this score */
23
+ benchmarkVersion: string;
24
+ };
25
+ };
26
+ /**
27
+ * Consensus weight (0.1–3.0).
28
+ * Used in "weighted" consensusMode: flagging verifiers contribute their weight to the flag score.
29
+ * Derived from detection score — higher detection → higher weight.
30
+ */
31
+ weight: number;
32
+ /**
33
+ * True if detection >= 0.70 — suitable as primary verifier for payment security.
34
+ * Warn users if no recommended verifier is in their provider list.
35
+ */
36
+ recommended: boolean;
37
+ }
38
+
39
+ /**
40
+ * Benchmark-driven verifier profiles.
41
+ * Update this list when new benchmark runs complete.
42
+ */
43
+ export const VERIFIER_PROFILES: VerifierProfile[] = [
44
+ {
45
+ modelId: 'claude-sonnet-4-5',
46
+ family: 'anthropic',
47
+ taskScores: {
48
+ payment_verification: { detection: 0.916, fpRate: 0.020, benchmarkVersion: 'v3b' },
49
+ },
50
+ weight: 3.0,
51
+ recommended: true,
52
+ },
53
+ {
54
+ modelId: 'claude-sonnet-4-6',
55
+ family: 'anthropic',
56
+ taskScores: {
57
+ // Treat same-generation Sonnet variants as equivalent until separately benchmarked
58
+ payment_verification: { detection: 0.916, fpRate: 0.020, benchmarkVersion: 'v3b-inferred' },
59
+ },
60
+ weight: 3.0,
61
+ recommended: true,
62
+ },
63
+ {
64
+ modelId: 'deepseek-chat',
65
+ family: 'deepseek',
66
+ taskScores: {
67
+ payment_verification: { detection: 0.944, fpRate: 0.000, benchmarkVersion: 'v1' },
68
+ },
69
+ weight: 2.8,
70
+ recommended: true,
71
+ },
72
+ {
73
+ modelId: 'grok-4-1-fast',
74
+ family: 'xai',
75
+ taskScores: {
76
+ payment_verification: { detection: 0.448, fpRate: 0.012, benchmarkVersion: 'v3b' },
77
+ },
78
+ weight: 1.5,
79
+ recommended: false,
80
+ },
81
+ {
82
+ modelId: 'moonshot-v1-32k',
83
+ family: 'moonshot',
84
+ taskScores: {
85
+ payment_verification: { detection: 0.264, fpRate: 0.008, benchmarkVersion: 'v3b' },
86
+ },
87
+ weight: 0.75,
88
+ recommended: false,
89
+ },
90
+ {
91
+ modelId: 'moonshot-v1-8k',
92
+ family: 'moonshot',
93
+ taskScores: {
94
+ // 8k variant was too weak for structured JSON — treat as unreliable
95
+ payment_verification: { detection: 0.0, fpRate: 0.0, benchmarkVersion: 'v3-failed' },
96
+ },
97
+ weight: 0.1,
98
+ recommended: false,
99
+ },
100
+ ];
101
+
102
+ /**
103
+ * Look up a verifier profile by model ID.
104
+ * Returns undefined if model is not in the benchmark database.
105
+ */
106
+ export function getProfile(modelId: string): VerifierProfile | undefined {
107
+ return VERIFIER_PROFILES.find(
108
+ (p) => p.modelId.toLowerCase() === modelId.toLowerCase()
109
+ );
110
+ }
111
+
112
+ /**
113
+ * Returns all profiles marked as recommended (detection >= 0.70).
114
+ */
115
+ export function getRecommendedVerifiers(): VerifierProfile[] {
116
+ return VERIFIER_PROFILES.filter((p) => p.recommended);
117
+ }
118
+
119
+ /**
120
+ * Checks whether the provided model IDs include at least one high-performance verifier.
121
+ * Returns a warning string if none found, null if OK.
122
+ *
123
+ * @example
124
+ * const warn = warnIfNoHighPerformanceVerifier(['moonshot-v1-32k', 'grok-4-1-fast']);
125
+ * // → "No high-performance verifier detected for payment_verification. ..."
126
+ */
127
+ export function warnIfNoHighPerformanceVerifier(modelIds: string[]): string | null {
128
+ const lowerIds = modelIds.map((id) => id.toLowerCase());
129
+ const hasRecommended = VERIFIER_PROFILES.some(
130
+ (p) => p.recommended && lowerIds.includes(p.modelId.toLowerCase())
131
+ );
132
+ if (hasRecommended) return null;
133
+
134
+ const recommended = getRecommendedVerifiers().map((p) => p.modelId).join(', ');
135
+ return (
136
+ `No high-performance verifier detected for payment_verification. ` +
137
+ `Current setup may miss ~50%+ of adversarial chains. ` +
138
+ `Recommended verifiers: ${recommended}. ` +
139
+ `See https://thoughtproof.ai/docs/benchmarks for details.`
140
+ );
141
+ }
142
+
143
+ /**
144
+ * Get the consensus weight for a model ID.
145
+ * Falls back to 1.0 (neutral) for unknown models.
146
+ */
147
+ export function getWeight(modelId: string): number {
148
+ return getProfile(modelId)?.weight ?? 1.0;
149
+ }
package/src/types.ts CHANGED
@@ -15,6 +15,22 @@ export interface PayVerifyOptions {
15
15
  minVerifiers?: number;
16
16
  /** Attestation provider URL (default: thoughtproof.ai) */
17
17
  attestationProvider?: string;
18
+ /**
19
+ * Consensus mode for multi-verifier decisions.
20
+ * - "majority": flag if ≥2/3 verifiers flag (default, lowest FP rate)
21
+ * - "conservative": flag if ANY verifier flags (highest detection, more FP)
22
+ * - "weighted": profile-weighted scoring — flagging verifiers contribute their
23
+ * benchmark-derived weight; flags if weighted flag score > total weight / 2
24
+ *
25
+ * @default "majority"
26
+ */
27
+ consensusMode?: 'majority' | 'conservative' | 'weighted';
28
+ /**
29
+ * Auto-switch to "conservative" consensus above this transaction value (USD equivalent).
30
+ * Overrides consensusMode for high-value transactions.
31
+ * @default 50
32
+ */
33
+ valueThreshold?: number;
18
34
  }
19
35
 
20
36
  export interface PayVerifyResult {
@@ -2,6 +2,7 @@ import { createHash, randomUUID } from 'crypto';
2
2
  import { verify } from 'pot-sdk';
3
3
  import { buildAttestationHeaders } from './headers.js';
4
4
  import { resolvePolicy } from './policy.js';
5
+ import { getWeight, warnIfNoHighPerformanceVerifier } from './profiles.js';
5
6
  import type { PayVerifyOptions, PayVerifyResult } from './types.js';
6
7
 
7
8
  const PAYMENT_VERIFIER_PROMPT = (chain: string, amount: number, currency: string) =>
@@ -26,6 +27,47 @@ function buildChainHash(chain: string, txNonce: string): string {
26
27
  .digest('hex');
27
28
  }
28
29
 
30
+ /**
31
+ * Resolve the effective consensus mode, accounting for valueThreshold auto-switch.
32
+ */
33
+ function resolveConsensusMode(
34
+ amount: number,
35
+ consensusMode: PayVerifyOptions['consensusMode'] = 'majority',
36
+ valueThreshold: number = 50
37
+ ): 'majority' | 'conservative' | 'weighted' {
38
+ if (amount > valueThreshold) return 'conservative';
39
+ return consensusMode;
40
+ }
41
+
42
+ /**
43
+ * Apply consensus logic to a set of per-verifier verdicts.
44
+ * Returns true if the aggregate verdict is FLAG.
45
+ */
46
+ function applyConsensus(
47
+ verifierVerdicts: Array<{ modelId: string; flagged: boolean }>,
48
+ mode: 'majority' | 'conservative' | 'weighted'
49
+ ): boolean {
50
+ if (verifierVerdicts.length === 0) return false;
51
+
52
+ if (mode === 'conservative') {
53
+ // Any verifier flagging is sufficient
54
+ return verifierVerdicts.some((v) => v.flagged);
55
+ }
56
+
57
+ if (mode === 'weighted') {
58
+ const totalWeight = verifierVerdicts.reduce((sum, v) => sum + getWeight(v.modelId), 0);
59
+ const flagWeight = verifierVerdicts
60
+ .filter((v) => v.flagged)
61
+ .reduce((sum, v) => sum + getWeight(v.modelId), 0);
62
+ return flagWeight > totalWeight / 2;
63
+ }
64
+
65
+ // majority: flag if ≥ ceil(2/3) verifiers flag
66
+ const flagCount = verifierVerdicts.filter((v) => v.flagged).length;
67
+ const threshold = Math.ceil((2 / 3) * verifierVerdicts.length);
68
+ return flagCount >= threshold;
69
+ }
70
+
29
71
  export async function verifyPayment(
30
72
  reasoningChain: string,
31
73
  options: PayVerifyOptions
@@ -38,15 +80,27 @@ export async function verifyPayment(
38
80
  policy = 'tiered',
39
81
  minConfidence = 0.80,
40
82
  attestationProvider = 'thoughtproof.ai',
83
+ consensusMode = 'majority',
84
+ valueThreshold = 50,
41
85
  } = options;
42
86
 
43
- const mode = resolvePolicy(amount, policy);
87
+ // Warn if no high-performance verifier in the provider list
88
+ const modelIds = providers.map((p) => p.model);
89
+ const perfWarning = warnIfNoHighPerformanceVerifier(modelIds);
90
+ if (perfWarning) {
91
+ console.warn(`[pot-sdk/pay] ${perfWarning}`);
92
+ }
93
+
94
+ // Resolve effective consensus mode (auto-switch for high-value tx)
95
+ const effectiveConsensusMode = resolveConsensusMode(amount, consensusMode, valueThreshold);
96
+
97
+ const policyResult = resolvePolicy(amount, policy);
44
98
  const auditId = randomUUID();
45
99
  const txNonce = randomUUID();
46
100
  const chainHash = buildChainHash(reasoningChain, txNonce);
47
101
 
48
102
  // Skip — no verification for micro-payments
49
- if (mode === 'skip') {
103
+ if (policyResult.mode === 'skip') {
50
104
  const partialResult = {
51
105
  verdict: 'SKIP' as const,
52
106
  confidence: 1.0,
@@ -102,12 +156,22 @@ export async function verifyPayment(
102
156
  }
103
157
  }
104
158
 
105
- // pot-sdk Verdict: VERIFIED PASS, anything else → FLAG
159
+ // Build per-verifier verdicts for consensus evaluation
160
+ // pot-sdk returns aggregate verdict; map per-provider based on flags + confidence
106
161
  const potVerdict = potResult.verdict;
107
- const verdict: 'PASS' | 'FLAG' =
108
- potVerdict === 'VERIFIED' && confidence >= minConfidence && concerns.length === 0
109
- ? 'PASS'
110
- : 'FLAG';
162
+ const isFlagged = potVerdict !== 'VERIFIED' || confidence < minConfidence || concerns.length > 0;
163
+
164
+ // For consensus: treat each provider as one verifier vote
165
+ // (pot-sdk aggregates internally; we apply our consensus layer on top)
166
+ const verifierVerdicts = providers.map((p) => ({
167
+ modelId: p.model,
168
+ // Distribute flag proportionally: if aggregate is flagged, all vote flag
169
+ // This is conservative but correct for MVP until per-verifier responses are available
170
+ flagged: isFlagged,
171
+ }));
172
+
173
+ const consensusFlagged = applyConsensus(verifierVerdicts, effectiveConsensusMode);
174
+ const verdict: 'PASS' | 'FLAG' = consensusFlagged ? 'FLAG' : 'PASS';
111
175
 
112
176
  const partialResult = {
113
177
  verdict,
package/tests/pay.test.ts CHANGED
@@ -4,16 +4,43 @@ import assert from 'assert';
4
4
 
5
5
  // --- Policy Tests ---
6
6
 
7
- assert.strictEqual(resolvePolicy(0.10, 'tiered'), 'skip', 'micro-payment should skip');
8
- assert.strictEqual(resolvePolicy(0.49, 'tiered'), 'skip', 'just below threshold should skip');
9
- assert.strictEqual(resolvePolicy(0.50, 'tiered'), 'async', '$0.50 should be async');
10
- assert.strictEqual(resolvePolicy(50, 'tiered'), 'async', '$50 should be async');
11
- assert.strictEqual(resolvePolicy(99.99, 'tiered'), 'async', '$99.99 should be async');
12
- assert.strictEqual(resolvePolicy(100, 'tiered'), 'sync', '$100 should be sync');
13
- assert.strictEqual(resolvePolicy(1000, 'tiered'), 'sync', '$1000 should be sync');
14
- assert.strictEqual(resolvePolicy(1000, 'always'), 'sync', 'always should be sync');
15
- assert.strictEqual(resolvePolicy(0.01, 'always'), 'sync', 'always overrides micro');
16
- assert.strictEqual(resolvePolicy(1000, 'skip'), 'skip', 'skip overrides large');
7
+ // Skip tier
8
+ const skip = resolvePolicy(0.10, 'tiered');
9
+ assert.strictEqual(skip.mode, 'skip', 'micro-payment should skip');
10
+ assert.strictEqual(skip.minVerifiers, 0);
11
+ assert.strictEqual(skip.tiebreakerOnAnyFlag, false);
12
+
13
+ assert.strictEqual(resolvePolicy(0.49, 'tiered').mode, 'skip', 'just below threshold');
14
+
15
+ // Async tier (2 verifiers)
16
+ const async2 = resolvePolicy(0.50, 'tiered');
17
+ assert.strictEqual(async2.mode, 'async', '$0.50 should be async');
18
+ assert.strictEqual(async2.minVerifiers, 2);
19
+
20
+ assert.strictEqual(resolvePolicy(50, 'tiered').mode, 'async', '$50 should be async');
21
+ assert.strictEqual(resolvePolicy(99.99, 'tiered').mode, 'async', '$99.99 should be async');
22
+
23
+ // Sync tier (3 verifiers)
24
+ const sync3 = resolvePolicy(100, 'tiered');
25
+ assert.strictEqual(sync3.mode, 'sync', '$100 should be sync');
26
+ assert.strictEqual(sync3.minVerifiers, 3);
27
+ assert.strictEqual(sync3.tiebreakerOnAnyFlag, false);
28
+
29
+ assert.strictEqual(resolvePolicy(500, 'tiered').mode, 'sync', '$500 should be sync');
30
+ assert.strictEqual(resolvePolicy(999.99, 'tiered').mode, 'sync', '$999.99 should be sync');
31
+
32
+ // Sync+ tier (3 verifiers + tiebreaker)
33
+ const syncPlus = resolvePolicy(1000, 'tiered');
34
+ assert.strictEqual(syncPlus.mode, 'sync-plus', '$1000 should be sync-plus');
35
+ assert.strictEqual(syncPlus.minVerifiers, 3);
36
+ assert.strictEqual(syncPlus.tiebreakerOnAnyFlag, true);
37
+
38
+ assert.strictEqual(resolvePolicy(5000, 'tiered').mode, 'sync-plus');
39
+ assert.strictEqual(resolvePolicy(50000, 'tiered').mode, 'sync-plus');
40
+
41
+ // Override policies
42
+ assert.strictEqual(resolvePolicy(0.01, 'always').mode, 'sync', 'always overrides micro');
43
+ assert.strictEqual(resolvePolicy(1000, 'skip').mode, 'skip', 'skip overrides large');
17
44
 
18
45
  console.log('✅ Policy tests passed');
19
46
 
@@ -22,7 +49,7 @@ console.log('✅ Policy tests passed');
22
49
  const mockResult = {
23
50
  verdict: 'PASS' as const,
24
51
  confidence: 0.94,
25
- verifiers: 2,
52
+ verifiers: 3,
26
53
  chainHash: 'abc123def456',
27
54
  auditId: 'test-audit-id',
28
55
  latencyMs: 1200,
@@ -35,7 +62,7 @@ assert.strictEqual(headers['X-402-Attestation-Provider'], 'thoughtproof.ai');
35
62
  assert.strictEqual(headers['X-402-Attestation-Chain-Hash'], 'sha256:abc123def456');
36
63
  assert.strictEqual(headers['X-402-Attestation-Verdict'], 'PASS');
37
64
  assert.strictEqual(headers['X-402-Attestation-Confidence'], '0.94');
38
- assert.strictEqual(headers['X-402-Attestation-Verifiers'], '2/2');
65
+ assert.strictEqual(headers['X-402-Attestation-Verifiers'], '3/3');
39
66
  assert(headers['X-402-Attestation-Audit-URL'].includes('test-audit-id'));
40
67
  assert(headers['X-402-Attestation-Timestamp'].includes('202'));
41
68