incremnt 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -1
- package/package.json +2 -1
- package/src/ask-answer-verifier.js +857 -0
- package/src/ask-coach.js +2634 -0
- package/src/ask-replay.js +358 -0
- package/src/auth.js +169 -15
- package/src/coach-facts.js +14 -1
- package/src/contract.js +160 -3
- package/src/format.js +68 -2
- package/src/lib.js +205 -17
- package/src/mcp.js +88 -24
- package/src/openrouter.js +261 -33
- package/src/plan-changeset.js +132 -0
- package/src/plan-comparison.js +245 -0
- package/src/program-draft.js +230 -0
- package/src/prompt-changelog.js +184 -0
- package/src/promptfoo-evals.js +10 -4
- package/src/promptfoo-langfuse-scores.js +55 -0
- package/src/queries.js +1442 -786
- package/src/remote.js +465 -12
- package/src/score-context.js +14 -7
- package/src/score-prelude.js +113 -0
- package/src/service-url.js +9 -0
- package/src/summary-evals.js +1192 -44
- package/src/sync-service.js +1383 -367
- package/src/transport.js +119 -3
|
@@ -0,0 +1,857 @@
|
|
|
1
|
+
import {
|
|
2
|
+
dateOnlyString,
|
|
3
|
+
executeCoachReadTool,
|
|
4
|
+
normalizeExerciseName
|
|
5
|
+
} from './queries.js';
|
|
6
|
+
|
|
7
|
+
export const ASK_ANSWER_VERIFIER_VERSION = 'ask-answer-verifier-v0.1';
|
|
8
|
+
|
|
9
|
+
function normalizeText(value) {
|
|
10
|
+
return String(value ?? '').trim();
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function escapeRegExp(value) {
|
|
14
|
+
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function uniqueStrings(values) {
|
|
18
|
+
return [...new Set((values ?? []).filter(Boolean))];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function normalizeExcludeList(exclude) {
|
|
22
|
+
if (!exclude) return [];
|
|
23
|
+
if (Array.isArray(exclude)) return exclude.map((item) => String(item));
|
|
24
|
+
if (exclude instanceof Set) return [...exclude].map((item) => String(item));
|
|
25
|
+
return [String(exclude)];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function stableJsonStringify(value) {
|
|
29
|
+
if (Array.isArray(value)) return `[${value.map((item) => stableJsonStringify(item)).join(',')}]`;
|
|
30
|
+
if (value && typeof value === 'object') {
|
|
31
|
+
return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableJsonStringify(value[key])}`).join(',')}}`;
|
|
32
|
+
}
|
|
33
|
+
return JSON.stringify(value);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function routedToolNames(routingMetadata = {}) {
|
|
37
|
+
return new Set([
|
|
38
|
+
...(routingMetadata.toolsUsed ?? []),
|
|
39
|
+
...(routingMetadata.evidencePlan?.executedTools ?? []),
|
|
40
|
+
...(routingMetadata.evidencePlan?.requiredTools ?? []),
|
|
41
|
+
...(routingMetadata.agenticToolInvocations ?? []).map((invocation) => invocation?.name)
|
|
42
|
+
].filter(Boolean));
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function compactObject(value) {
|
|
46
|
+
return Object.fromEntries(Object.entries(value).filter(([, item]) => item !== undefined && item !== null));
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function allExerciseNames(snapshot) {
|
|
50
|
+
const names = new Map();
|
|
51
|
+
for (const session of snapshot?.sessions ?? []) {
|
|
52
|
+
for (const exercise of session.exercises ?? []) {
|
|
53
|
+
if (!exercise?.name) continue;
|
|
54
|
+
const normalizedName = normalizeExerciseName(exercise.name);
|
|
55
|
+
if (normalizedName && !names.has(normalizedName)) names.set(normalizedName, exercise.name);
|
|
56
|
+
}
|
|
57
|
+
for (const exercise of session.prescriptionSnapshot?.exercises ?? []) {
|
|
58
|
+
const name = exercise?.exerciseName ?? exercise?.name;
|
|
59
|
+
if (!name) continue;
|
|
60
|
+
const normalizedName = normalizeExerciseName(name);
|
|
61
|
+
if (normalizedName && !names.has(normalizedName)) names.set(normalizedName, name);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
for (const program of snapshot?.programs ?? []) {
|
|
65
|
+
for (const day of program.days ?? []) {
|
|
66
|
+
for (const exercise of day.exercises ?? []) {
|
|
67
|
+
const name = exercise?.name ?? exercise?.exerciseName;
|
|
68
|
+
if (!name) continue;
|
|
69
|
+
const normalizedName = normalizeExerciseName(name);
|
|
70
|
+
if (normalizedName && !names.has(normalizedName)) names.set(normalizedName, name);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return names;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function findAskAnswerExerciseMentions(text, snapshot) {
|
|
78
|
+
const mentions = [];
|
|
79
|
+
for (const [normalizedName, displayName] of allExerciseNames(snapshot)) {
|
|
80
|
+
const pattern = new RegExp(`\\b${escapeRegExp(displayName)}\\b`, 'gi');
|
|
81
|
+
for (const match of String(text ?? '').matchAll(pattern)) {
|
|
82
|
+
mentions.push({
|
|
83
|
+
index: match.index ?? -1,
|
|
84
|
+
end: (match.index ?? -1) + match[0].length,
|
|
85
|
+
name: displayName,
|
|
86
|
+
normalizedName
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return mentions
|
|
91
|
+
.filter((mention, index, allMentions) => !allMentions.some((candidate, candidateIndex) => (
|
|
92
|
+
candidateIndex !== index
|
|
93
|
+
&& candidate.index <= mention.index
|
|
94
|
+
&& candidate.end >= mention.end
|
|
95
|
+
&& candidate.normalizedName.length > mention.normalizedName.length
|
|
96
|
+
)))
|
|
97
|
+
.sort((lhs, rhs) => lhs.index - rhs.index);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function sentenceBounds(text, start, end = start) {
|
|
101
|
+
const before = Math.max(
|
|
102
|
+
text.lastIndexOf('.', start),
|
|
103
|
+
text.lastIndexOf('!', start),
|
|
104
|
+
text.lastIndexOf('?', start),
|
|
105
|
+
text.lastIndexOf('\n', start)
|
|
106
|
+
);
|
|
107
|
+
const afterCandidates = ['.', '!', '?', '\n']
|
|
108
|
+
.map((char) => text.indexOf(char, end))
|
|
109
|
+
.filter((index) => index >= 0);
|
|
110
|
+
return {
|
|
111
|
+
start: before >= 0 ? before + 1 : 0,
|
|
112
|
+
end: afterCandidates.length > 0 ? Math.min(...afterCandidates) : text.length
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function mentionForClaim(mentions, text, claim) {
|
|
117
|
+
const claimStart = claim.index ?? -1;
|
|
118
|
+
const claimEnd = claim.end ?? claimStart + String(claim.text ?? '').length;
|
|
119
|
+
if (claimStart < 0) return null;
|
|
120
|
+
const bounds = sentenceBounds(text, claimStart, claimEnd);
|
|
121
|
+
const candidates = mentions.filter((mention) => (
|
|
122
|
+
mention.index >= bounds.start
|
|
123
|
+
&& mention.end <= bounds.end
|
|
124
|
+
));
|
|
125
|
+
return candidates
|
|
126
|
+
.map((mention) => ({
|
|
127
|
+
mention,
|
|
128
|
+
distance: mention.end <= claimStart
|
|
129
|
+
? claimStart - mention.end
|
|
130
|
+
: mention.index >= claimEnd
|
|
131
|
+
? mention.index - claimEnd
|
|
132
|
+
: 0
|
|
133
|
+
}))
|
|
134
|
+
.sort((lhs, rhs) => lhs.distance - rhs.distance)[0]?.mention ?? null;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function hasNegationBefore(text, index, tokenPattern) {
|
|
138
|
+
const window = text.slice(Math.max(0, index - 40), index);
|
|
139
|
+
return tokenPattern.test(window);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function topSetFromSets(sets = []) {
|
|
143
|
+
return sets
|
|
144
|
+
.map((set) => ({
|
|
145
|
+
weight: Number(set?.weight),
|
|
146
|
+
reps: Number(set?.reps)
|
|
147
|
+
}))
|
|
148
|
+
.filter((set) => Number.isFinite(set.weight) && Number.isFinite(set.reps))
|
|
149
|
+
.sort((lhs, rhs) => (rhs.weight * (1 + rhs.reps / 30)) - (lhs.weight * (1 + lhs.reps / 30)))[0] ?? null;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function compareEvidenceTopSets(currentTopSet, previousTopSet) {
|
|
153
|
+
const currentWeight = Number(currentTopSet?.weight);
|
|
154
|
+
const previousWeight = Number(previousTopSet?.weight);
|
|
155
|
+
const currentReps = Number(currentTopSet?.reps);
|
|
156
|
+
const previousReps = Number(previousTopSet?.reps);
|
|
157
|
+
if (!Number.isFinite(currentWeight) || !Number.isFinite(previousWeight)) return null;
|
|
158
|
+
let loadDirection = 'same';
|
|
159
|
+
if (currentWeight > previousWeight) loadDirection = 'up';
|
|
160
|
+
if (currentWeight < previousWeight) loadDirection = 'down';
|
|
161
|
+
if (loadDirection === 'same' && Number.isFinite(currentReps) && Number.isFinite(previousReps)) {
|
|
162
|
+
if (currentReps > previousReps) loadDirection = 'up';
|
|
163
|
+
if (currentReps < previousReps) loadDirection = 'down';
|
|
164
|
+
}
|
|
165
|
+
return { loadDirection, previousTopSet };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function comparedToPreviousEvidence(row) {
|
|
169
|
+
if (row?.comparedToPreviousSession) return row.comparedToPreviousSession;
|
|
170
|
+
if (!row?.previousComparableSession) return null;
|
|
171
|
+
const currentTopSet = row?.topSet ?? topSetFromSets(row?.sets);
|
|
172
|
+
const previousTopSet = topSetFromSets(row.previousComparableSession.sets);
|
|
173
|
+
return compareEvidenceTopSets(currentTopSet, previousTopSet);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function rowTopSet(row) {
|
|
177
|
+
if (row?.topSet) return row.topSet;
|
|
178
|
+
const weight = Number(row?.weight);
|
|
179
|
+
const reps = Number(row?.reps);
|
|
180
|
+
if (Number.isFinite(weight) && Number.isFinite(reps)) return { weight, reps };
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function addEvidenceRow(rows, toolName, row, inherited = {}) {
|
|
185
|
+
const exerciseName = row?.exerciseName ?? row?.name ?? inherited.exerciseName ?? null;
|
|
186
|
+
const normalizedName = normalizeExerciseName(exerciseName);
|
|
187
|
+
if (!normalizedName) return;
|
|
188
|
+
rows.push({
|
|
189
|
+
toolName,
|
|
190
|
+
exerciseName,
|
|
191
|
+
normalizedName,
|
|
192
|
+
date: row?.date ?? inherited.date ?? null,
|
|
193
|
+
daysAgo: row?.daysAgo ?? inherited.daysAgo ?? null,
|
|
194
|
+
recencyCutoffDays: row?.recencyCutoffDays ?? inherited.recencyCutoffDays ?? null,
|
|
195
|
+
isStale: row?.isStale ?? inherited.isStale ?? false,
|
|
196
|
+
topSet: rowTopSet(row),
|
|
197
|
+
comparedToPreviousSession: comparedToPreviousEvidence(row),
|
|
198
|
+
sets: Array.isArray(row?.sets) ? row.sets : []
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function evidenceRows(toolResults = []) {
|
|
203
|
+
const rows = [];
|
|
204
|
+
for (const toolResult of toolResults) {
|
|
205
|
+
for (const row of toolResult?.rows ?? []) {
|
|
206
|
+
if (Array.isArray(row?.exercises)) {
|
|
207
|
+
for (const exercise of row.exercises) {
|
|
208
|
+
addEvidenceRow(rows, toolResult.toolName, exercise, {
|
|
209
|
+
date: row.date,
|
|
210
|
+
daysAgo: row.daysAgo,
|
|
211
|
+
recencyCutoffDays: row.recencyCutoffDays,
|
|
212
|
+
isStale: row.isStale
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
} else if (row?.first || row?.best || row?.latest) {
|
|
216
|
+
for (const point of [row.first, row.best, row.latest]) {
|
|
217
|
+
addEvidenceRow(rows, toolResult.toolName, point, {
|
|
218
|
+
exerciseName: row.exerciseName,
|
|
219
|
+
recencyCutoffDays: row.recencyCutoffDays,
|
|
220
|
+
isStale: row.isStale
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
} else {
|
|
224
|
+
addEvidenceRow(rows, toolResult.toolName, row);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return rows;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function replayAskToolResults(snapshot, routingMetadata = {}, { today = new Date(), exclude = [], executeTool = executeCoachReadTool } = {}) {
|
|
232
|
+
const results = [];
|
|
233
|
+
const failures = [];
|
|
234
|
+
const seen = new Set();
|
|
235
|
+
const toolParams = routingMetadata.toolParams ?? {};
|
|
236
|
+
const excludeList = normalizeExcludeList(exclude);
|
|
237
|
+
const invocations = [
|
|
238
|
+
...uniqueStrings(routingMetadata.toolsUsed ?? []).map((toolName) => ({
|
|
239
|
+
name: toolName,
|
|
240
|
+
params: toolParams[toolName] ?? {}
|
|
241
|
+
})),
|
|
242
|
+
...(routingMetadata.agenticToolInvocations ?? [])
|
|
243
|
+
];
|
|
244
|
+
|
|
245
|
+
for (const invocation of invocations) {
|
|
246
|
+
const name = invocation?.name;
|
|
247
|
+
if (!name) continue;
|
|
248
|
+
const params = invocation?.params ?? {};
|
|
249
|
+
const key = `${name}:${stableJsonStringify(params)}`;
|
|
250
|
+
if (seen.has(key)) continue;
|
|
251
|
+
seen.add(key);
|
|
252
|
+
try {
|
|
253
|
+
results.push(executeTool(snapshot, name, { ...params, today: params.today ?? dateOnlyString(today), exclude: excludeList }));
|
|
254
|
+
} catch (error) {
|
|
255
|
+
failures.push({
|
|
256
|
+
key: 'tool_replay_failed',
|
|
257
|
+
severity: 'blocking',
|
|
258
|
+
toolName: name,
|
|
259
|
+
reason: `Could not replay routed tool ${name}: ${error?.message ?? String(error)}`
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return { toolResults: results, replayFailures: failures };
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function extractWeightClaims(text) {
|
|
268
|
+
const claims = [];
|
|
269
|
+
const pattern = /\b(\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\b/gi;
|
|
270
|
+
for (const match of normalizeText(text).matchAll(pattern)) {
|
|
271
|
+
claims.push({
|
|
272
|
+
text: match[0],
|
|
273
|
+
value: Number(match[1]),
|
|
274
|
+
index: match.index ?? -1,
|
|
275
|
+
end: (match.index ?? -1) + match[0].length
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
return claims;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function extractWeightedSetClaims(text) {
|
|
282
|
+
const claims = [];
|
|
283
|
+
const pattern = /\b(\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\s*(?:x|×|for)\s*(\d+)\b/gi;
|
|
284
|
+
for (const match of normalizeText(text).matchAll(pattern)) {
|
|
285
|
+
claims.push({
|
|
286
|
+
text: match[0],
|
|
287
|
+
weight: Number(match[1]),
|
|
288
|
+
reps: Number(match[2]),
|
|
289
|
+
index: match.index ?? -1,
|
|
290
|
+
end: (match.index ?? -1) + match[0].length
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
return claims;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function weightsForRows(rows) {
|
|
297
|
+
const weights = [];
|
|
298
|
+
for (const row of rows) {
|
|
299
|
+
for (const set of row.sets ?? []) {
|
|
300
|
+
const weight = Number(set.weight);
|
|
301
|
+
if (Number.isFinite(weight)) weights.push(weight);
|
|
302
|
+
}
|
|
303
|
+
for (const source of [row.topSet, row.comparedToPreviousSession?.previousTopSet]) {
|
|
304
|
+
const weight = Number(source?.weight);
|
|
305
|
+
if (Number.isFinite(weight)) weights.push(weight);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
return weights;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function setPairsForRows(rows) {
|
|
312
|
+
const pairs = [];
|
|
313
|
+
for (const row of rows) {
|
|
314
|
+
for (const set of row.sets ?? []) {
|
|
315
|
+
const weight = Number(set.weight);
|
|
316
|
+
const reps = Number(set.reps);
|
|
317
|
+
if (Number.isFinite(weight) && Number.isFinite(reps)) pairs.push({ weight, reps });
|
|
318
|
+
}
|
|
319
|
+
for (const source of [row.topSet, row.comparedToPreviousSession?.previousTopSet]) {
|
|
320
|
+
const weight = Number(source?.weight);
|
|
321
|
+
const reps = Number(source?.reps);
|
|
322
|
+
if (Number.isFinite(weight) && Number.isFinite(reps)) pairs.push({ weight, reps });
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return pairs;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
function isEstimatedOneRepMaxWeightClaim(text, claim) {
|
|
329
|
+
const start = Math.max(0, claim.index - 40);
|
|
330
|
+
const end = Math.min(text.length, claim.index + claim.text.length + 40);
|
|
331
|
+
return /\b(?:estimated\s+)?(?:1rm|e1rm|one[-\s]?rep\s+max)\b/i.test(text.slice(start, end));
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function isVolumeWeightClaim(text, claim) {
|
|
335
|
+
const start = Math.max(0, claim.index - 30);
|
|
336
|
+
const end = Math.min(text.length, claim.index + claim.text.length + 30);
|
|
337
|
+
return /\bvolume\b/i.test(text.slice(start, end));
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
function isBodyWeightClaim(text, claim) {
|
|
341
|
+
const start = Math.max(0, claim.index - 50);
|
|
342
|
+
const end = Math.min(text.length, claim.index + claim.text.length + 50);
|
|
343
|
+
return /\b(?:body\s*weight|bodyweight|scale\s+weight|weigh(?:ed|s|ing)?)\b/i.test(text.slice(start, end));
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function isUnsupportedReferenceClaim(text, claim) {
|
|
347
|
+
const start = Math.max(0, claim.index - 80);
|
|
348
|
+
const end = Math.min(text.length, claim.index + String(claim.text ?? '').length + 80);
|
|
349
|
+
const window = text.slice(start, end);
|
|
350
|
+
return /\b(?:cannot|can't|couldn'?t|do not|don't|not able to|unable to|won'?t|not going to)\b.{0,60}\b(?:verify|support|confirm|say|claim|trust|use)\b/i.test(window)
|
|
351
|
+
|| /\b(?:unsupported|unverified|not supported|not verified|not enough evidence|without evidence|missing evidence)\b/i.test(window)
|
|
352
|
+
|| /\b(?:evidence|data|route|tools?)\b.{0,60}\b(?:does(?: not|n't)|do(?: not|n't)|did(?: not|n't)|not)\b.{0,30}\b(?:include|show|support|verify|confirm)\b/i.test(window);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function weightSupported(claim, rows) {
|
|
356
|
+
return weightsForRows(rows).some((weight) => Math.abs(weight - claim.value) < 0.01);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function setPairSupported(claim, rows) {
|
|
360
|
+
return setPairsForRows(rows).some((pair) => (
|
|
361
|
+
Math.abs(pair.weight - claim.weight) < 0.01 && pair.reps === claim.reps
|
|
362
|
+
));
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
function claimWithinSetClaim(claim, setClaims) {
|
|
366
|
+
return setClaims.some((setClaim) => claim.index >= setClaim.index && claim.index < setClaim.end);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function outputHasE1rmClaim(output) {
|
|
370
|
+
return /\b(?:estimated\s+)?(?:e1rm|1rm|one[- ]rep max)\b/i.test(output);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function e1rmUncertaintyLanguage(output) {
|
|
374
|
+
return /\b(?:no|not enough|without|missing|lack(?:ing)?|insufficient)\b.{0,80}\b(?:e1rm|1rm|one[- ]rep max|records?|evidence|data)\b/i.test(output)
|
|
375
|
+
|| /\b(?:cannot|can't|couldn'?t|do not|don't|not able to|unable to)\b.{0,80}\b(?:verify|support|confirm|see|claim|say)\b.{0,80}\b(?:e1rm|1rm|one[- ]rep max)\b/i.test(output)
|
|
376
|
+
|| /\b(?:e1rm|1rm|one[- ]rep max)\b.{0,80}\b(?:not|isn'?t|wasn'?t)\b.{0,80}\b(?:in|included|shown|supported|verified)\b/i.test(output);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
function targetHitClaims(text) {
|
|
380
|
+
return [
|
|
381
|
+
...text.matchAll(/\b(?:you\s+)?hit(?:ting)?\s+all\s+(?:your\s+)?target(?:ed)?\s+reps?\b/gi),
|
|
382
|
+
...text.matchAll(/\b(?:you\s+)?hit\s+all\s+(?:the\s+)?targets?\b/gi),
|
|
383
|
+
...text.matchAll(/\b(?:you\s+)?hit\s+(?:the|your)\s+target\b(?!\s+(?:of|for|on))/gi)
|
|
384
|
+
]
|
|
385
|
+
.filter((match) => !hasNegationBefore(text, match.index ?? 0, /\b(?:not|didn'?t|don'?t|failed to|missed|never)\b[\s\w'-]*$/i))
|
|
386
|
+
.map((match) => ({ text: match[0] }));
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function cleanConsistencyClaims(text) {
|
|
390
|
+
return [
|
|
391
|
+
...text.matchAll(/\bclean,\s+consistent\b/gi),
|
|
392
|
+
...text.matchAll(/\bclean\s+and\s+consistent\b/gi),
|
|
393
|
+
...text.matchAll(/\bconsistent\s+set\s+of\s+work\b/gi),
|
|
394
|
+
...text.matchAll(/\bacross\s+the\s+board\b/gi)
|
|
395
|
+
]
|
|
396
|
+
.filter((match) => !hasNegationBefore(text, match.index ?? 0, /\b(?:not|wasn'?t|weren'?t|isn'?t|hardly|not a)\b[\s\w'-]*$/i))
|
|
397
|
+
.map((match) => ({ text: match[0] }));
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
function recentSessionMisses(snapshot, { today = new Date(), lookbackDays = 7, exerciseNames = [] } = {}) {
|
|
401
|
+
const todayMs = Date.parse(`${dateOnlyString(today)}T00:00:00.000Z`);
|
|
402
|
+
const cutoff = Number.isFinite(todayMs) ? todayMs - lookbackDays * 24 * 60 * 60 * 1000 : Date.now() - lookbackDays * 24 * 60 * 60 * 1000;
|
|
403
|
+
const scoped = exerciseNames.length > 0 ? new Set(exerciseNames) : null;
|
|
404
|
+
const misses = [];
|
|
405
|
+
for (const session of snapshot?.sessions ?? []) {
|
|
406
|
+
const completedAt = session.completedAt || session.date;
|
|
407
|
+
const completedTime = Date.parse(completedAt);
|
|
408
|
+
if (!Number.isFinite(completedTime) || completedTime < cutoff) continue;
|
|
409
|
+
const targetByExercise = new Map();
|
|
410
|
+
for (const planned of session.prescriptionSnapshot?.exercises ?? []) {
|
|
411
|
+
const target = Number(planned.targetReps);
|
|
412
|
+
if (Number.isFinite(target) && target > 0) {
|
|
413
|
+
targetByExercise.set(normalizeExerciseName(planned.exerciseName), target);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
for (const exercise of session.exercises ?? []) {
|
|
417
|
+
const normalizedName = normalizeExerciseName(exercise.name);
|
|
418
|
+
if (scoped && !scoped.has(normalizedName)) continue;
|
|
419
|
+
const target = targetByExercise.get(normalizedName);
|
|
420
|
+
if (!Number.isFinite(target)) continue;
|
|
421
|
+
for (const set of exercise.sets ?? []) {
|
|
422
|
+
const reps = Number(set.reps);
|
|
423
|
+
if (set.isComplete && Number.isFinite(reps) && reps < target) {
|
|
424
|
+
misses.push({ sessionId: session.id, exerciseName: exercise.name, reps, target });
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
return misses;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function fatigueLanguage(output) {
|
|
433
|
+
return /\b(fatigue|fatigued|underrecovered|under[-\s]?recovery|poor recovery|low recovery|incomplete recovery|recovery debt|fatigue ceiling|limited by recovery|limited by fatigue|accumulated fatigue)\b/i.test(output);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
function fatigueUncertaintyLanguage(output) {
|
|
437
|
+
return /\b(?:no|not enough|without|missing|lack(?:ing)?|insufficient)\s+(?:\w+\s+){0,4}?(?:recovery|readiness|vitals?|sleep|hrv|heart rate|data|info|signals?|metrics?)\b/i.test(output)
|
|
438
|
+
|| /\b(?:cannot|can't|do not|don't|not enough|isn't enough|no basis to|hard to)\s+(?:\w+\s+){0,12}?(?:infer|tie|connect|attribute|blame|claim|say|show|prove|know|call)\s+(?:\w+\s+){0,12}?(?:fatigue|recovery|readiness|why)\b/i.test(output);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
function fatigueSupport(snapshot, {
|
|
442
|
+
today = new Date(),
|
|
443
|
+
lookbackDays = 7,
|
|
444
|
+
exclude = [],
|
|
445
|
+
allowRecoveryMetrics = false,
|
|
446
|
+
allowSessionDropoff = false
|
|
447
|
+
} = {}) {
|
|
448
|
+
const todayMs = Date.parse(`${dateOnlyString(today)}T00:00:00.000Z`);
|
|
449
|
+
const cutoff = Number.isFinite(todayMs) ? todayMs - lookbackDays * 24 * 60 * 60 * 1000 : Date.now() - lookbackDays * 24 * 60 * 60 * 1000;
|
|
450
|
+
const excluded = new Set(normalizeExcludeList(exclude));
|
|
451
|
+
const withinCutoff = (dateValue) => {
|
|
452
|
+
const ms = Date.parse(dateValue);
|
|
453
|
+
return Number.isFinite(ms) && ms >= cutoff;
|
|
454
|
+
};
|
|
455
|
+
|
|
456
|
+
if (allowRecoveryMetrics && !excluded.has('recovery')) {
|
|
457
|
+
if ((snapshot?.vitalsSummaries ?? []).some((entry) => withinCutoff(entry.date))) return true;
|
|
458
|
+
const metrics = snapshot?.healthMetrics ?? {};
|
|
459
|
+
for (const key of ['restingHR', 'hrv', 'sleep']) {
|
|
460
|
+
if ((Array.isArray(metrics[key]) ? metrics[key] : []).some((reading) => withinCutoff(reading.date))) return true;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
if (allowSessionDropoff) {
|
|
465
|
+
for (const session of snapshot?.sessions ?? []) {
|
|
466
|
+
if (!withinCutoff(session.completedAt || session.date)) continue;
|
|
467
|
+
for (const exercise of session.exercises ?? []) {
|
|
468
|
+
const reps = (exercise.sets ?? []).map((set) => Number(set.reps)).filter((value) => Number.isFinite(value) && value > 0);
|
|
469
|
+
if (reps.length >= 2 && reps[0] > 0 && (reps[0] - reps.at(-1)) / reps[0] >= 0.3) return true;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
return false;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
function declineLanguage(window) {
|
|
477
|
+
const text = normalizeText(window);
|
|
478
|
+
const decline = /\b(drop(?:ped|ping|s)?(?: off)?|drop-off|declin(?:e|ed|ing)|regress(?:ed|ion|ing)?|fell|fall(?:ing)?|decreas(?:e|ed|ing)|lower|worse|slid|slipped)\b/i;
|
|
479
|
+
if (!decline.test(text)) return false;
|
|
480
|
+
if (/\b(?:no|not|isn'?t|wasn'?t|without|rather than)\b.{0,45}\b(drop(?:ped|ping|s)?(?: off)?|drop-off|declin(?:e|ed|ing)?|decreas(?:e|ed|ing)?|regress(?:ed|ion|ing)?|fall(?:ing)?|fell|lower|worse|slid|slipped)\b/i.test(text)) return false;
|
|
481
|
+
if (/\b(?:rep|reps)\b.{0,20}\b(drop(?:ped|ping|s)?(?: off)?|drop-off|slip(?:ped|ping)?|fell|fall(?:ing)?|lower|declin(?:e|ed|ing)?|decreas(?:e|ed|ing)?|worse)\b/i.test(text)) return false;
|
|
482
|
+
return true;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
function improvementLanguage(window) {
|
|
486
|
+
const text = normalizeText(window);
|
|
487
|
+
const improvement = /\b(improv(?:e|ed|ing|ement)|progress(?:ed|ing)?|stronger|increas(?:e|ed|ing)|moving up|went up|up from|load jump|jumped)\b/i;
|
|
488
|
+
if (!improvement.test(text)) return false;
|
|
489
|
+
if (/\b(?:no|not|isn'?t|wasn'?t|without|rather than)\b.{0,35}\b(improv(?:e|ed|ing|ement)?|progress(?:ed|ing)?|stronger|increas(?:e|ed|ing)?|moving up|went up|up from|load jump|jump(?:ed|ing)?)\b/i.test(text)) return false;
|
|
490
|
+
if (/\b(?:rep|reps)\b.{0,20}\b(improv(?:e|ed|ing|ement)?|increas(?:e|ed|ing)?|better|moving up|went up|up from|load jump|jump(?:ed|ing)?)\b/i.test(text)) return false;
|
|
491
|
+
return true;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
function directionWindows(outputText, exerciseName, exerciseNames = []) {
|
|
495
|
+
const normalizedExercise = normalizeExerciseName(exerciseName);
|
|
496
|
+
const otherExercises = [...new Set(exerciseNames.map(normalizeExerciseName))]
|
|
497
|
+
.filter((name) => name && name !== normalizedExercise);
|
|
498
|
+
const sentences = outputText.split(/(?<=[.!?])\s+/).map((sentence) => sentence.trim()).filter(Boolean);
|
|
499
|
+
if (!normalizedExercise) return sentences;
|
|
500
|
+
const windows = [];
|
|
501
|
+
for (let index = 0; index < sentences.length; index += 1) {
|
|
502
|
+
if (!normalizeExerciseName(sentences[index]).includes(normalizedExercise)) continue;
|
|
503
|
+
const clauses = sentences[index]
|
|
504
|
+
.split(/\s*(?:[;:]|,\s+|\bwhile\b|\bbut\b)\s*/i)
|
|
505
|
+
.map((clause) => clause.trim())
|
|
506
|
+
.filter(Boolean);
|
|
507
|
+
const scopedClauses = clauses.filter((clause) => normalizeExerciseName(clause).includes(normalizedExercise));
|
|
508
|
+
windows.push(...(scopedClauses.length > 0 ? scopedClauses : [sentences[index]]));
|
|
509
|
+
for (let nextIndex = index + 1; nextIndex < sentences.length; nextIndex += 1) {
|
|
510
|
+
const normalizedNext = normalizeExerciseName(sentences[nextIndex]);
|
|
511
|
+
if (otherExercises.some((name) => normalizedNext.includes(name))) break;
|
|
512
|
+
if (!/^(?:that|this|it|there|still|the\s+(?:latest|top)|top\s+set|same\s+load)\b/i.test(sentences[nextIndex])) break;
|
|
513
|
+
windows.push(sentences[nextIndex]);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
return windows.length > 0 ? [...new Set(windows)] : [outputText];
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
function newestComparableRow(rows = []) {
|
|
520
|
+
return rows
|
|
521
|
+
.filter((row) => row.comparedToPreviousSession?.loadDirection)
|
|
522
|
+
.sort((lhs, rhs) => {
|
|
523
|
+
const lhsDays = Number(lhs.daysAgo);
|
|
524
|
+
const rhsDays = Number(rhs.daysAgo);
|
|
525
|
+
if (Number.isFinite(lhsDays) && Number.isFinite(rhsDays)) return lhsDays - rhsDays;
|
|
526
|
+
return String(rhs.date ?? '').localeCompare(String(lhs.date ?? ''));
|
|
527
|
+
})[0] ?? null;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
function checkSnapshotClaims(answer, snapshot, routingMetadata, { today = new Date(), exclude = [] } = {}) {
|
|
531
|
+
const failures = [];
|
|
532
|
+
const mentions = findAskAnswerExerciseMentions(answer, snapshot);
|
|
533
|
+
const scopedExerciseNames = uniqueStrings(mentions.map((mention) => mention.normalizedName));
|
|
534
|
+
const normalized = normalizeText(answer);
|
|
535
|
+
const toolNames = routedToolNames(routingMetadata);
|
|
536
|
+
const hasSessionEvidence = [
|
|
537
|
+
'get_recent_sessions',
|
|
538
|
+
'get_exercise_history',
|
|
539
|
+
'get_next_session',
|
|
540
|
+
'get_exercise_progress_summary',
|
|
541
|
+
'get_program_progress',
|
|
542
|
+
'compare_session_to_observations'
|
|
543
|
+
].some((toolName) => toolNames.has(toolName));
|
|
544
|
+
const hasRecoveryEvidence = [
|
|
545
|
+
'get_readiness_snapshot',
|
|
546
|
+
'get_program_progress'
|
|
547
|
+
].some((toolName) => toolNames.has(toolName));
|
|
548
|
+
const misses = hasSessionEvidence
|
|
549
|
+
? recentSessionMisses(snapshot, { today, exerciseNames: scopedExerciseNames })
|
|
550
|
+
: [];
|
|
551
|
+
|
|
552
|
+
if (targetHitClaims(normalized).length > 0) {
|
|
553
|
+
if (!hasSessionEvidence) {
|
|
554
|
+
failures.push({
|
|
555
|
+
key: 'target_hit_without_session_evidence',
|
|
556
|
+
severity: 'blocking',
|
|
557
|
+
reason: 'Draft claims target reps were hit, but routed evidence did not include session performance.'
|
|
558
|
+
});
|
|
559
|
+
} else if (misses.length > 0) {
|
|
560
|
+
const sample = misses[0];
|
|
561
|
+
failures.push({
|
|
562
|
+
key: 'target_hit_contradiction',
|
|
563
|
+
severity: 'blocking',
|
|
564
|
+
reason: `Draft claims targets were hit, but ${sample.exerciseName} has ${sample.reps} reps below target ${sample.target}.`
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
if (cleanConsistencyClaims(normalized).length > 0 && misses.length > 0) {
|
|
570
|
+
const sample = misses[0];
|
|
571
|
+
failures.push({
|
|
572
|
+
key: 'clean_consistency_contradiction',
|
|
573
|
+
severity: 'blocking',
|
|
574
|
+
reason: `Draft frames missed target reps as clean consistency, but ${sample.exerciseName} has ${sample.reps} reps below target ${sample.target}.`
|
|
575
|
+
});
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
if (fatigueLanguage(normalized) && !fatigueUncertaintyLanguage(normalized) && !fatigueSupport(snapshot, {
|
|
579
|
+
today,
|
|
580
|
+
exclude,
|
|
581
|
+
allowRecoveryMetrics: hasRecoveryEvidence,
|
|
582
|
+
allowSessionDropoff: hasSessionEvidence
|
|
583
|
+
})) {
|
|
584
|
+
failures.push({
|
|
585
|
+
key: 'unsupported_fatigue_recovery',
|
|
586
|
+
severity: 'blocking',
|
|
587
|
+
reason: 'Draft uses fatigue/recovery attribution without recent vitals, sleep, or rep-dropoff support.'
|
|
588
|
+
});
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
return failures;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
function checkToolProvenance(answer, snapshot, routingMetadata, {
|
|
595
|
+
today = new Date(),
|
|
596
|
+
exclude = [],
|
|
597
|
+
strictMentionProvenance = false,
|
|
598
|
+
executeTool = executeCoachReadTool
|
|
599
|
+
} = {}) {
|
|
600
|
+
const failures = [];
|
|
601
|
+
const { toolResults, replayFailures } = replayAskToolResults(snapshot, routingMetadata, { today, exclude, executeTool });
|
|
602
|
+
failures.push(...replayFailures);
|
|
603
|
+
|
|
604
|
+
const rows = evidenceRows(toolResults);
|
|
605
|
+
const mentions = findAskAnswerExerciseMentions(answer, snapshot);
|
|
606
|
+
const toolsUsed = routedToolNames(routingMetadata);
|
|
607
|
+
const normalized = normalizeText(answer);
|
|
608
|
+
|
|
609
|
+
if (outputHasE1rmClaim(answer) && !e1rmUncertaintyLanguage(answer) && !toolsUsed.has('get_records')) {
|
|
610
|
+
failures.push({
|
|
611
|
+
key: 'e1rm_without_records',
|
|
612
|
+
severity: 'blocking',
|
|
613
|
+
reason: 'Draft mentions e1RM/1RM, but routed evidence did not use get_records.'
|
|
614
|
+
});
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
const setClaims = extractWeightedSetClaims(answer);
|
|
618
|
+
for (const claim of setClaims) {
|
|
619
|
+
if (isEstimatedOneRepMaxWeightClaim(answer, claim)) continue;
|
|
620
|
+
if (isUnsupportedReferenceClaim(answer, claim)) continue;
|
|
621
|
+
const mention = mentionForClaim(mentions, normalized, claim);
|
|
622
|
+
if (!mention) continue;
|
|
623
|
+
const exerciseRows = rows.filter((row) => row.normalizedName === mention.normalizedName);
|
|
624
|
+
if (exerciseRows.length === 0) {
|
|
625
|
+
failures.push({
|
|
626
|
+
key: 'unrouted_weighted_set_claim',
|
|
627
|
+
severity: 'blocking',
|
|
628
|
+
exerciseName: mention.name,
|
|
629
|
+
reason: `Draft asserts a weighted set for ${mention.name}, but that exercise was not in routed tool evidence.`
|
|
630
|
+
});
|
|
631
|
+
} else if (!setPairSupported(claim, exerciseRows)) {
|
|
632
|
+
failures.push({
|
|
633
|
+
key: 'unsupported_weighted_set_claim',
|
|
634
|
+
severity: 'blocking',
|
|
635
|
+
exerciseName: mention.name,
|
|
636
|
+
reason: `Draft asserts ${claim.text} for ${mention.name}, but routed evidence does not include that weight/reps pair.`
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
for (const claim of extractWeightClaims(answer)) {
|
|
642
|
+
if (claimWithinSetClaim(claim, setClaims)) continue;
|
|
643
|
+
if (isEstimatedOneRepMaxWeightClaim(answer, claim) || isVolumeWeightClaim(answer, claim) || isBodyWeightClaim(answer, claim)) continue;
|
|
644
|
+
if (isUnsupportedReferenceClaim(answer, claim)) continue;
|
|
645
|
+
const mention = mentionForClaim(mentions, normalized, claim);
|
|
646
|
+
if (!mention) continue;
|
|
647
|
+
const exerciseRows = rows.filter((row) => row.normalizedName === mention.normalizedName);
|
|
648
|
+
if (exerciseRows.length === 0) {
|
|
649
|
+
failures.push({
|
|
650
|
+
key: 'unrouted_weight_claim',
|
|
651
|
+
severity: 'blocking',
|
|
652
|
+
exerciseName: mention.name,
|
|
653
|
+
reason: `Draft asserts a load for ${mention.name}, but that exercise was not in routed tool evidence.`
|
|
654
|
+
});
|
|
655
|
+
} else if (!weightSupported(claim, exerciseRows)) {
|
|
656
|
+
failures.push({
|
|
657
|
+
key: 'unsupported_weight_claim',
|
|
658
|
+
severity: 'blocking',
|
|
659
|
+
exerciseName: mention.name,
|
|
660
|
+
reason: `Draft asserts ${claim.text} for ${mention.name}, but routed evidence does not include that load.`
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
const exerciseNames = rows.map((row) => row.exerciseName);
|
|
666
|
+
for (const mention of mentions) {
|
|
667
|
+
const exerciseRows = rows.filter((row) => row.normalizedName === mention.normalizedName);
|
|
668
|
+
if (exerciseRows.length === 0) {
|
|
669
|
+
if (strictMentionProvenance && toolResults.length > 0) {
|
|
670
|
+
failures.push({
|
|
671
|
+
key: 'unrouted_exercise_mention',
|
|
672
|
+
severity: 'advisory',
|
|
673
|
+
exerciseName: mention.name,
|
|
674
|
+
reason: `Draft mentions ${mention.name}, but that exercise was not in routed tool evidence.`
|
|
675
|
+
});
|
|
676
|
+
}
|
|
677
|
+
continue;
|
|
678
|
+
}
|
|
679
|
+
const comparable = newestComparableRow(exerciseRows);
|
|
680
|
+
if (!comparable) continue;
|
|
681
|
+
const direction = comparable.comparedToPreviousSession.loadDirection;
|
|
682
|
+
const exercisePattern = new RegExp(escapeRegExp(mention.name), 'gi');
|
|
683
|
+
const windows = directionWindows(normalized, mention.name, exerciseNames)
|
|
684
|
+
.map((window) => window.replace(exercisePattern, ''));
|
|
685
|
+
if (direction === 'up' && windows.some(declineLanguage)) {
|
|
686
|
+
failures.push({
|
|
687
|
+
key: 'direction_inversion',
|
|
688
|
+
severity: 'blocking',
|
|
689
|
+
exerciseName: mention.name,
|
|
690
|
+
reason: `Draft frames ${mention.name} as declining, but routed evidence says top load increased.`
|
|
691
|
+
});
|
|
692
|
+
}
|
|
693
|
+
if (direction === 'down' && windows.some(improvementLanguage)) {
|
|
694
|
+
failures.push({
|
|
695
|
+
key: 'direction_inversion',
|
|
696
|
+
severity: 'blocking',
|
|
697
|
+
exerciseName: mention.name,
|
|
698
|
+
reason: `Draft frames ${mention.name} as improving, but routed evidence says top load decreased.`
|
|
699
|
+
});
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
return failures;
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
const OBSERVATION_FOLLOWUP_ARTIFACT_PATTERNS = [
|
|
707
|
+
/\bi can confirm (?:the |this )?(?:coach )?observation\b/i,
|
|
708
|
+
/\b(?:the|this) (?:coach )?observation\b/i,
|
|
709
|
+
/\bcoach observation\b/i,
|
|
710
|
+
/\b(?:the|this) (?:coach )?note\b/i,
|
|
711
|
+
/\bcoach note\b/i,
|
|
712
|
+
/\b(?:the|this) (?:coach )?card\b/i,
|
|
713
|
+
/\bcoach card\b/i,
|
|
714
|
+
/\b(?:the|this) system\b/i
|
|
715
|
+
];
|
|
716
|
+
|
|
717
|
+
function checkObservationFollowupVoice(answer, route) {
|
|
718
|
+
if (route !== 'coach_observation_followup') return [];
|
|
719
|
+
const hits = uniqueStrings(OBSERVATION_FOLLOWUP_ARTIFACT_PATTERNS
|
|
720
|
+
.map((pattern) => answer.match(pattern)?.[0])
|
|
721
|
+
.filter(Boolean));
|
|
722
|
+
if (hits.length === 0) return [];
|
|
723
|
+
return [{
|
|
724
|
+
key: 'observation_followup_artifact_voice',
|
|
725
|
+
severity: 'blocking',
|
|
726
|
+
reason: `Observation follow-up answer talks about the coach artifact instead of speaking as the coach: ${hits.join(', ')}. Own it with first-person coaching language.`
|
|
727
|
+
}];
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
export function verifyAskAnswer({
|
|
731
|
+
answer,
|
|
732
|
+
snapshot,
|
|
733
|
+
routingMetadata = {},
|
|
734
|
+
today = new Date(),
|
|
735
|
+
exclude = [],
|
|
736
|
+
strictMentionProvenance = false,
|
|
737
|
+
executeTool = executeCoachReadTool
|
|
738
|
+
} = {}) {
|
|
739
|
+
const normalized = normalizeText(answer);
|
|
740
|
+
if (!normalized || normalized === 'NO_INSIGHT') {
|
|
741
|
+
return verificationResult([]);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
const route = routingMetadata?.effectiveRoute ?? routingMetadata?.route ?? routingMetadata?.evidencePlan?.effectiveRoute ?? routingMetadata?.evidencePlan?.route;
|
|
745
|
+
if (route === 'program_design') {
|
|
746
|
+
return verificationResult([], { skipped: true, skipReason: 'program_design_allows_new_exercises' });
|
|
747
|
+
}
|
|
748
|
+
const voiceFailures = checkObservationFollowupVoice(normalized, route);
|
|
749
|
+
const requestedAction = routingMetadata?.intent?.requestedAction ?? routingMetadata?.requestedAction;
|
|
750
|
+
const isPlanAdjustment = requestedAction === 'draft_changeset'
|
|
751
|
+
|| routingMetadata?.observationFollowUpIntent === 'plan_adjustment'
|
|
752
|
+
|| routingMetadata?.requestedCoachObservationIntent === 'plan_adjustment';
|
|
753
|
+
const isSuccessorPlan = requestedAction === 'draft_plan'
|
|
754
|
+
|| routingMetadata?.observationFollowUpIntent === 'successor_plan'
|
|
755
|
+
|| routingMetadata?.requestedCoachObservationIntent === 'successor_plan';
|
|
756
|
+
// Forward-looking prescriptions (successor program or plan changeset) legitimately
|
|
757
|
+
// describe future loads, which would trip the snapshot/provenance claim checks.
|
|
758
|
+
// Voice is still enforced; only the claim checks are waived.
|
|
759
|
+
if (route === 'coach_observation_followup' && (isSuccessorPlan || isPlanAdjustment)) {
|
|
760
|
+
if (voiceFailures.length > 0) return verificationResult(voiceFailures);
|
|
761
|
+
return verificationResult([], {
|
|
762
|
+
skipped: true,
|
|
763
|
+
skipReason: isPlanAdjustment
|
|
764
|
+
? 'plan_adjustment_allows_future_prescription'
|
|
765
|
+
: 'successor_plan_allows_future_prescription'
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
const failures = [
|
|
770
|
+
...voiceFailures,
|
|
771
|
+
...checkSnapshotClaims(normalized, snapshot, routingMetadata, { today, exclude }),
|
|
772
|
+
...checkToolProvenance(normalized, snapshot, routingMetadata, { today, exclude, strictMentionProvenance, executeTool })
|
|
773
|
+
];
|
|
774
|
+
return verificationResult(failures);
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
function verificationResult(failures, { skipped = false, skipReason = null } = {}) {
|
|
778
|
+
const blockingFailures = failures.filter((failure) => failure.severity !== 'advisory');
|
|
779
|
+
const advisoryFailures = failures.filter((failure) => failure.severity === 'advisory');
|
|
780
|
+
const checks = [
|
|
781
|
+
{
|
|
782
|
+
key: 'ask_answer_blocking_claims',
|
|
783
|
+
passed: blockingFailures.length === 0,
|
|
784
|
+
severity: 'blocking',
|
|
785
|
+
reason: blockingFailures.length === 0
|
|
786
|
+
? 'No blocking unsupported Ask answer claims detected.'
|
|
787
|
+
: blockingFailures.map((failure) => failure.reason).join(' ')
|
|
788
|
+
},
|
|
789
|
+
{
|
|
790
|
+
key: 'ask_answer_advisory_claims',
|
|
791
|
+
passed: advisoryFailures.length === 0,
|
|
792
|
+
severity: 'advisory',
|
|
793
|
+
reason: advisoryFailures.length === 0
|
|
794
|
+
? 'No advisory Ask answer provenance gaps detected.'
|
|
795
|
+
: advisoryFailures.map((failure) => failure.reason).join(' ')
|
|
796
|
+
}
|
|
797
|
+
];
|
|
798
|
+
return {
|
|
799
|
+
version: ASK_ANSWER_VERIFIER_VERSION,
|
|
800
|
+
skipped,
|
|
801
|
+
skipReason,
|
|
802
|
+
passed: blockingFailures.length === 0,
|
|
803
|
+
status: skipped ? 'skipped' : blockingFailures.length === 0 ? 'passed' : 'failed',
|
|
804
|
+
blockingFailures,
|
|
805
|
+
advisoryFailures,
|
|
806
|
+
checks,
|
|
807
|
+
blockingFailureCount: blockingFailures.length,
|
|
808
|
+
advisoryFailureCount: advisoryFailures.length,
|
|
809
|
+
failureKeys: uniqueStrings(failures.map((failure) => failure.key))
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
export function askVerificationMetadata(verification, {
|
|
814
|
+
retryCount = 0,
|
|
815
|
+
repaired = false,
|
|
816
|
+
fallback = false
|
|
817
|
+
} = {}) {
|
|
818
|
+
if (!verification) return null;
|
|
819
|
+
return compactObject({
|
|
820
|
+
version: verification.version ?? ASK_ANSWER_VERIFIER_VERSION,
|
|
821
|
+
status: fallback ? 'fallback' : verification.status,
|
|
822
|
+
passed: verification.passed === true && !fallback,
|
|
823
|
+
skipped: verification.skipped === true ? true : undefined,
|
|
824
|
+
skipReason: verification.skipReason,
|
|
825
|
+
retryCount,
|
|
826
|
+
repaired: repaired ? true : undefined,
|
|
827
|
+
fallback: fallback ? true : undefined,
|
|
828
|
+
blockingFailureCount: verification.blockingFailureCount ?? 0,
|
|
829
|
+
advisoryFailureCount: verification.advisoryFailureCount ?? 0,
|
|
830
|
+
failureKeys: verification.failureKeys ?? []
|
|
831
|
+
});
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
export function shouldRepairAskAnswer(verification) {
|
|
835
|
+
return verification?.passed === false && (verification.blockingFailureCount ?? 0) > 0;
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
export function buildAskAnswerRepairContext(context, _draftAnswer, verification) {
|
|
839
|
+
const reasons = (verification?.blockingFailures ?? [])
|
|
840
|
+
.slice(0, 6)
|
|
841
|
+
.map((failure) => `- ${failure.reason}`)
|
|
842
|
+
.join('\n');
|
|
843
|
+
return [
|
|
844
|
+
context,
|
|
845
|
+
'',
|
|
846
|
+
'Ask answer verification failed. Rewrite the answer so every exercise, load, rep, direction, and target-hit claim is directly supported by the evidence above.',
|
|
847
|
+
'Do not preserve unsupported claims. If the evidence is not enough, say that plainly instead of guessing.',
|
|
848
|
+
'Verifier findings:',
|
|
849
|
+
reasons || '- Unsupported training claim detected.',
|
|
850
|
+
'',
|
|
851
|
+
'Do not reuse the failed draft. Produce a fresh answer from the evidence.'
|
|
852
|
+
].join('\n');
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
export function safeAskVerificationFallback() {
|
|
856
|
+
return 'I can’t answer that safely from the evidence I just checked. The draft answer included training claims I could not verify, so I’m not going to guess. Ask me about a specific session or lift and I’ll re-check the data.';
|
|
857
|
+
}
|