incremnt 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -1
- package/src/ask-answer-verifier.js +249 -14
- package/src/ask-coach.js +495 -33
- package/src/openrouter.js +57 -30
- package/src/promptfoo-evals.js +20 -3
- package/src/queries.js +500 -21
- package/src/score-prelude.js +16 -13
- package/src/summary-evals.js +106 -474
- package/src/sync-service.js +73 -13
package/src/sync-service.js
CHANGED
|
@@ -4,6 +4,7 @@ import { formatIncrementScorePrelude } from './score-prelude.js';
|
|
|
4
4
|
import {
|
|
5
5
|
askVerificationMetadata,
|
|
6
6
|
buildAskAnswerRepairContext,
|
|
7
|
+
degradeAskAnswer,
|
|
7
8
|
safeAskVerificationFallback,
|
|
8
9
|
shouldRepairAskAnswer,
|
|
9
10
|
verifyAskAnswer
|
|
@@ -207,10 +208,28 @@ function mergeAgenticToolProvenance(routingMetadata, toolInvocations = []) {
|
|
|
207
208
|
}
|
|
208
209
|
}
|
|
209
210
|
|
|
211
|
+
function sanitizeBodyWeightEvidenceFactsForStorage(value) {
|
|
212
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) return null;
|
|
213
|
+
const facts = { ...value };
|
|
214
|
+
if (Array.isArray(facts.rows)) {
|
|
215
|
+
facts.rows = facts.rows
|
|
216
|
+
.filter((row) => row?.date && Number.isFinite(Number(row.weightKg)))
|
|
217
|
+
.slice(-90)
|
|
218
|
+
.map((row) => ({
|
|
219
|
+
date: askStorageString(String(row.date).slice(0, 10), { maxLength: 10 }),
|
|
220
|
+
weightKg: Math.round(Number(row.weightKg) * 10) / 10
|
|
221
|
+
}))
|
|
222
|
+
.filter((row) => row.date);
|
|
223
|
+
}
|
|
224
|
+
const serialized = JSON.stringify(facts);
|
|
225
|
+
if (serialized.length > ASK_STRUCTURED_MAX_JSON_LENGTH) return null;
|
|
226
|
+
return JSON.parse(serialized);
|
|
227
|
+
}
|
|
228
|
+
|
|
210
229
|
function sanitizeAskEvidenceForStorage(item) {
|
|
211
230
|
if (!item || typeof item !== 'object' || Array.isArray(item)) return null;
|
|
212
231
|
const sanitized = {};
|
|
213
|
-
for (const key of ['label', 'section', 'toolName', 'sourceTimestamp']) {
|
|
232
|
+
for (const key of ['label', 'section', 'toolName', 'sourceTimestamp', 'kind', 'presentation']) {
|
|
214
233
|
const value = askStorageString(item[key], { maxLength: 240 });
|
|
215
234
|
if (value) sanitized[key] = value;
|
|
216
235
|
}
|
|
@@ -218,6 +237,13 @@ function sanitizeAskEvidenceForStorage(item) {
|
|
|
218
237
|
const values = askStorageStringArray(item[key], { maxItems: ASK_STRUCTURED_MAX_ITEMS, maxLength: 160 });
|
|
219
238
|
if (values.length > 0) sanitized[key] = values;
|
|
220
239
|
}
|
|
240
|
+
const isBodyWeightEvidence = sanitized.toolName === 'get_body_weight_snapshot'
|
|
241
|
+
|| sanitized.kind === 'body_weight_trend'
|
|
242
|
+
|| sanitized.presentation === 'body_weight_trend';
|
|
243
|
+
if (isBodyWeightEvidence) {
|
|
244
|
+
const facts = sanitizeBodyWeightEvidenceFactsForStorage(item.facts);
|
|
245
|
+
if (facts) sanitized.facts = facts;
|
|
246
|
+
}
|
|
221
247
|
return Object.keys(sanitized).length > 0 ? sanitized : null;
|
|
222
248
|
}
|
|
223
249
|
|
|
@@ -242,7 +268,7 @@ function sanitizeAskProgramDraftForStorage(value) {
|
|
|
242
268
|
return JSON.parse(serialized);
|
|
243
269
|
}
|
|
244
270
|
|
|
245
|
-
function sanitizeAskStructuredResponseForStorage(structured) {
|
|
271
|
+
export function sanitizeAskStructuredResponseForStorage(structured) {
|
|
246
272
|
if (!structured || typeof structured !== 'object' || Array.isArray(structured)) return null;
|
|
247
273
|
const confidence = askStorageString(structured.confidence, { maxLength: 40 });
|
|
248
274
|
const answer = askStorageString(structured.answer);
|
|
@@ -602,6 +628,7 @@ export function buildAskInteractionLogPayload({
|
|
|
602
628
|
fallback: askResult?.fallback === true ? true : undefined,
|
|
603
629
|
route: routingMetadata?.route ?? evidencePlan?.route,
|
|
604
630
|
effectiveRoute: routingMetadata?.effectiveRoute ?? evidencePlan?.effectiveRoute,
|
|
631
|
+
responseProfile: routingMetadata?.responseProfile ?? routingMetadata?.intent?.responseProfile,
|
|
605
632
|
requestedAction: routingMetadata?.intent?.requestedAction,
|
|
606
633
|
intentConfidence: typeof routingMetadata?.intent?.confidence === 'number' ? routingMetadata.intent.confidence : undefined,
|
|
607
634
|
structuredConfidence: typeof structured?.confidence === 'string' ? structured.confidence : undefined,
|
|
@@ -620,6 +647,8 @@ export function buildAskInteractionLogPayload({
|
|
|
620
647
|
hasProgramDraft: structured?.programDraft != null ? true : undefined,
|
|
621
648
|
askVerificationStatus: answerVerification.status,
|
|
622
649
|
askVerificationRetryCount: typeof answerVerification.retryCount === 'number' ? answerVerification.retryCount : undefined,
|
|
650
|
+
askVerificationDegraded: answerVerification.degraded === true ? true : undefined,
|
|
651
|
+
askVerificationRedactedCount: typeof answerVerification.redactedCount === 'number' ? answerVerification.redactedCount : undefined,
|
|
623
652
|
askVerificationBlockingFailureCount: typeof answerVerification.blockingFailureCount === 'number' ? answerVerification.blockingFailureCount : undefined,
|
|
624
653
|
askVerificationAdvisoryFailureCount: typeof answerVerification.advisoryFailureCount === 'number' ? answerVerification.advisoryFailureCount : undefined,
|
|
625
654
|
askVerificationFailureKeys: logStringArray(answerVerification.failureKeys),
|
|
@@ -5165,6 +5194,11 @@ export function createSyncServiceRequestHandler({
|
|
|
5165
5194
|
const coachObservationFollowUp = selectAskCoachObservationFollowUp(requestedCoachObservation, coachObservations);
|
|
5166
5195
|
const missingRequestedCoachObservation = Boolean(requestedCoachObservation && !coachObservationFollowUp);
|
|
5167
5196
|
|
|
5197
|
+
const persistedKind = persistedConversation?.kind ?? (conversationId?.startsWith('weekly-checkin:') ? 'weekly-checkin' : 'ask');
|
|
5198
|
+
// The weekly check-in shares this ask path but runs under the terse
|
|
5199
|
+
// WEEKLY_CHECKIN_PROMPT; force the defensive profile so the expansive
|
|
5200
|
+
// evidence merge and score headline do not contradict that prompt.
|
|
5201
|
+
const askResponseProfileOverride = persistedKind === 'weekly-checkin' ? 'defensive' : null;
|
|
5168
5202
|
const routedContext = coachObservationFollowUp
|
|
5169
5203
|
? askObservationFollowUpContext(snapshot, question, coachObservationFollowUp, {
|
|
5170
5204
|
exclude,
|
|
@@ -5177,9 +5211,11 @@ export function createSyncServiceRequestHandler({
|
|
|
5177
5211
|
intent: requestedCoachObservation.intent,
|
|
5178
5212
|
today: new Date()
|
|
5179
5213
|
})
|
|
5180
|
-
: askRoutedContext(snapshot, question, { exclude, coachFacts, coachObservations, history: canonicalHistory });
|
|
5181
|
-
const
|
|
5182
|
-
|
|
5214
|
+
: askRoutedContext(snapshot, question, { exclude, coachFacts, coachObservations, history: canonicalHistory, responseProfileOverride: askResponseProfileOverride });
|
|
5215
|
+
const incrementScorePrelude = formatIncrementScorePrelude(scoreSnapshots, {
|
|
5216
|
+
question,
|
|
5217
|
+
responseProfile: routedContext.metadata?.responseProfile ?? routedContext.metadata?.intent?.responseProfile
|
|
5218
|
+
});
|
|
5183
5219
|
|
|
5184
5220
|
const preludes = [incrementScorePrelude].filter(Boolean);
|
|
5185
5221
|
const ctx = preludes.length > 0
|
|
@@ -5296,6 +5332,8 @@ export function createSyncServiceRequestHandler({
|
|
|
5296
5332
|
let verificationRetryCount = 0;
|
|
5297
5333
|
let verificationRepaired = false;
|
|
5298
5334
|
let verificationFallback = false;
|
|
5335
|
+
let verificationDegraded = false;
|
|
5336
|
+
let verificationRedactedCount = 0;
|
|
5299
5337
|
|
|
5300
5338
|
if (persistedKind === 'ask' && shouldRepairAskAnswer(verification)) {
|
|
5301
5339
|
verificationRetryCount = 1;
|
|
@@ -5322,20 +5360,42 @@ export function createSyncServiceRequestHandler({
|
|
|
5322
5360
|
}
|
|
5323
5361
|
|
|
5324
5362
|
if (persistedKind === 'ask' && shouldRepairAskAnswer(verification)) {
|
|
5325
|
-
|
|
5326
|
-
|
|
5327
|
-
|
|
5328
|
-
|
|
5329
|
-
|
|
5330
|
-
|
|
5331
|
-
|
|
5363
|
+
// Graceful degrade before refusing: strip the specific unsupported
|
|
5364
|
+
// sentences/bullets and ship the rest if it re-verifies clean. A
|
|
5365
|
+
// coaching answer minus one clause beats a blanket refusal.
|
|
5366
|
+
const degraded = degradeAskAnswer(attempt.assistantAnswer, verification);
|
|
5367
|
+
const degradedVerification = degraded.usable
|
|
5368
|
+
? verifyAskAnswer({
|
|
5369
|
+
answer: degraded.text,
|
|
5370
|
+
snapshot,
|
|
5371
|
+
routingMetadata,
|
|
5372
|
+
today: new Date(),
|
|
5373
|
+
exclude: [...exclude]
|
|
5374
|
+
})
|
|
5375
|
+
: null;
|
|
5376
|
+
if (degradedVerification && degradedVerification.blockingFailureCount === 0) {
|
|
5377
|
+
attempt = { ...attempt, assistantAnswer: degraded.text };
|
|
5378
|
+
verification = degradedVerification;
|
|
5379
|
+
verificationDegraded = true;
|
|
5380
|
+
verificationRedactedCount = degraded.redactedCount;
|
|
5381
|
+
} else {
|
|
5382
|
+
verificationFallback = true;
|
|
5383
|
+
attempt = {
|
|
5384
|
+
...attempt,
|
|
5385
|
+
assistantAnswer: safeAskVerificationFallback(),
|
|
5386
|
+
programDraft: undefined,
|
|
5387
|
+
planChangeset: undefined
|
|
5388
|
+
};
|
|
5389
|
+
}
|
|
5332
5390
|
}
|
|
5333
5391
|
|
|
5334
5392
|
const answerVerification = persistedKind === 'ask'
|
|
5335
5393
|
? askVerificationMetadata(verification, {
|
|
5336
5394
|
retryCount: verificationRetryCount,
|
|
5337
5395
|
repaired: verificationRepaired,
|
|
5338
|
-
fallback: verificationFallback
|
|
5396
|
+
fallback: verificationFallback,
|
|
5397
|
+
degraded: verificationDegraded,
|
|
5398
|
+
redactedCount: verificationRedactedCount
|
|
5339
5399
|
})
|
|
5340
5400
|
: undefined;
|
|
5341
5401
|
if (answerVerification) {
|