incremnt 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -1
- package/src/ask-answer-verifier.js +249 -14
- package/src/ask-coach.js +309 -21
- package/src/openrouter.js +55 -30
- package/src/promptfoo-evals.js +20 -3
- package/src/queries.js +113 -18
- package/src/score-prelude.js +16 -13
- package/src/summary-evals.js +106 -474
- package/src/sync-service.js +46 -11
package/src/promptfoo-evals.js
CHANGED
|
@@ -25,6 +25,7 @@ function envList(name) {
|
|
|
25
25
|
|
|
26
26
|
export function buildPromptfooTestCase(testCase, { caseSet = testCase.caseSet ?? 'synthetic', fixtureFile = testCase.fixtureFile ?? null } = {}) {
|
|
27
27
|
const question = testCase.context?.question ?? testCase.question ?? testCase.name;
|
|
28
|
+
const today = testCase.context?.today ?? testCase.today ?? null;
|
|
28
29
|
|
|
29
30
|
return {
|
|
30
31
|
description: `${testCase.surface}: ${testCase.name ?? testCase.id}`,
|
|
@@ -35,6 +36,7 @@ export function buildPromptfooTestCase(testCase, { caseSet = testCase.caseSet ??
|
|
|
35
36
|
snapshotFile: testCase.snapshotFile ?? null,
|
|
36
37
|
surface: testCase.surface,
|
|
37
38
|
question,
|
|
39
|
+
...(today ? { today } : {}),
|
|
38
40
|
output: testCase.output,
|
|
39
41
|
shouldPass: testCase.shouldPass !== false
|
|
40
42
|
},
|
|
@@ -83,8 +85,21 @@ async function resolvePromptfooEval(vars = {}) {
|
|
|
83
85
|
throw new Error(`Promptfoo eval case not found: ${caseSet}/${vars.caseId ?? '(missing caseId)'}`);
|
|
84
86
|
}
|
|
85
87
|
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
+
const contextOverrides = {
|
|
89
|
+
...(vars.question ? { question: vars.question } : {}),
|
|
90
|
+
...(vars.today ? { today: vars.today } : {})
|
|
91
|
+
};
|
|
92
|
+
const resolvedTestCase = Object.keys(contextOverrides).length > 0
|
|
93
|
+
? {
|
|
94
|
+
...testCase,
|
|
95
|
+
context: {
|
|
96
|
+
...(testCase.context ?? {}),
|
|
97
|
+
...contextOverrides
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
: testCase;
|
|
101
|
+
const snapshot = await loadSummaryEvalSnapshot(resolvedTestCase);
|
|
102
|
+
return { testCase: resolvedTestCase, snapshot };
|
|
88
103
|
}
|
|
89
104
|
|
|
90
105
|
function summarizeFailedChecks(result) {
|
|
@@ -98,7 +113,9 @@ function promptfooMetadataKey(vars = {}) {
|
|
|
98
113
|
return [
|
|
99
114
|
vars.caseSet ?? process.env.SUMMARY_EVAL_CASE_SET ?? 'synthetic',
|
|
100
115
|
vars.fixtureFile ?? '',
|
|
101
|
-
vars.caseId ?? ''
|
|
116
|
+
vars.caseId ?? '',
|
|
117
|
+
vars.question ?? '',
|
|
118
|
+
vars.today ?? ''
|
|
102
119
|
].join(':');
|
|
103
120
|
}
|
|
104
121
|
|
package/src/queries.js
CHANGED
|
@@ -3111,9 +3111,30 @@ export function getGoalStatus(snapshot, { limit = 5 } = {}) {
|
|
|
3111
3111
|
});
|
|
3112
3112
|
}
|
|
3113
3113
|
|
|
3114
|
+
function round1(value) {
|
|
3115
|
+
return Math.round(value * 10) / 10;
|
|
3116
|
+
}
|
|
3117
|
+
|
|
3118
|
+
function priorBestSetBefore(sets, recordDateIso) {
|
|
3119
|
+
let prior = null;
|
|
3120
|
+
for (const set of sets) {
|
|
3121
|
+
const setDate = normalizeDateOnly(set.date);
|
|
3122
|
+
if (setDate == null || recordDateIso == null || setDate >= recordDateIso) continue;
|
|
3123
|
+
if (!prior || set.e1rm > prior.e1rm) prior = set;
|
|
3124
|
+
}
|
|
3125
|
+
return prior;
|
|
3126
|
+
}
|
|
3127
|
+
|
|
3128
|
+
function classifyRecordKind(record, priorBest) {
|
|
3129
|
+
if (!priorBest) return 'first';
|
|
3130
|
+
// A PR achieved by adding reps at the same (or even lower) load reads as a
|
|
3131
|
+
// stall to anything reasoning on bar weight, so distinguish it explicitly.
|
|
3132
|
+
return record.weight > priorBest.weight ? 'load_pr' : 'rep_pr';
|
|
3133
|
+
}
|
|
3134
|
+
|
|
3114
3135
|
export function getRecords(snapshot, { exercises = [], limit = 15, recentSince = null, today = new Date() } = {}) {
|
|
3115
3136
|
const filter = exercises.length > 0 ? new Set(exercises.map((exercise) => exercise.canonical ?? canonicalExerciseName(exercise))) : null;
|
|
3116
|
-
const
|
|
3137
|
+
const setsByExercise = new Map();
|
|
3117
3138
|
for (const session of snapshot.sessions ?? []) {
|
|
3118
3139
|
for (const exercise of session.exercises ?? []) {
|
|
3119
3140
|
const key = canonicalExerciseName(exercise.name);
|
|
@@ -3121,27 +3142,85 @@ export function getRecords(snapshot, { exercises = [], limit = 15, recentSince =
|
|
|
3121
3142
|
for (const set of exercise.sets ?? []) {
|
|
3122
3143
|
if (!set.isComplete) continue;
|
|
3123
3144
|
const e1rm = Number(set.weight) * (1 + Number(set.reps) / 30);
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3127
|
-
|
|
3128
|
-
|
|
3129
|
-
date: completionDateForSession(session),
|
|
3130
|
-
sessionId: session.id ?? null
|
|
3131
|
-
});
|
|
3145
|
+
if (!(e1rm > 0)) continue;
|
|
3146
|
+
let entry = setsByExercise.get(key);
|
|
3147
|
+
if (!entry) {
|
|
3148
|
+
entry = { sets: [] };
|
|
3149
|
+
setsByExercise.set(key, entry);
|
|
3132
3150
|
}
|
|
3151
|
+
entry.sets.push({
|
|
3152
|
+
name: exercise.name,
|
|
3153
|
+
e1rm,
|
|
3154
|
+
weight: Number(set.weight),
|
|
3155
|
+
reps: Number(set.reps),
|
|
3156
|
+
date: completionDateForSession(session),
|
|
3157
|
+
sessionId: session.id ?? null
|
|
3158
|
+
});
|
|
3133
3159
|
}
|
|
3134
3160
|
}
|
|
3135
3161
|
}
|
|
3136
|
-
|
|
3137
|
-
|
|
3162
|
+
|
|
3163
|
+
const records = [];
|
|
3164
|
+
for (const entry of setsByExercise.values()) {
|
|
3165
|
+
let best = null;
|
|
3166
|
+
for (const set of entry.sets) {
|
|
3167
|
+
// Strictly greater keeps the earliest set that reached the best e1RM.
|
|
3168
|
+
if (!best || set.e1rm > best.e1rm) best = set;
|
|
3169
|
+
}
|
|
3170
|
+
if (!best) continue;
|
|
3171
|
+
records.push({
|
|
3172
|
+
name: best.name,
|
|
3173
|
+
e1rm: best.e1rm,
|
|
3174
|
+
weight: best.weight,
|
|
3175
|
+
reps: best.reps,
|
|
3176
|
+
date: best.date,
|
|
3177
|
+
sessionId: best.sessionId,
|
|
3178
|
+
sets: entry.sets
|
|
3179
|
+
});
|
|
3180
|
+
}
|
|
3181
|
+
|
|
3182
|
+
const allRows = records
|
|
3183
|
+
.map((record) => ({
|
|
3184
|
+
name: record.name,
|
|
3185
|
+
e1rm: record.e1rm,
|
|
3186
|
+
weight: record.weight,
|
|
3187
|
+
reps: record.reps,
|
|
3188
|
+
date: record.date,
|
|
3189
|
+
sessionId: record.sessionId
|
|
3190
|
+
}))
|
|
3138
3191
|
.sort((a, b) => b.e1rm - a.e1rm);
|
|
3192
|
+
|
|
3139
3193
|
const todayIso = dateOnlyString(today);
|
|
3140
|
-
const
|
|
3141
|
-
|
|
3142
|
-
|
|
3143
|
-
|
|
3144
|
-
|
|
3194
|
+
const recentSinceIso = recentSince ? normalizeDateOnly(recentSince) : null;
|
|
3195
|
+
const recentRecords = recentSinceIso
|
|
3196
|
+
? records
|
|
3197
|
+
.filter((record) => {
|
|
3198
|
+
const recordDate = normalizeDateOnly(record.date);
|
|
3199
|
+
return recordDate != null && recordDate >= recentSinceIso && recordDate <= todayIso;
|
|
3200
|
+
})
|
|
3201
|
+
.sort((a, b) => b.e1rm - a.e1rm)
|
|
3202
|
+
.map((record) => {
|
|
3203
|
+
const recordDateIso = normalizeDateOnly(record.date);
|
|
3204
|
+
const priorBest = priorBestSetBefore(record.sets, recordDateIso);
|
|
3205
|
+
const delta = priorBest ? round1(record.e1rm - priorBest.e1rm) : null;
|
|
3206
|
+
const deltaPct = priorBest && priorBest.e1rm > 0
|
|
3207
|
+
? round1(((record.e1rm - priorBest.e1rm) / priorBest.e1rm) * 100)
|
|
3208
|
+
: null;
|
|
3209
|
+
return {
|
|
3210
|
+
name: record.name,
|
|
3211
|
+
e1rm: record.e1rm,
|
|
3212
|
+
weight: record.weight,
|
|
3213
|
+
reps: record.reps,
|
|
3214
|
+
date: record.date,
|
|
3215
|
+
sessionId: record.sessionId,
|
|
3216
|
+
priorBest: priorBest
|
|
3217
|
+
? { e1rm: priorBest.e1rm, weight: priorBest.weight, reps: priorBest.reps, date: priorBest.date }
|
|
3218
|
+
: null,
|
|
3219
|
+
delta,
|
|
3220
|
+
deltaPct,
|
|
3221
|
+
kind: classifyRecordKind(record, priorBest)
|
|
3222
|
+
};
|
|
3223
|
+
})
|
|
3145
3224
|
: [];
|
|
3146
3225
|
const rows = allRows.slice(0, limit);
|
|
3147
3226
|
|
|
@@ -3156,7 +3235,8 @@ export function getRecords(snapshot, { exercises = [], limit = 15, recentSince =
|
|
|
3156
3235
|
recordCount: rows.length,
|
|
3157
3236
|
totalRecordCount: allRows.length,
|
|
3158
3237
|
recentRecordCount: recentRecords.length,
|
|
3159
|
-
recentRecordNames: recentRecords.map((record) => record.name)
|
|
3238
|
+
recentRecordNames: recentRecords.map((record) => record.name),
|
|
3239
|
+
recentRecords
|
|
3160
3240
|
},
|
|
3161
3241
|
sourceIds: rows.map((row) => row.sessionId),
|
|
3162
3242
|
sourceTimestamp: latestSourceTimestampFromDates(rows.map((row) => row.date)),
|
|
@@ -3543,7 +3623,15 @@ export function incrementScoreSummary(snapshot, { historyDays = 14 } = {}) {
|
|
|
3543
3623
|
|
|
3544
3624
|
const trimmedHistory = history.slice(0, boundedHistoryDays);
|
|
3545
3625
|
const prior = trimmedHistory[1];
|
|
3546
|
-
|
|
3626
|
+
// Scores are only comparable within the same formula version. The Increment
|
|
3627
|
+
// Score formula changed mid-2026 (it started counting recovery data it did not
|
|
3628
|
+
// have before), so subtracting an older-formula score from a newer one is the
|
|
3629
|
+
// "+36 / 77% up" cross-ruler artifact. Null the delta across a formula change
|
|
3630
|
+
// so downstream voice cannot frame a non-comparable jump as real progress.
|
|
3631
|
+
const latestFormulaVersion = latest.formulaVersion ?? null;
|
|
3632
|
+
const dayOverDayComparable = (typeof prior?.score === 'number')
|
|
3633
|
+
&& (prior.formulaVersion ?? null) === latestFormulaVersion;
|
|
3634
|
+
const dayOverDayDelta = dayOverDayComparable
|
|
3547
3635
|
? latest.score - prior.score
|
|
3548
3636
|
: null;
|
|
3549
3637
|
|
|
@@ -3570,6 +3658,11 @@ export function incrementScoreSummary(snapshot, { historyDays = 14 } = {}) {
|
|
|
3570
3658
|
dataTier: entry.dataTier ?? null,
|
|
3571
3659
|
formulaVersion: entry.formulaVersion ?? null
|
|
3572
3660
|
}));
|
|
3661
|
+
// A multi-day trend is only meaningful if every point shares the latest
|
|
3662
|
+
// formula version; otherwise the "rising/falling" steer mixes rulers.
|
|
3663
|
+
const trendComparable = recentTrend.every(
|
|
3664
|
+
(entry) => (entry.formulaVersion ?? null) === latestFormulaVersion
|
|
3665
|
+
);
|
|
3573
3666
|
|
|
3574
3667
|
return {
|
|
3575
3668
|
available: true,
|
|
@@ -3581,6 +3674,8 @@ export function incrementScoreSummary(snapshot, { historyDays = 14 } = {}) {
|
|
|
3581
3674
|
topPositiveDrivers: scoreDriverLabels(latest.topPositiveDrivers),
|
|
3582
3675
|
topNegativeDrivers: scoreDriverLabels(latest.topNegativeDrivers),
|
|
3583
3676
|
dayOverDayDelta,
|
|
3677
|
+
dayOverDayComparable,
|
|
3678
|
+
trendComparable,
|
|
3584
3679
|
recentTrend,
|
|
3585
3680
|
dataQualityNotes,
|
|
3586
3681
|
missingDataFlags,
|
package/src/score-prelude.js
CHANGED
|
@@ -29,25 +29,25 @@ export function scoreComponentPhrase(name) {
|
|
|
29
29
|
return SCORE_COMPONENT_PHRASES[String(name).toLowerCase()] ?? 'another training area';
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
-
// True when the user's question is actually about the Increment Score.
|
|
33
|
-
//
|
|
34
|
-
//
|
|
35
|
-
// plans where it was never asked for.
|
|
32
|
+
// True when the user's question is actually about the Increment Score. Defensive
|
|
33
|
+
// Ask profiles still use this to avoid score dashboarding in narrow decisions;
|
|
34
|
+
// expansive Ask profiles intentionally get the headline for richer coaching.
|
|
36
35
|
export function isScoreQuestion(question) {
|
|
37
36
|
return /\b(?:increment\s+)?score\b/i.test(String(question ?? ''));
|
|
38
37
|
}
|
|
39
38
|
|
|
40
|
-
export function formatIncrementScorePrelude(snapshots, { question = '' } = {}) {
|
|
39
|
+
export function formatIncrementScorePrelude(snapshots, { question = '', responseProfile = 'defensive' } = {}) {
|
|
41
40
|
if (!Array.isArray(snapshots) || snapshots.length === 0) return null;
|
|
42
41
|
const latest = snapshots[0];
|
|
43
42
|
if (latest == null || typeof latest.score !== 'number') return null;
|
|
44
43
|
|
|
44
|
+
const allowsHeadline = responseProfile === 'expansive' || isScoreQuestion(question);
|
|
45
45
|
const lines = [
|
|
46
|
-
|
|
46
|
+
allowsHeadline
|
|
47
|
+
? '[Increment Score — context only. The rounded score headline and drivers may be used in rich Ask Coach answers. Never recite component values, sub-scores, decimals, or daily score numbers.]'
|
|
48
|
+
: '[Increment Score — context only. Speak in training reality (recovery, fatigue, consistency, density). Never recite component values, sub-scores, decimals, or daily score numbers. Do not volunteer the overall score number unless the user asked about the score.]'
|
|
47
49
|
];
|
|
48
|
-
|
|
49
|
-
// weakest/strongest area and direction below are always safe to provide.
|
|
50
|
-
if (isScoreQuestion(question)) {
|
|
50
|
+
if (allowsHeadline) {
|
|
51
51
|
lines.push(`- Current: ${Math.round(latest.score)}/100`);
|
|
52
52
|
}
|
|
53
53
|
|
|
@@ -90,18 +90,21 @@ export function formatIncrementScorePrelude(snapshots, { question = '' } = {}) {
|
|
|
90
90
|
const negatives = driverLabels(latest.topNegativeDrivers);
|
|
91
91
|
if (negatives) lines.push(`- Holding the score back: ${negatives}`);
|
|
92
92
|
|
|
93
|
-
// Direction words only — no delta number, no daily-score list.
|
|
93
|
+
// Direction words only — no delta number, no daily-score list. Scores are only
|
|
94
|
+
// comparable within one formula version; a formula change makes the direction a
|
|
95
|
+
// cross-ruler lie ("+36 / 77% up"), so suppress the steer across it.
|
|
96
|
+
const latestFormulaVersion = latest.formulaVersion ?? null;
|
|
94
97
|
if (snapshots.length > 1) {
|
|
95
98
|
const prior = snapshots[1];
|
|
96
|
-
if (typeof prior?.score === 'number') {
|
|
99
|
+
if (typeof prior?.score === 'number' && (prior.formulaVersion ?? null) === latestFormulaVersion) {
|
|
97
100
|
const delta = latest.score - prior.score;
|
|
98
101
|
const dir = delta > 0 ? 'up' : delta < 0 ? 'down' : 'flat';
|
|
99
102
|
lines.push(`- Day-over-day: ${dir}`);
|
|
100
103
|
}
|
|
101
104
|
const recent = snapshots
|
|
102
105
|
.slice(0, 7)
|
|
103
|
-
.
|
|
104
|
-
.
|
|
106
|
+
.filter((s) => typeof s?.score === 'number' && (s.formulaVersion ?? null) === latestFormulaVersion)
|
|
107
|
+
.map((s) => s.score);
|
|
105
108
|
if (recent.length >= 3) {
|
|
106
109
|
const span = recent[0] - recent[recent.length - 1];
|
|
107
110
|
const trend = span > 2 ? 'rising' : span < -2 ? 'falling' : 'steady';
|