incremnt 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "incremnt",
3
- "version": "0.8.1",
3
+ "version": "0.8.2",
4
4
  "description": "Command-line tool for querying your incremnt strength training data",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -46,5 +46,10 @@
46
46
  },
47
47
  "devDependencies": {
48
48
  "promptfoo": "^0.121.8"
49
+ },
50
+ "overrides": {
51
+ "mongodb": {
52
+ "gcp-metadata": "^8.1.2"
53
+ }
49
54
  }
50
55
  }
@@ -165,12 +165,37 @@ function compareEvidenceTopSets(currentTopSet, previousTopSet) {
165
165
  return { loadDirection, previousTopSet };
166
166
  }
167
167
 
168
+ function sumReps(sets) {
169
+ if (!Array.isArray(sets)) return null;
170
+ let total = 0;
171
+ let any = false;
172
+ for (const set of sets) {
173
+ const reps = Number(set?.reps);
174
+ if (Number.isFinite(reps)) { total += reps; any = true; }
175
+ }
176
+ return any ? total : null;
177
+ }
178
+
168
179
  function comparedToPreviousEvidence(row) {
169
180
  if (row?.comparedToPreviousSession) return row.comparedToPreviousSession;
170
181
  if (!row?.previousComparableSession) return null;
171
182
  const currentTopSet = row?.topSet ?? topSetFromSets(row?.sets);
172
183
  const previousTopSet = topSetFromSets(row.previousComparableSession.sets);
173
- return compareEvidenceTopSets(currentTopSet, previousTopSet);
184
+ const compared = compareEvidenceTopSets(currentTopSet, previousTopSet);
185
+ // Top-set direction alone calls a session "down" when the top set lost reps,
186
+ // even if the lifter did MORE total work at the same load (e.g. added sets:
187
+ // 70x8,6,5 -> 70x7,7,7,5,5). That is not a decline, so describing it as
188
+ // "progressed" must not be blocked as a direction inversion. Mark it 'mixed'
189
+ // (a real load drop keeps 'down'). Mirrors the regression-flag total-work gate.
190
+ if (compared?.loadDirection === 'down'
191
+ && Number(currentTopSet?.weight) === Number(previousTopSet?.weight)) {
192
+ const current = sumReps(row?.sets);
193
+ const previous = sumReps(row.previousComparableSession.sets);
194
+ if (current != null && previous != null && current >= previous) {
195
+ return { ...compared, loadDirection: 'mixed' };
196
+ }
197
+ }
198
+ return compared;
174
199
  }
175
200
 
176
201
  function rowTopSet(row) {
@@ -195,7 +220,11 @@ function addEvidenceRow(rows, toolName, row, inherited = {}) {
195
220
  isStale: row?.isStale ?? inherited.isStale ?? false,
196
221
  topSet: rowTopSet(row),
197
222
  comparedToPreviousSession: comparedToPreviousEvidence(row),
198
- sets: Array.isArray(row?.sets) ? row.sets : []
223
+ sets: Array.isArray(row?.sets) ? row.sets : [],
224
+ // get_records rows carry an estimated 1RM but no logged sets. Capture it so a
225
+ // weight claim citing a record value (which expansive answers are encouraged
226
+ // to volunteer) is recognized as supported evidence.
227
+ e1rm: Number.isFinite(Number(row?.e1rm)) ? Number(row.e1rm) : null
199
228
  });
200
229
  }
201
230
 
@@ -264,13 +293,17 @@ function replayAskToolResults(snapshot, routingMetadata = {}, { today = new Date
264
293
  return { toolResults: results, replayFailures: failures };
265
294
  }
266
295
 
296
+ function parseWeightNumber(raw) {
297
+ return Number(String(raw).replace(/,/g, ''));
298
+ }
299
+
267
300
  function extractWeightClaims(text) {
268
301
  const claims = [];
269
- const pattern = /\b(\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\b/gi;
302
+ const pattern = /\b(\d{1,3}(?:,\d{3})+(?:\.\d+)?|\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\b/gi;
270
303
  for (const match of normalizeText(text).matchAll(pattern)) {
271
304
  claims.push({
272
305
  text: match[0],
273
- value: Number(match[1]),
306
+ value: parseWeightNumber(match[1]),
274
307
  index: match.index ?? -1,
275
308
  end: (match.index ?? -1) + match[0].length
276
309
  });
@@ -280,12 +313,12 @@ function extractWeightClaims(text) {
280
313
 
281
314
  function extractWeightedSetClaims(text) {
282
315
  const claims = [];
283
- const pattern = /\b(\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\s*(?:x|×|for)\s*(\d+)\b/gi;
316
+ const pattern = /\b(\d{1,3}(?:,\d{3})+(?:\.\d+)?|\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\s*(?:(?:x)\s*(\d+)|for\s+(\d+)\s*reps?)\b/gi;
284
317
  for (const match of normalizeText(text).matchAll(pattern)) {
285
318
  claims.push({
286
319
  text: match[0],
287
- weight: Number(match[1]),
288
- reps: Number(match[2]),
320
+ weight: parseWeightNumber(match[1]),
321
+ reps: Number(match[2] ?? match[3]),
289
322
  index: match.index ?? -1,
290
323
  end: (match.index ?? -1) + match[0].length
291
324
  });
@@ -332,9 +365,30 @@ function isEstimatedOneRepMaxWeightClaim(text, claim) {
332
365
  }
333
366
 
334
367
  function isVolumeWeightClaim(text, claim) {
335
- const start = Math.max(0, claim.index - 30);
336
- const end = Math.min(text.length, claim.index + claim.text.length + 30);
337
- return /\bvolume\b/i.test(text.slice(start, end));
368
+ const boundaries = [
369
+ '\n',
370
+ '. ',
371
+ ';',
372
+ ', while',
373
+ ', whereas',
374
+ ', but',
375
+ ' while ',
376
+ ' whereas ',
377
+ ' but '
378
+ ];
379
+ let start = 0;
380
+ for (const boundary of boundaries) {
381
+ const index = text.lastIndexOf(boundary, claim.index);
382
+ if (index >= 0) start = Math.max(start, index + boundary.length);
383
+ }
384
+ let end = text.length;
385
+ for (const boundary of boundaries) {
386
+ const index = text.indexOf(boundary, claim.index + claim.text.length);
387
+ if (index >= 0) end = Math.min(end, index);
388
+ }
389
+ const clause = text.slice(start, end);
390
+ if (/\b(?:volume|tonnage|total\s+(?:load|work|volume|tonnage))\b/i.test(clause)) return true;
391
+ return /\d,\d{3}/.test(String(claim.text ?? '')) && claim.value >= 10000;
338
392
  }
339
393
 
340
394
  function isBodyWeightClaim(text, claim) {
@@ -353,7 +407,10 @@ function isUnsupportedReferenceClaim(text, claim) {
353
407
  }
354
408
 
355
409
  function weightSupported(claim, rows) {
356
- return weightsForRows(rows).some((weight) => Math.abs(weight - claim.value) < 0.01);
410
+ if (weightsForRows(rows).some((weight) => Math.abs(weight - claim.value) < 0.01)) return true;
411
+ // A record's estimated 1RM is valid evidence for a weight number. Allow a small
412
+ // rounding tolerance since the model rounds (e.g. "224 kg" for a 224.25 e1RM).
413
+ return rows.some((row) => Number.isFinite(row.e1rm) && Math.abs(row.e1rm - claim.value) <= 1);
357
414
  }
358
415
 
359
416
  function setPairSupported(claim, rows) {
@@ -633,6 +690,7 @@ function checkToolProvenance(answer, snapshot, routingMetadata, {
633
690
  key: 'unsupported_weighted_set_claim',
634
691
  severity: 'blocking',
635
692
  exerciseName: mention.name,
693
+ claimText: claim.text,
636
694
  reason: `Draft asserts ${claim.text} for ${mention.name}, but routed evidence does not include that weight/reps pair.`
637
695
  });
638
696
  }
@@ -657,6 +715,7 @@ function checkToolProvenance(answer, snapshot, routingMetadata, {
657
715
  key: 'unsupported_weight_claim',
658
716
  severity: 'blocking',
659
717
  exerciseName: mention.name,
718
+ claimText: claim.text,
660
719
  reason: `Draft asserts ${claim.text} for ${mention.name}, but routed evidence does not include that load.`
661
720
  });
662
721
  }
@@ -727,6 +786,95 @@ function checkObservationFollowupVoice(answer, route) {
727
786
  }];
728
787
  }
729
788
 
789
+ function checkExpansiveCompleteness(answer, snapshot, routingMetadata, { executeTool = executeCoachReadTool } = {}) {
790
+ const responseProfile = routingMetadata?.responseProfile ?? routingMetadata?.intent?.responseProfile;
791
+ if (responseProfile !== 'expansive') return [];
792
+ const failures = [];
793
+ const toolNames = routedToolNames(routingMetadata);
794
+ if (toolNames.has('get_increment_score')) {
795
+ const scoreTool = executeTool(snapshot, 'get_increment_score', { historyDays: 21 });
796
+ const positiveDrivers = (scoreTool.facts?.topPositiveDrivers ?? []).filter(Boolean);
797
+ const normalizedAnswer = normalizeText(answer).toLowerCase();
798
+ const missingDrivers = positiveDrivers
799
+ .slice(0, 3)
800
+ .filter((driver) => !normalizedAnswer.includes(String(driver).toLowerCase()));
801
+ if (positiveDrivers.length > 0 && missingDrivers.length === positiveDrivers.slice(0, 3).length) {
802
+ failures.push({
803
+ key: 'expansive_omitted_positive_score_driver',
804
+ severity: 'advisory',
805
+ reason: `Expansive Ask answer omitted fetched positive Increment Score drivers: ${positiveDrivers.slice(0, 3).join('; ')}.`
806
+ });
807
+ }
808
+ }
809
+ return failures;
810
+ }
811
+
812
+ // Increment Score over-claim guards. Expansive Ask may name the rounded score
813
+ // headline and drivers, but two framings are always wrong and the expansive
814
+ // prompt only *asks* gpt-5.4-mini not to do them — these make it enforceable:
815
+ // 1. Reciting a component sub-score ("progression 90", "recovery is 63") —
816
+ // Tier-1 internals the user should never see.
817
+ // 2. Expressing a score move as a percentage ("score up 77%", "28% higher") —
818
+ // the score is a 0-100 index; a % jump is the cross-formula-ruler artifact
819
+ // that produced the bogus "+36 / 77% up" read.
820
+ const SCORE_COMPONENT_NAMES = ['coverage', 'stimulus', 'execution', 'progression', 'recovery'];
821
+ const NON_SCORE_UNIT =
822
+ '(?:kg|kilo|lbs?|pounds?|reps?|sets?|%|percent|pct|x\\b|for\\s+\\d|sessions?|days?|nights?|weeks?|months?|' +
823
+ 'years?|yrs?|h\\b|hrs?|hours?|mins?|minutes?|secs?|seconds?|bpm|ms|rpe|rir|am|pm|out\\s+of|of\\b|/\\s*\\d)';
824
+ const SCORE_COMPONENT_DUMP_PATTERN = new RegExp(
825
+ `\\b(${SCORE_COMPONENT_NAMES.join('|')})\\b[^.\\d\\n]{0,25}?(\\d{1,3}(?:\\.\\d+)?)\\b(?!\\.\\d)(?!\\s*${NON_SCORE_UNIT})`,
826
+ 'i'
827
+ );
828
+ // A score MOVE expressed as a percentage ("score up 77%", "score jumped 28%").
829
+ // The move verb must sit next to "score" so a stray percentage in the same
830
+ // sentence (e.g. "your score is 83 and volume is up 12%") does not false-fire
831
+ // this blocking check. Two clamps so order does not matter.
832
+ const SCORE_MOVE_WORD = '(?:up|down|higher|lower|increase[d]?|decrease[d]?|jump(?:ed)?|rose|climb(?:ed)?|gain(?:ed)?|grew|improv(?:ed|ement))';
833
+ const SCORE_PERCENT_MOVE_PATTERN = new RegExp(
834
+ `\\b(?:increment\\s+)?score\\b[^.\\n]{0,15}?\\b${SCORE_MOVE_WORD}\\b[^.\\n]{0,15}?\\d{1,3}(?:\\.\\d+)?\\s*(?:%|percent)`
835
+ + `|\\b${SCORE_MOVE_WORD}\\b[^.\\n]{0,8}?\\d{1,3}(?:\\.\\d+)?\\s*(?:%|percent)[^.\\n]{0,20}?\\b(?:increment\\s+)?score\\b`,
836
+ 'i'
837
+ );
838
+
839
+ function checkScoreOverclaim(answer) {
840
+ const text = normalizeText(answer);
841
+ if (!text) return [];
842
+ const failures = [];
843
+ const subscoreMatch = text.match(SCORE_COMPONENT_DUMP_PATTERN);
844
+ if (subscoreMatch) {
845
+ failures.push({
846
+ key: 'score_component_subscore_recited',
847
+ severity: 'blocking',
848
+ claimText: subscoreMatch[0],
849
+ reason: 'Answer recited an Increment Score component sub-score; surface the rounded headline and drivers only, never component values.'
850
+ });
851
+ }
852
+ const percentMatch = text.match(SCORE_PERCENT_MOVE_PATTERN);
853
+ if (percentMatch) {
854
+ failures.push({
855
+ key: 'score_percentage_move_claimed',
856
+ severity: 'blocking',
857
+ claimText: percentMatch[0],
858
+ reason: 'Answer framed an Increment Score move as a percentage; the score is a 0-100 index and % jumps are not comparable across formula versions.'
859
+ });
860
+ }
861
+ return failures;
862
+ }
863
+
864
+ function checkSessionObservationProvenance(answer, routingMetadata) {
865
+ const comparisons = routingMetadata?.sessionObservationComparisons ?? routingMetadata?.contextBundle?.sessionObservationComparisons ?? [];
866
+ if (!Array.isArray(comparisons) || comparisons.every((row) => row?.direction !== 'not_comparable')) return [];
867
+ const text = normalizeText(answer);
868
+ if (!/\b(main|primary|biggest|clear)\s+(?:limiter|issue|problem|finding|signal)|\bthis session\b[^.]{0,80}\b(?:shows|proves|confirms)\b/i.test(text)) {
869
+ return [];
870
+ }
871
+ return [{
872
+ key: 'not_comparable_observation_used_as_session_finding',
873
+ severity: 'advisory',
874
+ reason: 'Answer may be framing a not_comparable durable observation as a current-session finding.'
875
+ }];
876
+ }
877
+
730
878
  export function verifyAskAnswer({
731
879
  answer,
732
880
  snapshot,
@@ -769,7 +917,10 @@ export function verifyAskAnswer({
769
917
  const failures = [
770
918
  ...voiceFailures,
771
919
  ...checkSnapshotClaims(normalized, snapshot, routingMetadata, { today, exclude }),
772
- ...checkToolProvenance(normalized, snapshot, routingMetadata, { today, exclude, strictMentionProvenance, executeTool })
920
+ ...checkToolProvenance(normalized, snapshot, routingMetadata, { today, exclude, strictMentionProvenance, executeTool }),
921
+ ...checkSessionObservationProvenance(normalized, routingMetadata),
922
+ ...checkScoreOverclaim(normalized),
923
+ ...checkExpansiveCompleteness(normalized, snapshot, routingMetadata, { executeTool })
773
924
  ];
774
925
  return verificationResult(failures);
775
926
  }
@@ -813,18 +964,22 @@ function verificationResult(failures, { skipped = false, skipReason = null } = {
813
964
  export function askVerificationMetadata(verification, {
814
965
  retryCount = 0,
815
966
  repaired = false,
816
- fallback = false
967
+ fallback = false,
968
+ degraded = false,
969
+ redactedCount = 0
817
970
  } = {}) {
818
971
  if (!verification) return null;
819
972
  return compactObject({
820
973
  version: verification.version ?? ASK_ANSWER_VERIFIER_VERSION,
821
- status: fallback ? 'fallback' : verification.status,
974
+ status: fallback ? 'fallback' : degraded ? 'degraded' : verification.status,
822
975
  passed: verification.passed === true && !fallback,
823
976
  skipped: verification.skipped === true ? true : undefined,
824
977
  skipReason: verification.skipReason,
825
978
  retryCount,
826
979
  repaired: repaired ? true : undefined,
827
980
  fallback: fallback ? true : undefined,
981
+ degraded: degraded ? true : undefined,
982
+ redactedCount: degraded ? redactedCount : undefined,
828
983
  blockingFailureCount: verification.blockingFailureCount ?? 0,
829
984
  advisoryFailureCount: verification.advisoryFailureCount ?? 0,
830
985
  failureKeys: verification.failureKeys ?? []
@@ -855,3 +1010,83 @@ export function buildAskAnswerRepairContext(context, _draftAnswer, verification)
855
1010
  export function safeAskVerificationFallback() {
856
1011
  return 'I can’t answer that safely from the evidence I just checked. The draft answer included training claims I could not verify, so I’m not going to guess. Ask me about a specific session or lift and I’ll re-check the data.';
857
1012
  }
1013
+
1014
+ // Graceful degrade: rather than refusing a whole answer for one unsupported
1015
+ // claim, strip the offending content and keep the rest. A coaching surface
1016
+ // should almost never go silent — a good answer minus one clause beats "I can't
1017
+ // answer that safely". When blocking failures carry a strip anchor (the exact
1018
+ // claim text, or failing that the exercise name), we drop the whole line/bullet
1019
+ // that contains an anchor — NOT just the sentence — so a recommendation sharing
1020
+ // a line with an unsupported claim ("You hit 200kg. Keep that load.") is removed
1021
+ // with it rather than orphaned. We also sweep an immediately-following
1022
+ // recommendation line, since advice right after a stripped performance claim is
1023
+ // usually derived from it. Failures without anchors cannot be localized and fall
1024
+ // back. The caller re-verifies the result and ships it only if it is now clean
1025
+ // and still substantive, else falls back.
1026
+ function splitIntoUnits(text) {
1027
+ // Each newline-delimited line is one atomic unit (a bullet or a prose line).
1028
+ // Stripping whole lines — never partial sentences — avoids leaving a
1029
+ // recommendation behind that was built on a redacted claim.
1030
+ return String(text).split('\n').map((line) => {
1031
+ const trimmed = line.trim();
1032
+ return { raw: line, text: trimmed, strip: trimmed.length > 0, blank: trimmed.length === 0 };
1033
+ });
1034
+ }
1035
+
1036
+ // Cues that mark a line as a recommendation/prescription. Used to sweep advice
1037
+ // that immediately follows a stripped performance claim and likely depends on it.
1038
+ const RECOMMENDATION_CUE = /\b(keep|hold|stay|maintain|add|increase|bump|push|try|aim|go up|move up|next time|i'?d|that means|from there|so try|then)\b/i;
1039
+
1040
+ export function degradeAskAnswer(answer, verification) {
1041
+ const blocking = verification?.blockingFailures ?? [];
1042
+ if (blocking.length === 0) {
1043
+ const text = answer == null ? '' : String(answer);
1044
+ return { text, usable: Boolean(normalizeText(answer)), redactedCount: 0, localizable: true };
1045
+ }
1046
+ const text = normalizeText(answer);
1047
+ if (!text) {
1048
+ return { text, usable: Boolean(text), redactedCount: 0, localizable: true };
1049
+ }
1050
+ const phraseAnchors = blocking.map((failure) => failure.claimText).filter(Boolean).map((value) => value.toLowerCase());
1051
+ const nameAnchors = blocking.filter((failure) => !failure.claimText).map((failure) => failure.exerciseName).filter(Boolean).map((value) => value.toLowerCase());
1052
+ if (phraseAnchors.length === 0 && nameAnchors.length === 0) {
1053
+ // No blocking failure could be localized to a span — cannot safely trim.
1054
+ return { text, usable: false, redactedCount: 0, localizable: false };
1055
+ }
1056
+ const units = splitIntoUnits(text);
1057
+ let redactedCount = 0;
1058
+ let sweepDependentAdvice = false;
1059
+ const kept = [];
1060
+ for (const unit of units) {
1061
+ if (unit.blank) {
1062
+ // A blank line ends a paragraph, so it also ends a dependent-advice run.
1063
+ sweepDependentAdvice = false;
1064
+ kept.push(unit.raw);
1065
+ continue;
1066
+ }
1067
+ const haystack = unit.text.toLowerCase();
1068
+ const hit = phraseAnchors.some((anchor) => haystack.includes(anchor))
1069
+ || nameAnchors.some((anchor) => haystack.includes(anchor));
1070
+ if (hit) {
1071
+ // Strip the whole line containing the unsupported claim, and start sweeping
1072
+ // the advice that follows it.
1073
+ redactedCount += 1;
1074
+ sweepDependentAdvice = true;
1075
+ continue;
1076
+ }
1077
+ if (sweepDependentAdvice && RECOMMENDATION_CUE.test(unit.text)) {
1078
+ // Advice immediately after a redacted claim, with no new claim of its own —
1079
+ // treat it as derived from the fabrication and drop it too.
1080
+ redactedCount += 1;
1081
+ continue;
1082
+ }
1083
+ // A line with real, independent content ends the dependent-advice run.
1084
+ sweepDependentAdvice = false;
1085
+ kept.push(unit.raw);
1086
+ }
1087
+ const out = kept.join('\n').replace(/\n{3,}/g, '\n\n').trim();
1088
+ // Substantive = at least one real sentence of prose/bullet content remains.
1089
+ const remaining = out.replace(/^\s*(?:[-*•]|\d+[.)])\s+/gm, '').trim();
1090
+ const usable = redactedCount > 0 && remaining.length >= 24 && /[a-z]{3,}/i.test(remaining);
1091
+ return { text: out, usable, redactedCount, localizable: true };
1092
+ }