npm - incremnt - Versions diffs - 0.8.1 → 0.8.2 - Mend

incremnt 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +6 -1
package/src/ask-answer-verifier.js +249 -14
package/src/ask-coach.js +309 -21
package/src/openrouter.js +55 -30
package/src/promptfoo-evals.js +20 -3
package/src/queries.js +113 -18
package/src/score-prelude.js +16 -13
package/src/summary-evals.js +106 -474
package/src/sync-service.js +46 -11

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "incremnt",
-  "version": "0.8.1",
+  "version": "0.8.2",
   "description": "Command-line tool for querying your incremnt strength training data",
   "license": "MIT",
   "type": "module",
@@ -46,5 +46,10 @@
   },
   "devDependencies": {
     "promptfoo": "^0.121.8"
+  },
+  "overrides": {
+    "mongodb": {
+      "gcp-metadata": "^8.1.2"
+    }
   }
 }

package/src/ask-answer-verifier.js CHANGED Viewed

@@ -165,12 +165,37 @@ function compareEvidenceTopSets(currentTopSet, previousTopSet) {
   return { loadDirection, previousTopSet };
 }
+function sumReps(sets) {
+  if (!Array.isArray(sets)) return null;
+  let total = 0;
+  let any = false;
+  for (const set of sets) {
+    const reps = Number(set?.reps);
+    if (Number.isFinite(reps)) { total += reps; any = true; }
+  }
+  return any ? total : null;
+}
 function comparedToPreviousEvidence(row) {
   if (row?.comparedToPreviousSession) return row.comparedToPreviousSession;
   if (!row?.previousComparableSession) return null;
   const currentTopSet = row?.topSet ?? topSetFromSets(row?.sets);
   const previousTopSet = topSetFromSets(row.previousComparableSession.sets);
-  return compareEvidenceTopSets(currentTopSet, previousTopSet);
+  const compared = compareEvidenceTopSets(currentTopSet, previousTopSet);
+  // Top-set direction alone calls a session "down" when the top set lost reps,
+  // even if the lifter did MORE total work at the same load (e.g. added sets:
+  // 70x8,6,5 -> 70x7,7,7,5,5). That is not a decline, so describing it as
+  // "progressed" must not be blocked as a direction inversion. Mark it 'mixed'
+  // (a real load drop keeps 'down'). Mirrors the regression-flag total-work gate.
+  if (compared?.loadDirection === 'down'
+    && Number(currentTopSet?.weight) === Number(previousTopSet?.weight)) {
+    const current = sumReps(row?.sets);
+    const previous = sumReps(row.previousComparableSession.sets);
+    if (current != null && previous != null && current >= previous) {
+      return { ...compared, loadDirection: 'mixed' };
+    }
+  }
+  return compared;
 }
 function rowTopSet(row) {
@@ -195,7 +220,11 @@ function addEvidenceRow(rows, toolName, row, inherited = {}) {
     isStale: row?.isStale ?? inherited.isStale ?? false,
     topSet: rowTopSet(row),
     comparedToPreviousSession: comparedToPreviousEvidence(row),
-    sets: Array.isArray(row?.sets) ? row.sets : []
+    sets: Array.isArray(row?.sets) ? row.sets : [],
+    // get_records rows carry an estimated 1RM but no logged sets. Capture it so a
+    // weight claim citing a record value (which expansive answers are encouraged
+    // to volunteer) is recognized as supported evidence.
+    e1rm: Number.isFinite(Number(row?.e1rm)) ? Number(row.e1rm) : null
   });
 }
@@ -264,13 +293,17 @@ function replayAskToolResults(snapshot, routingMetadata = {}, { today = new Date
   return { toolResults: results, replayFailures: failures };
 }
+function parseWeightNumber(raw) {
+  return Number(String(raw).replace(/,/g, ''));
+}
 function extractWeightClaims(text) {
   const claims = [];
-  const pattern = /\b(\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\b/gi;
+  const pattern = /\b(\d{1,3}(?:,\d{3})+(?:\.\d+)?|\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\b/gi;
   for (const match of normalizeText(text).matchAll(pattern)) {
     claims.push({
       text: match[0],
-      value: Number(match[1]),
+      value: parseWeightNumber(match[1]),
       index: match.index ?? -1,
       end: (match.index ?? -1) + match[0].length
     });
@@ -280,12 +313,12 @@ function extractWeightClaims(text) {
 function extractWeightedSetClaims(text) {
   const claims = [];
-  const pattern = /\b(\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\s*(?:x|×|for)\s*(\d+)\b/gi;
+  const pattern = /\b(\d{1,3}(?:,\d{3})+(?:\.\d+)?|\d+(?:\.\d+)?)\s*(?:kg|kilograms?)\s*(?:(?:x|×)\s*(\d+)|for\s+(\d+)\s*reps?)\b/gi;
   for (const match of normalizeText(text).matchAll(pattern)) {
     claims.push({
       text: match[0],
-      weight: Number(match[1]),
-      reps: Number(match[2]),
+      weight: parseWeightNumber(match[1]),
+      reps: Number(match[2] ?? match[3]),
       index: match.index ?? -1,
       end: (match.index ?? -1) + match[0].length
     });
@@ -332,9 +365,30 @@ function isEstimatedOneRepMaxWeightClaim(text, claim) {
 }
 function isVolumeWeightClaim(text, claim) {
-  const start = Math.max(0, claim.index - 30);
-  const end = Math.min(text.length, claim.index + claim.text.length + 30);
-  return /\bvolume\b/i.test(text.slice(start, end));
+  const boundaries = [
+    '\n',
+    '. ',
+    ';',
+    ', while',
+    ', whereas',
+    ', but',
+    ' while ',
+    ' whereas ',
+    ' but '
+  ];
+  let start = 0;
+  for (const boundary of boundaries) {
+    const index = text.lastIndexOf(boundary, claim.index);
+    if (index >= 0) start = Math.max(start, index + boundary.length);
+  }
+  let end = text.length;
+  for (const boundary of boundaries) {
+    const index = text.indexOf(boundary, claim.index + claim.text.length);
+    if (index >= 0) end = Math.min(end, index);
+  }
+  const clause = text.slice(start, end);
+  if (/\b(?:volume|tonnage|total\s+(?:load|work|volume|tonnage))\b/i.test(clause)) return true;
+  return /\d,\d{3}/.test(String(claim.text ?? '')) && claim.value >= 10000;
 }
 function isBodyWeightClaim(text, claim) {
@@ -353,7 +407,10 @@ function isUnsupportedReferenceClaim(text, claim) {
 }
 function weightSupported(claim, rows) {
-  return weightsForRows(rows).some((weight) => Math.abs(weight - claim.value) < 0.01);
+  if (weightsForRows(rows).some((weight) => Math.abs(weight - claim.value) < 0.01)) return true;
+  // A record's estimated 1RM is valid evidence for a weight number. Allow a small
+  // rounding tolerance since the model rounds (e.g. "224 kg" for a 224.25 e1RM).
+  return rows.some((row) => Number.isFinite(row.e1rm) && Math.abs(row.e1rm - claim.value) <= 1);
 }
 function setPairSupported(claim, rows) {
@@ -633,6 +690,7 @@ function checkToolProvenance(answer, snapshot, routingMetadata, {
         key: 'unsupported_weighted_set_claim',
         severity: 'blocking',
         exerciseName: mention.name,
+        claimText: claim.text,
         reason: `Draft asserts ${claim.text} for ${mention.name}, but routed evidence does not include that weight/reps pair.`
       });
     }
@@ -657,6 +715,7 @@ function checkToolProvenance(answer, snapshot, routingMetadata, {
         key: 'unsupported_weight_claim',
         severity: 'blocking',
         exerciseName: mention.name,
+        claimText: claim.text,
         reason: `Draft asserts ${claim.text} for ${mention.name}, but routed evidence does not include that load.`
       });
     }
@@ -727,6 +786,95 @@ function checkObservationFollowupVoice(answer, route) {
   }];
 }
+function checkExpansiveCompleteness(answer, snapshot, routingMetadata, { executeTool = executeCoachReadTool } = {}) {
+  const responseProfile = routingMetadata?.responseProfile ?? routingMetadata?.intent?.responseProfile;
+  if (responseProfile !== 'expansive') return [];
+  const failures = [];
+  const toolNames = routedToolNames(routingMetadata);
+  if (toolNames.has('get_increment_score')) {
+    const scoreTool = executeTool(snapshot, 'get_increment_score', { historyDays: 21 });
+    const positiveDrivers = (scoreTool.facts?.topPositiveDrivers ?? []).filter(Boolean);
+    const normalizedAnswer = normalizeText(answer).toLowerCase();
+    const missingDrivers = positiveDrivers
+      .slice(0, 3)
+      .filter((driver) => !normalizedAnswer.includes(String(driver).toLowerCase()));
+    if (positiveDrivers.length > 0 && missingDrivers.length === positiveDrivers.slice(0, 3).length) {
+      failures.push({
+        key: 'expansive_omitted_positive_score_driver',
+        severity: 'advisory',
+        reason: `Expansive Ask answer omitted fetched positive Increment Score drivers: ${positiveDrivers.slice(0, 3).join('; ')}.`
+      });
+    }
+  }
+  return failures;
+}
+// Increment Score over-claim guards. Expansive Ask may name the rounded score
+// headline and drivers, but two framings are always wrong and the expansive
+// prompt only *asks* gpt-5.4-mini not to do them — these make it enforceable:
+//   1. Reciting a component sub-score ("progression 90", "recovery is 63") —
+//      Tier-1 internals the user should never see.
+//   2. Expressing a score move as a percentage ("score up 77%", "28% higher") —
+//      the score is a 0-100 index; a % jump is the cross-formula-ruler artifact
+//      that produced the bogus "+36 / 77% up" read.
+const SCORE_COMPONENT_NAMES = ['coverage', 'stimulus', 'execution', 'progression', 'recovery'];
+const NON_SCORE_UNIT =
+  '(?:kg|kilo|lbs?|pounds?|reps?|sets?|%|percent|pct|x\\b|for\\s+\\d|sessions?|days?|nights?|weeks?|months?|' +
+  'years?|yrs?|h\\b|hrs?|hours?|mins?|minutes?|secs?|seconds?|bpm|ms|rpe|rir|am|pm|out\\s+of|of\\b|/\\s*\\d)';
+const SCORE_COMPONENT_DUMP_PATTERN = new RegExp(
+  `\\b(${SCORE_COMPONENT_NAMES.join('|')})\\b[^.\\d\\n]{0,25}?(\\d{1,3}(?:\\.\\d+)?)\\b(?!\\.\\d)(?!\\s*${NON_SCORE_UNIT})`,
+  'i'
+);
+// A score MOVE expressed as a percentage ("score up 77%", "score jumped 28%").
+// The move verb must sit next to "score" so a stray percentage in the same
+// sentence (e.g. "your score is 83 and volume is up 12%") does not false-fire
+// this blocking check. Two clamps so order does not matter.
+const SCORE_MOVE_WORD = '(?:up|down|higher|lower|increase[d]?|decrease[d]?|jump(?:ed)?|rose|climb(?:ed)?|gain(?:ed)?|grew|improv(?:ed|ement))';
+const SCORE_PERCENT_MOVE_PATTERN = new RegExp(
+  `\\b(?:increment\\s+)?score\\b[^.\\n]{0,15}?\\b${SCORE_MOVE_WORD}\\b[^.\\n]{0,15}?\\d{1,3}(?:\\.\\d+)?\\s*(?:%|percent)`
+  + `|\\b${SCORE_MOVE_WORD}\\b[^.\\n]{0,8}?\\d{1,3}(?:\\.\\d+)?\\s*(?:%|percent)[^.\\n]{0,20}?\\b(?:increment\\s+)?score\\b`,
+  'i'
+);
+function checkScoreOverclaim(answer) {
+  const text = normalizeText(answer);
+  if (!text) return [];
+  const failures = [];
+  const subscoreMatch = text.match(SCORE_COMPONENT_DUMP_PATTERN);
+  if (subscoreMatch) {
+    failures.push({
+      key: 'score_component_subscore_recited',
+      severity: 'blocking',
+      claimText: subscoreMatch[0],
+      reason: 'Answer recited an Increment Score component sub-score; surface the rounded headline and drivers only, never component values.'
+    });
+  }
+  const percentMatch = text.match(SCORE_PERCENT_MOVE_PATTERN);
+  if (percentMatch) {
+    failures.push({
+      key: 'score_percentage_move_claimed',
+      severity: 'blocking',
+      claimText: percentMatch[0],
+      reason: 'Answer framed an Increment Score move as a percentage; the score is a 0-100 index and % jumps are not comparable across formula versions.'
+    });
+  }
+  return failures;
+}
+function checkSessionObservationProvenance(answer, routingMetadata) {
+  const comparisons = routingMetadata?.sessionObservationComparisons ?? routingMetadata?.contextBundle?.sessionObservationComparisons ?? [];
+  if (!Array.isArray(comparisons) || comparisons.every((row) => row?.direction !== 'not_comparable')) return [];
+  const text = normalizeText(answer);
+  if (!/\b(main|primary|biggest|clear)\s+(?:limiter|issue|problem|finding|signal)|\bthis session\b[^.]{0,80}\b(?:shows|proves|confirms)\b/i.test(text)) {
+    return [];
+  }
+  return [{
+    key: 'not_comparable_observation_used_as_session_finding',
+    severity: 'advisory',
+    reason: 'Answer may be framing a not_comparable durable observation as a current-session finding.'
+  }];
+}
 export function verifyAskAnswer({
   answer,
   snapshot,
@@ -769,7 +917,10 @@ export function verifyAskAnswer({
   const failures = [
     ...voiceFailures,
     ...checkSnapshotClaims(normalized, snapshot, routingMetadata, { today, exclude }),
-    ...checkToolProvenance(normalized, snapshot, routingMetadata, { today, exclude, strictMentionProvenance, executeTool })
+    ...checkToolProvenance(normalized, snapshot, routingMetadata, { today, exclude, strictMentionProvenance, executeTool }),
+    ...checkSessionObservationProvenance(normalized, routingMetadata),
+    ...checkScoreOverclaim(normalized),
+    ...checkExpansiveCompleteness(normalized, snapshot, routingMetadata, { executeTool })
   ];
   return verificationResult(failures);
 }
@@ -813,18 +964,22 @@ function verificationResult(failures, { skipped = false, skipReason = null } = {
 export function askVerificationMetadata(verification, {
   retryCount = 0,
   repaired = false,
-  fallback = false
+  fallback = false,
+  degraded = false,
+  redactedCount = 0
 } = {}) {
   if (!verification) return null;
   return compactObject({
     version: verification.version ?? ASK_ANSWER_VERIFIER_VERSION,
-    status: fallback ? 'fallback' : verification.status,
+    status: fallback ? 'fallback' : degraded ? 'degraded' : verification.status,
     passed: verification.passed === true && !fallback,
     skipped: verification.skipped === true ? true : undefined,
     skipReason: verification.skipReason,
     retryCount,
     repaired: repaired ? true : undefined,
     fallback: fallback ? true : undefined,
+    degraded: degraded ? true : undefined,
+    redactedCount: degraded ? redactedCount : undefined,
     blockingFailureCount: verification.blockingFailureCount ?? 0,
     advisoryFailureCount: verification.advisoryFailureCount ?? 0,
     failureKeys: verification.failureKeys ?? []
@@ -855,3 +1010,83 @@ export function buildAskAnswerRepairContext(context, _draftAnswer, verification)
 export function safeAskVerificationFallback() {
   return 'I can’t answer that safely from the evidence I just checked. The draft answer included training claims I could not verify, so I’m not going to guess. Ask me about a specific session or lift and I’ll re-check the data.';
 }
+// Graceful degrade: rather than refusing a whole answer for one unsupported
+// claim, strip the offending content and keep the rest. A coaching surface
+// should almost never go silent — a good answer minus one clause beats "I can't
+// answer that safely". When blocking failures carry a strip anchor (the exact
+// claim text, or failing that the exercise name), we drop the whole line/bullet
+// that contains an anchor — NOT just the sentence — so a recommendation sharing
+// a line with an unsupported claim ("You hit 200kg. Keep that load.") is removed
+// with it rather than orphaned. We also sweep an immediately-following
+// recommendation line, since advice right after a stripped performance claim is
+// usually derived from it. Failures without anchors cannot be localized and fall
+// back. The caller re-verifies the result and ships it only if it is now clean
+// and still substantive, else falls back.
+function splitIntoUnits(text) {
+  // Each newline-delimited line is one atomic unit (a bullet or a prose line).
+  // Stripping whole lines — never partial sentences — avoids leaving a
+  // recommendation behind that was built on a redacted claim.
+  return String(text).split('\n').map((line) => {
+    const trimmed = line.trim();
+    return { raw: line, text: trimmed, strip: trimmed.length > 0, blank: trimmed.length === 0 };
+  });
+}
+// Cues that mark a line as a recommendation/prescription. Used to sweep advice
+// that immediately follows a stripped performance claim and likely depends on it.
+const RECOMMENDATION_CUE = /\b(keep|hold|stay|maintain|add|increase|bump|push|try|aim|go up|move up|next time|i'?d|that means|from there|so try|then)\b/i;
+export function degradeAskAnswer(answer, verification) {
+  const blocking = verification?.blockingFailures ?? [];
+  if (blocking.length === 0) {
+    const text = answer == null ? '' : String(answer);
+    return { text, usable: Boolean(normalizeText(answer)), redactedCount: 0, localizable: true };
+  }
+  const text = normalizeText(answer);
+  if (!text) {
+    return { text, usable: Boolean(text), redactedCount: 0, localizable: true };
+  }
+  const phraseAnchors = blocking.map((failure) => failure.claimText).filter(Boolean).map((value) => value.toLowerCase());
+  const nameAnchors = blocking.filter((failure) => !failure.claimText).map((failure) => failure.exerciseName).filter(Boolean).map((value) => value.toLowerCase());
+  if (phraseAnchors.length === 0 && nameAnchors.length === 0) {
+    // No blocking failure could be localized to a span — cannot safely trim.
+    return { text, usable: false, redactedCount: 0, localizable: false };
+  }
+  const units = splitIntoUnits(text);
+  let redactedCount = 0;
+  let sweepDependentAdvice = false;
+  const kept = [];
+  for (const unit of units) {
+    if (unit.blank) {
+      // A blank line ends a paragraph, so it also ends a dependent-advice run.
+      sweepDependentAdvice = false;
+      kept.push(unit.raw);
+      continue;
+    }
+    const haystack = unit.text.toLowerCase();
+    const hit = phraseAnchors.some((anchor) => haystack.includes(anchor))
+      || nameAnchors.some((anchor) => haystack.includes(anchor));
+    if (hit) {
+      // Strip the whole line containing the unsupported claim, and start sweeping
+      // the advice that follows it.
+      redactedCount += 1;
+      sweepDependentAdvice = true;
+      continue;
+    }
+    if (sweepDependentAdvice && RECOMMENDATION_CUE.test(unit.text)) {
+      // Advice immediately after a redacted claim, with no new claim of its own —
+      // treat it as derived from the fabrication and drop it too.
+      redactedCount += 1;
+      continue;
+    }
+    // A line with real, independent content ends the dependent-advice run.
+    sweepDependentAdvice = false;
+    kept.push(unit.raw);
+  }
+  const out = kept.join('\n').replace(/\n{3,}/g, '\n\n').trim();
+  // Substantive = at least one real sentence of prose/bullet content remains.
+  const remaining = out.replace(/^\s*(?:[-*•]|\d+[.)])\s+/gm, '').trim();
+  const usable = redactedCount > 0 && remaining.length >= 24 && /[a-z]{3,}/i.test(remaining);
+  return { text: out, usable, redactedCount, localizable: true };
+}