npm - incremnt - Versions diffs - 0.3.0 → 0.5.0 - Mend

incremnt 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +9 -2
package/package.json +25 -4
package/src/anonymize.js +12 -0
package/src/coach-bakeoff.js +300 -0
package/src/coach-facts.js +100 -0
package/src/coach-prompt-variants.js +106 -0
package/src/contract.js +56 -1
package/src/exercise-aliases.js +163 -0
package/src/format.js +64 -1
package/src/increment-score-replay-data.js +486 -0
package/src/increment-score-replay.js +822 -0
package/src/lib.js +14 -2
package/src/local.js +3 -3
package/src/openrouter.js +1033 -179
package/src/program-phase-resolver.js +206 -0
package/src/prompt-security.js +13 -0
package/src/promptfoo-domain-assert.cjs +4 -0
package/src/promptfoo-evals.js +166 -0
package/src/promptfoo-langfuse-scores.js +354 -0
package/src/promptfoo-provider.cjs +14 -0
package/src/promptfoo-tests.cjs +4 -0
package/src/queries.js +2307 -164
package/src/remote.js +144 -1
package/src/state.js +9 -2
package/src/stored-summary-eval-report.js +171 -0
package/src/summary-evals.js +1445 -0
package/src/sync-service.js +1557 -158
package/src/workout-prompt-variants.js +52 -0

package/src/program-phase-resolver.js ADDED Viewed

@@ -0,0 +1,206 @@
+// JS port of StrengthTrackerIOS/StrengthTrackerIOS/ProgramPhaseResolver.swift.
+//
+// Phase is resolved purely from (plan, date). Every cycle/checkpoint/weekly
+// AI summary endpoint should ask this module for current/previous/next-week
+// phase facts and attach them to the prompt context so the model never has
+// to infer "is this a deload week" from session prose.
+//
+// The resolver duplicates the small Monday-normalisation math that the iOS
+// `DashboardSchedulingLogic` uses internally rather than reusing wrapped
+// week helpers — those take a `cycleWeek` argument that re-introduces the
+// counter-vs-calendar leak the Swift refactor closed.
+const MS_PER_DAY = 86_400_000;
+const DEFAULT_TOTAL_WEEKS = 16;
+const ProgramProgressionType = Object.freeze({
+  build: 'build',
+  deload: 'deload',
+  peak: 'peak',
+  benchmark: 'benchmark'
+});
+function startOfDay(date) {
+  const copy = new Date(date.getTime());
+  copy.setUTCHours(0, 0, 0, 0);
+  return copy;
+}
+// ISO weekday: 1 (Mon) … 7 (Sun)
+function isoWeekday(date) {
+  const day = date.getUTCDay();
+  return day === 0 ? 7 : day;
+}
+function normalizedPlanStartMonday(planStartDate) {
+  const d = startOfDay(new Date(planStartDate));
+  const iso = isoWeekday(d);
+  const daysUntilMonday = (8 - iso) % 7;
+  d.setUTCDate(d.getUTCDate() + daysUntilMonday);
+  return d;
+}
+function mondayOf(date) {
+  const d = startOfDay(new Date(date));
+  const iso = isoWeekday(d);
+  d.setUTCDate(d.getUTCDate() - (iso - 1));
+  return d;
+}
+function weeksBetween(earlier, later) {
+  const ms = later.getTime() - earlier.getTime();
+  return Math.floor(ms / (7 * MS_PER_DAY));
+}
+function progressionTypeFromGenericRule(weekNumber, totalWeeks = DEFAULT_TOTAL_WEEKS) {
+  if (weekNumber > 0 && weekNumber % 5 === 0) return ProgramProgressionType.deload;
+  const normalized = Math.max(1, Math.min(weekNumber, totalWeeks));
+  if (normalized === totalWeeks) return ProgramProgressionType.benchmark;
+  if (totalWeeks >= 12 && normalized >= totalWeeks - 3) return ProgramProgressionType.peak;
+  return ProgramProgressionType.build;
+}
+function planDurationWeeks(plan) {
+  // plannedWeeks.length fallback covers snapshots from older iOS builds that omit durationWeeks
+  const raw = Number(plan?.durationWeeks ?? plan?.plannedWeeks?.length ?? DEFAULT_TOTAL_WEEKS);
+  return Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : DEFAULT_TOTAL_WEEKS;
+}
+function plannedPhase(plan, weekNumber) {
+  if (!plan?.plannedWeeks?.length) {
+    return progressionTypeFromGenericRule(weekNumber, planDurationWeeks(plan));
+  }
+  const safeDuration = planDurationWeeks(plan);
+  const clamped = Math.max(1, Math.min(weekNumber, safeDuration));
+  const found = plan.plannedWeeks.find((w) => Number(w.weekNumber) === clamped);
+  return found?.phase ?? progressionTypeFromGenericRule(clamped, safeDuration);
+}
+function resolveGuided(program, plan, date) {
+  const durationWeeks = planDurationWeeks(plan);
+  const planMonday = normalizedPlanStartMonday(plan.startDate);
+  const dateMonday = mondayOf(date);
+  const offset = weeksBetween(planMonday, dateMonday);
+  const unboundedWeek = Math.max(1, offset + 1);
+  const displayWeek = unboundedWeek > durationWeeks
+    ? durationWeeks
+    : ((unboundedWeek - 1) % durationWeeks) + 1;
+  const phase = plannedPhase(plan, displayWeek);
+  const isDeload = phase === ProgramProgressionType.deload && unboundedWeek <= durationWeeks;
+  return {
+    phaseInstanceId: `plan:${plan.id}:week:${unboundedWeek}`,
+    unboundedWeek,
+    displayWeek,
+    phase,
+    isDeload,
+    source: 'planned'
+  };
+}
+function resolveFallback(program) {
+  const durationWeeks = DEFAULT_TOTAL_WEEKS;
+  const completedCycles = Number(program?.completedCyclesCount ?? 0);
+  const unboundedWeek = Math.max(1, completedCycles + 1);
+  const displayWeek = ((unboundedWeek - 1) % durationWeeks) + 1;
+  const phase = progressionTypeFromGenericRule(displayWeek, durationWeeks);
+  const isDeload = phase === ProgramProgressionType.deload && unboundedWeek <= durationWeeks;
+  return {
+    phaseInstanceId: `unguided:${program?.id ?? 'unknown'}:week:${unboundedWeek}`,
+    unboundedWeek,
+    displayWeek,
+    phase,
+    isDeload,
+    source: 'fallback'
+  };
+}
+/**
+ * Resolve the program's phase as of `date`. Returns a `ProgramPhaseResolution`
+ * with the same shape as the Swift struct.
+ */
+export function resolveProgramPhase(program, plan, date = new Date()) {
+  if (!program) return null;
+  if (plan) return resolveGuided(program, plan, date);
+  return resolveFallback(program);
+}
+/**
+ * Build a phase-window context for AI prompts. Mirrors the Swift
+ * `ProgramPhaseWindowContext`. Use the `programPhase` payload it returns as
+ * a structured field on AI request bodies / prompt contexts so the model
+ * never has to infer phase from session prose.
+ */
+export function programPhaseWindowContext(program, plan, summarizedRange, today = new Date()) {
+  if (!program) return null;
+  const current = resolveProgramPhase(program, plan, today);
+  if (!current) return null;
+  const offsetByDays = (date, days) => {
+    const d = new Date(date);
+    d.setUTCDate(d.getUTCDate() + days);
+    return d;
+  };
+  const previous = resolveProgramPhase(program, plan, offsetByDays(today, -7));
+  const next = resolveProgramPhase(program, plan, offsetByDays(today, 7));
+  const stride = (range) => {
+    if (!range) return [];
+    const start = new Date(range.start);
+    const end = new Date(range.end);
+    if (end <= start) return [];
+    const out = [];
+    const seen = new Set();
+    let cursor = mondayOf(start);
+    let safety = 0;
+    while (cursor <= end && safety < 1024) {
+      const r = resolveProgramPhase(program, plan, cursor);
+      if (r && !seen.has(r.phaseInstanceId)) {
+        seen.add(r.phaseInstanceId);
+        out.push(r);
+      }
+      cursor = offsetByDays(cursor, 7);
+      safety += 1;
+    }
+    return out;
+  };
+  const phasesInRange = stride(summarizedRange);
+  let priorRangePhases = [];
+  if (summarizedRange) {
+    // Use exact millisecond duration (mirrors Swift's DateInterval.duration)
+    // so non-day-aligned ranges don't shift the prior window by a fractional
+    // day via setUTCDate quirks.
+    const summarizedStart = new Date(summarizedRange.start);
+    const summarizedEnd = new Date(summarizedRange.end);
+    const durationMs = summarizedEnd.getTime() - summarizedStart.getTime();
+    if (durationMs > 0) {
+      const priorEnd = summarizedStart;
+      const priorStart = new Date(priorEnd.getTime() - durationMs);
+      priorRangePhases = stride({ start: priorStart, end: priorEnd });
+    }
+  }
+  const isPostDeloadReturn = !current.isDeload && previous?.phase === ProgramProgressionType.deload;
+  return {
+    current,
+    previousWeek: previous,
+    nextWeek: next,
+    summarizedRange: summarizedRange
+      ? { start: new Date(summarizedRange.start).toISOString(), end: new Date(summarizedRange.end).toISOString() }
+      : null,
+    phasesInRange,
+    previousRangePhases: priorRangePhases,
+    isPostDeloadReturn: Boolean(isPostDeloadReturn)
+  };
+}
+export const __test__ = {
+  normalizedPlanStartMonday,
+  mondayOf,
+  weeksBetween,
+  progressionTypeFromGenericRule,
+  plannedPhase
+};

package/src/prompt-security.js CHANGED Viewed

@@ -45,6 +45,19 @@ export function sanitizeHistory(messages) {
   return cleaned;
 }
+/**
+ * Strips XML-style tag blocks (e.g. <training_data>...</training_data>) from AI output.
+ * Models sometimes echo back fence tags from the system prompt or data context.
+ * Removes the tag and its content, then cleans up extra whitespace.
+ */
+export function stripXMLTagBlocks(text) {
+  if (!text) return text;
+  // Remove <tag>...</tag> blocks (non-greedy, supports multiline content)
+  const stripped = text.replace(/<([a-z][a-z0-9_:-]*)\b[^>]*>[\s\S]*?<\/\1>/gi, '');
+  // Clean up leftover blank lines
+  return stripped.replace(/\n{3,}/g, '\n\n').trim();
+}
 const LEAK_DETECTION_MIN_LENGTH = 50;
 /**

package/src/promptfoo-domain-assert.cjs ADDED Viewed

@@ -0,0 +1,4 @@
+module.exports = async function promptfooDomainAssert(output, context) {
+  const { assertPromptfooDomain } = await import('./promptfoo-evals.js');
+  return assertPromptfooDomain(output, context);
+};

package/src/promptfoo-evals.js ADDED Viewed

@@ -0,0 +1,166 @@
+import {
+  evaluateSummaryOutputFromSnapshot,
+  loadSummaryEvalCases,
+  loadSummaryEvalSnapshot,
+  summaryEvalFixturesRoot,
+  buildSummaryEvalContext,
+  generateSummaryEvalOutputWithMetadata
+} from './summary-evals.js';
+import { publishPromptfooLangfuseScore } from './promptfoo-langfuse-scores.js';
+const DEFAULT_ASSERTION_FILE = 'file://./src/promptfoo-domain-assert.cjs';
+const promptfooProviderMetadata = new Map();
+function envFlag(name) {
+  return ['1', 'true', 'yes'].includes(String(process.env[name] ?? '').toLowerCase());
+}
+function envList(name) {
+  return String(process.env[name] ?? '')
+    .split(',')
+    .map((value) => value.trim())
+    .filter(Boolean);
+}
+export function buildPromptfooTestCase(testCase, { caseSet = testCase.caseSet ?? 'synthetic', fixtureFile = testCase.fixtureFile ?? null } = {}) {
+  const question = testCase.context?.question ?? testCase.question ?? testCase.name;
+  return {
+    description: `${testCase.surface}: ${testCase.name ?? testCase.id}`,
+    vars: {
+      caseSet,
+      caseId: testCase.id,
+      fixtureFile,
+      snapshotFile: testCase.snapshotFile ?? null,
+      surface: testCase.surface,
+      question,
+      output: testCase.output,
+      shouldPass: testCase.shouldPass !== false
+    },
+    assert: [
+      {
+        type: 'javascript',
+        value: DEFAULT_ASSERTION_FILE
+      }
+    ],
+    metadata: {
+      surface: testCase.surface,
+      source: testCase.source ?? 'fixture',
+      fixtureFile,
+      snapshotFile: testCase.snapshotFile ?? null,
+      shouldPass: testCase.shouldPass !== false
+    }
+  };
+}
+export async function buildPromptfooTests({
+  caseSet = process.env.SUMMARY_EVAL_CASE_SET ?? 'synthetic',
+  includeNegativeControls = envFlag('PROMPTFOO_INCLUDE_NEGATIVE_CONTROLS'),
+  surfaces = envList('PROMPTFOO_SURFACES'),
+  caseIds = envList('PROMPTFOO_CASE_IDS')
+} = {}) {
+  const surfaceFilter = new Set(surfaces);
+  const caseFilter = new Set(caseIds);
+  const cases = await loadSummaryEvalCases(caseSet);
+  return cases
+    .filter((testCase) => includeNegativeControls || testCase.shouldPass !== false)
+    .filter((testCase) => surfaceFilter.size === 0 || surfaceFilter.has(testCase.surface))
+    .filter((testCase) => caseFilter.size === 0 || caseFilter.has(testCase.id))
+    .map((testCase) => buildPromptfooTestCase(testCase, { caseSet, fixtureFile: testCase.fixtureFile }));
+}
+async function resolvePromptfooEval(vars = {}) {
+  const caseSet = vars.caseSet ?? process.env.SUMMARY_EVAL_CASE_SET ?? 'synthetic';
+  const cases = await loadSummaryEvalCases(caseSet);
+  const testCase = cases.find((candidate) => {
+    if (vars.fixtureFile && candidate.fixtureFile !== vars.fixtureFile) return false;
+    return candidate.id === vars.caseId;
+  });
+  if (!testCase) {
+    throw new Error(`Promptfoo eval case not found: ${caseSet}/${vars.caseId ?? '(missing caseId)'}`);
+  }
+  const snapshot = await loadSummaryEvalSnapshot(testCase);
+  return { testCase, snapshot };
+}
+function summarizeFailedChecks(result) {
+  return result.checks
+    .filter((check) => !check.passed)
+    .map((check) => `${check.key}: ${check.reason}`)
+    .join(' | ');
+}
+function promptfooMetadataKey(vars = {}) {
+  return [
+    vars.caseSet ?? process.env.SUMMARY_EVAL_CASE_SET ?? 'synthetic',
+    vars.fixtureFile ?? '',
+    vars.caseId ?? ''
+  ].join(':');
+}
+export async function assertPromptfooDomain(output, context = {}) {
+  const { testCase, snapshot } = await resolvePromptfooEval(context.vars ?? {});
+  const result = evaluateSummaryOutputFromSnapshot(testCase, snapshot, output);
+  const providerMetadata = promptfooProviderMetadata.get(promptfooMetadataKey(context.vars ?? {}));
+  const scoreContext = providerMetadata && !context.providerResponse
+    ? { ...context, providerResponse: { metadata: providerMetadata } }
+    : context;
+  const langfuseScore = await publishPromptfooLangfuseScore({ result, testCase, context: scoreContext });
+  return {
+    pass: result.passed,
+    score: result.passed ? 1 : 0,
+    reason: result.passed ? 'Domain checks passed.' : summarizeFailedChecks(result),
+    componentResults: result.checks.map((check) => ({
+      pass: check.passed,
+      score: check.passed ? 1 : 0,
+      reason: `${check.key}: ${check.reason}`
+    })),
+    metadata: {
+      caseId: result.id,
+      surface: result.surface,
+      fixturesRoot: summaryEvalFixturesRoot,
+      langfuseScore
+    }
+  };
+}
+export async function callPromptfooProvider(prompt, context = {}) {
+  const { testCase, snapshot } = await resolvePromptfooEval(context.vars ?? {});
+  const liveGenerationEnabled = envFlag('SUMMARY_EVALS_LIVE') || envFlag('PROMPTFOO_LIVE');
+  if (!liveGenerationEnabled) {
+    return {
+      output: testCase.output,
+      metadata: {
+        caseId: testCase.id,
+        surface: testCase.surface,
+        mode: 'stored'
+      }
+    };
+  }
+  if (!process.env.OPENROUTER_API_KEY) {
+    return {
+      error: 'Missing OPENROUTER_API_KEY for live promptfoo eval.'
+    };
+  }
+  const evalContext = buildSummaryEvalContext(snapshot, testCase);
+  const generation = await generateSummaryEvalOutputWithMetadata(testCase, evalContext, snapshot);
+  promptfooProviderMetadata.set(promptfooMetadataKey(context.vars ?? {}), generation.metadata);
+  return {
+    output: generation.output,
+    prompt,
+    metadata: {
+      caseId: testCase.id,
+      surface: testCase.surface,
+      mode: 'live',
+      ...generation.metadata
+    }
+  };
+}