incremnt 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ // JS port of StrengthTrackerIOS/StrengthTrackerIOS/ProgramPhaseResolver.swift.
2
+ //
3
+ // Phase is resolved purely from (plan, date). Every cycle/checkpoint/weekly
4
+ // AI summary endpoint should ask this module for current/previous/next-week
5
+ // phase facts and attach them to the prompt context so the model never has
6
+ // to infer "is this a deload week" from session prose.
7
+ //
8
+ // The resolver duplicates the small Monday-normalisation math that the iOS
9
+ // `DashboardSchedulingLogic` uses internally rather than reusing wrapped
10
+ // week helpers — those take a `cycleWeek` argument that re-introduces the
11
+ // counter-vs-calendar leak the Swift refactor closed.
12
+
13
+ const MS_PER_DAY = 86_400_000;
14
+ const DEFAULT_TOTAL_WEEKS = 16;
15
+
16
+ const ProgramProgressionType = Object.freeze({
17
+ build: 'build',
18
+ deload: 'deload',
19
+ peak: 'peak',
20
+ benchmark: 'benchmark'
21
+ });
22
+
23
+ function startOfDay(date) {
24
+ const copy = new Date(date.getTime());
25
+ copy.setUTCHours(0, 0, 0, 0);
26
+ return copy;
27
+ }
28
+
29
+ // ISO weekday: 1 (Mon) … 7 (Sun)
30
+ function isoWeekday(date) {
31
+ const day = date.getUTCDay();
32
+ return day === 0 ? 7 : day;
33
+ }
34
+
35
+ function normalizedPlanStartMonday(planStartDate) {
36
+ const d = startOfDay(new Date(planStartDate));
37
+ const iso = isoWeekday(d);
38
+ const daysUntilMonday = (8 - iso) % 7;
39
+ d.setUTCDate(d.getUTCDate() + daysUntilMonday);
40
+ return d;
41
+ }
42
+
43
+ function mondayOf(date) {
44
+ const d = startOfDay(new Date(date));
45
+ const iso = isoWeekday(d);
46
+ d.setUTCDate(d.getUTCDate() - (iso - 1));
47
+ return d;
48
+ }
49
+
50
+ function weeksBetween(earlier, later) {
51
+ const ms = later.getTime() - earlier.getTime();
52
+ return Math.floor(ms / (7 * MS_PER_DAY));
53
+ }
54
+
55
+ function progressionTypeFromGenericRule(weekNumber, totalWeeks = DEFAULT_TOTAL_WEEKS) {
56
+ if (weekNumber > 0 && weekNumber % 5 === 0) return ProgramProgressionType.deload;
57
+ const normalized = Math.max(1, Math.min(weekNumber, totalWeeks));
58
+ if (normalized === totalWeeks) return ProgramProgressionType.benchmark;
59
+ if (totalWeeks >= 12 && normalized >= totalWeeks - 3) return ProgramProgressionType.peak;
60
+ return ProgramProgressionType.build;
61
+ }
62
+
63
+ function planDurationWeeks(plan) {
64
+ // plannedWeeks.length fallback covers snapshots from older iOS builds that omit durationWeeks
65
+ const raw = Number(plan?.durationWeeks ?? plan?.plannedWeeks?.length ?? DEFAULT_TOTAL_WEEKS);
66
+ return Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : DEFAULT_TOTAL_WEEKS;
67
+ }
68
+
69
+ function plannedPhase(plan, weekNumber) {
70
+ if (!plan?.plannedWeeks?.length) {
71
+ return progressionTypeFromGenericRule(weekNumber, planDurationWeeks(plan));
72
+ }
73
+ const safeDuration = planDurationWeeks(plan);
74
+ const clamped = Math.max(1, Math.min(weekNumber, safeDuration));
75
+ const found = plan.plannedWeeks.find((w) => Number(w.weekNumber) === clamped);
76
+ return found?.phase ?? progressionTypeFromGenericRule(clamped, safeDuration);
77
+ }
78
+
79
+ function resolveGuided(program, plan, date) {
80
+ const durationWeeks = planDurationWeeks(plan);
81
+ const planMonday = normalizedPlanStartMonday(plan.startDate);
82
+ const dateMonday = mondayOf(date);
83
+ const offset = weeksBetween(planMonday, dateMonday);
84
+ const unboundedWeek = Math.max(1, offset + 1);
85
+ const displayWeek = unboundedWeek > durationWeeks
86
+ ? durationWeeks
87
+ : ((unboundedWeek - 1) % durationWeeks) + 1;
88
+ const phase = plannedPhase(plan, displayWeek);
89
+ const isDeload = phase === ProgramProgressionType.deload && unboundedWeek <= durationWeeks;
90
+ return {
91
+ phaseInstanceId: `plan:${plan.id}:week:${unboundedWeek}`,
92
+ unboundedWeek,
93
+ displayWeek,
94
+ phase,
95
+ isDeload,
96
+ source: 'planned'
97
+ };
98
+ }
99
+
100
+ function resolveFallback(program) {
101
+ const durationWeeks = DEFAULT_TOTAL_WEEKS;
102
+ const completedCycles = Number(program?.completedCyclesCount ?? 0);
103
+ const unboundedWeek = Math.max(1, completedCycles + 1);
104
+ const displayWeek = ((unboundedWeek - 1) % durationWeeks) + 1;
105
+ const phase = progressionTypeFromGenericRule(displayWeek, durationWeeks);
106
+ const isDeload = phase === ProgramProgressionType.deload && unboundedWeek <= durationWeeks;
107
+ return {
108
+ phaseInstanceId: `unguided:${program?.id ?? 'unknown'}:week:${unboundedWeek}`,
109
+ unboundedWeek,
110
+ displayWeek,
111
+ phase,
112
+ isDeload,
113
+ source: 'fallback'
114
+ };
115
+ }
116
+
117
+ /**
118
+ * Resolve the program's phase as of `date`. Returns a `ProgramPhaseResolution`
119
+ * with the same shape as the Swift struct.
120
+ */
121
+ export function resolveProgramPhase(program, plan, date = new Date()) {
122
+ if (!program) return null;
123
+ if (plan) return resolveGuided(program, plan, date);
124
+ return resolveFallback(program);
125
+ }
126
+
127
+ /**
128
+ * Build a phase-window context for AI prompts. Mirrors the Swift
129
+ * `ProgramPhaseWindowContext`. Use the `programPhase` payload it returns as
130
+ * a structured field on AI request bodies / prompt contexts so the model
131
+ * never has to infer phase from session prose.
132
+ */
133
+ export function programPhaseWindowContext(program, plan, summarizedRange, today = new Date()) {
134
+ if (!program) return null;
135
+ const current = resolveProgramPhase(program, plan, today);
136
+ if (!current) return null;
137
+
138
+ const offsetByDays = (date, days) => {
139
+ const d = new Date(date);
140
+ d.setUTCDate(d.getUTCDate() + days);
141
+ return d;
142
+ };
143
+
144
+ const previous = resolveProgramPhase(program, plan, offsetByDays(today, -7));
145
+ const next = resolveProgramPhase(program, plan, offsetByDays(today, 7));
146
+
147
+ const stride = (range) => {
148
+ if (!range) return [];
149
+ const start = new Date(range.start);
150
+ const end = new Date(range.end);
151
+ if (end <= start) return [];
152
+ const out = [];
153
+ const seen = new Set();
154
+ let cursor = mondayOf(start);
155
+ let safety = 0;
156
+ while (cursor <= end && safety < 1024) {
157
+ const r = resolveProgramPhase(program, plan, cursor);
158
+ if (r && !seen.has(r.phaseInstanceId)) {
159
+ seen.add(r.phaseInstanceId);
160
+ out.push(r);
161
+ }
162
+ cursor = offsetByDays(cursor, 7);
163
+ safety += 1;
164
+ }
165
+ return out;
166
+ };
167
+
168
+ const phasesInRange = stride(summarizedRange);
169
+
170
+ let priorRangePhases = [];
171
+ if (summarizedRange) {
172
+ // Use exact millisecond duration (mirrors Swift's DateInterval.duration)
173
+ // so non-day-aligned ranges don't shift the prior window by a fractional
174
+ // day via setUTCDate quirks.
175
+ const summarizedStart = new Date(summarizedRange.start);
176
+ const summarizedEnd = new Date(summarizedRange.end);
177
+ const durationMs = summarizedEnd.getTime() - summarizedStart.getTime();
178
+ if (durationMs > 0) {
179
+ const priorEnd = summarizedStart;
180
+ const priorStart = new Date(priorEnd.getTime() - durationMs);
181
+ priorRangePhases = stride({ start: priorStart, end: priorEnd });
182
+ }
183
+ }
184
+
185
+ const isPostDeloadReturn = !current.isDeload && previous?.phase === ProgramProgressionType.deload;
186
+
187
+ return {
188
+ current,
189
+ previousWeek: previous,
190
+ nextWeek: next,
191
+ summarizedRange: summarizedRange
192
+ ? { start: new Date(summarizedRange.start).toISOString(), end: new Date(summarizedRange.end).toISOString() }
193
+ : null,
194
+ phasesInRange,
195
+ previousRangePhases: priorRangePhases,
196
+ isPostDeloadReturn: Boolean(isPostDeloadReturn)
197
+ };
198
+ }
199
+
200
+ export const __test__ = {
201
+ normalizedPlanStartMonday,
202
+ mondayOf,
203
+ weeksBetween,
204
+ progressionTypeFromGenericRule,
205
+ plannedPhase
206
+ };
@@ -45,6 +45,19 @@ export function sanitizeHistory(messages) {
45
45
  return cleaned;
46
46
  }
47
47
 
48
+ /**
49
+ * Strips XML-style tag blocks (e.g. <training_data>...</training_data>) from AI output.
50
+ * Models sometimes echo back fence tags from the system prompt or data context.
51
+ * Removes the tag and its content, then cleans up extra whitespace.
52
+ */
53
+ export function stripXMLTagBlocks(text) {
54
+ if (!text) return text;
55
+ // Remove <tag>...</tag> blocks (non-greedy, supports multiline content)
56
+ const stripped = text.replace(/<([a-z][a-z0-9_:-]*)\b[^>]*>[\s\S]*?<\/\1>/gi, '');
57
+ // Clean up leftover blank lines
58
+ return stripped.replace(/\n{3,}/g, '\n\n').trim();
59
+ }
60
+
48
61
  const LEAK_DETECTION_MIN_LENGTH = 50;
49
62
 
50
63
  /**
@@ -0,0 +1,4 @@
1
+ module.exports = async function promptfooDomainAssert(output, context) {
2
+ const { assertPromptfooDomain } = await import('./promptfoo-evals.js');
3
+ return assertPromptfooDomain(output, context);
4
+ };
@@ -0,0 +1,166 @@
1
+ import {
2
+ evaluateSummaryOutputFromSnapshot,
3
+ loadSummaryEvalCases,
4
+ loadSummaryEvalSnapshot,
5
+ summaryEvalFixturesRoot,
6
+ buildSummaryEvalContext,
7
+ generateSummaryEvalOutputWithMetadata
8
+ } from './summary-evals.js';
9
+ import { publishPromptfooLangfuseScore } from './promptfoo-langfuse-scores.js';
10
+
11
+ const DEFAULT_ASSERTION_FILE = 'file://./src/promptfoo-domain-assert.cjs';
12
+ const promptfooProviderMetadata = new Map();
13
+
14
+ function envFlag(name) {
15
+ return ['1', 'true', 'yes'].includes(String(process.env[name] ?? '').toLowerCase());
16
+ }
17
+
18
+ function envList(name) {
19
+ return String(process.env[name] ?? '')
20
+ .split(',')
21
+ .map((value) => value.trim())
22
+ .filter(Boolean);
23
+ }
24
+
25
+ export function buildPromptfooTestCase(testCase, { caseSet = testCase.caseSet ?? 'synthetic', fixtureFile = testCase.fixtureFile ?? null } = {}) {
26
+ const question = testCase.context?.question ?? testCase.question ?? testCase.name;
27
+
28
+ return {
29
+ description: `${testCase.surface}: ${testCase.name ?? testCase.id}`,
30
+ vars: {
31
+ caseSet,
32
+ caseId: testCase.id,
33
+ fixtureFile,
34
+ snapshotFile: testCase.snapshotFile ?? null,
35
+ surface: testCase.surface,
36
+ question,
37
+ output: testCase.output,
38
+ shouldPass: testCase.shouldPass !== false
39
+ },
40
+ assert: [
41
+ {
42
+ type: 'javascript',
43
+ value: DEFAULT_ASSERTION_FILE
44
+ }
45
+ ],
46
+ metadata: {
47
+ surface: testCase.surface,
48
+ source: testCase.source ?? 'fixture',
49
+ fixtureFile,
50
+ snapshotFile: testCase.snapshotFile ?? null,
51
+ shouldPass: testCase.shouldPass !== false
52
+ }
53
+ };
54
+ }
55
+
56
+ export async function buildPromptfooTests({
57
+ caseSet = process.env.SUMMARY_EVAL_CASE_SET ?? 'synthetic',
58
+ includeNegativeControls = envFlag('PROMPTFOO_INCLUDE_NEGATIVE_CONTROLS'),
59
+ surfaces = envList('PROMPTFOO_SURFACES'),
60
+ caseIds = envList('PROMPTFOO_CASE_IDS')
61
+ } = {}) {
62
+ const surfaceFilter = new Set(surfaces);
63
+ const caseFilter = new Set(caseIds);
64
+ const cases = await loadSummaryEvalCases(caseSet);
65
+
66
+ return cases
67
+ .filter((testCase) => includeNegativeControls || testCase.shouldPass !== false)
68
+ .filter((testCase) => surfaceFilter.size === 0 || surfaceFilter.has(testCase.surface))
69
+ .filter((testCase) => caseFilter.size === 0 || caseFilter.has(testCase.id))
70
+ .map((testCase) => buildPromptfooTestCase(testCase, { caseSet, fixtureFile: testCase.fixtureFile }));
71
+ }
72
+
73
+ async function resolvePromptfooEval(vars = {}) {
74
+ const caseSet = vars.caseSet ?? process.env.SUMMARY_EVAL_CASE_SET ?? 'synthetic';
75
+ const cases = await loadSummaryEvalCases(caseSet);
76
+ const testCase = cases.find((candidate) => {
77
+ if (vars.fixtureFile && candidate.fixtureFile !== vars.fixtureFile) return false;
78
+ return candidate.id === vars.caseId;
79
+ });
80
+
81
+ if (!testCase) {
82
+ throw new Error(`Promptfoo eval case not found: ${caseSet}/${vars.caseId ?? '(missing caseId)'}`);
83
+ }
84
+
85
+ const snapshot = await loadSummaryEvalSnapshot(testCase);
86
+ return { testCase, snapshot };
87
+ }
88
+
89
+ function summarizeFailedChecks(result) {
90
+ return result.checks
91
+ .filter((check) => !check.passed)
92
+ .map((check) => `${check.key}: ${check.reason}`)
93
+ .join(' | ');
94
+ }
95
+
96
+ function promptfooMetadataKey(vars = {}) {
97
+ return [
98
+ vars.caseSet ?? process.env.SUMMARY_EVAL_CASE_SET ?? 'synthetic',
99
+ vars.fixtureFile ?? '',
100
+ vars.caseId ?? ''
101
+ ].join(':');
102
+ }
103
+
104
+ export async function assertPromptfooDomain(output, context = {}) {
105
+ const { testCase, snapshot } = await resolvePromptfooEval(context.vars ?? {});
106
+ const result = evaluateSummaryOutputFromSnapshot(testCase, snapshot, output);
107
+ const providerMetadata = promptfooProviderMetadata.get(promptfooMetadataKey(context.vars ?? {}));
108
+ const scoreContext = providerMetadata && !context.providerResponse
109
+ ? { ...context, providerResponse: { metadata: providerMetadata } }
110
+ : context;
111
+ const langfuseScore = await publishPromptfooLangfuseScore({ result, testCase, context: scoreContext });
112
+
113
+ return {
114
+ pass: result.passed,
115
+ score: result.passed ? 1 : 0,
116
+ reason: result.passed ? 'Domain checks passed.' : summarizeFailedChecks(result),
117
+ componentResults: result.checks.map((check) => ({
118
+ pass: check.passed,
119
+ score: check.passed ? 1 : 0,
120
+ reason: `${check.key}: ${check.reason}`
121
+ })),
122
+ metadata: {
123
+ caseId: result.id,
124
+ surface: result.surface,
125
+ fixturesRoot: summaryEvalFixturesRoot,
126
+ langfuseScore
127
+ }
128
+ };
129
+ }
130
+
131
+ export async function callPromptfooProvider(prompt, context = {}) {
132
+ const { testCase, snapshot } = await resolvePromptfooEval(context.vars ?? {});
133
+ const liveGenerationEnabled = envFlag('SUMMARY_EVALS_LIVE') || envFlag('PROMPTFOO_LIVE');
134
+
135
+ if (!liveGenerationEnabled) {
136
+ return {
137
+ output: testCase.output,
138
+ metadata: {
139
+ caseId: testCase.id,
140
+ surface: testCase.surface,
141
+ mode: 'stored'
142
+ }
143
+ };
144
+ }
145
+
146
+ if (!process.env.OPENROUTER_API_KEY) {
147
+ return {
148
+ error: 'Missing OPENROUTER_API_KEY for live promptfoo eval.'
149
+ };
150
+ }
151
+
152
+ const evalContext = buildSummaryEvalContext(snapshot, testCase);
153
+ const generation = await generateSummaryEvalOutputWithMetadata(testCase, evalContext, snapshot);
154
+ promptfooProviderMetadata.set(promptfooMetadataKey(context.vars ?? {}), generation.metadata);
155
+
156
+ return {
157
+ output: generation.output,
158
+ prompt,
159
+ metadata: {
160
+ caseId: testCase.id,
161
+ surface: testCase.surface,
162
+ mode: 'live',
163
+ ...generation.metadata
164
+ }
165
+ };
166
+ }