incremnt 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ import { createHash, randomUUID } from 'node:crypto';
2
+ import { AI_PROMPT_VERSIONS } from './openrouter.js';
3
+
4
+ const DEFAULT_LANGFUSE_HOST = 'https://cloud.langfuse.com';
5
+ const SCORE_NAME = 'promptfoo_domain_pass';
6
+ const promptfooRunTracesCreated = new Set();
7
+
8
+ function compactObject(obj) {
9
+ return Object.fromEntries(
10
+ Object.entries(obj).filter(([, value]) => value !== undefined && value !== null)
11
+ );
12
+ }
13
+
14
+ function firstString(...values) {
15
+ for (const value of values) {
16
+ if (typeof value === 'string' && value.trim()) return value.trim();
17
+ }
18
+ return null;
19
+ }
20
+
21
+ function envFlag(name, env = process.env) {
22
+ return ['1', 'true', 'yes'].includes(String(env[name] ?? '').toLowerCase());
23
+ }
24
+
25
+ function currentGitSha(env = process.env) {
26
+ return env.RENDER_GIT_COMMIT
27
+ ?? env.GIT_SHA
28
+ ?? env.COMMIT_SHA
29
+ ?? env.VERCEL_GIT_COMMIT_SHA
30
+ ?? null;
31
+ }
32
+
33
+ export function shouldPublishPromptfooLangfuseScores(env = process.env) {
34
+ if (String(env.PROMPTFOO_LANGFUSE_SCORES ?? '').toLowerCase() === '0') return false;
35
+ return Boolean(env.LANGFUSE_PUBLIC_KEY && env.LANGFUSE_SECRET_KEY);
36
+ }
37
+
38
+ export function langfuseHost(env = process.env) {
39
+ return (firstString(env.LANGFUSE_BASE_URL, env.LANGFUSE_HOST) ?? DEFAULT_LANGFUSE_HOST).replace(/\/+$/, '');
40
+ }
41
+
42
+ export function promptVersionForSurface(surface) {
43
+ switch (surface) {
44
+ case 'ask':
45
+ return AI_PROMPT_VERSIONS.ask;
46
+ case 'workout':
47
+ return AI_PROMPT_VERSIONS.workout;
48
+ case 'cycle':
49
+ return AI_PROMPT_VERSIONS.cycle;
50
+ case 'vitals':
51
+ return AI_PROMPT_VERSIONS.vitals;
52
+ case 'checkpoint':
53
+ return AI_PROMPT_VERSIONS.checkpoint;
54
+ case 'weekly-checkin':
55
+ return AI_PROMPT_VERSIONS.weeklyCheckin;
56
+ default:
57
+ return 'unknown';
58
+ }
59
+ }
60
+
61
+ export function promptfooEvalMode(env = process.env) {
62
+ return envFlag('PROMPTFOO_LIVE', env) || envFlag('SUMMARY_EVALS_LIVE', env) ? 'live' : 'stored';
63
+ }
64
+
65
+ function stableScoreId({ runId, caseSet, caseId, provider, mode }) {
66
+ const key = [SCORE_NAME, runId, caseSet, caseId, provider, mode].join(':');
67
+ return `pf-${createHash('sha256').update(key).digest('hex').slice(0, 24)}`;
68
+ }
69
+
70
+ function stablePromptfooRunTraceId({ sessionId, runId, mode }) {
71
+ const key = ['promptfoo_run_trace', sessionId, runId, mode].join(':');
72
+ return createHash('sha256').update(key).digest('hex').slice(0, 32);
73
+ }
74
+
75
+ function scoreRunId(context = {}, env = process.env, now = new Date()) {
76
+ return context.evaluationId
77
+ ?? env.PROMPTFOO_RUN_ID
78
+ ?? env.GITHUB_RUN_ID
79
+ ?? env.CI_PIPELINE_ID
80
+ ?? `local-${now.toISOString().slice(0, 19)}`;
81
+ }
82
+
83
+ function providerLabel(context = {}) {
84
+ return context.provider?.label
85
+ ?? context.provider?.id
86
+ ?? context.vars?.provider
87
+ ?? 'incremnt-coach-current';
88
+ }
89
+
90
+ function modelLabel(testCase, context = {}, env = process.env) {
91
+ return context.providerResponse?.metadata?.model
92
+ ?? context.vars?.model
93
+ ?? testCase.metadata?.model
94
+ ?? env.SUMMARY_EVAL_MODEL
95
+ ?? env.OPENROUTER_MODEL
96
+ ?? 'current-chain';
97
+ }
98
+
99
+ function failedChecks(result) {
100
+ return (result.checks ?? []).filter((check) => !check.passed);
101
+ }
102
+
103
+ function langfuseScoreTarget({ testCase = {}, context = {}, sessionId }) {
104
+ const providerMetadata = context.providerResponse?.metadata ?? {};
105
+ const caseMetadata = testCase.metadata ?? {};
106
+ const traceId = firstString(
107
+ providerMetadata.langfuseTraceId,
108
+ providerMetadata.traceId,
109
+ caseMetadata.langfuseTraceId,
110
+ caseMetadata.traceId
111
+ );
112
+ const observationId = firstString(
113
+ providerMetadata.langfuseObservationId,
114
+ providerMetadata.observationId,
115
+ caseMetadata.langfuseObservationId,
116
+ caseMetadata.observationId
117
+ );
118
+
119
+ if (traceId) {
120
+ return compactObject({
121
+ traceId,
122
+ observationId
123
+ });
124
+ }
125
+
126
+ return { sessionId };
127
+ }
128
+
129
+ export function buildPromptfooLangfuseScorePayload({
130
+ result,
131
+ testCase,
132
+ context = {},
133
+ now = new Date(),
134
+ env = process.env
135
+ }) {
136
+ const vars = context.vars ?? {};
137
+ const caseSet = vars.caseSet ?? testCase.caseSet ?? env.SUMMARY_EVAL_CASE_SET ?? 'synthetic';
138
+ const runId = scoreRunId(context, env, now);
139
+ const mode = promptfooEvalMode(env);
140
+ const provider = providerLabel(context);
141
+ const failed = failedChecks(result);
142
+ const promptVersion = vars.promptVersion
143
+ ?? testCase.metadata?.promptVersion
144
+ ?? promptVersionForSurface(result.surface);
145
+ const sessionId = `promptfoo:${runId}`;
146
+ const target = langfuseScoreTarget({ testCase, context, sessionId });
147
+ const targetKind = target.observationId ? 'observation' : target.traceId ? 'trace' : 'session';
148
+
149
+ return {
150
+ id: stableScoreId({ runId, caseSet, caseId: result.id, provider, mode }),
151
+ name: SCORE_NAME,
152
+ value: result.passed ? 1 : 0,
153
+ dataType: 'BOOLEAN',
154
+ ...target,
155
+ comment: result.passed
156
+ ? 'Promptfoo domain checks passed.'
157
+ : failed.map((check) => `${check.key}: ${check.reason}`).join(' | ').slice(0, 1000),
158
+ metadata: compactObject({
159
+ kind: 'promptfoo_eval',
160
+ caseId: result.id,
161
+ caseName: result.name,
162
+ caseSet,
163
+ fixtureFile: vars.fixtureFile ?? testCase.fixtureFile,
164
+ snapshotFile: vars.snapshotFile ?? testCase.snapshotFile,
165
+ surface: result.surface,
166
+ promptVersion,
167
+ model: modelLabel(testCase, context, env),
168
+ provider,
169
+ mode,
170
+ runId,
171
+ shouldPass: vars.shouldPass ?? testCase.shouldPass !== false,
172
+ source: testCase.source ?? 'fixture',
173
+ gitSha: currentGitSha(env),
174
+ passed: result.passed,
175
+ scoreTarget: targetKind,
176
+ promptfooSessionId: sessionId,
177
+ langfuseTraceId: target.traceId,
178
+ langfuseObservationId: target.observationId,
179
+ assertionCount: result.checks?.length ?? 0,
180
+ failedAssertionKeys: failed.map((check) => check.key),
181
+ failedAssertionReasons: failed.map((check) => check.reason).slice(0, 10),
182
+ generatedAt: now.toISOString()
183
+ }),
184
+ environment: env.LANGFUSE_ENVIRONMENT ?? env.NODE_ENV ?? 'development'
185
+ };
186
+ }
187
+
188
+ export function buildPromptfooLangfuseRunTracePayload(scorePayload, {
189
+ now = new Date(),
190
+ eventId = randomUUID()
191
+ } = {}) {
192
+ if (!scorePayload?.sessionId) return null;
193
+
194
+ const metadata = scorePayload.metadata ?? {};
195
+ const runId = metadata.runId ?? String(scorePayload.sessionId).replace(/^promptfoo:/, '');
196
+ const traceId = stablePromptfooRunTraceId({
197
+ sessionId: scorePayload.sessionId,
198
+ runId,
199
+ mode: metadata.mode
200
+ });
201
+
202
+ return {
203
+ batch: [
204
+ {
205
+ type: 'trace-create',
206
+ id: eventId,
207
+ timestamp: now.toISOString(),
208
+ body: {
209
+ id: traceId,
210
+ timestamp: now.toISOString(),
211
+ name: 'promptfoo eval run',
212
+ sessionId: scorePayload.sessionId,
213
+ userId: 'promptfoo',
214
+ input: {
215
+ redacted: true,
216
+ source: 'promptfoo',
217
+ runId,
218
+ caseSet: metadata.caseSet,
219
+ mode: metadata.mode
220
+ },
221
+ output: {
222
+ redacted: true,
223
+ scoreName: scorePayload.name,
224
+ scoreTarget: metadata.scoreTarget
225
+ },
226
+ metadata: compactObject({
227
+ kind: 'promptfoo_eval_run',
228
+ runId,
229
+ caseSet: metadata.caseSet,
230
+ mode: metadata.mode,
231
+ provider: metadata.provider,
232
+ model: metadata.model,
233
+ promptfooSessionId: scorePayload.sessionId,
234
+ scoreName: scorePayload.name,
235
+ scoreTarget: metadata.scoreTarget,
236
+ gitSha: metadata.gitSha,
237
+ generatedAt: now.toISOString()
238
+ }),
239
+ tags: [
240
+ 'source:promptfoo',
241
+ metadata.mode ? `mode:${metadata.mode}` : null,
242
+ metadata.caseSet ? `case-set:${metadata.caseSet}` : null
243
+ ].filter(Boolean),
244
+ environment: scorePayload.environment
245
+ }
246
+ }
247
+ ],
248
+ metadata: {
249
+ source: 'incremnt-promptfoo-langfuse-scores',
250
+ runId
251
+ }
252
+ };
253
+ }
254
+
255
+ function authHeader(env) {
256
+ return `Basic ${Buffer.from(`${env.LANGFUSE_PUBLIC_KEY}:${env.LANGFUSE_SECRET_KEY}`).toString('base64')}`;
257
+ }
258
+
259
+ async function ensurePromptfooRunTrace(payload, { env, fetchImpl, now }) {
260
+ if (!payload.sessionId) return null;
261
+ const runTracePayload = buildPromptfooLangfuseRunTracePayload(payload, { now });
262
+ const runTraceId = runTracePayload.batch[0].body.id;
263
+
264
+ if (promptfooRunTracesCreated.has(runTraceId)) {
265
+ return { status: 'already-created', traceId: runTraceId };
266
+ }
267
+
268
+ const response = await fetchImpl(`${langfuseHost(env)}/api/public/ingestion`, {
269
+ method: 'POST',
270
+ headers: {
271
+ Authorization: authHeader(env),
272
+ 'Content-Type': 'application/json'
273
+ },
274
+ body: JSON.stringify(runTracePayload)
275
+ });
276
+
277
+ let body = null;
278
+ let parseError = null;
279
+ let rawText = '';
280
+
281
+ if (typeof response?.json === 'function') {
282
+ try {
283
+ body = await response.json();
284
+ } catch (error) {
285
+ parseError = error;
286
+ rawText = typeof response?.text === 'function' ? await response.text() : '';
287
+ }
288
+ }
289
+
290
+ if (!response?.ok || parseError || (Array.isArray(body?.errors) && body.errors.length > 0)) {
291
+ const detail = body ? JSON.stringify(body) : rawText;
292
+ throw new Error(`Langfuse promptfoo run trace publish failed: ${response?.status ?? 'unknown'} ${detail}`.trim());
293
+ }
294
+
295
+ promptfooRunTracesCreated.add(runTraceId);
296
+ return { status: 'published', traceId: runTraceId };
297
+ }
298
+
299
+ export async function publishPromptfooLangfuseScore(input, {
300
+ env = process.env,
301
+ fetchImpl = globalThis.fetch,
302
+ logger = console,
303
+ now = new Date()
304
+ } = {}) {
305
+ if (!shouldPublishPromptfooLangfuseScores(env)) {
306
+ return {
307
+ status: 'disabled',
308
+ reason: env.LANGFUSE_PUBLIC_KEY && env.LANGFUSE_SECRET_KEY ? 'disabled-by-env' : 'missing-credentials'
309
+ };
310
+ }
311
+
312
+ if (typeof fetchImpl !== 'function') {
313
+ return { status: 'failed', error: 'fetch is unavailable' };
314
+ }
315
+
316
+ const payload = buildPromptfooLangfuseScorePayload({ ...input, env, now });
317
+ try {
318
+ let runTrace = null;
319
+ try {
320
+ runTrace = await ensurePromptfooRunTrace(payload, { env, fetchImpl, now });
321
+ } catch (error) {
322
+ const message = error instanceof Error ? error.message : String(error);
323
+ logger?.warn?.('Promptfoo Langfuse run trace publish failed:', message);
324
+ runTrace = { status: 'failed', error: message };
325
+ }
326
+
327
+ const response = await fetchImpl(`${langfuseHost(env)}/api/public/scores`, {
328
+ method: 'POST',
329
+ headers: {
330
+ Authorization: authHeader(env),
331
+ 'Content-Type': 'application/json'
332
+ },
333
+ body: JSON.stringify(payload)
334
+ });
335
+
336
+ if (!response?.ok) {
337
+ const body = typeof response?.text === 'function' ? await response.text() : '';
338
+ throw new Error(`Langfuse score publish failed: ${response?.status ?? 'unknown'} ${body}`.trim());
339
+ }
340
+
341
+ const data = typeof response.json === 'function' ? await response.json() : {};
342
+ return compactObject({
343
+ status: 'published',
344
+ scoreId: data.id,
345
+ sessionId: payload.sessionId,
346
+ runTraceId: runTrace?.traceId,
347
+ runTraceStatus: runTrace?.status
348
+ });
349
+ } catch (error) {
350
+ const message = error instanceof Error ? error.message : String(error);
351
+ logger?.warn?.('Promptfoo Langfuse score publish skipped:', message);
352
+ return { status: 'failed', error: message };
353
+ }
354
+ }
@@ -0,0 +1,14 @@
1
+ module.exports = class IncremntCoachProvider {
2
+ constructor(options = {}) {
3
+ this.providerId = options.id || 'incremnt-coach';
4
+ }
5
+
6
+ id() {
7
+ return this.providerId;
8
+ }
9
+
10
+ async callApi(prompt, context) {
11
+ const { callPromptfooProvider } = await import('./promptfoo-evals.js');
12
+ return callPromptfooProvider(prompt, context);
13
+ }
14
+ };
@@ -0,0 +1,4 @@
1
+ module.exports = async function promptfooTests() {
2
+ const { buildPromptfooTests } = await import('./promptfoo-evals.js');
3
+ return buildPromptfooTests();
4
+ };