@sanity/ailf 4.5.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/agent-harness-tools.yaml +42 -0
- package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
- package/canonical/grader-references/mcp-server-spec.yaml +51 -0
- package/canonical/grader-references/portable-text.yaml +48 -0
- package/config/rubrics.ts +38 -2
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +197 -2
- package/dist/_vendor/ailf-core/artifact-registry.js +419 -5
- package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
- package/dist/_vendor/ailf-core/examples/index.js +146 -47
- package/dist/_vendor/ailf-core/ports/context.d.ts +26 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/llm-client.d.ts +112 -0
- package/dist/_vendor/ailf-core/ports/llm-client.js +68 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
- package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/index.js +9 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
- package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
- package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +40 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.js +25 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +19 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.js +19 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/services/index.js +5 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
- package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
- package/dist/_vendor/ailf-core/types/attribution.js +18 -0
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
- package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
- package/dist/_vendor/ailf-core/types/confidence.d.ts +68 -0
- package/dist/_vendor/ailf-core/types/confidence.js +56 -0
- package/dist/_vendor/ailf-core/types/diagnosis.d.ts +169 -0
- package/dist/_vendor/ailf-core/types/diagnosis.js +17 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
- package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
- package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +82 -29
- package/dist/_vendor/ailf-core/types/index.js +16 -1
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +1 -0
- package/dist/adapters/api-client/build-request.js +3 -0
- package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
- package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
- package/dist/adapters/attribution/index.d.ts +9 -0
- package/dist/adapters/attribution/index.js +8 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/grader-outputs/index.d.ts +10 -0
- package/dist/adapters/grader-outputs/index.js +8 -0
- package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
- package/dist/adapters/grader-outputs/legacy/index.js +10 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.js +4 -0
- package/dist/adapters/llm/anthropic-llm-client.d.ts +48 -0
- package/dist/adapters/llm/anthropic-llm-client.js +205 -0
- package/dist/adapters/llm/fake-llm-client.d.ts +49 -0
- package/dist/adapters/llm/fake-llm-client.js +63 -0
- package/dist/adapters/llm/index.d.ts +9 -0
- package/dist/adapters/llm/index.js +4 -0
- package/dist/adapters/llm/openai-llm-client.d.ts +44 -0
- package/dist/adapters/llm/openai-llm-client.js +168 -0
- package/dist/adapters/llm/pricing.d.ts +12 -0
- package/dist/adapters/llm/pricing.js +8 -0
- package/dist/adapters/llm/retry.d.ts +56 -0
- package/dist/adapters/llm/retry.js +66 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +90 -22
- package/dist/adapters/task-sources/repo-schemas.js +19 -2
- package/dist/artifact-capture/api-gateway-artifact-writer.js +2 -1
- package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +2 -1
- package/dist/artifact-capture/gcs-artifact-writer.js +3 -1
- package/dist/artifact-capture/local-fs-artifact-writer.js +3 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/explain-handler.js +1 -1
- package/dist/commands/lookup-doc.d.ts +1 -1
- package/dist/commands/lookup-doc.js +3 -3
- package/dist/commands/pipeline-action.d.ts +6 -0
- package/dist/commands/pipeline-action.js +2 -0
- package/dist/commands/remote-pipeline.js +1 -0
- package/dist/composition-root.d.ts +59 -1
- package/dist/composition-root.js +95 -0
- package/dist/config/rubrics.ts +38 -2
- package/dist/grader/agent-harness.d.ts +14 -0
- package/dist/grader/agent-harness.js +17 -0
- package/dist/grader/common.d.ts +17 -0
- package/dist/grader/common.js +21 -0
- package/dist/grader/index.d.ts +38 -0
- package/dist/grader/index.js +75 -0
- package/dist/grader/knowledge-probe.d.ts +14 -0
- package/dist/grader/knowledge-probe.js +18 -0
- package/dist/grader/literacy.d.ts +13 -0
- package/dist/grader/literacy.js +17 -0
- package/dist/grader/mcp.d.ts +14 -0
- package/dist/grader/mcp.js +18 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +5 -0
- package/dist/orchestration/steps/calculate-scores-step.js +23 -1
- package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
- package/dist/orchestration/steps/compute-attribution-step.js +279 -0
- package/dist/orchestration/steps/gap-analysis-step.js +35 -7
- package/dist/orchestration/steps/index.d.ts +1 -0
- package/dist/orchestration/steps/index.js +1 -0
- package/dist/pipeline/attribution.d.ts +15 -0
- package/dist/pipeline/attribution.js +18 -9
- package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
- package/dist/pipeline/borderline-consensus-runner.js +124 -0
- package/dist/pipeline/borderline-detector.d.ts +24 -0
- package/dist/pipeline/borderline-detector.js +26 -0
- package/dist/pipeline/calculate-scores.d.ts +114 -3
- package/dist/pipeline/calculate-scores.js +426 -24
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +35 -17
- package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
- package/dist/pipeline/compiler/rubric-resolution.js +9 -1
- package/dist/pipeline/compute-attribution.d.ts +80 -0
- package/dist/pipeline/compute-attribution.js +196 -0
- package/dist/pipeline/failure-modes.d.ts +52 -17
- package/dist/pipeline/failure-modes.js +178 -117
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/package.json +6 -4
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fake LLMClient for tests.
|
|
3
|
+
*
|
|
4
|
+
* Returns canned responses in insertion order and records each call so
|
|
5
|
+
* consumers' tests can assert on prompts, models, and telemetry context.
|
|
6
|
+
*
|
|
7
|
+
* Exported alongside the real adapters so consuming-feature tests can
|
|
8
|
+
* stub the LLM cleanly without a network round-trip.
|
|
9
|
+
*/
|
|
10
|
+
import type { LLMClient, LLMCompleteArgs, LLMCompleteStructuredArgs, LLMCompletion, LLMStructuredCompletion } from "../../_vendor/ailf-core/index.d.ts";
|
|
11
|
+
export interface FakeCallRecord {
|
|
12
|
+
kind: "complete" | "completeStructured";
|
|
13
|
+
model: string;
|
|
14
|
+
prompt: string;
|
|
15
|
+
context?: {
|
|
16
|
+
feature: string;
|
|
17
|
+
runId?: string;
|
|
18
|
+
cardId?: string;
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
export interface FakeCompletionResponse {
|
|
22
|
+
text: string;
|
|
23
|
+
promptTokens?: number;
|
|
24
|
+
completionTokens?: number;
|
|
25
|
+
cost?: number;
|
|
26
|
+
}
|
|
27
|
+
export interface FakeStructuredResponse {
|
|
28
|
+
/**
|
|
29
|
+
* Value returned to the consumer. Run through the consumer-supplied Zod
|
|
30
|
+
* schema in `completeStructured` so consumers exercise their parse-don't-
|
|
31
|
+
* validate path even with the fake; supply a value that matches the
|
|
32
|
+
* schema's shape (or supply a deliberately malformed one to test failure).
|
|
33
|
+
*/
|
|
34
|
+
value: unknown;
|
|
35
|
+
promptTokens?: number;
|
|
36
|
+
completionTokens?: number;
|
|
37
|
+
cost?: number;
|
|
38
|
+
}
|
|
39
|
+
export declare class FakeLLMClient implements LLMClient {
|
|
40
|
+
readonly calls: FakeCallRecord[];
|
|
41
|
+
private readonly completeQueue;
|
|
42
|
+
private readonly structuredQueue;
|
|
43
|
+
constructor(args?: {
|
|
44
|
+
completeResponses?: FakeCompletionResponse[];
|
|
45
|
+
structuredResponses?: FakeStructuredResponse[];
|
|
46
|
+
});
|
|
47
|
+
complete(args: LLMCompleteArgs): Promise<LLMCompletion>;
|
|
48
|
+
completeStructured<T>(args: LLMCompleteStructuredArgs<T>): Promise<LLMStructuredCompletion<T>>;
|
|
49
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fake LLMClient for tests.
|
|
3
|
+
*
|
|
4
|
+
* Returns canned responses in insertion order and records each call so
|
|
5
|
+
* consumers' tests can assert on prompts, models, and telemetry context.
|
|
6
|
+
*
|
|
7
|
+
* Exported alongside the real adapters so consuming-feature tests can
|
|
8
|
+
* stub the LLM cleanly without a network round-trip.
|
|
9
|
+
*/
|
|
10
|
+
export class FakeLLMClient {
|
|
11
|
+
calls = [];
|
|
12
|
+
completeQueue;
|
|
13
|
+
structuredQueue;
|
|
14
|
+
constructor(args = {}) {
|
|
15
|
+
this.completeQueue = [...(args.completeResponses ?? [])];
|
|
16
|
+
this.structuredQueue = [...(args.structuredResponses ?? [])];
|
|
17
|
+
}
|
|
18
|
+
async complete(args) {
|
|
19
|
+
this.calls.push({
|
|
20
|
+
kind: "complete",
|
|
21
|
+
model: args.model,
|
|
22
|
+
prompt: args.prompt,
|
|
23
|
+
...(args.context ? { context: args.context } : {}),
|
|
24
|
+
});
|
|
25
|
+
const next = this.completeQueue.shift();
|
|
26
|
+
if (!next) {
|
|
27
|
+
throw new Error("FakeLLMClient: no more queued complete responses (call exceeded queue)");
|
|
28
|
+
}
|
|
29
|
+
return {
|
|
30
|
+
text: next.text,
|
|
31
|
+
usage: {
|
|
32
|
+
promptTokens: next.promptTokens ?? 0,
|
|
33
|
+
completionTokens: next.completionTokens ?? 0,
|
|
34
|
+
},
|
|
35
|
+
cost: next.cost ?? 0,
|
|
36
|
+
model: args.model,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
async completeStructured(args) {
|
|
40
|
+
this.calls.push({
|
|
41
|
+
kind: "completeStructured",
|
|
42
|
+
model: args.model,
|
|
43
|
+
prompt: args.prompt,
|
|
44
|
+
...(args.context ? { context: args.context } : {}),
|
|
45
|
+
});
|
|
46
|
+
const next = this.structuredQueue.shift();
|
|
47
|
+
if (!next) {
|
|
48
|
+
throw new Error("FakeLLMClient: no more queued structured responses (call exceeded queue)");
|
|
49
|
+
}
|
|
50
|
+
// Run through the consumer-supplied schema so consumers exercise the
|
|
51
|
+
// parse-don't-validate path even with the fake.
|
|
52
|
+
const value = args.schema.parse(next.value);
|
|
53
|
+
return {
|
|
54
|
+
value,
|
|
55
|
+
usage: {
|
|
56
|
+
promptTokens: next.promptTokens ?? 0,
|
|
57
|
+
completionTokens: next.completionTokens ?? 0,
|
|
58
|
+
},
|
|
59
|
+
cost: next.cost ?? 0,
|
|
60
|
+
model: args.model,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export { AnthropicLLMClient } from "./anthropic-llm-client.js";
|
|
2
|
+
export type { AnthropicLLMClientOptions } from "./anthropic-llm-client.js";
|
|
3
|
+
export { FakeLLMClient } from "./fake-llm-client.js";
|
|
4
|
+
export type { FakeCallRecord, FakeCompletionResponse, FakeStructuredResponse, } from "./fake-llm-client.js";
|
|
5
|
+
export { OpenAILLMClient } from "./openai-llm-client.js";
|
|
6
|
+
export type { OpenAILLMClientOptions } from "./openai-llm-client.js";
|
|
7
|
+
export type { ModelPricing } from "./pricing.js";
|
|
8
|
+
export { DEFAULT_RETRY_POLICY, LLMHttpError, isRetryableStatus, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
|
|
9
|
+
export type { RetryPolicy } from "./retry.js";
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { AnthropicLLMClient } from "./anthropic-llm-client.js";
|
|
2
|
+
export { FakeLLMClient } from "./fake-llm-client.js";
|
|
3
|
+
export { OpenAILLMClient } from "./openai-llm-client.js";
|
|
4
|
+
export { DEFAULT_RETRY_POLICY, LLMHttpError, isRetryableStatus, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI adapter for the LLMClient port.
|
|
3
|
+
*
|
|
4
|
+
* Uses fetch() against the Chat Completions API — same transport pattern as
|
|
5
|
+
* the existing grader (`packages/eval/src/pipeline/grader-api.ts`). No SDK
|
|
6
|
+
* dependency. Centralizes retry, rate-limit handling, cost accounting, and
|
|
7
|
+
* per-call telemetry tagging via `context.feature`.
|
|
8
|
+
*
|
|
9
|
+
* Per `.claude/rules/eval-pipeline.md` and `.claude/rules/typescript.md`:
|
|
10
|
+
* the adapter never reads `process.env`. The composition root maps env vars
|
|
11
|
+
* to typed constructor args.
|
|
12
|
+
*/
|
|
13
|
+
import { type LLMClient, type LLMCompleteArgs, type LLMCompleteStructuredArgs, type LLMCompletion, type LLMStructuredCompletion, type Logger } from "../../_vendor/ailf-core/index.d.ts";
|
|
14
|
+
import type { ModelPricing } from "./pricing.js";
|
|
15
|
+
import { type RetryPolicy } from "./retry.js";
|
|
16
|
+
export interface OpenAILLMClientOptions {
|
|
17
|
+
/** OpenAI API key. */
|
|
18
|
+
apiKey: string;
|
|
19
|
+
/** Optional override of the chat-completions endpoint. */
|
|
20
|
+
baseUrl?: string;
|
|
21
|
+
/** Pricing table keyed by canonical model id (without the `openai:` prefix). */
|
|
22
|
+
pricing?: Record<string, ModelPricing>;
|
|
23
|
+
retryPolicy?: Partial<RetryPolicy>;
|
|
24
|
+
logger?: Logger;
|
|
25
|
+
/** Sleep injectable for tests. */
|
|
26
|
+
sleep?: (ms: number) => Promise<void>;
|
|
27
|
+
/** Random source for jitter — injectable for tests. */
|
|
28
|
+
rng?: () => number;
|
|
29
|
+
}
|
|
30
|
+
export declare class OpenAILLMClient implements LLMClient {
|
|
31
|
+
private readonly apiKey;
|
|
32
|
+
private readonly baseUrl;
|
|
33
|
+
private readonly pricing;
|
|
34
|
+
private readonly retryPolicy;
|
|
35
|
+
private readonly logger?;
|
|
36
|
+
private readonly sleep?;
|
|
37
|
+
private readonly rng?;
|
|
38
|
+
constructor(options: OpenAILLMClientOptions);
|
|
39
|
+
complete(args: LLMCompleteArgs): Promise<LLMCompletion>;
|
|
40
|
+
completeStructured<T>(args: LLMCompleteStructuredArgs<T>): Promise<LLMStructuredCompletion<T>>;
|
|
41
|
+
private callApi;
|
|
42
|
+
private computeCost;
|
|
43
|
+
private logTelemetry;
|
|
44
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI adapter for the LLMClient port.
|
|
3
|
+
*
|
|
4
|
+
* Uses fetch() against the Chat Completions API — same transport pattern as
|
|
5
|
+
* the existing grader (`packages/eval/src/pipeline/grader-api.ts`). No SDK
|
|
6
|
+
* dependency. Centralizes retry, rate-limit handling, cost accounting, and
|
|
7
|
+
* per-call telemetry tagging via `context.feature`.
|
|
8
|
+
*
|
|
9
|
+
* Per `.claude/rules/eval-pipeline.md` and `.claude/rules/typescript.md`:
|
|
10
|
+
* the adapter never reads `process.env`. The composition root maps env vars
|
|
11
|
+
* to typed constructor args.
|
|
12
|
+
*/
|
|
13
|
+
import { OpenAIChatResponseSchema, splitModelId, } from "../../_vendor/ailf-core/index.js";
|
|
14
|
+
import { DEFAULT_RETRY_POLICY, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
|
|
15
|
+
const DEFAULT_BASE_URL = "https://api.openai.com/v1/chat/completions";
|
|
16
|
+
/**
|
|
17
|
+
* Conservative defaults for the models in `packages/eval/config/models.ts`.
|
|
18
|
+
* Update when models are added or vendor pricing changes. Unknown models
|
|
19
|
+
* fall through to `cost: 0` with a warning logged.
|
|
20
|
+
*
|
|
21
|
+
* Pricing reference: https://openai.com/api/pricing
|
|
22
|
+
*/
|
|
23
|
+
const DEFAULT_PRICING = {
|
|
24
|
+
"gpt-5.2": { inputPer1k: 0.005, outputPer1k: 0.015 },
|
|
25
|
+
"gpt-5": { inputPer1k: 0.005, outputPer1k: 0.015 },
|
|
26
|
+
"gpt-5.4": { inputPer1k: 0.005, outputPer1k: 0.015 },
|
|
27
|
+
};
|
|
28
|
+
export class OpenAILLMClient {
|
|
29
|
+
apiKey;
|
|
30
|
+
baseUrl;
|
|
31
|
+
pricing;
|
|
32
|
+
retryPolicy;
|
|
33
|
+
logger;
|
|
34
|
+
sleep;
|
|
35
|
+
rng;
|
|
36
|
+
constructor(options) {
|
|
37
|
+
this.apiKey = options.apiKey;
|
|
38
|
+
this.baseUrl = options.baseUrl ?? DEFAULT_BASE_URL;
|
|
39
|
+
this.pricing = { ...DEFAULT_PRICING, ...(options.pricing ?? {}) };
|
|
40
|
+
this.retryPolicy = {
|
|
41
|
+
...DEFAULT_RETRY_POLICY,
|
|
42
|
+
...(options.retryPolicy ?? {}),
|
|
43
|
+
};
|
|
44
|
+
if (options.logger)
|
|
45
|
+
this.logger = options.logger;
|
|
46
|
+
if (options.sleep)
|
|
47
|
+
this.sleep = options.sleep;
|
|
48
|
+
if (options.rng)
|
|
49
|
+
this.rng = options.rng;
|
|
50
|
+
}
|
|
51
|
+
async complete(args) {
|
|
52
|
+
const { modelName } = splitModelId(args.model);
|
|
53
|
+
const body = buildBody(modelName, args.prompt, {
|
|
54
|
+
temperature: args.temperature,
|
|
55
|
+
maxTokens: args.maxTokens,
|
|
56
|
+
stop: args.stop,
|
|
57
|
+
});
|
|
58
|
+
const data = await this.callApi(body);
|
|
59
|
+
const text = data.choices?.[0]?.message?.content;
|
|
60
|
+
if (text == null || text === "") {
|
|
61
|
+
throw new Error(`OpenAI returned empty completion for model ${args.model}`);
|
|
62
|
+
}
|
|
63
|
+
const usage = extractUsage(data.usage);
|
|
64
|
+
const cost = this.computeCost(modelName, usage);
|
|
65
|
+
this.logTelemetry(args.context, args.model, usage, cost);
|
|
66
|
+
return { text, usage, cost, model: args.model };
|
|
67
|
+
}
|
|
68
|
+
async completeStructured(args) {
|
|
69
|
+
const { modelName } = splitModelId(args.model);
|
|
70
|
+
const body = buildBody(modelName, args.prompt, {
|
|
71
|
+
temperature: args.temperature,
|
|
72
|
+
maxTokens: args.maxTokens,
|
|
73
|
+
responseFormat: { type: "json_object" },
|
|
74
|
+
});
|
|
75
|
+
const data = await this.callApi(body);
|
|
76
|
+
const raw = data.choices?.[0]?.message?.content;
|
|
77
|
+
if (raw == null || raw === "") {
|
|
78
|
+
throw new Error(`OpenAI returned empty structured completion for model ${args.model}`);
|
|
79
|
+
}
|
|
80
|
+
let parsed;
|
|
81
|
+
try {
|
|
82
|
+
parsed = JSON.parse(raw);
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
throw new Error(`OpenAI structured completion returned invalid JSON for model ${args.model}: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
|
|
86
|
+
}
|
|
87
|
+
const value = args.schema.parse(parsed);
|
|
88
|
+
const usage = extractUsage(data.usage);
|
|
89
|
+
const cost = this.computeCost(modelName, usage);
|
|
90
|
+
this.logTelemetry(args.context, args.model, usage, cost);
|
|
91
|
+
return { value, usage, cost, model: args.model };
|
|
92
|
+
}
|
|
93
|
+
async callApi(body) {
|
|
94
|
+
return runWithRetry({
|
|
95
|
+
policy: this.retryPolicy,
|
|
96
|
+
...(this.sleep ? { sleep: this.sleep } : {}),
|
|
97
|
+
...(this.rng ? { rng: this.rng } : {}),
|
|
98
|
+
attempt: async () => {
|
|
99
|
+
const response = await fetch(this.baseUrl, {
|
|
100
|
+
method: "POST",
|
|
101
|
+
headers: {
|
|
102
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
103
|
+
"Content-Type": "application/json",
|
|
104
|
+
},
|
|
105
|
+
body: JSON.stringify(body),
|
|
106
|
+
});
|
|
107
|
+
if (!response.ok) {
|
|
108
|
+
const text = await response.text();
|
|
109
|
+
const retryAfter = parseRetryAfterSeconds(response.headers.get("retry-after"));
|
|
110
|
+
return {
|
|
111
|
+
ok: false,
|
|
112
|
+
status: response.status,
|
|
113
|
+
body: text,
|
|
114
|
+
...(retryAfter !== undefined
|
|
115
|
+
? { retryAfterSeconds: retryAfter }
|
|
116
|
+
: {}),
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
const json = await response.json();
|
|
120
|
+
const data = OpenAIChatResponseSchema.parse(json);
|
|
121
|
+
if (data.error?.message) {
|
|
122
|
+
throw new Error(`OpenAI API error: ${data.error.message}`);
|
|
123
|
+
}
|
|
124
|
+
return { ok: true, value: data };
|
|
125
|
+
},
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
computeCost(modelName, usage) {
|
|
129
|
+
const price = this.pricing[modelName];
|
|
130
|
+
if (!price) {
|
|
131
|
+
this.logger?.warn(`OpenAI cost unknown for model "${modelName}" — recording cost=0. Add it to OpenAILLMClientOptions.pricing.`);
|
|
132
|
+
return 0;
|
|
133
|
+
}
|
|
134
|
+
return ((usage.promptTokens / 1000) * price.inputPer1k +
|
|
135
|
+
(usage.completionTokens / 1000) * price.outputPer1k);
|
|
136
|
+
}
|
|
137
|
+
logTelemetry(context, model, usage, cost) {
|
|
138
|
+
if (!this.logger)
|
|
139
|
+
return;
|
|
140
|
+
const tag = context ? ` feature=${context.feature}` : "";
|
|
141
|
+
const runTag = context?.runId ? ` runId=${context.runId}` : "";
|
|
142
|
+
const cardTag = context?.cardId ? ` cardId=${context.cardId}` : "";
|
|
143
|
+
this.logger.debug(`LLM call (openai)${tag}${runTag}${cardTag} model=${model} ` +
|
|
144
|
+
`prompt_tokens=${usage.promptTokens} completion_tokens=${usage.completionTokens} ` +
|
|
145
|
+
`cost_usd=${cost.toFixed(6)}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
function buildBody(modelName, prompt, opts) {
|
|
149
|
+
const body = {
|
|
150
|
+
model: modelName,
|
|
151
|
+
messages: [{ role: "user", content: prompt }],
|
|
152
|
+
};
|
|
153
|
+
if (opts.temperature !== undefined)
|
|
154
|
+
body.temperature = opts.temperature;
|
|
155
|
+
if (opts.maxTokens !== undefined)
|
|
156
|
+
body.max_tokens = opts.maxTokens;
|
|
157
|
+
if (opts.stop && opts.stop.length > 0)
|
|
158
|
+
body.stop = opts.stop;
|
|
159
|
+
if (opts.responseFormat)
|
|
160
|
+
body.response_format = opts.responseFormat;
|
|
161
|
+
return body;
|
|
162
|
+
}
|
|
163
|
+
function extractUsage(usage) {
|
|
164
|
+
return {
|
|
165
|
+
promptTokens: usage?.prompt_tokens ?? 0,
|
|
166
|
+
completionTokens: usage?.completion_tokens ?? 0,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-model pricing types shared by LLM adapters.
|
|
3
|
+
*
|
|
4
|
+
* Hard-coded vendor pricing drifts; treat the in-adapter defaults as a
|
|
5
|
+
* sensible starting point and override via constructor options when the
|
|
6
|
+
* vendor changes their rate card.
|
|
7
|
+
*/
|
|
8
|
+
/** USD per 1K tokens. */
|
|
9
|
+
export interface ModelPricing {
|
|
10
|
+
inputPer1k: number;
|
|
11
|
+
outputPer1k: number;
|
|
12
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared retry helper for LLMClient adapters.
|
|
3
|
+
*
|
|
4
|
+
* Bounded exponential backoff with optional `Retry-After` honoring and
|
|
5
|
+
* symmetric jitter. Treats 429 / 5xx as retryable and any other HTTP error
|
|
6
|
+
* as terminal.
|
|
7
|
+
*
|
|
8
|
+
* Errors carry the full response body on the instance for callers that need
|
|
9
|
+
* to inspect it; the message is intentionally short and body-free so it's
|
|
10
|
+
* safe to include in user-facing logs and stack traces.
|
|
11
|
+
*/
|
|
12
|
+
export interface RetryPolicy {
|
|
13
|
+
/** Total attempts including the initial call. Default 3. */
|
|
14
|
+
maxAttempts: number;
|
|
15
|
+
/** Initial backoff in ms. Default 500. */
|
|
16
|
+
baseDelayMs: number;
|
|
17
|
+
/** Multiplier per attempt. Default 2. */
|
|
18
|
+
backoffFactor: number;
|
|
19
|
+
/** Cap on a single delay in ms. Default 10_000. */
|
|
20
|
+
maxDelayMs: number;
|
|
21
|
+
/**
|
|
22
|
+
* Symmetric jitter as a fraction of the computed delay, in `[0, 1)`. The
|
|
23
|
+
* actual delay is `delay * (1 + (rng() - 0.5) * 2 * jitter)`. Default 0.3.
|
|
24
|
+
* Set to 0 to disable.
|
|
25
|
+
*/
|
|
26
|
+
jitter: number;
|
|
27
|
+
}
|
|
28
|
+
export declare const DEFAULT_RETRY_POLICY: RetryPolicy;
|
|
29
|
+
export declare class LLMHttpError extends Error {
|
|
30
|
+
readonly status: number;
|
|
31
|
+
readonly attempts: number;
|
|
32
|
+
/** Full upstream response body (kept on the instance, NOT in `message`). */
|
|
33
|
+
readonly body: string;
|
|
34
|
+
constructor(status: number, body: string, attempts: number);
|
|
35
|
+
}
|
|
36
|
+
export declare function isRetryableStatus(status: number): boolean;
|
|
37
|
+
export interface RunWithRetryArgs<T> {
|
|
38
|
+
policy: RetryPolicy;
|
|
39
|
+
/** Per-attempt callable. Resolves to {result} on success, or returns ok:false to fail. */
|
|
40
|
+
attempt: () => Promise<{
|
|
41
|
+
ok: true;
|
|
42
|
+
value: T;
|
|
43
|
+
} | {
|
|
44
|
+
ok: false;
|
|
45
|
+
status: number;
|
|
46
|
+
body: string;
|
|
47
|
+
retryAfterSeconds?: number;
|
|
48
|
+
}>;
|
|
49
|
+
/** Sleeps for `ms`. Injectable for tests. */
|
|
50
|
+
sleep?: (ms: number) => Promise<void>;
|
|
51
|
+
/** Random source in `[0, 1)`. Injectable for tests. Defaults to `Math.random`. */
|
|
52
|
+
rng?: () => number;
|
|
53
|
+
}
|
|
54
|
+
export declare function runWithRetry<T>(args: RunWithRetryArgs<T>): Promise<T>;
|
|
55
|
+
/** Parses a `Retry-After` header (seconds-only form). */
|
|
56
|
+
export declare function parseRetryAfterSeconds(header: null | string): number | undefined;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared retry helper for LLMClient adapters.
|
|
3
|
+
*
|
|
4
|
+
* Bounded exponential backoff with optional `Retry-After` honoring and
|
|
5
|
+
* symmetric jitter. Treats 429 / 5xx as retryable and any other HTTP error
|
|
6
|
+
* as terminal.
|
|
7
|
+
*
|
|
8
|
+
* Errors carry the full response body on the instance for callers that need
|
|
9
|
+
* to inspect it; the message is intentionally short and body-free so it's
|
|
10
|
+
* safe to include in user-facing logs and stack traces.
|
|
11
|
+
*/
|
|
12
|
+
export const DEFAULT_RETRY_POLICY = {
|
|
13
|
+
maxAttempts: 3,
|
|
14
|
+
baseDelayMs: 500,
|
|
15
|
+
backoffFactor: 2,
|
|
16
|
+
maxDelayMs: 10_000,
|
|
17
|
+
jitter: 0.3,
|
|
18
|
+
};
|
|
19
|
+
export class LLMHttpError extends Error {
|
|
20
|
+
status;
|
|
21
|
+
attempts;
|
|
22
|
+
/** Full upstream response body (kept on the instance, NOT in `message`). */
|
|
23
|
+
body;
|
|
24
|
+
constructor(status, body, attempts) {
|
|
25
|
+
super(`LLM request failed with status ${status} after ${attempts} attempt(s)`);
|
|
26
|
+
this.status = status;
|
|
27
|
+
this.attempts = attempts;
|
|
28
|
+
this.name = "LLMHttpError";
|
|
29
|
+
this.body = body;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export function isRetryableStatus(status) {
|
|
33
|
+
return status === 429 || (status >= 500 && status < 600);
|
|
34
|
+
}
|
|
35
|
+
const defaultSleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
36
|
+
export async function runWithRetry(args) {
|
|
37
|
+
const { policy, attempt, sleep = defaultSleep, rng = Math.random } = args;
|
|
38
|
+
for (let i = 1; i <= policy.maxAttempts; i++) {
|
|
39
|
+
const res = await attempt();
|
|
40
|
+
if (res.ok)
|
|
41
|
+
return res.value;
|
|
42
|
+
const canRetry = i < policy.maxAttempts && isRetryableStatus(res.status);
|
|
43
|
+
if (!canRetry) {
|
|
44
|
+
throw new LLMHttpError(res.status, res.body, i);
|
|
45
|
+
}
|
|
46
|
+
const exp = policy.baseDelayMs * Math.pow(policy.backoffFactor, i - 1);
|
|
47
|
+
const base = res.retryAfterSeconds ? res.retryAfterSeconds * 1000 : exp;
|
|
48
|
+
const capped = Math.min(base, policy.maxDelayMs);
|
|
49
|
+
const jittered = policy.jitter > 0
|
|
50
|
+
? capped * (1 + (rng() - 0.5) * 2 * policy.jitter)
|
|
51
|
+
: capped;
|
|
52
|
+
await sleep(Math.max(0, Math.round(jittered)));
|
|
53
|
+
}
|
|
54
|
+
// Unreachable: the canRetry branch always throws on the final attempt.
|
|
55
|
+
// Defensive throw so the type checker sees a definite return.
|
|
56
|
+
throw new LLMHttpError(0, "no error body", policy.maxAttempts);
|
|
57
|
+
}
|
|
58
|
+
/** Parses a `Retry-After` header (seconds-only form). */
|
|
59
|
+
export function parseRetryAfterSeconds(header) {
|
|
60
|
+
if (!header)
|
|
61
|
+
return undefined;
|
|
62
|
+
const n = Number(header);
|
|
63
|
+
if (Number.isFinite(n) && n >= 0)
|
|
64
|
+
return n;
|
|
65
|
+
return undefined;
|
|
66
|
+
}
|
|
@@ -55,9 +55,13 @@ interface ContentLakeCanonicalDoc {
|
|
|
55
55
|
sectionSlug?: string;
|
|
56
56
|
slug?: string;
|
|
57
57
|
}
|
|
58
|
+
interface ContentLakeCriterion {
|
|
59
|
+
id?: string;
|
|
60
|
+
text?: string;
|
|
61
|
+
}
|
|
58
62
|
/** Assertion shape from the Content Lake (mirrors the Studio schema). */
|
|
59
63
|
interface ContentLakeAssertion {
|
|
60
|
-
criteria?:
|
|
64
|
+
criteria?: ContentLakeCriterion[];
|
|
61
65
|
template?: string;
|
|
62
66
|
threshold?: number;
|
|
63
67
|
type?: string;
|
|
@@ -73,7 +73,13 @@ const TASKS_QUERY = /* groq */ `
|
|
|
73
73
|
perspective,
|
|
74
74
|
reason
|
|
75
75
|
},
|
|
76
|
-
"assertions": coalesce(assertions, assert)
|
|
76
|
+
"assertions": coalesce(assertions, assert)[] {
|
|
77
|
+
type, template, weight, value, threshold,
|
|
78
|
+
"criteria": criteria[] {
|
|
79
|
+
"id": coalesce(id.current, _key),
|
|
80
|
+
"text": coalesce(text, @)
|
|
81
|
+
}
|
|
82
|
+
},
|
|
77
83
|
rawAssert,
|
|
78
84
|
baseline,
|
|
79
85
|
tags,
|
|
@@ -256,8 +262,28 @@ function mapAssertions(raw) {
|
|
|
256
262
|
.filter((a) => !!a.type)
|
|
257
263
|
.map((a) => {
|
|
258
264
|
if (a.type === "llm-rubric" && a.template && a.criteria) {
|
|
265
|
+
// Tighten the runtime contract: the GROQ projection's
|
|
266
|
+
// `coalesce(text, @)` falls through to the entire criterion
|
|
267
|
+
// element when `text` is missing, so a partial legacy criterion
|
|
268
|
+
// like `{_key: "abc"}` arrives here as `{ id: "abc", text: {...} }`
|
|
269
|
+
// — `text` set to the whole `@` object. Explicit type checks
|
|
270
|
+
// drop those with a diagnostic, instead of letting the non-string
|
|
271
|
+
// `text` propagate until the outer ContentLakeAuthorableTaskSchema
|
|
272
|
+
// parse fails deep inside the assertions array (noisy diagnostic).
|
|
259
273
|
return {
|
|
260
|
-
criteria: a.criteria
|
|
274
|
+
criteria: a.criteria
|
|
275
|
+
.filter((c) => {
|
|
276
|
+
if (!c)
|
|
277
|
+
return false;
|
|
278
|
+
const idOk = typeof c.id === "string" && c.id.length > 0;
|
|
279
|
+
const textOk = typeof c.text === "string" && c.text.length > 0;
|
|
280
|
+
if (!idOk || !textOk) {
|
|
281
|
+
console.warn(`[ContentLakeTaskSource] dropping malformed criterion: ${JSON.stringify(c).slice(0, 100)}`);
|
|
282
|
+
return false;
|
|
283
|
+
}
|
|
284
|
+
return true;
|
|
285
|
+
})
|
|
286
|
+
.map((c) => ({ id: c.id, text: c.text })),
|
|
261
287
|
template: a.template,
|
|
262
288
|
type: "llm-rubric",
|
|
263
289
|
...(a.weight !== undefined ? { weight: a.weight } : {}),
|