npm - @sanity/ailf - Versions diffs - 2.3.1 → 2.3.2 - Mend

@sanity/ailf 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/config/models.ts +7 -3
package/dist/agent-observer/provider.d.ts +15 -2
package/dist/agent-observer/provider.js +91 -6
package/dist/commands/pipeline-action.js +11 -0
package/dist/config/models.ts +7 -3
package/dist/webhook/eval-request-handler.js +24 -19
package/package.json +1 -1

package/config/models.ts CHANGED Viewed

@@ -37,7 +37,10 @@ export default defineModels({
     {
       id: "openai:chat:gpt-5.2",
       label: "GPT 5.2",
-      config: { temperature: 0.2, max_tokens: 4096 },
+      config: {
+        max_completion_tokens: 8192,
+        verbosity: "medium",
+      },
       modes: ["literacy", "knowledge-probe"],
       // All literacy variants included by default
     },
@@ -45,8 +48,9 @@ export default defineModels({
       id: "openai:responses:gpt-5.4",
       label: "GPT 5.4",
       config: {
-        reasoning_effort: "medium",
-        max_output_tokens: 4096,
+        reasoning: { effort: "medium", summary: "auto" },
+        verbosity: "medium",
+        max_output_tokens: 32_000, // reasoning tokens share this budget — 4096 was too low
         maxRetries: 1,
       },
       timeoutMs: 600_000, // 10 min — reasoning model needs more headroom

package/dist/agent-observer/provider.d.ts CHANGED Viewed

@@ -69,9 +69,22 @@ export default class InstrumentedProvider {
     getRecorder(): RequestRecorder;
     id(): string;
     /**
-     * Calls OpenAI Chat Completions API directly. Uses the recorder's
-     * fetch wrapper so the LLM call itself is captured in the observation log.
+     * Detect whether the model should use the Responses API based on config.
+     */
+    private isResponsesModel;
+    /**
+     * Calls OpenAI directly. Routes to the Responses API for reasoning
+     * models and Chat Completions API for everything else. Uses the
+     * recorder's fetch wrapper so the API call is captured.
      */
     private callOpenAI;
+    /**
+     * Calls OpenAI Chat Completions API (non-reasoning models).
+     */
+    private callOpenAIChatCompletions;
+    /**
+     * Calls OpenAI Responses API (reasoning models like GPT-5.x, o-series).
+     */
+    private callOpenAIResponses;
 }
 export {};

package/dist/agent-observer/provider.js CHANGED Viewed

@@ -94,13 +94,26 @@ export default class InstrumentedProvider {
         return `instrumented:${this.providerId}`;
     }
     /**
-     * Calls OpenAI Chat Completions API directly. Uses the recorder's
-     * fetch wrapper so the LLM call itself is captured in the observation log.
+     * Detect whether the model should use the Responses API based on config.
+     */
+    isResponsesModel() {
+        const model = this.config.modelName || this.config.model || "";
+        return (this.config.reasoning != null ||
+            this.config.reasoning_effort != null ||
+            model.startsWith("gpt-5") ||
+            model.startsWith("o1") ||
+            model.startsWith("o3") ||
+            model.startsWith("o4"));
+    }
+    /**
+     * Calls OpenAI directly. Routes to the Responses API for reasoning
+     * models and Chat Completions API for everything else. Uses the
+     * recorder's fetch wrapper so the API call is captured.
      */
     async callOpenAI(prompt) {
-        const model = this.config.model || "gpt-4o";
-        const temperature = this.config.temperature ?? 0;
-        const maxTokens = this.config.max_tokens || 4096;
+        const model = this.config.modelName ||
+            this.config.model ||
+            "gpt-4o";
         const apiKey = this.config.apiKey || process.env.OPENAI_API_KEY;
         if (!apiKey) {
             return {
@@ -108,7 +121,17 @@ export default class InstrumentedProvider {
                 output: undefined,
             };
         }
-        // Use the recorder's fetch wrapper so the API call is recorded
+        if (this.isResponsesModel()) {
+            return this.callOpenAIResponses(prompt, model, apiKey);
+        }
+        return this.callOpenAIChatCompletions(prompt, model, apiKey);
+    }
+    /**
+     * Calls OpenAI Chat Completions API (non-reasoning models).
+     */
+    async callOpenAIChatCompletions(prompt, model, apiKey) {
+        const temperature = this.config.temperature ?? 0;
+        const maxTokens = this.config.max_tokens || 4096;
         const fetchFn = this.recorder.isRunning()
             ? this.recorder.fetch.bind(this.recorder)
             : globalThis.fetch;
@@ -148,4 +171,66 @@ export default class InstrumentedProvider {
             },
         };
     }
+    /**
+     * Calls OpenAI Responses API (reasoning models like GPT-5.x, o-series).
+     */
+    async callOpenAIResponses(prompt, model, apiKey) {
+        const maxOutputTokens = this.config.max_output_tokens || 32_000;
+        const reasoning = this.config.reasoning;
+        const reasoningEffort = reasoning?.effort || this.config.reasoning_effort || "medium";
+        const reasoningSummary = reasoning?.summary;
+        const verbosity = this.config.verbosity;
+        const fetchFn = this.recorder.isRunning()
+            ? this.recorder.fetch.bind(this.recorder)
+            : globalThis.fetch;
+        const startTime = Date.now();
+        const response = await fetchFn("https://api.openai.com/v1/responses", {
+            body: JSON.stringify({
+                input: prompt,
+                max_output_tokens: maxOutputTokens,
+                model,
+                reasoning: {
+                    effort: reasoningEffort,
+                    ...(reasoningSummary ? { summary: reasoningSummary } : {}),
+                },
+                ...(verbosity ? { text: { format: { type: "text" }, verbosity } } : {}),
+            }),
+            headers: {
+                Authorization: `Bearer ${apiKey}`,
+                "Content-Type": "application/json",
+            },
+            method: "POST",
+        });
+        const data = (await response.json());
+        if (data.error) {
+            return {
+                error: data.error.message ?? "Unknown OpenAI error",
+                output: undefined,
+            };
+        }
+        // Extract text from Responses API output format
+        let output = "";
+        for (const item of data.output ?? []) {
+            if (item.type === "message" && item.content) {
+                for (const block of item.content) {
+                    if (block.type === "output_text" && block.text) {
+                        output += block.text;
+                    }
+                }
+            }
+        }
+        return {
+            cost: calculateCost(model, data.usage?.input_tokens ?? 0, data.usage?.output_tokens ?? 0),
+            metadata: {
+                latencyMs: Date.now() - startTime,
+                model,
+            },
+            output,
+            tokenUsage: {
+                completion: data.usage?.output_tokens,
+                prompt: data.usage?.input_tokens,
+                total: data.usage?.total_tokens,
+            },
+        };
+    }
 }

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -315,6 +315,17 @@ export async function executePipeline(cliOpts) {
         }
         // Output dir: explicit CLI flag → $CWD/.ailf/results/latest/
         config.outputDir = resolveOutputDir(cliOpts.outputDir);
+        // Capture options — CLI flags and env vars aren't in the config file,
+        // so merge them here (same logic as resolveOptions).
+        config.captureEnabled = cliOpts.capture || process.env.AILF_CAPTURE === "1";
+        if (cliOpts.captureDir ?? process.env.AILF_CAPTURE_DIR) {
+            config.captureDir = cliOpts.captureDir ?? process.env.AILF_CAPTURE_DIR;
+        }
+        config.captureCompress =
+            cliOpts.captureCompress !== false &&
+                process.env.AILF_CAPTURE_COMPRESS !== "0";
+        config.captureExtras =
+            cliOpts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0";
         // Create AppContext directly from the merged config so adapters
         // (especially taskSource) are wired from the file config's
         // taskSourceType — not from CLI defaults.

package/dist/config/models.ts CHANGED Viewed

@@ -37,7 +37,10 @@ export default defineModels({
     {
       id: "openai:chat:gpt-5.2",
       label: "GPT 5.2",
-      config: { temperature: 0.2, max_tokens: 4096 },
+      config: {
+        max_completion_tokens: 8192,
+        verbosity: "medium",
+      },
       modes: ["literacy", "knowledge-probe"],
       // All literacy variants included by default
     },
@@ -45,8 +48,9 @@ export default defineModels({
       id: "openai:responses:gpt-5.4",
       label: "GPT 5.4",
       config: {
-        reasoning_effort: "medium",
-        max_output_tokens: 4096,
+        reasoning: { effort: "medium", summary: "auto" },
+        verbosity: "medium",
+        max_output_tokens: 32_000, // reasoning tokens share this budget — 4096 was too low
         maxRetries: 1,
       },
       timeoutMs: 600_000, // 10 min — reasoning model needs more headroom

package/dist/webhook/eval-request-handler.js CHANGED Viewed

@@ -165,28 +165,33 @@ async function dispatchGitHubEval(repo, payload, config) {
     const hasPerspective = !!payload.perspective;
     const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
     const hasAreas = Array.isArray(payload.areas) && payload.areas.length > 0;
+    // Nest the PipelineRequest under `request` to stay within GitHub's
+    // 10-property limit on client_payload. Workflow-level metadata
+    // (caller_repo) stays at the top level for the workflow to read.
     const body = {
         client_payload: {
             caller_repo: "sanity-io/www-sanity-io",
-            dataset: payload.dataset,
-            mode: payload.mode,
-            projectId: payload.projectId,
-            publish: true,
-            source: "production",
-            // Studio-initiated evals always use Content Lake as the task source.
-            // Without this, the pipeline only loads filesystem .task.ts files and
-            // Studio-owned tasks are invisible.
-            taskMode: "content-lake",
-            // Release-scoped fields
-            ...(hasPerspective ? { perspective: payload.perspective } : {}),
-            // Task-scoped fields
-            ...(hasTasks ? { tasks: payload.tasks } : {}),
-            ...(hasAreas ? { areas: payload.areas } : {}),
-            ...(payload.debug ? { debug: true } : {}),
-            ...(payload.tag ? { publishTag: payload.tag } : {}),
-            ...(payload.sourceReportId
-                ? { sourceReportId: payload.sourceReportId }
-                : {}),
+            request: {
+                dataset: payload.dataset,
+                mode: payload.mode,
+                projectId: payload.projectId,
+                publish: true,
+                source: "production",
+                // Studio-initiated evals always use Content Lake as the task source.
+                // Without this, the pipeline only loads filesystem .task.ts files and
+                // Studio-owned tasks are invisible.
+                taskMode: "content-lake",
+                // Release-scoped fields
+                ...(hasPerspective ? { perspective: payload.perspective } : {}),
+                // Task-scoped fields
+                ...(hasTasks ? { tasks: payload.tasks } : {}),
+                ...(hasAreas ? { areas: payload.areas } : {}),
+                ...(payload.debug ? { debug: true } : {}),
+                ...(payload.tag ? { publishTag: payload.tag } : {}),
+                ...(payload.sourceReportId
+                    ? { sourceReportId: payload.sourceReportId }
+                    : {}),
+            },
         },
         event_type: "external-eval",
     };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "2.3.1",
+  "version": "2.3.2",
   "private": false,
   "publishConfig": {
     "access": "public"