npm - @sanity/ailf - Versions diffs - 3.8.0 → 3.9.0 - Mend

@sanity/ailf 3.8.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/dist/pipeline/compiler/__tests__/telemetry.test.js DELETED Viewed

@@ -1,503 +0,0 @@
-/**
- * telemetry.test.ts — Tests for the observability & telemetry subsystem.
- *
- * Covers tool call classification, trace collection, cost tracking,
- * redaction pipeline, trace storage, and per-turn trace merging.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/telemetry.test.ts
- */
-import assert from "node:assert/strict";
-import { existsSync, rmSync } from "node:fs";
-import { afterEach, describe, it } from "node:test";
-import { tmpdir } from "os";
-import { resolve } from "path";
-import { classifyToolCall, classifyToolCalls, } from "../telemetry/tool-classifier.js";
-import { collectTrace, mergeTraces } from "../telemetry/trace-collector.js";
-import { checkBudget, computeCost, estimateRunCost, lookupPricing, } from "../telemetry/cost-tracker.js";
-import { redactTrace } from "../telemetry/redactor.js";
-import { extractTraceSummary, LocalTraceStore, } from "../telemetry/trace-store.js";
-// ---------------------------------------------------------------------------
-// Tool call classification
-// ---------------------------------------------------------------------------
-describe("classifyToolCall", () => {
-    it("classifies known tools by exact name", () => {
-        assert.equal(classifyToolCall("WebSearch"), "search");
-        assert.equal(classifyToolCall("Read"), "read");
-        assert.equal(classifyToolCall("Write"), "write");
-        assert.equal(classifyToolCall("Bash"), "execute");
-        assert.equal(classifyToolCall("Browser.navigate"), "navigate");
-        assert.equal(classifyToolCall("AskUser"), "communicate");
-    });
-    it("uses heuristic for unknown tools", () => {
-        assert.equal(classifyToolCall("custom_search_tool"), "search");
-        assert.equal(classifyToolCall("ReadFromDB"), "read");
-        assert.equal(classifyToolCall("writeConfig"), "write");
-        assert.equal(classifyToolCall("executeScript"), "execute");
-    });
-    it("uses custom mappings over defaults", () => {
-        assert.equal(classifyToolCall("MyTool", { MyTool: "communicate" }), "communicate");
-    });
-    it("falls back to execute for truly unknown tools", () => {
-        assert.equal(classifyToolCall("zzz_unknown_zzz"), "execute");
-    });
-});
-describe("classifyToolCalls", () => {
-    it("classifies a batch and reports unrecognized names", () => {
-        const { categories, unrecognized } = classifyToolCalls([
-            "WebSearch",
-            "Read",
-            "zzz_mystery_tool",
-        ]);
-        assert.equal(categories.length, 3);
-        assert.equal(categories[0], "search");
-        assert.equal(categories[1], "read");
-        assert.ok(unrecognized.includes("zzz_mystery_tool"));
-    });
-});
-// ---------------------------------------------------------------------------
-// Trace collection
-// ---------------------------------------------------------------------------
-describe("collectTrace", () => {
-    const baseOptions = {
-        runId: "run-1",
-        taskId: "task-1",
-        testCaseIndex: 0,
-        modelId: "openai:chat:gpt-4o",
-    };
-    it("creates a trace from an empty response", () => {
-        const trace = collectTrace({}, baseOptions);
-        assert.equal(trace.runId, "run-1");
-        assert.equal(trace.taskId, "task-1");
-        assert.equal(trace.modelId, "openai:chat:gpt-4o");
-        assert.equal(trace.toolCalls.length, 0);
-        assert.equal(trace.tokensUsed.totalTokens, 0);
-    });
-    it("extracts tool calls from metadata", () => {
-        const trace = collectTrace({
-            metadata: {
-                toolCalls: [
-                    { name: "WebSearch", input: { query: "GROQ" }, durationMs: 100 },
-                    { name: "Read", input: { path: "/docs/groq.md" }, durationMs: 50 },
-                ],
-            },
-        }, baseOptions);
-        assert.equal(trace.toolCalls.length, 2);
-        assert.equal(trace.toolCalls[0].name, "WebSearch");
-        assert.equal(trace.toolCalls[0].category, "search");
-        assert.equal(trace.toolCalls[1].name, "Read");
-        assert.equal(trace.toolCalls[1].category, "read");
-    });
-    it("extracts token usage", () => {
-        const trace = collectTrace({ tokenUsage: { prompt: 1000, completion: 500, total: 1500 } }, baseOptions);
-        assert.equal(trace.tokensUsed.promptTokens, 1000);
-        assert.equal(trace.tokensUsed.completionTokens, 500);
-        assert.equal(trace.tokensUsed.totalTokens, 1500);
-    });
-    it("extracts URLs from tool calls", () => {
-        const trace = collectTrace({
-            metadata: {
-                toolCalls: [
-                    { name: "WebFetch", input: { url: "https://sanity.io/docs" } },
-                ],
-            },
-        }, baseOptions);
-        assert.ok(trace.urlsVisited.includes("https://sanity.io/docs"));
-    });
-    it("extracts search terms", () => {
-        const trace = collectTrace({
-            metadata: {
-                toolCalls: [
-                    { name: "WebSearch", input: { query: "GROQ projection" } },
-                ],
-            },
-        }, baseOptions);
-        assert.ok(trace.searchTerms.includes("GROQ projection"));
-    });
-    it("extracts files read and written", () => {
-        const trace = collectTrace({
-            metadata: {
-                toolCalls: [
-                    { name: "Read", input: { path: "/src/schema.ts" } },
-                    { name: "Write", input: { path: "/src/config.ts" } },
-                ],
-            },
-        }, baseOptions);
-        assert.ok(trace.filesRead.includes("/src/schema.ts"));
-        assert.ok(trace.filesWritten.includes("/src/config.ts"));
-    });
-    it("creates event log from tool calls", () => {
-        const trace = collectTrace({
-            metadata: {
-                toolCalls: [
-                    { name: "WebSearch", input: { query: "test" }, durationMs: 100 },
-                ],
-            },
-            latencyMs: 500,
-        }, baseOptions);
-        // Should have: llm_request, tool_call_start, tool_call_end, llm_response
-        assert.equal(trace.events.length, 4);
-        assert.equal(trace.events[0].type, "llm_request");
-        assert.equal(trace.events[1].type, "tool_call_start");
-        assert.equal(trace.events[2].type, "tool_call_end");
-        assert.equal(trace.events[3].type, "llm_response");
-    });
-    it("builds a root span", () => {
-        const trace = collectTrace({ latencyMs: 1000 }, baseOptions);
-        assert.equal(trace.spans.length, 1);
-        assert.equal(trace.spans[0].operation, "test-case");
-        assert.equal(trace.spans[0].parentSpanId, null);
-    });
-});
-// ---------------------------------------------------------------------------
-// mergeTraces (per-turn tracing — task 6f)
-// ---------------------------------------------------------------------------
-describe("mergeTraces", () => {
-    const parentOptions = {
-        runId: "run-1",
-        taskId: "task-1",
-        testCaseIndex: 0,
-        modelId: "openai:chat:gpt-4o",
-    };
-    function makeTurn(index) {
-        return collectTrace({
-            metadata: {
-                toolCalls: [
-                    {
-                        name: "WebSearch",
-                        input: { query: `turn ${index}` },
-                        durationMs: 50,
-                    },
-                ],
-            },
-            tokenUsage: { prompt: 100, completion: 50, total: 150 },
-            latencyMs: 200,
-        }, { ...parentOptions, testCaseIndex: index });
-    }
-    it("merges multiple turns into one trace", () => {
-        const turns = [makeTurn(0), makeTurn(1), makeTurn(2)];
-        const merged = mergeTraces(turns, parentOptions);
-        assert.equal(merged.toolCalls.length, 3);
-        assert.equal(merged.tokensUsed.promptTokens, 300);
-        assert.equal(merged.tokensUsed.completionTokens, 150);
-        assert.equal(merged.durationMs, 600);
-    });
-    it("creates per-turn spans under root", () => {
-        const turns = [makeTurn(0), makeTurn(1)];
-        const merged = mergeTraces(turns, parentOptions);
-        // root + 2 turns
-        assert.equal(merged.spans.length, 3);
-        assert.equal(merged.spans[0].operation, "test-case");
-        assert.equal(merged.spans[0].parentSpanId, null);
-        assert.equal(merged.spans[1].operation, "turn-0");
-        assert.equal(merged.spans[1].parentSpanId, merged.spans[0].spanId);
-        assert.equal(merged.spans[2].operation, "turn-1");
-    });
-    it("deduplicates URLs and search terms", () => {
-        const t1 = collectTrace({
-            metadata: {
-                toolCalls: [
-                    { name: "WebSearch", input: { query: "GROQ" } },
-                    { name: "WebFetch", input: { url: "https://sanity.io" } },
-                ],
-            },
-        }, { ...parentOptions, testCaseIndex: 0 });
-        const t2 = collectTrace({
-            metadata: {
-                toolCalls: [
-                    { name: "WebSearch", input: { query: "GROQ" } },
-                    { name: "WebFetch", input: { url: "https://sanity.io" } },
-                ],
-            },
-        }, { ...parentOptions, testCaseIndex: 1 });
-        const merged = mergeTraces([t1, t2], parentOptions);
-        assert.equal(merged.searchTerms.length, 1); // deduplicated
-        assert.equal(merged.urlsVisited.length, 1); // deduplicated
-    });
-    it("handles empty turns", () => {
-        const merged = mergeTraces([], parentOptions);
-        assert.equal(merged.toolCalls.length, 0);
-        assert.equal(merged.spans.length, 1); // root only
-    });
-});
-// ---------------------------------------------------------------------------
-// Cost tracking
-// ---------------------------------------------------------------------------
-describe("computeCost", () => {
-    it("computes cost from token usage and pricing", () => {
-        const cost = computeCost({ promptTokens: 1000, completionTokens: 500, totalTokens: 1500 }, { input: 3.0, output: 15.0 });
-        // 1000 * 3.0/1M + 500 * 15.0/1M = 0.003 + 0.0075 = 0.0105
-        assert.ok(Math.abs(cost - 0.0105) < 0.0001);
-    });
-    it("accounts for cached input tokens", () => {
-        const cost = computeCost({
-            promptTokens: 1000,
-            completionTokens: 500,
-            totalTokens: 1500,
-            toolTokens: 300,
-        }, { input: 3.0, output: 15.0, cachedInput: 0.3 });
-        // 700 * 3.0/1M + 300 * 0.3/1M + 500 * 15.0/1M = 0.0021 + 0.00009 + 0.0075
-        assert.ok(cost > 0);
-        assert.ok(cost < 0.02);
-    });
-});
-describe("lookupPricing", () => {
-    it("finds exact match", () => {
-        const pricing = lookupPricing("openai:chat:gpt-4o");
-        assert.ok(pricing);
-        assert.ok(pricing.input > 0);
-    });
-    it("falls back to prefix match", () => {
-        const pricing = lookupPricing("openai:chat:gpt-4o-2024-11-20");
-        assert.ok(pricing);
-    });
-    it("returns undefined for unknown model", () => {
-        const pricing = lookupPricing("unknown:model:xyz");
-        assert.equal(pricing, undefined);
-    });
-    it("uses custom pricing over defaults", () => {
-        const pricing = lookupPricing("custom:model", {
-            "custom:model": { input: 1.0, output: 2.0 },
-        });
-        assert.ok(pricing);
-        assert.equal(pricing.input, 1.0);
-    });
-});
-describe("estimateRunCost", () => {
-    it("estimates cost for a run", () => {
-        const estimate = estimateRunCost(5, ["openai:chat:gpt-4o"]);
-        assert.ok(estimate.totalUSD > 0);
-        assert.equal(estimate.perModel.length, 1);
-    });
-    it("flags budget warning", () => {
-        const estimate = estimateRunCost(100, ["openai:chat:gpt-4o", "anthropic:messages:claude-sonnet-4-6"], { perRun: { warn: 0.01, stop: 1.0 } });
-        assert.equal(estimate.exceedsWarning, true);
-    });
-});
-describe("checkBudget", () => {
-    it("allows spend below thresholds", () => {
-        const result = checkBudget(1.0, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
-        assert.equal(result.proceed, true);
-        assert.equal(result.warning, undefined);
-    });
-    it("warns at warn threshold", () => {
-        const result = checkBudget(5.5, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
-        assert.equal(result.proceed, true);
-        assert.ok(result.warning?.includes("warning"));
-    });
-    it("stops at stop threshold", () => {
-        const result = checkBudget(25.0, { perRun: { warn: 5.0, stop: 20.0 } }, "perRun");
-        assert.equal(result.proceed, false);
-        assert.ok(result.warning?.includes("exceeded"));
-    });
-});
-// ---------------------------------------------------------------------------
-// Redaction
-// ---------------------------------------------------------------------------
-describe("redactTrace", () => {
-    function makeTrace(toolCalls) {
-        return {
-            traceId: "trace-1",
-            runId: "run-1",
-            taskId: "task-1",
-            testCaseIndex: 0,
-            modelId: "openai:chat:gpt-4o",
-            spans: [],
-            toolCalls,
-            urlsVisited: [],
-            searchTerms: [],
-            filesRead: [],
-            filesWritten: [],
-            tokensUsed: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
-            costEstimate: 0,
-            durationMs: 0,
-            events: [],
-            startedAt: new Date().toISOString(),
-            completedAt: new Date().toISOString(),
-        };
-    }
-    it("redacts Bearer tokens in tool call inputs", () => {
-        const trace = makeTrace([
-            {
-                name: "WebFetch",
-                input: {
-                    url: "https://api.sanity.io",
-                    auth: "Bearer sk_live_abc123def456ghi789",
-                },
-                output: "OK",
-                durationMs: 100,
-                category: "read",
-            },
-        ]);
-        const { trace: redacted, redactionCount } = redactTrace(trace);
-        const inputStr = JSON.stringify(redacted.toolCalls[0].input);
-        assert.ok(!inputStr.includes("sk_live_abc123def456ghi789"));
-        assert.ok(inputStr.includes("[REDACTED]"));
-        assert.ok(redactionCount > 0);
-    });
-    it("redacts Sanity tokens", () => {
-        const trace = makeTrace([
-            {
-                name: "Write",
-                input: { token: "skAbcDefGhiJklMnoPqrStUvWxYz0123456789" },
-                output: null,
-                durationMs: 10,
-                category: "write",
-            },
-        ]);
-        const { trace: redacted } = redactTrace(trace);
-        const inputStr = JSON.stringify(redacted.toolCalls[0].input);
-        assert.ok(inputStr.includes("[REDACTED_SANITY_TOKEN]"));
-    });
-    it("redacts OpenAI keys", () => {
-        const trace = makeTrace([
-            {
-                name: "Bash",
-                input: {
-                    command: "export OPENAI_API_KEY=sk-proj-abcdefghij1234567890abcdefghij",
-                },
-                output: null,
-                durationMs: 10,
-                category: "execute",
-            },
-        ]);
-        const { trace: redacted } = redactTrace(trace);
-        const inputStr = JSON.stringify(redacted.toolCalls[0].input);
-        assert.ok(!inputStr.includes("sk-proj-abcdefghij1234567890abcdefghij"), "OpenAI key should be redacted");
-    });
-    it("does not mutate the original trace", () => {
-        const original = makeTrace([
-            {
-                name: "WebFetch",
-                input: { auth: "Bearer secrettoken1234567890" },
-                output: null,
-                durationMs: 10,
-                category: "read",
-            },
-        ]);
-        const originalStr = JSON.stringify(original);
-        redactTrace(original);
-        assert.equal(JSON.stringify(original), originalStr);
-    });
-    it("reports which rules fired", () => {
-        const trace = makeTrace([
-            {
-                name: "Bash",
-                input: {
-                    cmd: "curl -H 'Authorization: Bearer abc123def456789' https://api.example.com",
-                },
-                output: null,
-                durationMs: 10,
-                category: "execute",
-            },
-        ]);
-        const { rulesApplied } = redactTrace(trace);
-        assert.ok(rulesApplied.includes("bearer_tokens"));
-    });
-});
-// ---------------------------------------------------------------------------
-// Trace storage
-// ---------------------------------------------------------------------------
-describe("LocalTraceStore", () => {
-    const storeDir = resolve(tmpdir(), `ailf-trace-test-${process.pid}`);
-    afterEach(() => {
-        if (existsSync(storeDir)) {
-            rmSync(storeDir, { recursive: true, force: true });
-        }
-    });
-    it("stores and retrieves a trace", async () => {
-        const store = new LocalTraceStore(storeDir);
-        const trace = {
-            traceId: "trace-store-test",
-            runId: "run-1",
-            taskId: "task-1",
-            testCaseIndex: 0,
-            modelId: "openai:chat:gpt-4o",
-            spans: [],
-            toolCalls: [],
-            urlsVisited: [],
-            searchTerms: [],
-            filesRead: [],
-            filesWritten: [],
-            tokensUsed: { promptTokens: 100, completionTokens: 50, totalTokens: 150 },
-            costEstimate: 0.001,
-            durationMs: 500,
-            events: [],
-            startedAt: new Date().toISOString(),
-            completedAt: new Date().toISOString(),
-        };
-        const result = await store.store(trace);
-        assert.ok(result.uri.startsWith("file://"));
-        assert.ok(result.sizeBytes > 0);
-        const retrieved = await store.retrieve(result.uri);
-        assert.ok(retrieved);
-        assert.equal(retrieved.traceId, "trace-store-test");
-    });
-    it("returns null for non-existent trace", async () => {
-        const store = new LocalTraceStore(storeDir);
-        const result = await store.retrieve("file:///nonexistent/path.json");
-        assert.equal(result, null);
-    });
-});
-// ---------------------------------------------------------------------------
-// Trace summary extraction
-// ---------------------------------------------------------------------------
-describe("extractTraceSummary", () => {
-    it("extracts sanitized summary from full trace", () => {
-        const trace = {
-            traceId: "trace-summary-test",
-            runId: "run-1",
-            taskId: "task-1",
-            testCaseIndex: 0,
-            modelId: "openai:chat:gpt-4o",
-            spans: [],
-            toolCalls: [
-                {
-                    name: "WebSearch",
-                    input: {},
-                    output: null,
-                    durationMs: 100,
-                    category: "search",
-                },
-                {
-                    name: "Read",
-                    input: {},
-                    output: null,
-                    durationMs: 50,
-                    category: "read",
-                },
-                {
-                    name: "Read",
-                    input: {},
-                    output: null,
-                    durationMs: 30,
-                    category: "read",
-                },
-            ],
-            urlsVisited: ["https://sanity.io/docs"],
-            searchTerms: ["GROQ"],
-            filesRead: ["/src/schema.ts"],
-            filesWritten: [],
-            tokensUsed: {
-                promptTokens: 1000,
-                completionTokens: 500,
-                totalTokens: 1500,
-            },
-            costEstimate: 0.01,
-            durationMs: 2000,
-            events: [],
-            startedAt: new Date().toISOString(),
-            completedAt: new Date().toISOString(),
-        };
-        const summary = extractTraceSummary(trace, "file:///traces/trace-1.json");
-        assert.equal(summary.traceId, "trace-summary-test");
-        assert.equal(summary.traceDataUri, "file:///traces/trace-1.json");
-        assert.equal(summary.toolCallCount, 3);
-        assert.equal(summary.toolCallCategories.search, 1);
-        assert.equal(summary.toolCallCategories.read, 2);
-        assert.equal(summary.totalTokens, 1500);
-        assert.equal(summary.costEstimate, 0.01);
-        assert.equal(summary.urlsVisitedCount, 1);
-        assert.equal(summary.filesReadCount, 1);
-        assert.equal(summary.filesWrittenCount, 0);
-    });
-});

package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-/**
- * tool-loop-openai.test.ts — Tests for the OpenAI MCP tool loop.
- *
- * Tests both API variants (Chat Completions and Responses) with mocked
- * fetch to verify tool calling, error handling, token tracking, and
- * round exhaustion.
- *
- * Run: npx tsx --test src/pipeline/compiler/__tests__/tool-loop-openai.test.ts
- */
-export {};