npm - @evalgate/sdk - Versions diffs - 2.2.3 → 2.3.0 - Mend

@evalgate/sdk 2.2.3 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/CHANGELOG.md +31 -0
package/README.md +39 -2
package/dist/assertions.d.ts +186 -6
package/dist/assertions.js +515 -61
package/dist/batch.js +4 -4
package/dist/cache.d.ts +4 -0
package/dist/cache.js +4 -0
package/dist/cli/baseline.d.ts +14 -0
package/dist/cli/baseline.js +43 -3
package/dist/cli/check.d.ts +5 -2
package/dist/cli/check.js +20 -12
package/dist/cli/compare.d.ts +80 -0
package/dist/cli/compare.js +266 -0
package/dist/cli/index.js +244 -101
package/dist/cli/regression-gate.js +23 -0
package/dist/cli/run.js +22 -0
package/dist/cli/start.d.ts +26 -0
package/dist/cli/start.js +130 -0
package/dist/cli/templates.d.ts +24 -0
package/dist/cli/templates.js +314 -0
package/dist/cli/traces.d.ts +109 -0
package/dist/cli/traces.js +152 -0
package/dist/cli/validate.d.ts +37 -0
package/dist/cli/validate.js +252 -0
package/dist/cli/watch.d.ts +19 -0
package/dist/cli/watch.js +175 -0
package/dist/client.js +6 -13
package/dist/constants.d.ts +2 -0
package/dist/constants.js +5 -0
package/dist/index.d.ts +8 -6
package/dist/index.js +26 -6
package/dist/integrations/openai.js +83 -60
package/dist/logger.d.ts +3 -1
package/dist/logger.js +2 -1
package/dist/otel.d.ts +130 -0
package/dist/otel.js +309 -0
package/dist/runtime/eval.d.ts +14 -4
package/dist/runtime/eval.js +127 -2
package/dist/runtime/registry.d.ts +4 -2
package/dist/runtime/registry.js +11 -3
package/dist/runtime/run-report.d.ts +1 -1
package/dist/runtime/run-report.js +7 -4
package/dist/runtime/types.d.ts +38 -0
package/dist/testing.d.ts +8 -0
package/dist/testing.js +45 -10
package/dist/version.d.ts +2 -2
package/dist/version.js +2 -2
package/dist/workflows.d.ts +2 -0
package/dist/workflows.js +184 -102
package/package.json +124 -117

package/dist/workflows.js CHANGED Viewed

@@ -27,6 +27,39 @@
  * await tracer.endWorkflow({ resolution: 'Issue resolved' });
  * ```
  */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.WorkflowTracer = void 0;
 exports.traceLangChainAgent = traceLangChainAgent;
@@ -34,6 +67,8 @@ exports.traceCrewAI = traceCrewAI;
 exports.traceAutoGen = traceAutoGen;
 exports.createWorkflowTracer = createWorkflowTracer;
 exports.traceWorkflowStep = traceWorkflowStep;
+const fs = __importStar(require("node:fs"));
+const nodePath = __importStar(require("node:path"));
 const context_1 = require("./context");
 // ============================================================================
 // MAIN CLASS - WorkflowTracer
@@ -75,6 +110,7 @@ class WorkflowTracer {
             tracePrefix: options.tracePrefix || "workflow",
             captureFullPayloads: options.captureFullPayloads ?? true,
             debug: options.debug ?? false,
+            offline: options.offline ?? false,
         };
     }
     // ==========================================================================
@@ -101,34 +137,39 @@ class WorkflowTracer {
         }
         const traceId = `${this.options.tracePrefix}-${Date.now()}-${this.generateId()}`;
         const startedAt = new Date().toISOString();
-        // Create the trace
-        const trace = await this.client.traces.create({
-            name: `Workflow: ${name}`,
-            traceId,
-            organizationId: this.options.organizationId,
-            status: "pending",
-            metadata: (0, context_1.mergeWithContext)({
-                workflowName: name,
-                definition,
-                ...metadata,
-            }),
-        });
-        this.currentWorkflow = {
-            id: 0, // Will be set after API call returns
-            traceId: trace.id,
+        // Create the trace (skip in offline mode)
+        let traceResultId = 0;
+        if (!this.options.offline) {
+            const trace = await this.client.traces.create({
+                name: `Workflow: ${name}`,
+                traceId,
+                organizationId: this.options.organizationId,
+                status: "pending",
+                metadata: (0, context_1.mergeWithContext)({
+                    workflowName: name,
+                    definition,
+                    ...metadata,
+                }),
+            });
+            traceResultId = trace.id;
+        }
+        const workflow = {
+            id: 0,
+            traceId: traceResultId,
             name,
             startedAt,
             definition,
             metadata,
         };
+        this.currentWorkflow = workflow;
         // Reset state
         this.handoffs = [];
         this.decisions = [];
         this.costs = [];
         this.activeSpans.clear();
         this.spanCounter = 0;
-        this.log("Started workflow", { name, traceId: trace.id });
-        return this.currentWorkflow;
+        this.log("Started workflow", { name, traceId: traceResultId });
+        return workflow;
     }
     /**
      * End the current workflow
@@ -140,24 +181,57 @@ class WorkflowTracer {
         const durationMs = Date.now() - new Date(this.currentWorkflow.startedAt).getTime();
         // Calculate total cost
         const totalCost = this.costs.reduce((sum, cost) => sum + parseFloat(cost.totalCost), 0);
-        // Update the original trace with completion data
-        await this.client.traces.update(this.currentWorkflow.traceId, {
-            status: status === "completed" ? "success" : "error",
-            durationMs,
-            metadata: (0, context_1.mergeWithContext)({
-                workflowName: this.currentWorkflow.name,
-                output,
-                status,
-                totalCost: totalCost.toFixed(6),
-                handoffCount: this.handoffs.length,
-                decisionCount: this.decisions.length,
-                agentCount: new Set(this.handoffs.map((h) => h.toAgent)).size + 1,
-                retryCount: this.costs.filter((c) => c.isRetry).length,
-                handoffs: this.handoffs,
-                decisions: this.decisions,
-                costs: this.costs,
-            }),
-        });
+        // Update the original trace with completion data (skip in offline mode)
+        if (!this.options.offline) {
+            await this.client.traces.update(this.currentWorkflow.traceId, {
+                status: status === "completed" ? "success" : "error",
+                durationMs,
+                metadata: (0, context_1.mergeWithContext)({
+                    workflowName: this.currentWorkflow.name,
+                    output,
+                    status,
+                    totalCost: totalCost.toFixed(6),
+                    handoffCount: this.handoffs.length,
+                    decisionCount: this.decisions.length,
+                    agentCount: new Set(this.handoffs.map((h) => h.toAgent)).size + 1,
+                    retryCount: this.costs.filter((c) => c.isRetry).length,
+                    handoffs: this.handoffs,
+                    decisions: this.decisions,
+                    costs: this.costs,
+                }),
+            });
+        }
+        // In offline mode, persist workflow data to local filesystem
+        if (this.options.offline) {
+            try {
+                const dataDir = nodePath.resolve(".evalgate-data", "workflows");
+                fs.mkdirSync(dataDir, { recursive: true });
+                const fileName = `${this.currentWorkflow.name.replace(/[^a-zA-Z0-9_-]/g, "_")}-${Date.now()}.json`;
+                const workflowData = {
+                    name: this.currentWorkflow.name,
+                    startedAt: this.currentWorkflow.startedAt,
+                    endedAt: new Date().toISOString(),
+                    status,
+                    durationMs,
+                    totalCost: totalCost.toFixed(6),
+                    handoffs: this.handoffs,
+                    decisions: this.decisions,
+                    costs: this.costs,
+                    output,
+                    metadata: this.currentWorkflow.metadata,
+                    definition: this.currentWorkflow.definition,
+                };
+                fs.writeFileSync(nodePath.join(dataDir, fileName), JSON.stringify(workflowData, null, 2));
+                this.log("Saved workflow to local filesystem", {
+                    path: nodePath.join(dataDir, fileName),
+                });
+            }
+            catch (err) {
+                this.log("Failed to save workflow to local filesystem", {
+                    error: err instanceof Error ? err.message : String(err),
+                });
+            }
+        }
         this.log("Ended workflow", {
             name: this.currentWorkflow.name,
             status,
@@ -193,18 +267,20 @@ class WorkflowTracer {
             metadata: input,
         };
         this.activeSpans.set(spanId, spanContext);
-        // Create span via API
-        await this.client.traces.createSpan(this.currentWorkflow.traceId, {
-            name: `Agent: ${agentName}`,
-            spanId,
-            type: "agent",
-            parentSpanId,
-            startTime,
-            metadata: (0, context_1.mergeWithContext)({
-                agentName,
-                ...(this.options.captureFullPayloads ? { input } : {}),
-            }),
-        });
+        // Create span via API (skip in offline mode)
+        if (!this.options.offline) {
+            await this.client.traces.createSpan(this.currentWorkflow.traceId, {
+                name: `Agent: ${agentName}`,
+                spanId,
+                type: "agent",
+                parentSpanId,
+                startTime,
+                metadata: (0, context_1.mergeWithContext)({
+                    agentName,
+                    ...(this.options.captureFullPayloads ? { input } : {}),
+                }),
+            });
+        }
         this.log("Started agent span", { agentName, spanId });
         return spanContext;
     }
@@ -217,21 +293,23 @@ class WorkflowTracer {
         }
         const endTime = new Date().toISOString();
         const durationMs = new Date(endTime).getTime() - new Date(span.startTime).getTime();
-        // Update span via API (create completion record)
-        await this.client.traces.createSpan(this.currentWorkflow.traceId, {
-            name: `Agent: ${span.agentName}`,
-            spanId: `${span.spanId}-end`,
-            type: "agent",
-            parentSpanId: span.spanId,
-            startTime: span.startTime,
-            endTime,
-            durationMs,
-            metadata: (0, context_1.mergeWithContext)({
-                agentName: span.agentName,
-                ...(this.options.captureFullPayloads ? { output } : {}),
-                ...(error ? { error } : {}),
-            }),
-        });
+        // Update span via API (skip in offline mode)
+        if (!this.options.offline) {
+            await this.client.traces.createSpan(this.currentWorkflow.traceId, {
+                name: `Agent: ${span.agentName}`,
+                spanId: `${span.spanId}-end`,
+                type: "agent",
+                parentSpanId: span.spanId,
+                startTime: span.startTime,
+                endTime,
+                durationMs,
+                metadata: (0, context_1.mergeWithContext)({
+                    agentName: span.agentName,
+                    ...(this.options.captureFullPayloads ? { output } : {}),
+                    ...(error ? { error } : {}),
+                }),
+            });
+        }
         this.activeSpans.delete(span.spanId);
         this.log("Ended agent span", {
             agentName: span.agentName,
@@ -267,22 +345,24 @@ class WorkflowTracer {
             timestamp: new Date().toISOString(),
         };
         this.handoffs.push(handoff);
-        // Also create a span for the handoff
-        const spanId = `handoff-${this.handoffs.length}-${this.generateId()}`;
-        await this.client.traces.createSpan(this.currentWorkflow.traceId, {
-            name: `Handoff: ${fromAgent || "start"} → ${toAgent}`,
-            spanId,
-            type: "handoff",
-            startTime: handoff.timestamp,
-            endTime: handoff.timestamp,
-            durationMs: 0,
-            metadata: (0, context_1.mergeWithContext)({
-                handoffType,
-                fromAgent,
-                toAgent,
-                context,
-            }),
-        });
+        // Also create a span for the handoff (skip in offline mode)
+        if (!this.options.offline) {
+            const spanId = `handoff-${this.handoffs.length}-${this.generateId()}`;
+            await this.client.traces.createSpan(this.currentWorkflow.traceId, {
+                name: `Handoff: ${fromAgent || "start"} → ${toAgent}`,
+                spanId,
+                type: "handoff",
+                startTime: handoff.timestamp,
+                endTime: handoff.timestamp,
+                durationMs: 0,
+                metadata: (0, context_1.mergeWithContext)({
+                    handoffType,
+                    fromAgent,
+                    toAgent,
+                    context,
+                }),
+            });
+        }
         this.log("Recorded handoff", { fromAgent, toAgent, handoffType });
     }
     // ==========================================================================
@@ -312,28 +392,30 @@ class WorkflowTracer {
             throw new Error("No active workflow. Call startWorkflow() first.");
         }
         this.decisions.push(params);
-        // Create a span for the decision
-        const spanId = `decision-${this.decisions.length}-${this.generateId()}`;
-        const timestamp = new Date().toISOString();
-        await this.client.traces.createSpan(this.currentWorkflow.traceId, {
-            name: `Decision: ${params.agent} chose ${params.chosen}`,
-            spanId,
-            type: "decision",
-            startTime: timestamp,
-            endTime: timestamp,
-            durationMs: 0,
-            metadata: (0, context_1.mergeWithContext)({
-                isDecisionPoint: true,
-                agentName: params.agent,
-                decisionType: params.type,
-                chosen: params.chosen,
-                alternatives: params.alternatives,
-                reasoning: params.reasoning,
-                confidence: params.confidence,
-                contextFactors: params.contextFactors,
-                inputContext: params.inputContext,
-            }),
-        });
+        // Create a span for the decision (skip in offline mode)
+        if (!this.options.offline) {
+            const spanId = `decision-${this.decisions.length}-${this.generateId()}`;
+            const timestamp = new Date().toISOString();
+            await this.client.traces.createSpan(this.currentWorkflow.traceId, {
+                name: `Decision: ${params.agent} chose ${params.chosen}`,
+                spanId,
+                type: "decision",
+                startTime: timestamp,
+                endTime: timestamp,
+                durationMs: 0,
+                metadata: (0, context_1.mergeWithContext)({
+                    isDecisionPoint: true,
+                    agentName: params.agent,
+                    decisionType: params.type,
+                    chosen: params.chosen,
+                    alternatives: params.alternatives,
+                    reasoning: params.reasoning,
+                    confidence: params.confidence,
+                    contextFactors: params.contextFactors,
+                    inputContext: params.inputContext,
+                }),
+            });
+        }
         this.log("Recorded decision", {
             agent: params.agent,
             type: params.type,
@@ -375,8 +457,8 @@ class WorkflowTracer {
             totalCost: totalCost.toFixed(6),
         };
         this.costs.push(costRecord);
-        // Also record as a span if in an active workflow
-        if (this.currentWorkflow) {
+        // Also record as a span if in an active workflow (skip in offline mode)
+        if (this.currentWorkflow && !this.options.offline) {
             const spanId = `cost-${this.costs.length}-${this.generateId()}`;
             const timestamp = new Date().toISOString();
             await this.client.traces.createSpan(this.currentWorkflow.traceId, {

package/package.json CHANGED Viewed

@@ -1,118 +1,125 @@
 {
-	"name": "@evalgate/sdk",
-	"version": "2.2.3",
-	"publishConfig": {
-		"access": "public",
-		"registry": "https://registry.npmjs.org/"
-	},
-	"description": "EvalGate SDK - Complete API Coverage with Performance Optimizations",
-	"main": "dist/index.js",
-	"module": "dist/index.js",
-	"types": "dist/index.d.ts",
-	"sideEffects": false,
-	"files": [
-		"dist",
-		"README.md",
-		"CHANGELOG.md"
-	],
-	"bin": {
-		"evalgate": "dist/cli/index.js"
-	},
-	"engines": {
-		"node": ">=16.0.0"
-	},
-	"scripts": {
-		"build": "tsc",
-		"dev": "tsc --watch",
-		"test": "vitest run",
-		"test:watch": "vitest"
-	},
-	"keywords": [
-		"ai",
-		"evaluation",
-		"llm",
-		"testing",
-		"observability",
-		"tracing",
-		"monitoring",
-		"annotations",
-		"webhooks",
-		"developer-tools",
-		"openai",
-		"anthropic"
-	],
-	"author": "EvalGate Team",
-	"license": "MIT",
-	"repository": {
-		"type": "git",
-		"url": "git+https://github.com/pauly7610/ai-evaluation-platform.git",
-		"directory": "src/packages/sdk"
-	},
-	"homepage": "https://evalgate.com",
-	"bugs": {
-		"url": "https://github.com/pauly7610/ai-evaluation-platform/issues"
-	},
-	"dependencies": {
-		"commander": "^14.0.0"
-	},
-	"peerDependencies": {
-		"@anthropic-ai/sdk": "^0.20.0",
-		"openai": "^4.0.0"
-	},
-	"peerDependenciesMeta": {
-		"openai": {
-			"optional": true
-		},
-		"@anthropic-ai/sdk": {
-			"optional": true
-		}
-	},
-	"devDependencies": {
-		"@types/node": "^20.0.0",
-		"ts-node": "^10.9.2",
-		"typescript": "^5.0.0",
-		"vitest": "^1.0.0"
-	},
-	"exports": {
-		".": {
-			"import": "./dist/index.js",
-			"require": "./dist/index.js",
-			"types": "./dist/index.d.ts"
-		},
-		"./assertions": {
-			"import": "./dist/assertions.js",
-			"require": "./dist/assertions.js",
-			"types": "./dist/assertions.d.ts"
-		},
-		"./testing": {
-			"import": "./dist/testing.js",
-			"require": "./dist/testing.js",
-			"types": "./dist/testing.d.ts"
-		},
-		"./integrations/openai": {
-			"import": "./dist/integrations/openai.js",
-			"require": "./dist/integrations/openai.js",
-			"types": "./dist/integrations/openai.d.ts"
-		},
-		"./integrations/anthropic": {
-			"import": "./dist/integrations/anthropic.js",
-			"require": "./dist/integrations/anthropic.js",
-			"types": "./dist/integrations/anthropic.d.ts"
-		},
-		"./integrations/openai-eval": {
-			"import": "./dist/integrations/openai-eval.js",
-			"require": "./dist/integrations/openai-eval.js",
-			"types": "./dist/integrations/openai-eval.d.ts"
-		},
-		"./matchers": {
-			"import": "./dist/matchers/index.js",
-			"require": "./dist/matchers/index.js",
-			"types": "./dist/matchers/index.d.ts"
-		},
-		"./regression": {
-			"import": "./dist/regression.js",
-			"require": "./dist/regression.js",
-			"types": "./dist/regression.d.ts"
-		}
-	}
-}
+  "name": "@evalgate/sdk",
+  "version": "2.3.0",
+  "publishConfig": {
+    "access": "public",
+    "registry": "https://registry.npmjs.org/"
+  },
+  "description": "EvalGate SDK - Complete API Coverage with Performance Optimizations",
+  "main": "dist/index.js",
+  "module": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "sideEffects": false,
+  "files": [
+    "dist",
+    "README.md",
+    "CHANGELOG.md"
+  ],
+  "bin": {
+    "evalgate": "dist/cli/index.js"
+  },
+  "engines": {
+    "node": ">=16.0.0"
+  },
+  "keywords": [
+    "ai",
+    "evaluation",
+    "llm",
+    "testing",
+    "observability",
+    "tracing",
+    "monitoring",
+    "annotations",
+    "webhooks",
+    "developer-tools",
+    "openai",
+    "anthropic"
+  ],
+  "author": "EvalGate Team",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/pauly7610/ai-evaluation-platform.git",
+    "directory": "src/packages/sdk"
+  },
+  "homepage": "https://evalgate.com",
+  "bugs": {
+    "url": "https://github.com/pauly7610/ai-evaluation-platform/issues"
+  },
+  "dependencies": {
+    "commander": "^14.0.0"
+  },
+  "peerDependencies": {
+    "@anthropic-ai/sdk": "^0.20.0",
+    "openai": "^4.0.0"
+  },
+  "peerDependenciesMeta": {
+    "openai": {
+      "optional": true
+    },
+    "@anthropic-ai/sdk": {
+      "optional": true
+    }
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "ts-node": "^10.9.2",
+    "typescript": "^5.0.0",
+    "vitest": "^1.0.0"
+  },
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "require": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    },
+    "./assertions": {
+      "import": "./dist/assertions.js",
+      "require": "./dist/assertions.js",
+      "types": "./dist/assertions.d.ts"
+    },
+    "./testing": {
+      "import": "./dist/testing.js",
+      "require": "./dist/testing.js",
+      "types": "./dist/testing.d.ts"
+    },
+    "./integrations/openai": {
+      "import": "./dist/integrations/openai.js",
+      "require": "./dist/integrations/openai.js",
+      "types": "./dist/integrations/openai.d.ts"
+    },
+    "./integrations/anthropic": {
+      "import": "./dist/integrations/anthropic.js",
+      "require": "./dist/integrations/anthropic.js",
+      "types": "./dist/integrations/anthropic.d.ts"
+    },
+    "./integrations/openai-eval": {
+      "import": "./dist/integrations/openai-eval.js",
+      "require": "./dist/integrations/openai-eval.js",
+      "types": "./dist/integrations/openai-eval.d.ts"
+    },
+    "./matchers": {
+      "import": "./dist/matchers/index.js",
+      "require": "./dist/matchers/index.js",
+      "types": "./dist/matchers/index.d.ts"
+    },
+    "./regression": {
+      "import": "./dist/regression.js",
+      "require": "./dist/regression.js",
+      "types": "./dist/regression.d.ts"
+    },
+    "./otel": {
+      "import": "./dist/otel.js",
+      "require": "./dist/otel.js",
+      "types": "./dist/otel.d.ts"
+    }
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsc --watch",
+    "test": "vitest run",
+    "test:dist": "tsc && vitest run src/__tests__/dist-smoke.test.ts",
+    "otel:test": "bash otel-integration/run-test.sh",
+    "test:watch": "vitest"
+  }
+}