npm - orchestrated - Versions diffs - 0.1.9 → 0.1.10 - Mend

orchestrated 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/index.d.ts CHANGED Viewed

@@ -10,6 +10,7 @@
 import type { Batch } from 'openai/resources';
 import type { ChatCompletionCreateParamsBase } from 'openai/resources/chat/completions.mjs';
 import { ClientOptions } from 'openai';
+import { default as default_2 } from 'openai';
 import type { Metadata } from 'openai/resources';
 import OpenAI from 'openai';
 import type { z } from 'zod';
@@ -175,7 +176,9 @@ declare interface BatchResult {
     };
 }
-export declare const Behavioral: (args: unknown) => Promise<Score>;
+export declare const Behavioral: ((args: unknown) => Promise<Score>) & {
+    definition?: SerializableScorerDefinition;
+};
 /**
  * Bundle reference with fingerprint
@@ -204,7 +207,9 @@ export declare const colors: {
     yellow: string;
 };
-export declare const ContentSafety: (args: unknown) => Promise<Score>;
+export declare const ContentSafety: ((args: unknown) => Promise<Score>) & {
+    definition?: SerializableScorerDefinition;
+};
 /**
  * Context builder with progressive type refinement
@@ -284,7 +289,9 @@ declare interface DataSourceMetadata {
     ctx: ResolvedDataCtx;
 }
-export declare const Effectiveness: (args: unknown) => Promise<Score>;
+export declare const Effectiveness: ((args: unknown) => Promise<Score>) & {
+    definition?: SerializableScorerDefinition;
+};
 /**
  * Eval overload for SerializableEvaluation (from API responses or definitions.json)
@@ -421,16 +428,36 @@ export declare interface EvalOptions<EvalReport = boolean> {
      * Callback for pending batch UI (optional, CLI injects ink renderer)
      */
     onPendingBatch?: PendingBatchCallback;
+    /**
+     * Execution mode for evaluations:
+     * - "batch": Use batching for expensive LLM scorers (default, cost-effective)
+     * - "sync": Execute scorers synchronously without batching (faster, real-time)
+     */
+    execute?: "batch" | "sync";
+    /**
+     * Callback for streaming individual results as they complete (sync mode only)
+     * Called after each test case is evaluated
+     * Useful for real-time progress updates and streaming to clients
+     */
+    onResult?: (result: EvalResult) => void | Promise<void>;
     /**
      * Optional batch client for managing batch requests.
+     * Only used when execute is "batch".
      * If not provided, a default BatchClient will be created.
      */
     batchClient?: BatchClient;
     /**
      * Optional eval client for LLM scoring operations.
+     * Only used when execute is "batch".
      * If not provided, a default EvalClient will be created.
      */
     evalClient?: EvalClient;
+    /**
+     * Optional OpenAI client for sync LLM operations.
+     * Only used when execute is "sync".
+     * If not provided, a default OpenAI client will be created.
+     */
+    openaiClient?: default_2;
     __schedule?: string;
 }
@@ -446,6 +473,9 @@ export declare interface EvalResult<Input = any, Output = any, Expected = any> {
     tags?: string[];
     id?: string;
     hasPendingBatch?: boolean;
+    ctx: {
+        [k: string]: any;
+    };
 }
 /**

package/index.js CHANGED Viewed

@@ -21089,7 +21089,7 @@ async function initState(partial2 = {}, skipAuth = false) {
     throw new Error("State is immutable and already initialized. Use resetState() for testing.");
   }
   if (globalThis.__ORCHESTRATED_SHARED_STATE__) {
-    const sharedState = globalThis.__ORCHESTRATED_SHARED_STATE__;
+    const { lazyLoad, ...sharedState } = globalThis.__ORCHESTRATED_SHARED_STATE__;
     globalState = Object.freeze({ ...sharedState });
     isInitialized = true;
     return;
@@ -21148,7 +21148,8 @@ async function initState(partial2 = {}, skipAuth = false) {
 }
 function getState() {
   if (!isInitialized && globalThis.__ORCHESTRATED_SHARED_STATE__) {
-    return globalThis.__ORCHESTRATED_SHARED_STATE__;
+    const { lazyLoad, ...shareableState } = globalThis.__ORCHESTRATED_SHARED_STATE__;
+    return shareableState;
   }
   if (!isInitialized) {
     throw new Error("State not initialized. Call await initState() before using getState().");
@@ -21242,10 +21243,10 @@ var init_data_source = __esm(() => {
       id: exports_external.string(),
       ctx: exports_external.object({
         systemPrompt: exports_external.string().optional()
-      }),
+      }).loose(),
       input: exports_external.string(),
       output: exports_external.string()
-    }))
+    }).loose())
   }).passthrough();
 });
@@ -88550,10 +88551,34 @@ function buildPromptScorer(config2) {
     enumerable: false,
     configurable: true
   });
+  const serializedSchema = config2.parameters ? serializeZodSchema(config2.parameters) : {
+    type: "zod",
+    definition: JSON.stringify({ type: "object" })
+  };
+  const definition = {
+    type: "prompt",
+    name: config2.name,
+    slug: config2.slug || config2.name.toLowerCase().replace(/[_\s]+/g, "-"),
+    description: config2.description || `Prompt-based scorer: ${config2.name}`,
+    schema: serializedSchema,
+    promptTemplate: config2.promptTemplate,
+    choiceScores: config2.choiceScores,
+    model: config2.model,
+    useCoT: config2.useCoT,
+    temperature: config2.temperature,
+    metadata: config2.metadata
+  };
+  Object.defineProperty(scorerFunction, "definition", {
+    value: definition,
+    writable: false,
+    enumerable: false,
+    configurable: true
+  });
   return scorerFunction;
 }
 var init_scorer = __esm(() => {
   init_jsdist();
+  init_schema_serializer();
 });
 // src/serialization/types.ts
@@ -117088,6 +117113,22 @@ async function serializeScorer(scorer, evalName, index) {
   }
   if (typeof scorer === "function") {
     const scorerName = scorer.name;
+    console.log({
+      scorerName,
+      scorer
+    });
+    const definition = scorer.definition;
+    if (definition && typeof definition === "object" && definition.type) {
+      const alreadyRegistered = registry2.scorers.find((s) => s.name === definition.name);
+      if (!alreadyRegistered) {
+        registry2.scorers.push(definition);
+      }
+      return {
+        type: "custom_scorer",
+        slug: definition.slug || definition.name.toLowerCase(),
+        fingerprint: definition.fingerprint
+      };
+    }
     const registered = registry2.scorers.find((s) => s.name === scorerName);
     if (registered) {
       if (registered.type === "custom_scorer") {
@@ -117108,8 +117149,8 @@ async function serializeScorer(scorer, evalName, index) {
         fingerprint: registered.fingerprint
       };
     }
-    const inlineName = `${evalName}_Scorer_${index}`;
-    const slug = inlineName.toLowerCase().replace(/_/g, "-");
+    const inlineName = scorerName || `${evalName}_Scorer_${index}`;
+    const slug = inlineName.toLowerCase().replace(/[_\s]+/g, "-");
     const handlerName = `${inlineName}Handler`;
     registerHandler(handlerName, scorer, {
       location: "eval",
@@ -117626,8 +117667,37 @@ var traced = {
 };
 // src/evaluator/core.ts
+function deterministicStringify(obj) {
+  if (obj === null)
+    return "null";
+  if (obj === undefined)
+    return "undefined";
+  if (typeof obj === "string")
+    return JSON.stringify(obj);
+  if (typeof obj === "number" || typeof obj === "boolean")
+    return String(obj);
+  if (obj instanceof Date)
+    return obj.toISOString();
+  if (Array.isArray(obj)) {
+    const items = obj.map(deterministicStringify);
+    return `[${items.join(",")}]`;
+  }
+  if (typeof obj === "object") {
+    const keys = Object.keys(obj).sort();
+    const pairs2 = keys.map((key) => `${JSON.stringify(key)}:${deterministicStringify(obj[key])}`);
+    return `{${pairs2.join(",")}}`;
+  }
+  return String(obj);
+}
 function generateTestCasesChecksum(testCases) {
-  const content = JSON.stringify(testCases, null, 0);
+  const stableData = testCases.map((testCase) => ({
+    id: testCase.id,
+    input: testCase.input,
+    output: testCase.output,
+    expected: testCase.expected,
+    ctx: testCase.ctx
+  }));
+  const content = deterministicStringify(stableData);
   return createHash2("sha256").update(content).digest("hex");
 }
 function generateCaseId(dataCase) {
@@ -117745,10 +117815,20 @@ function getEvaluationOptions(name, options, state) {
   const returnResults = merged.returnResults ?? true;
   const scorerFailAsZero = merged.scorerFailAsZero ?? false;
   const progress = !jsonl && createProgressTracker ? createProgressTracker(name) : new NullProgressTracker;
-  const batchClient = merged.batchClient || new BatchClient;
-  const evalClient = merged.evalClient || new EvalClient({
-    batchClient
-  });
+  const execute = merged.execute ?? "batch";
+  let batchClient;
+  let evalClient;
+  let openaiClient;
+  if (execute === "batch") {
+    batchClient = merged.batchClient || new BatchClient;
+    evalClient = merged.evalClient || new EvalClient({
+      batchClient
+    });
+  } else {
+    const OpenAI4 = __require("openai").default;
+    openaiClient = merged.openaiClient || new OpenAI4;
+  }
+  const onResult = merged.onResult;
   return {
     state,
     reporter,
@@ -117759,12 +117839,15 @@ function getEvaluationOptions(name, options, state) {
     progress,
     createProgressTracker: createProgressTracker || (() => new NullProgressTracker),
     onPendingBatch,
+    execute,
     batchClient,
-    evalClient
+    evalClient,
+    openaiClient,
+    onResult
   };
 }
 async function evaluateDataCase(dataCase, evaluator, _ctx, options) {
-  const evalClient = options.evalClient;
+  const client2 = options.execute === "sync" ? options.openaiClient : options.evalClient;
   const caseId = generateCaseId(dataCase);
   const dataCaseCtx = dataCase.ctx;
   const ctx = _ctx.mutate({
@@ -117787,7 +117870,7 @@ async function evaluateDataCase(dataCase, evaluator, _ctx, options) {
       expected: dataCase.expected,
       tags: dataCase.tags,
       id: caseId,
-      client: evalClient
+      client: client2
     };
     const fieldsToExclude = new Set(["state", "ctx", "tags", "id", "client"]);
     const argsForStorage = Object.fromEntries(Object.entries(scorerArgs).filter(([key]) => !fieldsToExclude.has(key)));
@@ -117801,7 +117884,8 @@ async function evaluateDataCase(dataCase, evaluator, _ctx, options) {
       tags: dataCase.tags,
       id: caseId,
       hasPendingBatch: result.hasPendingBatch,
-      error: result.error
+      error: result.error,
+      ctx: argsForStorage
     };
     if (verbose) {
       console.dir(result.scores, { depth: null });
@@ -117817,7 +117901,25 @@ function getScorerName(scorer2, index) {
     return scorer2;
   }
   if (typeof scorer2 === "function") {
-    return scorer2.name;
+    return scorer2.name || `Scorer ${index ?? 0}`;
+  }
+  if (scorer2 && typeof scorer2 === "object") {
+    if (scorer2.type === "internal" && scorer2.name) {
+      return scorer2.name;
+    }
+    if (scorer2.type === "custom_scorer" && scorer2.slug) {
+      try {
+        const registry2 = getRegistry2();
+        const scorerDef = registry2.scorers.find((s) => s.slug === scorer2.slug);
+        if (scorerDef && scorerDef.name) {
+          return scorerDef.name;
+        }
+      } catch (e) {}
+      return scorer2.slug.split("-").map((word) => word.charAt(0).toUpperCase() + word.slice(1)).join(" ");
+    }
+    if (scorer2.name) {
+      return scorer2.name;
+    }
   }
   return `Scorer ${index ?? 0}`;
 }
@@ -118011,7 +118113,15 @@ async function executeScorer(scorer2, scorerArgs) {
   return scorerFn(scorerArgs);
 }
 async function runEval(name, evaluator, options) {
-  const { state, verbose, jsonl, returnResults, progress, batchClient } = options;
+  const {
+    state,
+    verbose,
+    jsonl,
+    returnResults,
+    progress,
+    execute,
+    batchClient
+  } = options;
   const ctx = init({
     ...evaluator.ctx ?? {},
     state,
@@ -118023,9 +118133,11 @@ async function runEval(name, evaluator, options) {
       [ATTR_DATASET_SOURCE_TYPE]: data.ctx.sourceType,
       [ATTR_EVAL_EXECUTION_METADATA_TEST_CASE_COUNT]: data.ctx.caseCount
     });
-    await batchClient.initialize(name, data.ctx.checksum, data.ctx.dataSourceType);
-    if (batchClient.hasPendingBatch) {
-      return batchClient.getPending();
+    if (execute === "batch" && batchClient) {
+      await batchClient.initialize(name, data.ctx.checksum, data.ctx.dataSourceType);
+      if (batchClient.hasPendingBatch) {
+        return batchClient.getPending();
+      }
     }
     const results = [];
     const errors6 = new Map;
@@ -118036,6 +118148,9 @@ async function runEval(name, evaluator, options) {
       }), options);
       results.push(caseResult.result);
       aggregateScorerErrors(errors6, caseResult.errors);
+      if (execute === "sync" && options.onResult) {
+        await options.onResult(caseResult.result);
+      }
       if (!jsonl) {
         progress.increment();
       }
@@ -118044,9 +118159,12 @@ async function runEval(name, evaluator, options) {
     if (!jsonl) {
       progress.stop();
     }
-    const batch = await batchClient.submit();
-    if (verbose) {
-      console.log(batch);
+    let batch;
+    if (execute === "batch" && batchClient) {
+      batch = await batchClient.submit();
+      if (verbose) {
+        console.log(batch);
+      }
     }
     const summary = createEvaluationSummary(name, results, errors6);
     const result = {
@@ -118262,12 +118380,15 @@ class ScorerRegistry {
     }
     return buildPromptScorer({
       name: config2.name,
+      slug: config2.slug,
+      description: config2.description,
       promptTemplate: config2.promptTemplate,
       choiceScores: config2.choiceScores,
       model: config2.model,
       useCoT: config2.useCoT,
       temperature: config2.temperature,
-      parameters: config2.parameters
+      parameters: config2.parameters,
+      metadata: config2.metadata
     });
   }
   createCustomScorer(config2, inLazyMode) {
@@ -118430,4 +118551,4 @@ export {
   Behavioral
 };
-//# debugId=7CBAC3512DE449E364756E2164756E21
+//# debugId=5F5616B0EFE8647164756E2164756E21