npm - @objectstack/service-ai - Versions diffs - 7.0.0 → 7.2.0 - Mend

@objectstack/service-ai 7.0.0 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -42,6 +42,77 @@ function buildEngineContext(ctx) {
   }
   return { roles: [], permissions: [], isSystem: true };
 }
+async function resolveObjectFieldNames(ctx, objectName) {
+  let def;
+  if (ctx.metadataService) {
+    try {
+      def = await ctx.metadataService.getObject(objectName);
+    } catch {
+      def = void 0;
+    }
+  }
+  if (!def && ctx.protocol?.getMetaItems) {
+    try {
+      const all = await ctx.protocol.getMetaItems({ type: "object" });
+      const arr = Array.isArray(all) ? all : all && typeof all === "object" && Array.isArray(all.items) ? all.items : [];
+      def = arr.find((o) => o?.name === objectName);
+    } catch {
+      def = void 0;
+    }
+  }
+  if (!def) return null;
+  const fields = def.fields ?? {};
+  const names = /* @__PURE__ */ new Set(["id", ...Object.keys(fields)]);
+  return names;
+}
+function collectWhereFields(where, acc) {
+  if (!where || typeof where !== "object") return;
+  if (Array.isArray(where)) {
+    for (const item of where) collectWhereFields(item, acc);
+    return;
+  }
+  for (const [key, value] of Object.entries(where)) {
+    if (WHERE_OPERATOR_KEYS.has(key)) {
+      collectWhereFields(value, acc);
+    } else {
+      acc.add(key);
+      if (value && typeof value === "object" && !Array.isArray(value)) {
+        collectWhereFields(value, acc);
+      }
+    }
+  }
+}
+function unknownFieldError(objectName, unknown, available) {
+  const sample = [...available].slice(0, 40);
+  const truncated = available.size > sample.length;
+  return JSON.stringify({
+    error: `Unknown field(s) ${JSON.stringify(unknown)} on "${objectName}". Call describe_object first to see the real schema \u2014 do not guess generic fields like \`status\`, \`is_active\`, or \`deleted_at\`.`,
+    objectName,
+    unknownFields: unknown,
+    availableFields: sample,
+    availableFieldsTruncated: truncated,
+    totalAvailable: available.size,
+    hint: "Use the describe_object tool to fetch the authoritative field list."
+  });
+}
+async function validateFieldReferences(ctx, objectName, refs) {
+  const available = await resolveObjectFieldNames(ctx, objectName);
+  if (!available) return null;
+  const referenced = /* @__PURE__ */ new Set();
+  collectWhereFields(refs.where, referenced);
+  for (const f of refs.fields ?? []) referenced.add(f);
+  for (const o of refs.orderBy ?? []) if (o?.field) referenced.add(o.field);
+  for (const g of refs.groupBy ?? []) referenced.add(g);
+  for (const a of refs.aggregations ?? []) {
+    if (a?.field) referenced.add(a.field);
+  }
+  const unknown = [];
+  for (const ref of referenced) {
+    if (!available.has(ref)) unknown.push(ref);
+  }
+  if (unknown.length === 0) return null;
+  return unknownFieldError(objectName, unknown, available);
+}
 function createQueryRecordsHandler(ctx) {
   return async (args, execCtx) => {
     const {
@@ -52,6 +123,12 @@ function createQueryRecordsHandler(ctx) {
       limit,
       offset
     } = args;
+    const validationError = await validateFieldReferences(ctx, objectName, {
+      where,
+      fields,
+      orderBy
+    });
+    if (validationError) return validationError;
     const rawLimit = limit ?? DEFAULT_QUERY_LIMIT;
     const safeLimit = Number.isFinite(rawLimit) && rawLimit > 0 ? Math.min(Math.floor(rawLimit), MAX_QUERY_LIMIT) : DEFAULT_QUERY_LIMIT;
     const safeOffset = Number.isFinite(offset) && offset >= 0 ? Math.floor(offset) : void 0;
@@ -69,6 +146,8 @@ function createQueryRecordsHandler(ctx) {
 function createGetRecordHandler(ctx) {
   return async (args, execCtx) => {
     const { objectName, recordId, fields } = args;
+    const validationError = await validateFieldReferences(ctx, objectName, { fields });
+    if (validationError) return validationError;
     const record = await ctx.dataEngine.findOne(objectName, {
       where: { id: recordId },
       fields,
@@ -90,6 +169,12 @@ function createAggregateDataHandler(ctx) {
         });
       }
     }
+    const validationError = await validateFieldReferences(ctx, objectName, {
+      where,
+      groupBy,
+      aggregations
+    });
+    if (validationError) return validationError;
     const result = await ctx.dataEngine.aggregate(objectName, {
       where,
       groupBy,
@@ -108,7 +193,7 @@ function registerDataTools(registry, context) {
   registry.register(GET_RECORD_TOOL, createGetRecordHandler(context));
   registry.register(AGGREGATE_DATA_TOOL, createAggregateDataHandler(context));
 }
-var MAX_QUERY_LIMIT, DEFAULT_QUERY_LIMIT, QUERY_RECORDS_TOOL, GET_RECORD_TOOL, AGGREGATE_DATA_TOOL, DATA_TOOL_DEFINITIONS, VALID_AGG_FUNCTIONS;
+var MAX_QUERY_LIMIT, DEFAULT_QUERY_LIMIT, QUERY_RECORDS_TOOL, GET_RECORD_TOOL, AGGREGATE_DATA_TOOL, DATA_TOOL_DEFINITIONS, WHERE_OPERATOR_KEYS, VALID_AGG_FUNCTIONS;
 var init_data_tools = __esm({
   "src/tools/data-tools.ts"() {
     "use strict";
@@ -126,7 +211,7 @@ var init_data_tools = __esm({
           },
           where: {
             type: "object",
-            description: 'Filter conditions as key-value pairs (e.g. { "status": "active" }) or MongoDB-style operators (e.g. { "amount": { "$gt": 100 } })'
+            description: 'Filter conditions. Keys MUST be real field names obtained from describe_object \u2014 do NOT assume generic fields like `status`, `is_active`, or `deleted_at` exist on every object. Values are equality matches, or MongoDB-style operators (`{ "$gt": 100 }`, `{ "$in": [...] }`, etc.). Logical combinators: `$and` / `$or` / `$not` with nested clauses.'
           },
           fields: {
             type: "array",
@@ -224,7 +309,7 @@ var init_data_tools = __esm({
           },
           where: {
             type: "object",
-            description: "Filter conditions applied before aggregation"
+            description: "Filter applied before aggregation. Same rules as query_records: keys MUST be real field names obtained from describe_object \u2014 do NOT guess generic fields like `status` or `is_active`."
           }
         },
         required: ["objectName", "aggregations"],
@@ -236,6 +321,28 @@ var init_data_tools = __esm({
       GET_RECORD_TOOL,
       AGGREGATE_DATA_TOOL
     ];
+    WHERE_OPERATOR_KEYS = /* @__PURE__ */ new Set([
+      "$and",
+      "$or",
+      "$not",
+      "$nor",
+      "$eq",
+      "$ne",
+      "$gt",
+      "$gte",
+      "$lt",
+      "$lte",
+      "$in",
+      "$nin",
+      "$exists",
+      "$regex",
+      "$like",
+      "$ilike",
+      "$contains",
+      "$startsWith",
+      "$endsWith",
+      "$between"
+    ]);
     VALID_AGG_FUNCTIONS = /* @__PURE__ */ new Set([
       "count",
       "sum",
@@ -1415,7 +1522,7 @@ var InMemoryConversationService = class {
     }
     return results;
   }
-  async addMessage(conversationId, message) {
+  async addMessage(conversationId, message, _extras) {
     const conversation = this.store.get(conversationId);
     if (!conversation) {
       throw new Error(`Conversation "${conversationId}" not found`);
@@ -1721,9 +1828,9 @@ ${assistantText.slice(0, 800)}` : "")
    * must never fail because the history write failed. Mirrors the
    * precedent set by `ObjectQLTraceRecorder.record`.
    */
-  async persistMessage(conversationId, message) {
+  async persistMessage(conversationId, message, extras) {
     try {
-      await this.conversationService.addMessage(conversationId, message);
+      await this.conversationService.addMessage(conversationId, message, extras);
     } catch (err) {
       this.logger.warn("[AI] persist message failed", {
         conversationId,
@@ -1732,6 +1839,25 @@ ${assistantText.slice(0, 800)}` : "")
       });
     }
   }
+  /**
+   * Build a {@link MessageObservability} payload from an LLM-call result
+   * and the wall-clock time it took. Returns `undefined` when there's
+   * nothing useful to persist (no usage and no latency) so callers don't
+   * need to special-case empty results.
+   */
+  static buildObservability(result, startedAt) {
+    if (!result) return void 0;
+    const usage = result.usage;
+    const latencyMs = startedAt != null ? Date.now() - startedAt : void 0;
+    if (!result.model && !usage && latencyMs == null) return void 0;
+    return {
+      model: result.model,
+      promptTokens: usage?.promptTokens,
+      completionTokens: usage?.completionTokens,
+      totalTokens: usage?.totalTokens,
+      latencyMs
+    };
+  }
   /**
    * Run an adapter call and emit a trace event.
    *
@@ -1883,14 +2009,20 @@ ${assistantText.slice(0, 800)}` : "")
     });
     let abortedByCallback = false;
     for (let iteration = 0; iteration < maxIterations; iteration++) {
+      const turnStartedAt = Date.now();
       const result = await this.adapter.chat(conversation, chatOptions);
+      const turnObservability = _AIService.buildObservability(result, turnStartedAt);
       if (!result.toolCalls || result.toolCalls.length === 0) {
         this.logger.debug("[AI] chatWithTools finished", { iteration, content: result.content.slice(0, 80) });
         if (conversationId) {
-          await this.persistMessage(conversationId, {
-            role: "assistant",
-            content: result.content
-          });
+          await this.persistMessage(
+            conversationId,
+            {
+              role: "assistant",
+              content: result.content
+            },
+            turnObservability
+          );
           void this.summarizeConversation(conversationId);
         }
         return autoCreatedConversationId ? { ...result, conversationId: autoCreatedConversationId } : result;
@@ -1908,7 +2040,7 @@ ${assistantText.slice(0, 800)}` : "")
       };
       conversation.push(assistantTurn);
       if (conversationId) {
-        await this.persistMessage(conversationId, assistantTurn);
+        await this.persistMessage(conversationId, assistantTurn, turnObservability);
       }
       const toolResults = await this.toolRegistry.executeAll(
         result.toolCalls,
@@ -1949,16 +2081,22 @@ ${assistantText.slice(0, 800)}` : "")
         toolErrors: toolErrors.length > 0 ? toolErrors : void 0
       });
     }
+    const finalStartedAt = Date.now();
     const finalResult = await this.adapter.chat(conversation, {
       ...chatOptions,
       tools: void 0,
       toolChoice: void 0
     });
+    const finalObservability = _AIService.buildObservability(finalResult, finalStartedAt);
     if (conversationId) {
-      await this.persistMessage(conversationId, {
-        role: "assistant",
-        content: finalResult.content
-      });
+      await this.persistMessage(
+        conversationId,
+        {
+          role: "assistant",
+          content: finalResult.content
+        },
+        finalObservability
+      );
       void this.summarizeConversation(conversationId);
     }
     return autoCreatedConversationId ? { ...finalResult, conversationId: autoCreatedConversationId } : finalResult;
@@ -2011,13 +2149,19 @@ ${assistantText.slice(0, 800)}` : "")
       }
     }
     for (let iteration = 0; iteration < maxIterations; iteration++) {
+      const turnStartedAt = Date.now();
       const result2 = await this.adapter.chat(conversation, chatOptions);
+      const turnObservability = _AIService.buildObservability(result2, turnStartedAt);
       if (!result2.toolCalls || result2.toolCalls.length === 0) {
         if (conversationId) {
-          await this.persistMessage(conversationId, {
-            role: "assistant",
-            content: result2.content
-          });
+          await this.persistMessage(
+            conversationId,
+            {
+              role: "assistant",
+              content: result2.content
+            },
+            turnObservability
+          );
           void this.summarizeConversation(conversationId);
         }
         yield textDeltaPart("stream", result2.content);
@@ -2036,7 +2180,7 @@ ${assistantText.slice(0, 800)}` : "")
       };
       conversation.push(assistantTurn);
       if (conversationId) {
-        await this.persistMessage(conversationId, assistantTurn);
+        await this.persistMessage(conversationId, assistantTurn, turnObservability);
       }
       const toolResults = await this.toolRegistry.executeAll(
         result2.toolCalls,
@@ -2078,12 +2222,18 @@ ${assistantText.slice(0, 800)}` : "")
       this.logger.warn("[AI] streamChatWithTools max iterations reached");
     }
     const finalOptions = { ...chatOptions, tools: void 0, toolChoice: void 0 };
+    const finalStartedAt = Date.now();
     const result = await this.adapter.chat(conversation, finalOptions);
+    const finalObservability = _AIService.buildObservability(result, finalStartedAt);
     if (conversationId) {
-      await this.persistMessage(conversationId, {
-        role: "assistant",
-        content: result.content
-      });
+      await this.persistMessage(
+        conversationId,
+        {
+          role: "assistant",
+          content: result.content
+        },
+        finalObservability
+      );
       void this.summarizeConversation(conversationId);
     }
     yield textDeltaPart("stream", result.content);
@@ -3342,6 +3492,39 @@ function buildPendingActionRoutes(aiService, logger) {
   ];
 }
+// src/routes/eval-routes.ts
+function buildEvalRoutes(evalRunner, logger) {
+  return [
+    {
+      method: "POST",
+      path: "/api/v1/ai/evals/runs",
+      description: "Execute an AI eval case and persist the run record",
+      auth: true,
+      permissions: ["ai:admin"],
+      handler: async (req) => {
+        const body = req.body ?? {};
+        if (!body.caseId || typeof body.caseId !== "string") {
+          return { status: 400, body: { error: "caseId is required" } };
+        }
+        try {
+          const result = await evalRunner.run({
+            caseId: body.caseId,
+            agentId: body.agentId,
+            model: body.model,
+            judgeModel: body.judgeModel,
+            persist: body.persist
+          });
+          return { status: 200, body: result };
+        } catch (err) {
+          const message = err instanceof Error ? err.message : String(err);
+          logger.error("[AI Route] /ai/evals/runs error", err instanceof Error ? err : void 0);
+          return { status: 500, body: { error: message } };
+        }
+      }
+    }
+  ];
+}
 // src/conversation/objectql-conversation-service.ts
 var import_node_crypto2 = require("crypto");
 var CONVERSATIONS_OBJECT = "ai_conversations";
@@ -3425,7 +3608,7 @@ var ObjectQLConversationService = class {
     );
     return conversations;
   }
-  async addMessage(conversationId, message) {
+  async addMessage(conversationId, message, extras) {
     const row = await this.engine.findOne(CONVERSATIONS_OBJECT, {
       where: { id: conversationId }
     });
@@ -3463,6 +3646,11 @@ var ObjectQLConversationService = class {
       content: contentStr,
       tool_calls: toolCallsJson,
       tool_call_id: toolCallId,
+      model: extras?.model ?? null,
+      prompt_tokens: extras?.promptTokens ?? null,
+      completion_tokens: extras?.completionTokens ?? null,
+      total_tokens: extras?.totalTokens ?? null,
+      latency_ms: extras?.latencyMs ?? null,
       created_at: now
     });
     await this.engine.update(CONVERSATIONS_OBJECT, { id: conversationId, updated_at: now }, {
@@ -3686,6 +3874,38 @@ var AiMessageObject = import_data2.ObjectSchema.create({
       maxLength: 255,
       description: "ID of the tool call this message responds to (when role=tool)"
     }),
+    // ── Per-message observability ────────────────────────────────────
+    // Populated when this message is the output of an LLM call (most
+    // assistant turns). User and tool messages leave them null. Lets
+    // analytics surfaces (cost per turn, latency histograms, A/B model
+    // comparisons) query a single table instead of joining ai_traces
+    // by timestamp.
+    model: import_data2.Field.text({
+      label: "Model",
+      required: false,
+      maxLength: 128,
+      description: "Model id reported by the adapter for the call that produced this message"
+    }),
+    prompt_tokens: import_data2.Field.number({
+      label: "Prompt Tokens",
+      required: false,
+      description: "Tokens in the request that produced this message"
+    }),
+    completion_tokens: import_data2.Field.number({
+      label: "Completion Tokens",
+      required: false,
+      description: "Tokens generated in this message"
+    }),
+    total_tokens: import_data2.Field.number({
+      label: "Total Tokens",
+      required: false,
+      description: "prompt + completion for the producing call"
+    }),
+    latency_ms: import_data2.Field.number({
+      label: "Latency (ms)",
+      required: false,
+      description: "Wall-clock duration of the LLM call that produced this message"
+    }),
     created_at: import_data2.Field.datetime({
       label: "Created At",
       required: true,
@@ -3695,7 +3915,8 @@ var AiMessageObject = import_data2.ObjectSchema.create({
   },
   indexes: [
     { fields: ["conversation_id"] },
-    { fields: ["conversation_id", "created_at"] }
+    { fields: ["conversation_id", "created_at"] },
+    { fields: ["model"] }
   ],
   enable: {
     trackHistory: false,
@@ -3984,6 +4205,195 @@ var AiPendingActionObject = import_data4.ObjectSchema.create({
   }
 });
+// src/objects/ai-eval-case.object.ts
+var import_data5 = require("@objectstack/spec/data");
+var AiEvalCaseObject = import_data5.ObjectSchema.create({
+  name: "ai_eval_cases",
+  label: "AI Eval Case",
+  pluralLabel: "AI Eval Cases",
+  icon: "flask-conical",
+  isSystem: true,
+  description: "Golden test cases that pin down expected AI behavior",
+  fields: {
+    id: import_data5.Field.text({
+      label: "Case ID",
+      required: true,
+      readonly: true
+    }),
+    name: import_data5.Field.text({
+      label: "Name",
+      required: true,
+      maxLength: 255,
+      description: "Human-readable case name"
+    }),
+    agent_id: import_data5.Field.text({
+      label: "Agent ID",
+      required: true,
+      maxLength: 255,
+      description: "Target agent to invoke (resolved via ai_agents)"
+    }),
+    description: import_data5.Field.textarea({
+      label: "Description",
+      required: false,
+      description: "What this case validates and why it matters"
+    }),
+    input: import_data5.Field.textarea({
+      label: "Input Messages",
+      required: true,
+      description: "JSON-serialized ModelMessage[] (the user prompt(s) to feed the agent)"
+    }),
+    expected_contains: import_data5.Field.text({
+      label: "Expected Substring",
+      required: false,
+      maxLength: 1024,
+      description: "If set, response must contain this substring (case-sensitive). Skipped when expected_regex is set."
+    }),
+    expected_regex: import_data5.Field.text({
+      label: "Expected Regex",
+      required: false,
+      maxLength: 1024,
+      description: "If set, response must match this JavaScript regex. Takes precedence over expected_contains."
+    }),
+    judge_instructions: import_data5.Field.textarea({
+      label: "Judge Instructions",
+      required: false,
+      description: "Extra rubric passed to the judge model when no expected_* is set"
+    }),
+    enabled: import_data5.Field.boolean({
+      label: "Enabled",
+      required: false,
+      defaultValue: true,
+      description: "Disabled cases are skipped by batch runs"
+    }),
+    created_at: import_data5.Field.datetime({
+      label: "Created At",
+      required: true,
+      defaultValue: "NOW()",
+      readonly: true
+    }),
+    updated_at: import_data5.Field.datetime({
+      label: "Updated At",
+      required: false
+    })
+  },
+  indexes: [
+    { fields: ["agent_id"] },
+    { fields: ["enabled"] }
+  ],
+  enable: {
+    trackHistory: true,
+    searchable: true,
+    apiEnabled: true,
+    trash: true,
+    mru: true
+  }
+});
+// src/objects/ai-eval-run.object.ts
+var import_data6 = require("@objectstack/spec/data");
+var AiEvalRunObject = import_data6.ObjectSchema.create({
+  name: "ai_eval_runs",
+  label: "AI Eval Run",
+  pluralLabel: "AI Eval Runs",
+  icon: "gauge",
+  isSystem: true,
+  description: "One execution of an eval case (used for regression tracking and model A/B comparisons)",
+  fields: {
+    id: import_data6.Field.text({
+      label: "Run ID",
+      required: true,
+      readonly: true
+    }),
+    case_id: import_data6.Field.lookup("ai_eval_cases", {
+      label: "Case",
+      required: true
+    }),
+    agent_id: import_data6.Field.text({
+      label: "Agent ID",
+      required: true,
+      maxLength: 255,
+      description: "Agent that was invoked (denormalized for fast filtering)"
+    }),
+    model: import_data6.Field.text({
+      label: "Model",
+      required: true,
+      maxLength: 128,
+      description: "Model id used for the eval (denormalized for A/B comparison)"
+    }),
+    status: import_data6.Field.select({
+      label: "Status",
+      required: true,
+      options: [
+        { label: "Pass", value: "pass" },
+        { label: "Fail", value: "fail" },
+        { label: "Error", value: "error" }
+      ]
+    }),
+    score: import_data6.Field.number({
+      label: "Score (0\u2013100)",
+      required: false,
+      description: "100 for pass, 0 for fail when using substring/regex check; judge score otherwise"
+    }),
+    response: import_data6.Field.textarea({
+      label: "Response",
+      required: false,
+      description: "The assistant response that was scored"
+    }),
+    error: import_data6.Field.textarea({
+      label: "Error",
+      required: false,
+      description: "Adapter error stack when status=error"
+    }),
+    judge_model: import_data6.Field.text({
+      label: "Judge Model",
+      required: false,
+      maxLength: 128,
+      description: "Model id of the judge (null if check was rule-based)"
+    }),
+    judge_reasoning: import_data6.Field.textarea({
+      label: "Judge Reasoning",
+      required: false,
+      description: "Free-form explanation from the judge model"
+    }),
+    prompt_tokens: import_data6.Field.number({
+      label: "Prompt Tokens",
+      required: false
+    }),
+    completion_tokens: import_data6.Field.number({
+      label: "Completion Tokens",
+      required: false
+    }),
+    total_tokens: import_data6.Field.number({
+      label: "Total Tokens",
+      required: false
+    }),
+    latency_ms: import_data6.Field.number({
+      label: "Latency (ms)",
+      required: false
+    }),
+    run_at: import_data6.Field.datetime({
+      label: "Run At",
+      required: true,
+      defaultValue: "NOW()",
+      readonly: true
+    })
+  },
+  indexes: [
+    { fields: ["case_id"] },
+    { fields: ["model"] },
+    { fields: ["status"] },
+    { fields: ["case_id", "run_at"] },
+    { fields: ["agent_id", "model"] }
+  ],
+  enable: {
+    trackHistory: false,
+    searchable: false,
+    apiEnabled: true,
+    trash: false,
+    mru: false
+  }
+});
 // src/views/ai-trace.view.ts
 var import_spec = require("@objectstack/spec");
 var AiTraceView = (0, import_spec.defineView)({
@@ -4041,9 +4451,85 @@ var AiTraceView = (0, import_spec.defineView)({
   }
 });
-// src/views/ai-pending-action.view.ts
+// src/views/ai-message.view.ts
 var import_spec2 = require("@objectstack/spec");
-var AiPendingActionView = (0, import_spec2.defineView)({
+var AiMessageView = (0, import_spec2.defineView)({
+  list: {
+    type: "grid",
+    data: { provider: "object", object: "ai_messages" },
+    columns: [
+      { field: "created_at", label: "Time" },
+      { field: "conversation_id", label: "Conversation" },
+      { field: "role" },
+      { field: "model" },
+      { field: "prompt_tokens", label: "Prompt" },
+      { field: "completion_tokens", label: "Output" },
+      { field: "total_tokens", label: "Total" },
+      { field: "latency_ms", label: "Latency (ms)" }
+    ],
+    sort: [{ field: "created_at", order: "desc" }],
+    pagination: { pageSize: 50 },
+    searchableFields: ["conversation_id", "content", "tool_call_id"],
+    filterableFields: ["role", "model", "conversation_id"]
+  },
+  listViews: {
+    assistants_only: {
+      label: "Assistant turns",
+      type: "grid",
+      data: { provider: "object", object: "ai_messages" },
+      columns: [
+        { field: "created_at", label: "Time" },
+        { field: "conversation_id", label: "Conversation" },
+        { field: "model" },
+        { field: "prompt_tokens", label: "Prompt" },
+        { field: "completion_tokens", label: "Output" },
+        { field: "total_tokens", label: "Total" },
+        { field: "latency_ms", label: "Latency (ms)" },
+        { field: "content", label: "Reply (preview)" }
+      ],
+      filter: [{ field: "role", operator: "=", value: "assistant" }],
+      sort: [{ field: "created_at", order: "desc" }]
+    },
+    by_model: {
+      label: "By model",
+      type: "grid",
+      data: { provider: "object", object: "ai_messages" },
+      columns: [
+        { field: "model" },
+        { field: "created_at", label: "Time" },
+        { field: "latency_ms", label: "Latency (ms)" },
+        { field: "total_tokens", label: "Tokens" },
+        { field: "conversation_id", label: "Conversation" }
+      ],
+      filter: [{ field: "role", operator: "=", value: "assistant" }],
+      sort: [
+        { field: "model", order: "asc" },
+        { field: "created_at", order: "desc" }
+      ]
+    },
+    slow: {
+      label: "Slow turns (>5s)",
+      type: "grid",
+      data: { provider: "object", object: "ai_messages" },
+      columns: [
+        { field: "created_at", label: "Time" },
+        { field: "model" },
+        { field: "latency_ms", label: "Latency (ms)" },
+        { field: "total_tokens", label: "Tokens" },
+        { field: "conversation_id", label: "Conversation" }
+      ],
+      filter: [
+        { field: "role", operator: "=", value: "assistant" },
+        { field: "latency_ms", operator: ">", value: 5e3 }
+      ],
+      sort: [{ field: "latency_ms", order: "desc" }]
+    }
+  }
+});
+// src/views/ai-pending-action.view.ts
+var import_spec3 = require("@objectstack/spec");
+var AiPendingActionView = (0, import_spec3.defineView)({
   list: {
     type: "grid",
     data: { provider: "object", object: "ai_pending_actions" },
@@ -4273,12 +4759,325 @@ var AiPendingActionView = (0, import_spec2.defineView)({
   }
 });
+// src/views/ai-eval.view.ts
+var import_spec4 = require("@objectstack/spec");
+var AiEvalRunView = (0, import_spec4.defineView)({
+  list: {
+    type: "grid",
+    data: { provider: "object", object: "ai_eval_runs" },
+    columns: [
+      { field: "run_at", label: "Run At" },
+      { field: "case_id", label: "Case" },
+      { field: "agent_id", label: "Agent" },
+      { field: "model" },
+      { field: "status" },
+      { field: "score" },
+      { field: "latency_ms", label: "Latency (ms)" },
+      { field: "total_tokens", label: "Tokens" }
+    ],
+    sort: [{ field: "run_at", order: "desc" }],
+    pagination: { pageSize: 50 },
+    filterableFields: ["status", "model", "agent_id", "case_id"],
+    searchableFields: ["response", "judge_reasoning"]
+  },
+  listViews: {
+    failures: {
+      label: "Failures & errors",
+      type: "grid",
+      data: { provider: "object", object: "ai_eval_runs" },
+      columns: [
+        { field: "run_at", label: "Run At" },
+        { field: "case_id", label: "Case" },
+        { field: "model" },
+        { field: "status" },
+        { field: "score" },
+        { field: "error" },
+        { field: "judge_reasoning" }
+      ],
+      filter: [{ field: "status", operator: "in", value: ["fail", "error"] }],
+      sort: [{ field: "run_at", order: "desc" }]
+    },
+    by_model: {
+      label: "By model",
+      type: "grid",
+      data: { provider: "object", object: "ai_eval_runs" },
+      columns: [
+        { field: "model" },
+        { field: "case_id", label: "Case" },
+        { field: "status" },
+        { field: "score" },
+        { field: "latency_ms", label: "Latency (ms)" },
+        { field: "total_tokens", label: "Tokens" },
+        { field: "run_at", label: "Run At" }
+      ],
+      sort: [
+        { field: "model", order: "asc" },
+        { field: "run_at", order: "desc" }
+      ]
+    },
+    latest_per_case: {
+      label: "Latest per case",
+      type: "grid",
+      data: { provider: "object", object: "ai_eval_runs" },
+      columns: [
+        { field: "case_id", label: "Case" },
+        { field: "model" },
+        { field: "status" },
+        { field: "score" },
+        { field: "latency_ms", label: "Latency (ms)" },
+        { field: "run_at", label: "Run At" }
+      ],
+      sort: [
+        { field: "case_id", order: "asc" },
+        { field: "run_at", order: "desc" }
+      ]
+    }
+  }
+});
+var AiEvalCaseView = (0, import_spec4.defineView)({
+  list: {
+    type: "grid",
+    data: { provider: "object", object: "ai_eval_cases" },
+    columns: [
+      { field: "name" },
+      { field: "agent_id", label: "Agent" },
+      { field: "enabled" },
+      { field: "expected_contains", label: "Expected (substring)" },
+      { field: "expected_regex", label: "Expected (regex)" },
+      { field: "updated_at" }
+    ],
+    sort: [{ field: "updated_at", order: "desc" }],
+    pagination: { pageSize: 50 },
+    filterableFields: ["agent_id", "enabled"],
+    searchableFields: ["name", "description", "input"]
+  }
+});
+// src/eval/eval-runner.ts
+var import_node_crypto3 = require("crypto");
+var import_zod = require("zod");
+var EVAL_CASES_OBJECT = "ai_eval_cases";
+var EVAL_RUNS_OBJECT = "ai_eval_runs";
+var JudgeOutputSchema = import_zod.z.object({
+  score: import_zod.z.number().min(0).max(100),
+  reasoning: import_zod.z.string().min(1)
+});
+var EvalRunner = class {
+  constructor(metadataService, dataEngine, aiService, agentRuntime) {
+    this.metadataService = metadataService;
+    this.dataEngine = dataEngine;
+    this.aiService = aiService;
+    this.agentRuntime = agentRuntime;
+  }
+  async run(options) {
+    const caseRow = await this.loadCase(options.caseId);
+    const agentId = options.agentId ?? caseRow.agent_id;
+    const agent = await this.agentRuntime.loadAgent(agentId);
+    if (!agent) {
+      throw new Error(`EvalRunner: agent "${agentId}" not found`);
+    }
+    const userMessages = this.parseInput(caseRow.input);
+    const activeSkills = await this.agentRuntime.resolveActiveSkills(
+      agent,
+      options.agentContext
+    );
+    const systemMessages = this.agentRuntime.buildSystemMessages(
+      agent,
+      options.agentContext,
+      activeSkills
+    );
+    const toolDefs = this.aiService.toolRegistry.getAll();
+    const agentOptions = this.agentRuntime.buildRequestOptions(
+      agent,
+      toolDefs,
+      activeSkills
+    );
+    const fullMessages = [...systemMessages, ...userMessages];
+    const effectiveModel = options.model ?? agentOptions.model ?? "(adapter default)";
+    const startedAt = Date.now();
+    let responseText = "";
+    let errorMessage = null;
+    let promptTokens = null;
+    let completionTokens = null;
+    let totalTokens = null;
+    try {
+      const result2 = await this.aiService.chatWithTools(fullMessages, {
+        ...agentOptions,
+        model: options.model ?? agentOptions.model,
+        maxIterations: agent.planning?.maxIterations
+      });
+      responseText = result2.content ?? "";
+      const usage = result2.usage;
+      if (usage) {
+        promptTokens = usage.promptTokens ?? null;
+        completionTokens = usage.completionTokens ?? null;
+        totalTokens = usage.totalTokens ?? null;
+      }
+    } catch (err) {
+      errorMessage = err instanceof Error ? err.stack ?? err.message : String(err);
+    }
+    const latencyMs = Date.now() - startedAt;
+    let status = "error";
+    let score = null;
+    let judgeModel = null;
+    let judgeReasoning = null;
+    if (errorMessage) {
+      status = "error";
+    } else if (caseRow.expected_regex) {
+      let regex = null;
+      try {
+        regex = new RegExp(caseRow.expected_regex);
+      } catch (re) {
+        status = "error";
+        errorMessage = `Invalid expected_regex: ${re instanceof Error ? re.message : String(re)}`;
+      }
+      if (regex) {
+        const matched = regex.test(responseText);
+        status = matched ? "pass" : "fail";
+        score = matched ? 100 : 0;
+      }
+    } else if (caseRow.expected_contains) {
+      const matched = responseText.includes(caseRow.expected_contains);
+      status = matched ? "pass" : "fail";
+      score = matched ? 100 : 0;
+    } else {
+      judgeModel = options.judgeModel ?? options.model ?? agentOptions.model ?? null;
+      try {
+        const judgement = await this.runJudge({
+          model: judgeModel,
+          caseRow,
+          response: responseText
+        });
+        score = judgement.score;
+        judgeReasoning = judgement.reasoning;
+        status = judgement.score >= 70 ? "pass" : "fail";
+      } catch (je) {
+        status = "error";
+        errorMessage = je instanceof Error ? je.stack ?? je.message : String(je);
+      }
+    }
+    const result = {
+      id: (0, import_node_crypto3.randomUUID)(),
+      caseId: caseRow.id,
+      agentId,
+      model: effectiveModel,
+      status,
+      score,
+      response: responseText,
+      error: errorMessage,
+      judgeModel,
+      judgeReasoning,
+      promptTokens,
+      completionTokens,
+      totalTokens,
+      latencyMs
+    };
+    if (options.persist !== false) {
+      await this.persist(result);
+    }
+    return result;
+  }
+  // ── Helpers ──────────────────────────────────────────────────────
+  async loadCase(caseId) {
+    const row = await this.dataEngine.findOne(EVAL_CASES_OBJECT, {
+      where: { id: caseId }
+    });
+    if (!row) {
+      throw new Error(`EvalRunner: case "${caseId}" not found`);
+    }
+    if (row.enabled === false) {
+      throw new Error(`EvalRunner: case "${caseId}" is disabled`);
+    }
+    return row;
+  }
+  parseInput(input) {
+    const trimmed = input.trim();
+    if (!trimmed.startsWith("[") && !trimmed.startsWith("{") && !trimmed.startsWith('"')) {
+      return [{ role: "user", content: input }];
+    }
+    let parsed;
+    try {
+      parsed = JSON.parse(trimmed);
+    } catch {
+      return [{ role: "user", content: input }];
+    }
+    if (Array.isArray(parsed)) {
+      return parsed;
+    }
+    if (typeof parsed === "string") {
+      return [{ role: "user", content: parsed }];
+    }
+    if (parsed && typeof parsed === "object" && "role" in parsed) {
+      return [parsed];
+    }
+    throw new Error("input must be a string, ModelMessage, or ModelMessage[]");
+  }
+  async runJudge(args) {
+    const rubric = args.caseRow.judge_instructions?.trim() || "Decide whether the assistant response correctly and helpfully answers the user request.";
+    const judgeMessages = [
+      {
+        role: "system",
+        content: "You are an impartial grader for an AI evaluation harness. Score the candidate response from 0 to 100 where 100 means it fully and correctly satisfies the rubric and 0 means it does not. Reply with structured JSON only."
+      },
+      {
+        role: "user",
+        content: [
+          `# Rubric
+${rubric}`,
+          `# Case name
+${args.caseRow.name}`,
+          args.caseRow.description ? `# Case description
+${args.caseRow.description}` : "",
+          `# Original user input
+${args.caseRow.input}`,
+          `# Candidate response
+${args.response || "(empty)"}`
+        ].filter(Boolean).join("\n\n")
+      }
+    ];
+    if (typeof this.aiService.generateObject === "function") {
+      const out = await this.aiService.generateObject(judgeMessages, JudgeOutputSchema, {
+        model: args.model ?? void 0
+      });
+      return JudgeOutputSchema.parse(out.object);
+    }
+    const judged = await this.aiService.chatWithTools(judgeMessages, {
+      model: args.model ?? void 0
+    });
+    const text = judged.content ?? "";
+    const match = text.match(/\{[\s\S]*\}/);
+    if (!match) {
+      throw new Error(`Judge response did not contain JSON: ${text.slice(0, 200)}`);
+    }
+    return JudgeOutputSchema.parse(JSON.parse(match[0]));
+  }
+  async persist(run) {
+    await this.dataEngine.insert(EVAL_RUNS_OBJECT, {
+      id: run.id,
+      case_id: run.caseId,
+      agent_id: run.agentId,
+      model: run.model,
+      status: run.status,
+      score: run.score,
+      response: run.response,
+      error: run.error,
+      judge_model: run.judgeModel,
+      judge_reasoning: run.judgeReasoning,
+      prompt_tokens: run.promptTokens,
+      completion_tokens: run.completionTokens,
+      total_tokens: run.totalTokens,
+      latency_ms: run.latencyMs,
+      run_at: (/* @__PURE__ */ new Date()).toISOString()
+    });
+  }
+};
 // src/plugin.ts
 init_data_tools();
 init_metadata_tools();
 // src/tools/query-data.tool.ts
-var import_zod = require("zod");
+var import_zod2 = require("zod");
 // src/schema-retriever.ts
 var SchemaRetriever = class {
@@ -4453,19 +5252,19 @@ function buildAiEngineContext(ctx) {
   }
   return { roles: [], permissions: [], isSystem: true };
 }
-var QueryPlanSchema = import_zod.z.object({
-  objectName: import_zod.z.string().min(1).describe('The snake_case object name to query (e.g. "task", "account").'),
-  whereJson: import_zod.z.string().nullable().describe(
+var QueryPlanSchema = import_zod2.z.object({
+  objectName: import_zod2.z.string().min(1).describe('The snake_case object name to query (e.g. "task", "account").'),
+  whereJson: import_zod2.z.string().nullable().describe(
     'Filter conditions encoded as a JSON object string. Examples: `{"status":"completed"}`, `{"subject":{"$contains":"Build"}}`, `{"amount":{"$gt":100}}`. Pass null to match all records.'
   ),
-  fields: import_zod.z.array(import_zod.z.string()).nullable().describe("Field names to return. Pass null to return all fields."),
-  orderBy: import_zod.z.array(
-    import_zod.z.object({
-      field: import_zod.z.string(),
-      order: import_zod.z.enum(["asc", "desc"])
+  fields: import_zod2.z.array(import_zod2.z.string()).nullable().describe("Field names to return. Pass null to return all fields."),
+  orderBy: import_zod2.z.array(
+    import_zod2.z.object({
+      field: import_zod2.z.string(),
+      order: import_zod2.z.enum(["asc", "desc"])
     })
   ).nullable().describe("Sort order. First entry is primary sort key. Pass null for no sort."),
-  limit: import_zod.z.number().int().min(1).max(200).nullable().describe("Maximum number of records (default 20, max 200). Pass null for default.")
+  limit: import_zod2.z.number().int().min(1).max(200).nullable().describe("Maximum number of records (default 20, max 200). Pass null for default.")
 });
 var QUERY_DATA_TOOL = {
   name: "query_data",
@@ -5434,13 +6233,14 @@ Capabilities:
 Guidelines:
 1. Always use the describe_object tool first to understand a table's structure before querying it.
-2. Respect the user's current context \u2014 if they are viewing a specific object or record, use that as the default scope.
-3. When presenting data, format it in a clear and readable way using markdown tables or bullet lists.
-4. For large result sets, summarize the data and mention the total count.
-5. When performing aggregations, explain the results in plain language.
-6. If a query returns no results, suggest possible reasons and alternative queries.
-7. Never expose internal IDs unless the user explicitly asks for them.
-8. Always answer in the same language the user is using.`,
+2. Do NOT assume generic fields like \`status\`, \`is_active\`, \`deleted_at\`, \`type\`, or \`enabled\` exist on every object \u2014 they almost never do. Field names in \`where\`, \`fields\`, \`orderBy\`, \`groupBy\`, and aggregations MUST come from describe_object output. If the tool returns an "Unknown field" error, call describe_object on that object and retry with real field names.
+3. Respect the user's current context \u2014 if they are viewing a specific object or record, use that as the default scope.
+4. When presenting data, format it in a clear and readable way using markdown tables or bullet lists.
+5. For large result sets, summarize the data and mention the total count.
+6. When performing aggregations, explain the results in plain language.
+7. If a query returns no results, suggest possible reasons and alternative queries.
+8. Never expose internal IDs unless the user explicitly asks for them.
+9. Always answer in the same language the user is using.`,
   tools: [
     "query_data",
     "list_objects",
@@ -5552,10 +6352,11 @@ Guidelines:
 // src/adapters/vercel-adapter.ts
 var import_ai9 = require("ai");
-function buildVercelOptions(options) {
+function buildVercelOptions(options, modelId) {
   if (!options) return {};
   const opts = {};
-  if (options.temperature != null) opts.temperature = options.temperature;
+  const reasoning = isReasoningModel(modelId);
+  if (options.temperature != null && !reasoning) opts.temperature = options.temperature;
   if (options.maxTokens != null) opts.maxTokens = options.maxTokens;
   if (options.stop?.length) opts.stopSequences = options.stop;
   if (options.tools?.length) {
@@ -5573,6 +6374,11 @@ function buildVercelOptions(options) {
   }
   return opts;
 }
+function isReasoningModel(modelId) {
+  if (!modelId) return false;
+  const id = modelId.includes("/") ? modelId.slice(modelId.lastIndexOf("/") + 1) : modelId;
+  return /^(o[134](?:-|$)|gpt-5(?:-|$)|o4-mini)/i.test(id);
+}
 var VercelLLMAdapter = class {
   constructor(config) {
     this.name = "vercel";
@@ -5582,7 +6388,7 @@ var VercelLLMAdapter = class {
     const result = await (0, import_ai9.generateText)({
       model: this.model,
       messages,
-      ...buildVercelOptions(options)
+      ...buildVercelOptions(options, this.model.modelId)
     });
     return {
       content: result.text,
@@ -5599,7 +6405,7 @@ var VercelLLMAdapter = class {
     const result = await (0, import_ai9.generateText)({
       model: this.model,
       prompt,
-      ...buildVercelOptions(options)
+      ...buildVercelOptions(options, this.model.modelId)
     });
     return {
       content: result.text,
@@ -5615,7 +6421,7 @@ var VercelLLMAdapter = class {
     const result = (0, import_ai9.streamText)({
       model: this.model,
       messages,
-      ...buildVercelOptions(options)
+      ...buildVercelOptions(options, this.model.modelId)
     });
     try {
       for await (const part of result.fullStream) {
@@ -5641,7 +6447,7 @@ var VercelLLMAdapter = class {
       schema,
       schemaName,
       schemaDescription,
-      ...buildVercelOptions(rest)
+      ...buildVercelOptions(rest, this.model.modelId)
     });
     return {
       object: result.object,
@@ -5730,7 +6536,7 @@ function computeCost(pricing, usage) {
 }
 // src/plugin.ts
-var AIServicePlugin = class {
+var _AIServicePlugin = class _AIServicePlugin {
   constructor(options = {}) {
     this.name = "com.objectstack.service-ai";
     this.version = "1.0.0";
@@ -5738,13 +6544,44 @@ var AIServicePlugin = class {
     this.dependencies = ["com.objectstack.engine.objectql"];
     this.options = options;
   }
+  /**
+   * Normalise OpenAI-compatible preset providers (DeepSeek / DashScope /
+   * Cloudflare / SiliconFlow / OpenRouter) into the `provider=openai` shape
+   * with the appropriate base URL pre-filled. Returns the rewritten values
+   * map; non-preset providers pass through unchanged.
+   */
+  normalisePresetProvider(values) {
+    const provider = String(values.provider ?? "memory");
+    if (provider === "cloudflare") {
+      const accountId = String(values.cloudflare_account_id ?? "").trim();
+      const gatewayId = String(values.cloudflare_gateway_id ?? "default").trim() || "default";
+      if (!accountId) return values;
+      return {
+        ...values,
+        provider: "openai",
+        openai_api_key: values.cloudflare_api_key,
+        openai_base_url: `https://gateway.ai.cloudflare.com/v1/${accountId}/${gatewayId}/compat`,
+        openai_model: values.cloudflare_model ?? "openai/gpt-4o-mini"
+      };
+    }
+    const preset = _AIServicePlugin.OPENAI_COMPATIBLE_PRESETS[provider];
+    if (!preset) return values;
+    return {
+      ...values,
+      provider: "openai",
+      openai_api_key: values[`${provider}_api_key`],
+      openai_base_url: preset.baseURL,
+      openai_model: values[`${provider}_model`] ?? preset.defaultModel
+    };
+  }
   /**
    * Build an LLM adapter from a provider/key/model triple. Used both
    * by the boot-time auto-detect path and by the live `settings:changed`
    * rebuild path. Returns `null` if the requested provider cannot be
    * loaded or required credentials are missing.
    */
-  async buildAdapterFromValues(ctx, values) {
+  async buildAdapterFromValues(ctx, rawValues) {
+    const values = this.normalisePresetProvider(rawValues);
     const provider = String(values.provider ?? "memory");
     if (provider === "memory") {
       return { adapter: new MemoryLLMAdapter(), description: "MemoryLLMAdapter (echo mode)" };
@@ -6042,8 +6879,8 @@ var AIServicePlugin = class {
       type: "plugin",
       scope: "project",
       namespace: "ai",
-      objects: [AiConversationObject, AiMessageObject, AiTraceObject, AiPendingActionObject],
-      views: [AiTraceView, AiPendingActionView]
+      objects: [AiConversationObject, AiMessageObject, AiTraceObject, AiPendingActionObject, AiEvalCaseObject, AiEvalRunObject],
+      views: [AiTraceView, AiMessageView, AiPendingActionView, AiEvalCaseView, AiEvalRunView]
     });
     if (this.options.debug) {
       ctx.hook("ai:beforeChat", async (messages) => {
@@ -6080,7 +6917,11 @@ var AIServicePlugin = class {
     try {
       const dataEngine = ctx.getService("data");
       if (dataEngine) {
-        registerDataTools(this.service.toolRegistry, { dataEngine });
+        registerDataTools(this.service.toolRegistry, {
+          dataEngine,
+          metadataService,
+          protocol: protocolService
+        });
         ctx.logger.info("[AI] Built-in data tools registered");
         if (metadataService) {
           registerQueryDataTool(this.service.toolRegistry, {
@@ -6301,6 +7142,20 @@ var AIServicePlugin = class {
       const assistantRoutes = buildAssistantRoutes(this.service, agentRuntime, skillRegistry, ctx.logger);
       routes.push(...assistantRoutes);
       ctx.logger.info(`[AI] Assistant (ambient) routes registered (${assistantRoutes.length} routes)`);
+      const evalDataEngine = ctx.getService("data");
+      if (evalDataEngine && typeof evalDataEngine.insert === "function") {
+        const evalRunner = new EvalRunner(
+          metadataService,
+          evalDataEngine,
+          this.service,
+          agentRuntime
+        );
+        const evalRoutes = buildEvalRoutes(evalRunner, ctx.logger);
+        routes.push(...evalRoutes);
+        ctx.logger.info(`[AI] Eval routes registered (${evalRoutes.length} routes)`);
+      } else {
+        ctx.logger.debug("[AI] IDataEngine not available, skipping eval routes");
+      }
     } else {
       ctx.logger.debug("[AI] Metadata service not available, skipping agent and assistant routes");
     }
@@ -6529,6 +7384,20 @@ var AIServicePlugin = class {
     this.service = void 0;
   }
 };
+/**
+ * OpenAI-compatible preset providers — these all expose `/v1/chat/completions`
+ * in OpenAI shape, so we re-use the `@ai-sdk/openai` SDK with a preset
+ * base URL. Centralising the mapping here keeps the settings UI ergonomic
+ * (operators pick "DeepSeek", not "openai" + a base URL they have to look up)
+ * without bloating buildAdapterFromValues with a switch per provider.
+ */
+_AIServicePlugin.OPENAI_COMPATIBLE_PRESETS = {
+  deepseek: { baseURL: "https://api.deepseek.com", defaultModel: "deepseek-chat" },
+  dashscope: { baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1", defaultModel: "qwen-plus" },
+  siliconflow: { baseURL: "https://api.siliconflow.cn/v1", defaultModel: "Qwen/Qwen2.5-7B-Instruct" },
+  openrouter: { baseURL: "https://openrouter.ai/api/v1", defaultModel: "openai/gpt-4o-mini" }
+};
+var AIServicePlugin = _AIServicePlugin;
 function extractOverrides(payload) {
   if (!payload || typeof payload !== "object") return {};
   const p = payload;