@objectstack/service-ai 7.0.0 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -42,6 +42,77 @@ function buildEngineContext(ctx) {
42
42
  }
43
43
  return { roles: [], permissions: [], isSystem: true };
44
44
  }
45
+ async function resolveObjectFieldNames(ctx, objectName) {
46
+ let def;
47
+ if (ctx.metadataService) {
48
+ try {
49
+ def = await ctx.metadataService.getObject(objectName);
50
+ } catch {
51
+ def = void 0;
52
+ }
53
+ }
54
+ if (!def && ctx.protocol?.getMetaItems) {
55
+ try {
56
+ const all = await ctx.protocol.getMetaItems({ type: "object" });
57
+ const arr = Array.isArray(all) ? all : all && typeof all === "object" && Array.isArray(all.items) ? all.items : [];
58
+ def = arr.find((o) => o?.name === objectName);
59
+ } catch {
60
+ def = void 0;
61
+ }
62
+ }
63
+ if (!def) return null;
64
+ const fields = def.fields ?? {};
65
+ const names = /* @__PURE__ */ new Set(["id", ...Object.keys(fields)]);
66
+ return names;
67
+ }
68
+ function collectWhereFields(where, acc) {
69
+ if (!where || typeof where !== "object") return;
70
+ if (Array.isArray(where)) {
71
+ for (const item of where) collectWhereFields(item, acc);
72
+ return;
73
+ }
74
+ for (const [key, value] of Object.entries(where)) {
75
+ if (WHERE_OPERATOR_KEYS.has(key)) {
76
+ collectWhereFields(value, acc);
77
+ } else {
78
+ acc.add(key);
79
+ if (value && typeof value === "object" && !Array.isArray(value)) {
80
+ collectWhereFields(value, acc);
81
+ }
82
+ }
83
+ }
84
+ }
85
+ function unknownFieldError(objectName, unknown, available) {
86
+ const sample = [...available].slice(0, 40);
87
+ const truncated = available.size > sample.length;
88
+ return JSON.stringify({
89
+ error: `Unknown field(s) ${JSON.stringify(unknown)} on "${objectName}". Call describe_object first to see the real schema \u2014 do not guess generic fields like \`status\`, \`is_active\`, or \`deleted_at\`.`,
90
+ objectName,
91
+ unknownFields: unknown,
92
+ availableFields: sample,
93
+ availableFieldsTruncated: truncated,
94
+ totalAvailable: available.size,
95
+ hint: "Use the describe_object tool to fetch the authoritative field list."
96
+ });
97
+ }
98
+ async function validateFieldReferences(ctx, objectName, refs) {
99
+ const available = await resolveObjectFieldNames(ctx, objectName);
100
+ if (!available) return null;
101
+ const referenced = /* @__PURE__ */ new Set();
102
+ collectWhereFields(refs.where, referenced);
103
+ for (const f of refs.fields ?? []) referenced.add(f);
104
+ for (const o of refs.orderBy ?? []) if (o?.field) referenced.add(o.field);
105
+ for (const g of refs.groupBy ?? []) referenced.add(g);
106
+ for (const a of refs.aggregations ?? []) {
107
+ if (a?.field) referenced.add(a.field);
108
+ }
109
+ const unknown = [];
110
+ for (const ref of referenced) {
111
+ if (!available.has(ref)) unknown.push(ref);
112
+ }
113
+ if (unknown.length === 0) return null;
114
+ return unknownFieldError(objectName, unknown, available);
115
+ }
45
116
  function createQueryRecordsHandler(ctx) {
46
117
  return async (args, execCtx) => {
47
118
  const {
@@ -52,6 +123,12 @@ function createQueryRecordsHandler(ctx) {
52
123
  limit,
53
124
  offset
54
125
  } = args;
126
+ const validationError = await validateFieldReferences(ctx, objectName, {
127
+ where,
128
+ fields,
129
+ orderBy
130
+ });
131
+ if (validationError) return validationError;
55
132
  const rawLimit = limit ?? DEFAULT_QUERY_LIMIT;
56
133
  const safeLimit = Number.isFinite(rawLimit) && rawLimit > 0 ? Math.min(Math.floor(rawLimit), MAX_QUERY_LIMIT) : DEFAULT_QUERY_LIMIT;
57
134
  const safeOffset = Number.isFinite(offset) && offset >= 0 ? Math.floor(offset) : void 0;
@@ -69,6 +146,8 @@ function createQueryRecordsHandler(ctx) {
69
146
  function createGetRecordHandler(ctx) {
70
147
  return async (args, execCtx) => {
71
148
  const { objectName, recordId, fields } = args;
149
+ const validationError = await validateFieldReferences(ctx, objectName, { fields });
150
+ if (validationError) return validationError;
72
151
  const record = await ctx.dataEngine.findOne(objectName, {
73
152
  where: { id: recordId },
74
153
  fields,
@@ -90,6 +169,12 @@ function createAggregateDataHandler(ctx) {
90
169
  });
91
170
  }
92
171
  }
172
+ const validationError = await validateFieldReferences(ctx, objectName, {
173
+ where,
174
+ groupBy,
175
+ aggregations
176
+ });
177
+ if (validationError) return validationError;
93
178
  const result = await ctx.dataEngine.aggregate(objectName, {
94
179
  where,
95
180
  groupBy,
@@ -108,7 +193,7 @@ function registerDataTools(registry, context) {
108
193
  registry.register(GET_RECORD_TOOL, createGetRecordHandler(context));
109
194
  registry.register(AGGREGATE_DATA_TOOL, createAggregateDataHandler(context));
110
195
  }
111
- var MAX_QUERY_LIMIT, DEFAULT_QUERY_LIMIT, QUERY_RECORDS_TOOL, GET_RECORD_TOOL, AGGREGATE_DATA_TOOL, DATA_TOOL_DEFINITIONS, VALID_AGG_FUNCTIONS;
196
+ var MAX_QUERY_LIMIT, DEFAULT_QUERY_LIMIT, QUERY_RECORDS_TOOL, GET_RECORD_TOOL, AGGREGATE_DATA_TOOL, DATA_TOOL_DEFINITIONS, WHERE_OPERATOR_KEYS, VALID_AGG_FUNCTIONS;
112
197
  var init_data_tools = __esm({
113
198
  "src/tools/data-tools.ts"() {
114
199
  "use strict";
@@ -126,7 +211,7 @@ var init_data_tools = __esm({
126
211
  },
127
212
  where: {
128
213
  type: "object",
129
- description: 'Filter conditions as key-value pairs (e.g. { "status": "active" }) or MongoDB-style operators (e.g. { "amount": { "$gt": 100 } })'
214
+ description: 'Filter conditions. Keys MUST be real field names obtained from describe_object \u2014 do NOT assume generic fields like `status`, `is_active`, or `deleted_at` exist on every object. Values are equality matches, or MongoDB-style operators (`{ "$gt": 100 }`, `{ "$in": [...] }`, etc.). Logical combinators: `$and` / `$or` / `$not` with nested clauses.'
130
215
  },
131
216
  fields: {
132
217
  type: "array",
@@ -224,7 +309,7 @@ var init_data_tools = __esm({
224
309
  },
225
310
  where: {
226
311
  type: "object",
227
- description: "Filter conditions applied before aggregation"
312
+ description: "Filter applied before aggregation. Same rules as query_records: keys MUST be real field names obtained from describe_object \u2014 do NOT guess generic fields like `status` or `is_active`."
228
313
  }
229
314
  },
230
315
  required: ["objectName", "aggregations"],
@@ -236,6 +321,28 @@ var init_data_tools = __esm({
236
321
  GET_RECORD_TOOL,
237
322
  AGGREGATE_DATA_TOOL
238
323
  ];
324
+ WHERE_OPERATOR_KEYS = /* @__PURE__ */ new Set([
325
+ "$and",
326
+ "$or",
327
+ "$not",
328
+ "$nor",
329
+ "$eq",
330
+ "$ne",
331
+ "$gt",
332
+ "$gte",
333
+ "$lt",
334
+ "$lte",
335
+ "$in",
336
+ "$nin",
337
+ "$exists",
338
+ "$regex",
339
+ "$like",
340
+ "$ilike",
341
+ "$contains",
342
+ "$startsWith",
343
+ "$endsWith",
344
+ "$between"
345
+ ]);
239
346
  VALID_AGG_FUNCTIONS = /* @__PURE__ */ new Set([
240
347
  "count",
241
348
  "sum",
@@ -1415,7 +1522,7 @@ var InMemoryConversationService = class {
1415
1522
  }
1416
1523
  return results;
1417
1524
  }
1418
- async addMessage(conversationId, message) {
1525
+ async addMessage(conversationId, message, _extras) {
1419
1526
  const conversation = this.store.get(conversationId);
1420
1527
  if (!conversation) {
1421
1528
  throw new Error(`Conversation "${conversationId}" not found`);
@@ -1721,9 +1828,9 @@ ${assistantText.slice(0, 800)}` : "")
1721
1828
  * must never fail because the history write failed. Mirrors the
1722
1829
  * precedent set by `ObjectQLTraceRecorder.record`.
1723
1830
  */
1724
- async persistMessage(conversationId, message) {
1831
+ async persistMessage(conversationId, message, extras) {
1725
1832
  try {
1726
- await this.conversationService.addMessage(conversationId, message);
1833
+ await this.conversationService.addMessage(conversationId, message, extras);
1727
1834
  } catch (err) {
1728
1835
  this.logger.warn("[AI] persist message failed", {
1729
1836
  conversationId,
@@ -1732,6 +1839,25 @@ ${assistantText.slice(0, 800)}` : "")
1732
1839
  });
1733
1840
  }
1734
1841
  }
1842
+ /**
1843
+ * Build a {@link MessageObservability} payload from an LLM-call result
1844
+ * and the wall-clock time it took. Returns `undefined` when there's
1845
+ * nothing useful to persist (no usage and no latency) so callers don't
1846
+ * need to special-case empty results.
1847
+ */
1848
+ static buildObservability(result, startedAt) {
1849
+ if (!result) return void 0;
1850
+ const usage = result.usage;
1851
+ const latencyMs = startedAt != null ? Date.now() - startedAt : void 0;
1852
+ if (!result.model && !usage && latencyMs == null) return void 0;
1853
+ return {
1854
+ model: result.model,
1855
+ promptTokens: usage?.promptTokens,
1856
+ completionTokens: usage?.completionTokens,
1857
+ totalTokens: usage?.totalTokens,
1858
+ latencyMs
1859
+ };
1860
+ }
1735
1861
  /**
1736
1862
  * Run an adapter call and emit a trace event.
1737
1863
  *
@@ -1883,14 +2009,20 @@ ${assistantText.slice(0, 800)}` : "")
1883
2009
  });
1884
2010
  let abortedByCallback = false;
1885
2011
  for (let iteration = 0; iteration < maxIterations; iteration++) {
2012
+ const turnStartedAt = Date.now();
1886
2013
  const result = await this.adapter.chat(conversation, chatOptions);
2014
+ const turnObservability = _AIService.buildObservability(result, turnStartedAt);
1887
2015
  if (!result.toolCalls || result.toolCalls.length === 0) {
1888
2016
  this.logger.debug("[AI] chatWithTools finished", { iteration, content: result.content.slice(0, 80) });
1889
2017
  if (conversationId) {
1890
- await this.persistMessage(conversationId, {
1891
- role: "assistant",
1892
- content: result.content
1893
- });
2018
+ await this.persistMessage(
2019
+ conversationId,
2020
+ {
2021
+ role: "assistant",
2022
+ content: result.content
2023
+ },
2024
+ turnObservability
2025
+ );
1894
2026
  void this.summarizeConversation(conversationId);
1895
2027
  }
1896
2028
  return autoCreatedConversationId ? { ...result, conversationId: autoCreatedConversationId } : result;
@@ -1908,7 +2040,7 @@ ${assistantText.slice(0, 800)}` : "")
1908
2040
  };
1909
2041
  conversation.push(assistantTurn);
1910
2042
  if (conversationId) {
1911
- await this.persistMessage(conversationId, assistantTurn);
2043
+ await this.persistMessage(conversationId, assistantTurn, turnObservability);
1912
2044
  }
1913
2045
  const toolResults = await this.toolRegistry.executeAll(
1914
2046
  result.toolCalls,
@@ -1949,16 +2081,22 @@ ${assistantText.slice(0, 800)}` : "")
1949
2081
  toolErrors: toolErrors.length > 0 ? toolErrors : void 0
1950
2082
  });
1951
2083
  }
2084
+ const finalStartedAt = Date.now();
1952
2085
  const finalResult = await this.adapter.chat(conversation, {
1953
2086
  ...chatOptions,
1954
2087
  tools: void 0,
1955
2088
  toolChoice: void 0
1956
2089
  });
2090
+ const finalObservability = _AIService.buildObservability(finalResult, finalStartedAt);
1957
2091
  if (conversationId) {
1958
- await this.persistMessage(conversationId, {
1959
- role: "assistant",
1960
- content: finalResult.content
1961
- });
2092
+ await this.persistMessage(
2093
+ conversationId,
2094
+ {
2095
+ role: "assistant",
2096
+ content: finalResult.content
2097
+ },
2098
+ finalObservability
2099
+ );
1962
2100
  void this.summarizeConversation(conversationId);
1963
2101
  }
1964
2102
  return autoCreatedConversationId ? { ...finalResult, conversationId: autoCreatedConversationId } : finalResult;
@@ -2011,13 +2149,19 @@ ${assistantText.slice(0, 800)}` : "")
2011
2149
  }
2012
2150
  }
2013
2151
  for (let iteration = 0; iteration < maxIterations; iteration++) {
2152
+ const turnStartedAt = Date.now();
2014
2153
  const result2 = await this.adapter.chat(conversation, chatOptions);
2154
+ const turnObservability = _AIService.buildObservability(result2, turnStartedAt);
2015
2155
  if (!result2.toolCalls || result2.toolCalls.length === 0) {
2016
2156
  if (conversationId) {
2017
- await this.persistMessage(conversationId, {
2018
- role: "assistant",
2019
- content: result2.content
2020
- });
2157
+ await this.persistMessage(
2158
+ conversationId,
2159
+ {
2160
+ role: "assistant",
2161
+ content: result2.content
2162
+ },
2163
+ turnObservability
2164
+ );
2021
2165
  void this.summarizeConversation(conversationId);
2022
2166
  }
2023
2167
  yield textDeltaPart("stream", result2.content);
@@ -2036,7 +2180,7 @@ ${assistantText.slice(0, 800)}` : "")
2036
2180
  };
2037
2181
  conversation.push(assistantTurn);
2038
2182
  if (conversationId) {
2039
- await this.persistMessage(conversationId, assistantTurn);
2183
+ await this.persistMessage(conversationId, assistantTurn, turnObservability);
2040
2184
  }
2041
2185
  const toolResults = await this.toolRegistry.executeAll(
2042
2186
  result2.toolCalls,
@@ -2078,12 +2222,18 @@ ${assistantText.slice(0, 800)}` : "")
2078
2222
  this.logger.warn("[AI] streamChatWithTools max iterations reached");
2079
2223
  }
2080
2224
  const finalOptions = { ...chatOptions, tools: void 0, toolChoice: void 0 };
2225
+ const finalStartedAt = Date.now();
2081
2226
  const result = await this.adapter.chat(conversation, finalOptions);
2227
+ const finalObservability = _AIService.buildObservability(result, finalStartedAt);
2082
2228
  if (conversationId) {
2083
- await this.persistMessage(conversationId, {
2084
- role: "assistant",
2085
- content: result.content
2086
- });
2229
+ await this.persistMessage(
2230
+ conversationId,
2231
+ {
2232
+ role: "assistant",
2233
+ content: result.content
2234
+ },
2235
+ finalObservability
2236
+ );
2087
2237
  void this.summarizeConversation(conversationId);
2088
2238
  }
2089
2239
  yield textDeltaPart("stream", result.content);
@@ -3342,6 +3492,39 @@ function buildPendingActionRoutes(aiService, logger) {
3342
3492
  ];
3343
3493
  }
3344
3494
 
3495
+ // src/routes/eval-routes.ts
3496
+ function buildEvalRoutes(evalRunner, logger) {
3497
+ return [
3498
+ {
3499
+ method: "POST",
3500
+ path: "/api/v1/ai/evals/runs",
3501
+ description: "Execute an AI eval case and persist the run record",
3502
+ auth: true,
3503
+ permissions: ["ai:admin"],
3504
+ handler: async (req) => {
3505
+ const body = req.body ?? {};
3506
+ if (!body.caseId || typeof body.caseId !== "string") {
3507
+ return { status: 400, body: { error: "caseId is required" } };
3508
+ }
3509
+ try {
3510
+ const result = await evalRunner.run({
3511
+ caseId: body.caseId,
3512
+ agentId: body.agentId,
3513
+ model: body.model,
3514
+ judgeModel: body.judgeModel,
3515
+ persist: body.persist
3516
+ });
3517
+ return { status: 200, body: result };
3518
+ } catch (err) {
3519
+ const message = err instanceof Error ? err.message : String(err);
3520
+ logger.error("[AI Route] /ai/evals/runs error", err instanceof Error ? err : void 0);
3521
+ return { status: 500, body: { error: message } };
3522
+ }
3523
+ }
3524
+ }
3525
+ ];
3526
+ }
3527
+
3345
3528
  // src/conversation/objectql-conversation-service.ts
3346
3529
  var import_node_crypto2 = require("crypto");
3347
3530
  var CONVERSATIONS_OBJECT = "ai_conversations";
@@ -3425,7 +3608,7 @@ var ObjectQLConversationService = class {
3425
3608
  );
3426
3609
  return conversations;
3427
3610
  }
3428
- async addMessage(conversationId, message) {
3611
+ async addMessage(conversationId, message, extras) {
3429
3612
  const row = await this.engine.findOne(CONVERSATIONS_OBJECT, {
3430
3613
  where: { id: conversationId }
3431
3614
  });
@@ -3463,6 +3646,11 @@ var ObjectQLConversationService = class {
3463
3646
  content: contentStr,
3464
3647
  tool_calls: toolCallsJson,
3465
3648
  tool_call_id: toolCallId,
3649
+ model: extras?.model ?? null,
3650
+ prompt_tokens: extras?.promptTokens ?? null,
3651
+ completion_tokens: extras?.completionTokens ?? null,
3652
+ total_tokens: extras?.totalTokens ?? null,
3653
+ latency_ms: extras?.latencyMs ?? null,
3466
3654
  created_at: now
3467
3655
  });
3468
3656
  await this.engine.update(CONVERSATIONS_OBJECT, { id: conversationId, updated_at: now }, {
@@ -3686,6 +3874,38 @@ var AiMessageObject = import_data2.ObjectSchema.create({
3686
3874
  maxLength: 255,
3687
3875
  description: "ID of the tool call this message responds to (when role=tool)"
3688
3876
  }),
3877
+ // ── Per-message observability ────────────────────────────────────
3878
+ // Populated when this message is the output of an LLM call (most
3879
+ // assistant turns). User and tool messages leave them null. Lets
3880
+ // analytics surfaces (cost per turn, latency histograms, A/B model
3881
+ // comparisons) query a single table instead of joining ai_traces
3882
+ // by timestamp.
3883
+ model: import_data2.Field.text({
3884
+ label: "Model",
3885
+ required: false,
3886
+ maxLength: 128,
3887
+ description: "Model id reported by the adapter for the call that produced this message"
3888
+ }),
3889
+ prompt_tokens: import_data2.Field.number({
3890
+ label: "Prompt Tokens",
3891
+ required: false,
3892
+ description: "Tokens in the request that produced this message"
3893
+ }),
3894
+ completion_tokens: import_data2.Field.number({
3895
+ label: "Completion Tokens",
3896
+ required: false,
3897
+ description: "Tokens generated in this message"
3898
+ }),
3899
+ total_tokens: import_data2.Field.number({
3900
+ label: "Total Tokens",
3901
+ required: false,
3902
+ description: "prompt + completion for the producing call"
3903
+ }),
3904
+ latency_ms: import_data2.Field.number({
3905
+ label: "Latency (ms)",
3906
+ required: false,
3907
+ description: "Wall-clock duration of the LLM call that produced this message"
3908
+ }),
3689
3909
  created_at: import_data2.Field.datetime({
3690
3910
  label: "Created At",
3691
3911
  required: true,
@@ -3695,7 +3915,8 @@ var AiMessageObject = import_data2.ObjectSchema.create({
3695
3915
  },
3696
3916
  indexes: [
3697
3917
  { fields: ["conversation_id"] },
3698
- { fields: ["conversation_id", "created_at"] }
3918
+ { fields: ["conversation_id", "created_at"] },
3919
+ { fields: ["model"] }
3699
3920
  ],
3700
3921
  enable: {
3701
3922
  trackHistory: false,
@@ -3984,6 +4205,195 @@ var AiPendingActionObject = import_data4.ObjectSchema.create({
3984
4205
  }
3985
4206
  });
3986
4207
 
4208
+ // src/objects/ai-eval-case.object.ts
4209
+ var import_data5 = require("@objectstack/spec/data");
4210
+ var AiEvalCaseObject = import_data5.ObjectSchema.create({
4211
+ name: "ai_eval_cases",
4212
+ label: "AI Eval Case",
4213
+ pluralLabel: "AI Eval Cases",
4214
+ icon: "flask-conical",
4215
+ isSystem: true,
4216
+ description: "Golden test cases that pin down expected AI behavior",
4217
+ fields: {
4218
+ id: import_data5.Field.text({
4219
+ label: "Case ID",
4220
+ required: true,
4221
+ readonly: true
4222
+ }),
4223
+ name: import_data5.Field.text({
4224
+ label: "Name",
4225
+ required: true,
4226
+ maxLength: 255,
4227
+ description: "Human-readable case name"
4228
+ }),
4229
+ agent_id: import_data5.Field.text({
4230
+ label: "Agent ID",
4231
+ required: true,
4232
+ maxLength: 255,
4233
+ description: "Target agent to invoke (resolved via ai_agents)"
4234
+ }),
4235
+ description: import_data5.Field.textarea({
4236
+ label: "Description",
4237
+ required: false,
4238
+ description: "What this case validates and why it matters"
4239
+ }),
4240
+ input: import_data5.Field.textarea({
4241
+ label: "Input Messages",
4242
+ required: true,
4243
+ description: "JSON-serialized ModelMessage[] (the user prompt(s) to feed the agent)"
4244
+ }),
4245
+ expected_contains: import_data5.Field.text({
4246
+ label: "Expected Substring",
4247
+ required: false,
4248
+ maxLength: 1024,
4249
+ description: "If set, response must contain this substring (case-sensitive). Skipped when expected_regex is set."
4250
+ }),
4251
+ expected_regex: import_data5.Field.text({
4252
+ label: "Expected Regex",
4253
+ required: false,
4254
+ maxLength: 1024,
4255
+ description: "If set, response must match this JavaScript regex. Takes precedence over expected_contains."
4256
+ }),
4257
+ judge_instructions: import_data5.Field.textarea({
4258
+ label: "Judge Instructions",
4259
+ required: false,
4260
+ description: "Extra rubric passed to the judge model when no expected_* is set"
4261
+ }),
4262
+ enabled: import_data5.Field.boolean({
4263
+ label: "Enabled",
4264
+ required: false,
4265
+ defaultValue: true,
4266
+ description: "Disabled cases are skipped by batch runs"
4267
+ }),
4268
+ created_at: import_data5.Field.datetime({
4269
+ label: "Created At",
4270
+ required: true,
4271
+ defaultValue: "NOW()",
4272
+ readonly: true
4273
+ }),
4274
+ updated_at: import_data5.Field.datetime({
4275
+ label: "Updated At",
4276
+ required: false
4277
+ })
4278
+ },
4279
+ indexes: [
4280
+ { fields: ["agent_id"] },
4281
+ { fields: ["enabled"] }
4282
+ ],
4283
+ enable: {
4284
+ trackHistory: true,
4285
+ searchable: true,
4286
+ apiEnabled: true,
4287
+ trash: true,
4288
+ mru: true
4289
+ }
4290
+ });
4291
+
4292
+ // src/objects/ai-eval-run.object.ts
4293
+ var import_data6 = require("@objectstack/spec/data");
4294
+ var AiEvalRunObject = import_data6.ObjectSchema.create({
4295
+ name: "ai_eval_runs",
4296
+ label: "AI Eval Run",
4297
+ pluralLabel: "AI Eval Runs",
4298
+ icon: "gauge",
4299
+ isSystem: true,
4300
+ description: "One execution of an eval case (used for regression tracking and model A/B comparisons)",
4301
+ fields: {
4302
+ id: import_data6.Field.text({
4303
+ label: "Run ID",
4304
+ required: true,
4305
+ readonly: true
4306
+ }),
4307
+ case_id: import_data6.Field.lookup("ai_eval_cases", {
4308
+ label: "Case",
4309
+ required: true
4310
+ }),
4311
+ agent_id: import_data6.Field.text({
4312
+ label: "Agent ID",
4313
+ required: true,
4314
+ maxLength: 255,
4315
+ description: "Agent that was invoked (denormalized for fast filtering)"
4316
+ }),
4317
+ model: import_data6.Field.text({
4318
+ label: "Model",
4319
+ required: true,
4320
+ maxLength: 128,
4321
+ description: "Model id used for the eval (denormalized for A/B comparison)"
4322
+ }),
4323
+ status: import_data6.Field.select({
4324
+ label: "Status",
4325
+ required: true,
4326
+ options: [
4327
+ { label: "Pass", value: "pass" },
4328
+ { label: "Fail", value: "fail" },
4329
+ { label: "Error", value: "error" }
4330
+ ]
4331
+ }),
4332
+ score: import_data6.Field.number({
4333
+ label: "Score (0\u2013100)",
4334
+ required: false,
4335
+ description: "100 for pass, 0 for fail when using substring/regex check; judge score otherwise"
4336
+ }),
4337
+ response: import_data6.Field.textarea({
4338
+ label: "Response",
4339
+ required: false,
4340
+ description: "The assistant response that was scored"
4341
+ }),
4342
+ error: import_data6.Field.textarea({
4343
+ label: "Error",
4344
+ required: false,
4345
+ description: "Adapter error stack when status=error"
4346
+ }),
4347
+ judge_model: import_data6.Field.text({
4348
+ label: "Judge Model",
4349
+ required: false,
4350
+ maxLength: 128,
4351
+ description: "Model id of the judge (null if check was rule-based)"
4352
+ }),
4353
+ judge_reasoning: import_data6.Field.textarea({
4354
+ label: "Judge Reasoning",
4355
+ required: false,
4356
+ description: "Free-form explanation from the judge model"
4357
+ }),
4358
+ prompt_tokens: import_data6.Field.number({
4359
+ label: "Prompt Tokens",
4360
+ required: false
4361
+ }),
4362
+ completion_tokens: import_data6.Field.number({
4363
+ label: "Completion Tokens",
4364
+ required: false
4365
+ }),
4366
+ total_tokens: import_data6.Field.number({
4367
+ label: "Total Tokens",
4368
+ required: false
4369
+ }),
4370
+ latency_ms: import_data6.Field.number({
4371
+ label: "Latency (ms)",
4372
+ required: false
4373
+ }),
4374
+ run_at: import_data6.Field.datetime({
4375
+ label: "Run At",
4376
+ required: true,
4377
+ defaultValue: "NOW()",
4378
+ readonly: true
4379
+ })
4380
+ },
4381
+ indexes: [
4382
+ { fields: ["case_id"] },
4383
+ { fields: ["model"] },
4384
+ { fields: ["status"] },
4385
+ { fields: ["case_id", "run_at"] },
4386
+ { fields: ["agent_id", "model"] }
4387
+ ],
4388
+ enable: {
4389
+ trackHistory: false,
4390
+ searchable: false,
4391
+ apiEnabled: true,
4392
+ trash: false,
4393
+ mru: false
4394
+ }
4395
+ });
4396
+
3987
4397
  // src/views/ai-trace.view.ts
3988
4398
  var import_spec = require("@objectstack/spec");
3989
4399
  var AiTraceView = (0, import_spec.defineView)({
@@ -4041,9 +4451,85 @@ var AiTraceView = (0, import_spec.defineView)({
4041
4451
  }
4042
4452
  });
4043
4453
 
4044
- // src/views/ai-pending-action.view.ts
4454
+ // src/views/ai-message.view.ts
4045
4455
  var import_spec2 = require("@objectstack/spec");
4046
- var AiPendingActionView = (0, import_spec2.defineView)({
4456
+ var AiMessageView = (0, import_spec2.defineView)({
4457
+ list: {
4458
+ type: "grid",
4459
+ data: { provider: "object", object: "ai_messages" },
4460
+ columns: [
4461
+ { field: "created_at", label: "Time" },
4462
+ { field: "conversation_id", label: "Conversation" },
4463
+ { field: "role" },
4464
+ { field: "model" },
4465
+ { field: "prompt_tokens", label: "Prompt" },
4466
+ { field: "completion_tokens", label: "Output" },
4467
+ { field: "total_tokens", label: "Total" },
4468
+ { field: "latency_ms", label: "Latency (ms)" }
4469
+ ],
4470
+ sort: [{ field: "created_at", order: "desc" }],
4471
+ pagination: { pageSize: 50 },
4472
+ searchableFields: ["conversation_id", "content", "tool_call_id"],
4473
+ filterableFields: ["role", "model", "conversation_id"]
4474
+ },
4475
+ listViews: {
4476
+ assistants_only: {
4477
+ label: "Assistant turns",
4478
+ type: "grid",
4479
+ data: { provider: "object", object: "ai_messages" },
4480
+ columns: [
4481
+ { field: "created_at", label: "Time" },
4482
+ { field: "conversation_id", label: "Conversation" },
4483
+ { field: "model" },
4484
+ { field: "prompt_tokens", label: "Prompt" },
4485
+ { field: "completion_tokens", label: "Output" },
4486
+ { field: "total_tokens", label: "Total" },
4487
+ { field: "latency_ms", label: "Latency (ms)" },
4488
+ { field: "content", label: "Reply (preview)" }
4489
+ ],
4490
+ filter: [{ field: "role", operator: "=", value: "assistant" }],
4491
+ sort: [{ field: "created_at", order: "desc" }]
4492
+ },
4493
+ by_model: {
4494
+ label: "By model",
4495
+ type: "grid",
4496
+ data: { provider: "object", object: "ai_messages" },
4497
+ columns: [
4498
+ { field: "model" },
4499
+ { field: "created_at", label: "Time" },
4500
+ { field: "latency_ms", label: "Latency (ms)" },
4501
+ { field: "total_tokens", label: "Tokens" },
4502
+ { field: "conversation_id", label: "Conversation" }
4503
+ ],
4504
+ filter: [{ field: "role", operator: "=", value: "assistant" }],
4505
+ sort: [
4506
+ { field: "model", order: "asc" },
4507
+ { field: "created_at", order: "desc" }
4508
+ ]
4509
+ },
4510
+ slow: {
4511
+ label: "Slow turns (>5s)",
4512
+ type: "grid",
4513
+ data: { provider: "object", object: "ai_messages" },
4514
+ columns: [
4515
+ { field: "created_at", label: "Time" },
4516
+ { field: "model" },
4517
+ { field: "latency_ms", label: "Latency (ms)" },
4518
+ { field: "total_tokens", label: "Tokens" },
4519
+ { field: "conversation_id", label: "Conversation" }
4520
+ ],
4521
+ filter: [
4522
+ { field: "role", operator: "=", value: "assistant" },
4523
+ { field: "latency_ms", operator: ">", value: 5e3 }
4524
+ ],
4525
+ sort: [{ field: "latency_ms", order: "desc" }]
4526
+ }
4527
+ }
4528
+ });
4529
+
4530
+ // src/views/ai-pending-action.view.ts
4531
+ var import_spec3 = require("@objectstack/spec");
4532
+ var AiPendingActionView = (0, import_spec3.defineView)({
4047
4533
  list: {
4048
4534
  type: "grid",
4049
4535
  data: { provider: "object", object: "ai_pending_actions" },
@@ -4273,12 +4759,325 @@ var AiPendingActionView = (0, import_spec2.defineView)({
4273
4759
  }
4274
4760
  });
4275
4761
 
4762
+ // src/views/ai-eval.view.ts
4763
+ var import_spec4 = require("@objectstack/spec");
4764
+ var AiEvalRunView = (0, import_spec4.defineView)({
4765
+ list: {
4766
+ type: "grid",
4767
+ data: { provider: "object", object: "ai_eval_runs" },
4768
+ columns: [
4769
+ { field: "run_at", label: "Run At" },
4770
+ { field: "case_id", label: "Case" },
4771
+ { field: "agent_id", label: "Agent" },
4772
+ { field: "model" },
4773
+ { field: "status" },
4774
+ { field: "score" },
4775
+ { field: "latency_ms", label: "Latency (ms)" },
4776
+ { field: "total_tokens", label: "Tokens" }
4777
+ ],
4778
+ sort: [{ field: "run_at", order: "desc" }],
4779
+ pagination: { pageSize: 50 },
4780
+ filterableFields: ["status", "model", "agent_id", "case_id"],
4781
+ searchableFields: ["response", "judge_reasoning"]
4782
+ },
4783
+ listViews: {
4784
+ failures: {
4785
+ label: "Failures & errors",
4786
+ type: "grid",
4787
+ data: { provider: "object", object: "ai_eval_runs" },
4788
+ columns: [
4789
+ { field: "run_at", label: "Run At" },
4790
+ { field: "case_id", label: "Case" },
4791
+ { field: "model" },
4792
+ { field: "status" },
4793
+ { field: "score" },
4794
+ { field: "error" },
4795
+ { field: "judge_reasoning" }
4796
+ ],
4797
+ filter: [{ field: "status", operator: "in", value: ["fail", "error"] }],
4798
+ sort: [{ field: "run_at", order: "desc" }]
4799
+ },
4800
+ by_model: {
4801
+ label: "By model",
4802
+ type: "grid",
4803
+ data: { provider: "object", object: "ai_eval_runs" },
4804
+ columns: [
4805
+ { field: "model" },
4806
+ { field: "case_id", label: "Case" },
4807
+ { field: "status" },
4808
+ { field: "score" },
4809
+ { field: "latency_ms", label: "Latency (ms)" },
4810
+ { field: "total_tokens", label: "Tokens" },
4811
+ { field: "run_at", label: "Run At" }
4812
+ ],
4813
+ sort: [
4814
+ { field: "model", order: "asc" },
4815
+ { field: "run_at", order: "desc" }
4816
+ ]
4817
+ },
4818
+ latest_per_case: {
4819
+ label: "Latest per case",
4820
+ type: "grid",
4821
+ data: { provider: "object", object: "ai_eval_runs" },
4822
+ columns: [
4823
+ { field: "case_id", label: "Case" },
4824
+ { field: "model" },
4825
+ { field: "status" },
4826
+ { field: "score" },
4827
+ { field: "latency_ms", label: "Latency (ms)" },
4828
+ { field: "run_at", label: "Run At" }
4829
+ ],
4830
+ sort: [
4831
+ { field: "case_id", order: "asc" },
4832
+ { field: "run_at", order: "desc" }
4833
+ ]
4834
+ }
4835
+ }
4836
+ });
4837
+ var AiEvalCaseView = (0, import_spec4.defineView)({
4838
+ list: {
4839
+ type: "grid",
4840
+ data: { provider: "object", object: "ai_eval_cases" },
4841
+ columns: [
4842
+ { field: "name" },
4843
+ { field: "agent_id", label: "Agent" },
4844
+ { field: "enabled" },
4845
+ { field: "expected_contains", label: "Expected (substring)" },
4846
+ { field: "expected_regex", label: "Expected (regex)" },
4847
+ { field: "updated_at" }
4848
+ ],
4849
+ sort: [{ field: "updated_at", order: "desc" }],
4850
+ pagination: { pageSize: 50 },
4851
+ filterableFields: ["agent_id", "enabled"],
4852
+ searchableFields: ["name", "description", "input"]
4853
+ }
4854
+ });
4855
+
4856
+ // src/eval/eval-runner.ts
4857
+ var import_node_crypto3 = require("crypto");
4858
+ var import_zod = require("zod");
4859
+ var EVAL_CASES_OBJECT = "ai_eval_cases";
4860
+ var EVAL_RUNS_OBJECT = "ai_eval_runs";
4861
+ var JudgeOutputSchema = import_zod.z.object({
4862
+ score: import_zod.z.number().min(0).max(100),
4863
+ reasoning: import_zod.z.string().min(1)
4864
+ });
4865
+ var EvalRunner = class {
4866
+ constructor(metadataService, dataEngine, aiService, agentRuntime) {
4867
+ this.metadataService = metadataService;
4868
+ this.dataEngine = dataEngine;
4869
+ this.aiService = aiService;
4870
+ this.agentRuntime = agentRuntime;
4871
+ }
4872
+ async run(options) {
4873
+ const caseRow = await this.loadCase(options.caseId);
4874
+ const agentId = options.agentId ?? caseRow.agent_id;
4875
+ const agent = await this.agentRuntime.loadAgent(agentId);
4876
+ if (!agent) {
4877
+ throw new Error(`EvalRunner: agent "${agentId}" not found`);
4878
+ }
4879
+ const userMessages = this.parseInput(caseRow.input);
4880
+ const activeSkills = await this.agentRuntime.resolveActiveSkills(
4881
+ agent,
4882
+ options.agentContext
4883
+ );
4884
+ const systemMessages = this.agentRuntime.buildSystemMessages(
4885
+ agent,
4886
+ options.agentContext,
4887
+ activeSkills
4888
+ );
4889
+ const toolDefs = this.aiService.toolRegistry.getAll();
4890
+ const agentOptions = this.agentRuntime.buildRequestOptions(
4891
+ agent,
4892
+ toolDefs,
4893
+ activeSkills
4894
+ );
4895
+ const fullMessages = [...systemMessages, ...userMessages];
4896
+ const effectiveModel = options.model ?? agentOptions.model ?? "(adapter default)";
4897
+ const startedAt = Date.now();
4898
+ let responseText = "";
4899
+ let errorMessage = null;
4900
+ let promptTokens = null;
4901
+ let completionTokens = null;
4902
+ let totalTokens = null;
4903
+ try {
4904
+ const result2 = await this.aiService.chatWithTools(fullMessages, {
4905
+ ...agentOptions,
4906
+ model: options.model ?? agentOptions.model,
4907
+ maxIterations: agent.planning?.maxIterations
4908
+ });
4909
+ responseText = result2.content ?? "";
4910
+ const usage = result2.usage;
4911
+ if (usage) {
4912
+ promptTokens = usage.promptTokens ?? null;
4913
+ completionTokens = usage.completionTokens ?? null;
4914
+ totalTokens = usage.totalTokens ?? null;
4915
+ }
4916
+ } catch (err) {
4917
+ errorMessage = err instanceof Error ? err.stack ?? err.message : String(err);
4918
+ }
4919
+ const latencyMs = Date.now() - startedAt;
4920
+ let status = "error";
4921
+ let score = null;
4922
+ let judgeModel = null;
4923
+ let judgeReasoning = null;
4924
+ if (errorMessage) {
4925
+ status = "error";
4926
+ } else if (caseRow.expected_regex) {
4927
+ let regex = null;
4928
+ try {
4929
+ regex = new RegExp(caseRow.expected_regex);
4930
+ } catch (re) {
4931
+ status = "error";
4932
+ errorMessage = `Invalid expected_regex: ${re instanceof Error ? re.message : String(re)}`;
4933
+ }
4934
+ if (regex) {
4935
+ const matched = regex.test(responseText);
4936
+ status = matched ? "pass" : "fail";
4937
+ score = matched ? 100 : 0;
4938
+ }
4939
+ } else if (caseRow.expected_contains) {
4940
+ const matched = responseText.includes(caseRow.expected_contains);
4941
+ status = matched ? "pass" : "fail";
4942
+ score = matched ? 100 : 0;
4943
+ } else {
4944
+ judgeModel = options.judgeModel ?? options.model ?? agentOptions.model ?? null;
4945
+ try {
4946
+ const judgement = await this.runJudge({
4947
+ model: judgeModel,
4948
+ caseRow,
4949
+ response: responseText
4950
+ });
4951
+ score = judgement.score;
4952
+ judgeReasoning = judgement.reasoning;
4953
+ status = judgement.score >= 70 ? "pass" : "fail";
4954
+ } catch (je) {
4955
+ status = "error";
4956
+ errorMessage = je instanceof Error ? je.stack ?? je.message : String(je);
4957
+ }
4958
+ }
4959
+ const result = {
4960
+ id: (0, import_node_crypto3.randomUUID)(),
4961
+ caseId: caseRow.id,
4962
+ agentId,
4963
+ model: effectiveModel,
4964
+ status,
4965
+ score,
4966
+ response: responseText,
4967
+ error: errorMessage,
4968
+ judgeModel,
4969
+ judgeReasoning,
4970
+ promptTokens,
4971
+ completionTokens,
4972
+ totalTokens,
4973
+ latencyMs
4974
+ };
4975
+ if (options.persist !== false) {
4976
+ await this.persist(result);
4977
+ }
4978
+ return result;
4979
+ }
4980
+ // ── Helpers ──────────────────────────────────────────────────────
4981
+ async loadCase(caseId) {
4982
+ const row = await this.dataEngine.findOne(EVAL_CASES_OBJECT, {
4983
+ where: { id: caseId }
4984
+ });
4985
+ if (!row) {
4986
+ throw new Error(`EvalRunner: case "${caseId}" not found`);
4987
+ }
4988
+ if (row.enabled === false) {
4989
+ throw new Error(`EvalRunner: case "${caseId}" is disabled`);
4990
+ }
4991
+ return row;
4992
+ }
4993
+ parseInput(input) {
4994
+ const trimmed = input.trim();
4995
+ if (!trimmed.startsWith("[") && !trimmed.startsWith("{") && !trimmed.startsWith('"')) {
4996
+ return [{ role: "user", content: input }];
4997
+ }
4998
+ let parsed;
4999
+ try {
5000
+ parsed = JSON.parse(trimmed);
5001
+ } catch {
5002
+ return [{ role: "user", content: input }];
5003
+ }
5004
+ if (Array.isArray(parsed)) {
5005
+ return parsed;
5006
+ }
5007
+ if (typeof parsed === "string") {
5008
+ return [{ role: "user", content: parsed }];
5009
+ }
5010
+ if (parsed && typeof parsed === "object" && "role" in parsed) {
5011
+ return [parsed];
5012
+ }
5013
+ throw new Error("input must be a string, ModelMessage, or ModelMessage[]");
5014
+ }
5015
+ async runJudge(args) {
5016
+ const rubric = args.caseRow.judge_instructions?.trim() || "Decide whether the assistant response correctly and helpfully answers the user request.";
5017
+ const judgeMessages = [
5018
+ {
5019
+ role: "system",
5020
+ content: "You are an impartial grader for an AI evaluation harness. Score the candidate response from 0 to 100 where 100 means it fully and correctly satisfies the rubric and 0 means it does not. Reply with structured JSON only."
5021
+ },
5022
+ {
5023
+ role: "user",
5024
+ content: [
5025
+ `# Rubric
5026
+ ${rubric}`,
5027
+ `# Case name
5028
+ ${args.caseRow.name}`,
5029
+ args.caseRow.description ? `# Case description
5030
+ ${args.caseRow.description}` : "",
5031
+ `# Original user input
5032
+ ${args.caseRow.input}`,
5033
+ `# Candidate response
5034
+ ${args.response || "(empty)"}`
5035
+ ].filter(Boolean).join("\n\n")
5036
+ }
5037
+ ];
5038
+ if (typeof this.aiService.generateObject === "function") {
5039
+ const out = await this.aiService.generateObject(judgeMessages, JudgeOutputSchema, {
5040
+ model: args.model ?? void 0
5041
+ });
5042
+ return JudgeOutputSchema.parse(out.object);
5043
+ }
5044
+ const judged = await this.aiService.chatWithTools(judgeMessages, {
5045
+ model: args.model ?? void 0
5046
+ });
5047
+ const text = judged.content ?? "";
5048
+ const match = text.match(/\{[\s\S]*\}/);
5049
+ if (!match) {
5050
+ throw new Error(`Judge response did not contain JSON: ${text.slice(0, 200)}`);
5051
+ }
5052
+ return JudgeOutputSchema.parse(JSON.parse(match[0]));
5053
+ }
5054
+ async persist(run) {
5055
+ await this.dataEngine.insert(EVAL_RUNS_OBJECT, {
5056
+ id: run.id,
5057
+ case_id: run.caseId,
5058
+ agent_id: run.agentId,
5059
+ model: run.model,
5060
+ status: run.status,
5061
+ score: run.score,
5062
+ response: run.response,
5063
+ error: run.error,
5064
+ judge_model: run.judgeModel,
5065
+ judge_reasoning: run.judgeReasoning,
5066
+ prompt_tokens: run.promptTokens,
5067
+ completion_tokens: run.completionTokens,
5068
+ total_tokens: run.totalTokens,
5069
+ latency_ms: run.latencyMs,
5070
+ run_at: (/* @__PURE__ */ new Date()).toISOString()
5071
+ });
5072
+ }
5073
+ };
5074
+
4276
5075
  // src/plugin.ts
4277
5076
  init_data_tools();
4278
5077
  init_metadata_tools();
4279
5078
 
4280
5079
  // src/tools/query-data.tool.ts
4281
- var import_zod = require("zod");
5080
+ var import_zod2 = require("zod");
4282
5081
 
4283
5082
  // src/schema-retriever.ts
4284
5083
  var SchemaRetriever = class {
@@ -4453,19 +5252,19 @@ function buildAiEngineContext(ctx) {
4453
5252
  }
4454
5253
  return { roles: [], permissions: [], isSystem: true };
4455
5254
  }
4456
- var QueryPlanSchema = import_zod.z.object({
4457
- objectName: import_zod.z.string().min(1).describe('The snake_case object name to query (e.g. "task", "account").'),
4458
- whereJson: import_zod.z.string().nullable().describe(
5255
+ var QueryPlanSchema = import_zod2.z.object({
5256
+ objectName: import_zod2.z.string().min(1).describe('The snake_case object name to query (e.g. "task", "account").'),
5257
+ whereJson: import_zod2.z.string().nullable().describe(
4459
5258
  'Filter conditions encoded as a JSON object string. Examples: `{"status":"completed"}`, `{"subject":{"$contains":"Build"}}`, `{"amount":{"$gt":100}}`. Pass null to match all records.'
4460
5259
  ),
4461
- fields: import_zod.z.array(import_zod.z.string()).nullable().describe("Field names to return. Pass null to return all fields."),
4462
- orderBy: import_zod.z.array(
4463
- import_zod.z.object({
4464
- field: import_zod.z.string(),
4465
- order: import_zod.z.enum(["asc", "desc"])
5260
+ fields: import_zod2.z.array(import_zod2.z.string()).nullable().describe("Field names to return. Pass null to return all fields."),
5261
+ orderBy: import_zod2.z.array(
5262
+ import_zod2.z.object({
5263
+ field: import_zod2.z.string(),
5264
+ order: import_zod2.z.enum(["asc", "desc"])
4466
5265
  })
4467
5266
  ).nullable().describe("Sort order. First entry is primary sort key. Pass null for no sort."),
4468
- limit: import_zod.z.number().int().min(1).max(200).nullable().describe("Maximum number of records (default 20, max 200). Pass null for default.")
5267
+ limit: import_zod2.z.number().int().min(1).max(200).nullable().describe("Maximum number of records (default 20, max 200). Pass null for default.")
4469
5268
  });
4470
5269
  var QUERY_DATA_TOOL = {
4471
5270
  name: "query_data",
@@ -5434,13 +6233,14 @@ Capabilities:
5434
6233
 
5435
6234
  Guidelines:
5436
6235
  1. Always use the describe_object tool first to understand a table's structure before querying it.
5437
- 2. Respect the user's current context \u2014 if they are viewing a specific object or record, use that as the default scope.
5438
- 3. When presenting data, format it in a clear and readable way using markdown tables or bullet lists.
5439
- 4. For large result sets, summarize the data and mention the total count.
5440
- 5. When performing aggregations, explain the results in plain language.
5441
- 6. If a query returns no results, suggest possible reasons and alternative queries.
5442
- 7. Never expose internal IDs unless the user explicitly asks for them.
5443
- 8. Always answer in the same language the user is using.`,
6236
+ 2. Do NOT assume generic fields like \`status\`, \`is_active\`, \`deleted_at\`, \`type\`, or \`enabled\` exist on every object \u2014 they almost never do. Field names in \`where\`, \`fields\`, \`orderBy\`, \`groupBy\`, and aggregations MUST come from describe_object output. If the tool returns an "Unknown field" error, call describe_object on that object and retry with real field names.
6237
+ 3. Respect the user's current context \u2014 if they are viewing a specific object or record, use that as the default scope.
6238
+ 4. When presenting data, format it in a clear and readable way using markdown tables or bullet lists.
6239
+ 5. For large result sets, summarize the data and mention the total count.
6240
+ 6. When performing aggregations, explain the results in plain language.
6241
+ 7. If a query returns no results, suggest possible reasons and alternative queries.
6242
+ 8. Never expose internal IDs unless the user explicitly asks for them.
6243
+ 9. Always answer in the same language the user is using.`,
5444
6244
  tools: [
5445
6245
  "query_data",
5446
6246
  "list_objects",
@@ -5552,10 +6352,11 @@ Guidelines:
5552
6352
 
5553
6353
  // src/adapters/vercel-adapter.ts
5554
6354
  var import_ai9 = require("ai");
5555
- function buildVercelOptions(options) {
6355
+ function buildVercelOptions(options, modelId) {
5556
6356
  if (!options) return {};
5557
6357
  const opts = {};
5558
- if (options.temperature != null) opts.temperature = options.temperature;
6358
+ const reasoning = isReasoningModel(modelId);
6359
+ if (options.temperature != null && !reasoning) opts.temperature = options.temperature;
5559
6360
  if (options.maxTokens != null) opts.maxTokens = options.maxTokens;
5560
6361
  if (options.stop?.length) opts.stopSequences = options.stop;
5561
6362
  if (options.tools?.length) {
@@ -5573,6 +6374,11 @@ function buildVercelOptions(options) {
5573
6374
  }
5574
6375
  return opts;
5575
6376
  }
6377
+ function isReasoningModel(modelId) {
6378
+ if (!modelId) return false;
6379
+ const id = modelId.includes("/") ? modelId.slice(modelId.lastIndexOf("/") + 1) : modelId;
6380
+ return /^(o[134](?:-|$)|gpt-5(?:-|$)|o4-mini)/i.test(id);
6381
+ }
5576
6382
  var VercelLLMAdapter = class {
5577
6383
  constructor(config) {
5578
6384
  this.name = "vercel";
@@ -5582,7 +6388,7 @@ var VercelLLMAdapter = class {
5582
6388
  const result = await (0, import_ai9.generateText)({
5583
6389
  model: this.model,
5584
6390
  messages,
5585
- ...buildVercelOptions(options)
6391
+ ...buildVercelOptions(options, this.model.modelId)
5586
6392
  });
5587
6393
  return {
5588
6394
  content: result.text,
@@ -5599,7 +6405,7 @@ var VercelLLMAdapter = class {
5599
6405
  const result = await (0, import_ai9.generateText)({
5600
6406
  model: this.model,
5601
6407
  prompt,
5602
- ...buildVercelOptions(options)
6408
+ ...buildVercelOptions(options, this.model.modelId)
5603
6409
  });
5604
6410
  return {
5605
6411
  content: result.text,
@@ -5615,7 +6421,7 @@ var VercelLLMAdapter = class {
5615
6421
  const result = (0, import_ai9.streamText)({
5616
6422
  model: this.model,
5617
6423
  messages,
5618
- ...buildVercelOptions(options)
6424
+ ...buildVercelOptions(options, this.model.modelId)
5619
6425
  });
5620
6426
  try {
5621
6427
  for await (const part of result.fullStream) {
@@ -5641,7 +6447,7 @@ var VercelLLMAdapter = class {
5641
6447
  schema,
5642
6448
  schemaName,
5643
6449
  schemaDescription,
5644
- ...buildVercelOptions(rest)
6450
+ ...buildVercelOptions(rest, this.model.modelId)
5645
6451
  });
5646
6452
  return {
5647
6453
  object: result.object,
@@ -5730,7 +6536,7 @@ function computeCost(pricing, usage) {
5730
6536
  }
5731
6537
 
5732
6538
  // src/plugin.ts
5733
- var AIServicePlugin = class {
6539
+ var _AIServicePlugin = class _AIServicePlugin {
5734
6540
  constructor(options = {}) {
5735
6541
  this.name = "com.objectstack.service-ai";
5736
6542
  this.version = "1.0.0";
@@ -5738,13 +6544,44 @@ var AIServicePlugin = class {
5738
6544
  this.dependencies = ["com.objectstack.engine.objectql"];
5739
6545
  this.options = options;
5740
6546
  }
6547
+ /**
6548
+ * Normalise OpenAI-compatible preset providers (DeepSeek / DashScope /
6549
+ * Cloudflare / SiliconFlow / OpenRouter) into the `provider=openai` shape
6550
+ * with the appropriate base URL pre-filled. Returns the rewritten values
6551
+ * map; non-preset providers pass through unchanged.
6552
+ */
6553
+ normalisePresetProvider(values) {
6554
+ const provider = String(values.provider ?? "memory");
6555
+ if (provider === "cloudflare") {
6556
+ const accountId = String(values.cloudflare_account_id ?? "").trim();
6557
+ const gatewayId = String(values.cloudflare_gateway_id ?? "default").trim() || "default";
6558
+ if (!accountId) return values;
6559
+ return {
6560
+ ...values,
6561
+ provider: "openai",
6562
+ openai_api_key: values.cloudflare_api_key,
6563
+ openai_base_url: `https://gateway.ai.cloudflare.com/v1/${accountId}/${gatewayId}/compat`,
6564
+ openai_model: values.cloudflare_model ?? "openai/gpt-4o-mini"
6565
+ };
6566
+ }
6567
+ const preset = _AIServicePlugin.OPENAI_COMPATIBLE_PRESETS[provider];
6568
+ if (!preset) return values;
6569
+ return {
6570
+ ...values,
6571
+ provider: "openai",
6572
+ openai_api_key: values[`${provider}_api_key`],
6573
+ openai_base_url: preset.baseURL,
6574
+ openai_model: values[`${provider}_model`] ?? preset.defaultModel
6575
+ };
6576
+ }
5741
6577
  /**
5742
6578
  * Build an LLM adapter from a provider/key/model triple. Used both
5743
6579
  * by the boot-time auto-detect path and by the live `settings:changed`
5744
6580
  * rebuild path. Returns `null` if the requested provider cannot be
5745
6581
  * loaded or required credentials are missing.
5746
6582
  */
5747
- async buildAdapterFromValues(ctx, values) {
6583
+ async buildAdapterFromValues(ctx, rawValues) {
6584
+ const values = this.normalisePresetProvider(rawValues);
5748
6585
  const provider = String(values.provider ?? "memory");
5749
6586
  if (provider === "memory") {
5750
6587
  return { adapter: new MemoryLLMAdapter(), description: "MemoryLLMAdapter (echo mode)" };
@@ -6042,8 +6879,8 @@ var AIServicePlugin = class {
6042
6879
  type: "plugin",
6043
6880
  scope: "project",
6044
6881
  namespace: "ai",
6045
- objects: [AiConversationObject, AiMessageObject, AiTraceObject, AiPendingActionObject],
6046
- views: [AiTraceView, AiPendingActionView]
6882
+ objects: [AiConversationObject, AiMessageObject, AiTraceObject, AiPendingActionObject, AiEvalCaseObject, AiEvalRunObject],
6883
+ views: [AiTraceView, AiMessageView, AiPendingActionView, AiEvalCaseView, AiEvalRunView]
6047
6884
  });
6048
6885
  if (this.options.debug) {
6049
6886
  ctx.hook("ai:beforeChat", async (messages) => {
@@ -6080,7 +6917,11 @@ var AIServicePlugin = class {
6080
6917
  try {
6081
6918
  const dataEngine = ctx.getService("data");
6082
6919
  if (dataEngine) {
6083
- registerDataTools(this.service.toolRegistry, { dataEngine });
6920
+ registerDataTools(this.service.toolRegistry, {
6921
+ dataEngine,
6922
+ metadataService,
6923
+ protocol: protocolService
6924
+ });
6084
6925
  ctx.logger.info("[AI] Built-in data tools registered");
6085
6926
  if (metadataService) {
6086
6927
  registerQueryDataTool(this.service.toolRegistry, {
@@ -6301,6 +7142,20 @@ var AIServicePlugin = class {
6301
7142
  const assistantRoutes = buildAssistantRoutes(this.service, agentRuntime, skillRegistry, ctx.logger);
6302
7143
  routes.push(...assistantRoutes);
6303
7144
  ctx.logger.info(`[AI] Assistant (ambient) routes registered (${assistantRoutes.length} routes)`);
7145
+ const evalDataEngine = ctx.getService("data");
7146
+ if (evalDataEngine && typeof evalDataEngine.insert === "function") {
7147
+ const evalRunner = new EvalRunner(
7148
+ metadataService,
7149
+ evalDataEngine,
7150
+ this.service,
7151
+ agentRuntime
7152
+ );
7153
+ const evalRoutes = buildEvalRoutes(evalRunner, ctx.logger);
7154
+ routes.push(...evalRoutes);
7155
+ ctx.logger.info(`[AI] Eval routes registered (${evalRoutes.length} routes)`);
7156
+ } else {
7157
+ ctx.logger.debug("[AI] IDataEngine not available, skipping eval routes");
7158
+ }
6304
7159
  } else {
6305
7160
  ctx.logger.debug("[AI] Metadata service not available, skipping agent and assistant routes");
6306
7161
  }
@@ -6529,6 +7384,20 @@ var AIServicePlugin = class {
6529
7384
  this.service = void 0;
6530
7385
  }
6531
7386
  };
7387
+ /**
7388
+ * OpenAI-compatible preset providers — these all expose `/v1/chat/completions`
7389
+ * in OpenAI shape, so we re-use the `@ai-sdk/openai` SDK with a preset
7390
+ * base URL. Centralising the mapping here keeps the settings UI ergonomic
7391
+ * (operators pick "DeepSeek", not "openai" + a base URL they have to look up)
7392
+ * without bloating buildAdapterFromValues with a switch per provider.
7393
+ */
7394
+ _AIServicePlugin.OPENAI_COMPATIBLE_PRESETS = {
7395
+ deepseek: { baseURL: "https://api.deepseek.com", defaultModel: "deepseek-chat" },
7396
+ dashscope: { baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1", defaultModel: "qwen-plus" },
7397
+ siliconflow: { baseURL: "https://api.siliconflow.cn/v1", defaultModel: "Qwen/Qwen2.5-7B-Instruct" },
7398
+ openrouter: { baseURL: "https://openrouter.ai/api/v1", defaultModel: "openai/gpt-4o-mini" }
7399
+ };
7400
+ var AIServicePlugin = _AIServicePlugin;
6532
7401
  function extractOverrides(payload) {
6533
7402
  if (!payload || typeof payload !== "object") return {};
6534
7403
  const p = payload;