@objectstack/service-ai 7.0.0 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,6 +30,77 @@ function buildEngineContext(ctx) {
30
30
  }
31
31
  return { roles: [], permissions: [], isSystem: true };
32
32
  }
33
+ async function resolveObjectFieldNames(ctx, objectName) {
34
+ let def;
35
+ if (ctx.metadataService) {
36
+ try {
37
+ def = await ctx.metadataService.getObject(objectName);
38
+ } catch {
39
+ def = void 0;
40
+ }
41
+ }
42
+ if (!def && ctx.protocol?.getMetaItems) {
43
+ try {
44
+ const all = await ctx.protocol.getMetaItems({ type: "object" });
45
+ const arr = Array.isArray(all) ? all : all && typeof all === "object" && Array.isArray(all.items) ? all.items : [];
46
+ def = arr.find((o) => o?.name === objectName);
47
+ } catch {
48
+ def = void 0;
49
+ }
50
+ }
51
+ if (!def) return null;
52
+ const fields = def.fields ?? {};
53
+ const names = /* @__PURE__ */ new Set(["id", ...Object.keys(fields)]);
54
+ return names;
55
+ }
56
+ function collectWhereFields(where, acc) {
57
+ if (!where || typeof where !== "object") return;
58
+ if (Array.isArray(where)) {
59
+ for (const item of where) collectWhereFields(item, acc);
60
+ return;
61
+ }
62
+ for (const [key, value] of Object.entries(where)) {
63
+ if (WHERE_OPERATOR_KEYS.has(key)) {
64
+ collectWhereFields(value, acc);
65
+ } else {
66
+ acc.add(key);
67
+ if (value && typeof value === "object" && !Array.isArray(value)) {
68
+ collectWhereFields(value, acc);
69
+ }
70
+ }
71
+ }
72
+ }
73
+ function unknownFieldError(objectName, unknown, available) {
74
+ const sample = [...available].slice(0, 40);
75
+ const truncated = available.size > sample.length;
76
+ return JSON.stringify({
77
+ error: `Unknown field(s) ${JSON.stringify(unknown)} on "${objectName}". Call describe_object first to see the real schema \u2014 do not guess generic fields like \`status\`, \`is_active\`, or \`deleted_at\`.`,
78
+ objectName,
79
+ unknownFields: unknown,
80
+ availableFields: sample,
81
+ availableFieldsTruncated: truncated,
82
+ totalAvailable: available.size,
83
+ hint: "Use the describe_object tool to fetch the authoritative field list."
84
+ });
85
+ }
86
+ async function validateFieldReferences(ctx, objectName, refs) {
87
+ const available = await resolveObjectFieldNames(ctx, objectName);
88
+ if (!available) return null;
89
+ const referenced = /* @__PURE__ */ new Set();
90
+ collectWhereFields(refs.where, referenced);
91
+ for (const f of refs.fields ?? []) referenced.add(f);
92
+ for (const o of refs.orderBy ?? []) if (o?.field) referenced.add(o.field);
93
+ for (const g of refs.groupBy ?? []) referenced.add(g);
94
+ for (const a of refs.aggregations ?? []) {
95
+ if (a?.field) referenced.add(a.field);
96
+ }
97
+ const unknown = [];
98
+ for (const ref of referenced) {
99
+ if (!available.has(ref)) unknown.push(ref);
100
+ }
101
+ if (unknown.length === 0) return null;
102
+ return unknownFieldError(objectName, unknown, available);
103
+ }
33
104
  function createQueryRecordsHandler(ctx) {
34
105
  return async (args, execCtx) => {
35
106
  const {
@@ -40,6 +111,12 @@ function createQueryRecordsHandler(ctx) {
40
111
  limit,
41
112
  offset
42
113
  } = args;
114
+ const validationError = await validateFieldReferences(ctx, objectName, {
115
+ where,
116
+ fields,
117
+ orderBy
118
+ });
119
+ if (validationError) return validationError;
43
120
  const rawLimit = limit ?? DEFAULT_QUERY_LIMIT;
44
121
  const safeLimit = Number.isFinite(rawLimit) && rawLimit > 0 ? Math.min(Math.floor(rawLimit), MAX_QUERY_LIMIT) : DEFAULT_QUERY_LIMIT;
45
122
  const safeOffset = Number.isFinite(offset) && offset >= 0 ? Math.floor(offset) : void 0;
@@ -57,6 +134,8 @@ function createQueryRecordsHandler(ctx) {
57
134
  function createGetRecordHandler(ctx) {
58
135
  return async (args, execCtx) => {
59
136
  const { objectName, recordId, fields } = args;
137
+ const validationError = await validateFieldReferences(ctx, objectName, { fields });
138
+ if (validationError) return validationError;
60
139
  const record = await ctx.dataEngine.findOne(objectName, {
61
140
  where: { id: recordId },
62
141
  fields,
@@ -78,6 +157,12 @@ function createAggregateDataHandler(ctx) {
78
157
  });
79
158
  }
80
159
  }
160
+ const validationError = await validateFieldReferences(ctx, objectName, {
161
+ where,
162
+ groupBy,
163
+ aggregations
164
+ });
165
+ if (validationError) return validationError;
81
166
  const result = await ctx.dataEngine.aggregate(objectName, {
82
167
  where,
83
168
  groupBy,
@@ -96,7 +181,7 @@ function registerDataTools(registry, context) {
96
181
  registry.register(GET_RECORD_TOOL, createGetRecordHandler(context));
97
182
  registry.register(AGGREGATE_DATA_TOOL, createAggregateDataHandler(context));
98
183
  }
99
- var MAX_QUERY_LIMIT, DEFAULT_QUERY_LIMIT, QUERY_RECORDS_TOOL, GET_RECORD_TOOL, AGGREGATE_DATA_TOOL, DATA_TOOL_DEFINITIONS, VALID_AGG_FUNCTIONS;
184
+ var MAX_QUERY_LIMIT, DEFAULT_QUERY_LIMIT, QUERY_RECORDS_TOOL, GET_RECORD_TOOL, AGGREGATE_DATA_TOOL, DATA_TOOL_DEFINITIONS, WHERE_OPERATOR_KEYS, VALID_AGG_FUNCTIONS;
100
185
  var init_data_tools = __esm({
101
186
  "src/tools/data-tools.ts"() {
102
187
  "use strict";
@@ -114,7 +199,7 @@ var init_data_tools = __esm({
114
199
  },
115
200
  where: {
116
201
  type: "object",
117
- description: 'Filter conditions as key-value pairs (e.g. { "status": "active" }) or MongoDB-style operators (e.g. { "amount": { "$gt": 100 } })'
202
+ description: 'Filter conditions. Keys MUST be real field names obtained from describe_object \u2014 do NOT assume generic fields like `status`, `is_active`, or `deleted_at` exist on every object. Values are equality matches, or MongoDB-style operators (`{ "$gt": 100 }`, `{ "$in": [...] }`, etc.). Logical combinators: `$and` / `$or` / `$not` with nested clauses.'
118
203
  },
119
204
  fields: {
120
205
  type: "array",
@@ -212,7 +297,7 @@ var init_data_tools = __esm({
212
297
  },
213
298
  where: {
214
299
  type: "object",
215
- description: "Filter conditions applied before aggregation"
300
+ description: "Filter applied before aggregation. Same rules as query_records: keys MUST be real field names obtained from describe_object \u2014 do NOT guess generic fields like `status` or `is_active`."
216
301
  }
217
302
  },
218
303
  required: ["objectName", "aggregations"],
@@ -224,6 +309,28 @@ var init_data_tools = __esm({
224
309
  GET_RECORD_TOOL,
225
310
  AGGREGATE_DATA_TOOL
226
311
  ];
312
+ WHERE_OPERATOR_KEYS = /* @__PURE__ */ new Set([
313
+ "$and",
314
+ "$or",
315
+ "$not",
316
+ "$nor",
317
+ "$eq",
318
+ "$ne",
319
+ "$gt",
320
+ "$gte",
321
+ "$lt",
322
+ "$lte",
323
+ "$in",
324
+ "$nin",
325
+ "$exists",
326
+ "$regex",
327
+ "$like",
328
+ "$ilike",
329
+ "$contains",
330
+ "$startsWith",
331
+ "$endsWith",
332
+ "$between"
333
+ ]);
227
334
  VALID_AGG_FUNCTIONS = /* @__PURE__ */ new Set([
228
335
  "count",
229
336
  "sum",
@@ -1341,7 +1448,7 @@ var InMemoryConversationService = class {
1341
1448
  }
1342
1449
  return results;
1343
1450
  }
1344
- async addMessage(conversationId, message) {
1451
+ async addMessage(conversationId, message, _extras) {
1345
1452
  const conversation = this.store.get(conversationId);
1346
1453
  if (!conversation) {
1347
1454
  throw new Error(`Conversation "${conversationId}" not found`);
@@ -1647,9 +1754,9 @@ ${assistantText.slice(0, 800)}` : "")
1647
1754
  * must never fail because the history write failed. Mirrors the
1648
1755
  * precedent set by `ObjectQLTraceRecorder.record`.
1649
1756
  */
1650
- async persistMessage(conversationId, message) {
1757
+ async persistMessage(conversationId, message, extras) {
1651
1758
  try {
1652
- await this.conversationService.addMessage(conversationId, message);
1759
+ await this.conversationService.addMessage(conversationId, message, extras);
1653
1760
  } catch (err) {
1654
1761
  this.logger.warn("[AI] persist message failed", {
1655
1762
  conversationId,
@@ -1658,6 +1765,25 @@ ${assistantText.slice(0, 800)}` : "")
1658
1765
  });
1659
1766
  }
1660
1767
  }
1768
+ /**
1769
+ * Build a {@link MessageObservability} payload from an LLM-call result
1770
+ * and the wall-clock time it took. Returns `undefined` when there's
1771
+ * nothing useful to persist (no usage and no latency) so callers don't
1772
+ * need to special-case empty results.
1773
+ */
1774
+ static buildObservability(result, startedAt) {
1775
+ if (!result) return void 0;
1776
+ const usage = result.usage;
1777
+ const latencyMs = startedAt != null ? Date.now() - startedAt : void 0;
1778
+ if (!result.model && !usage && latencyMs == null) return void 0;
1779
+ return {
1780
+ model: result.model,
1781
+ promptTokens: usage?.promptTokens,
1782
+ completionTokens: usage?.completionTokens,
1783
+ totalTokens: usage?.totalTokens,
1784
+ latencyMs
1785
+ };
1786
+ }
1661
1787
  /**
1662
1788
  * Run an adapter call and emit a trace event.
1663
1789
  *
@@ -1809,14 +1935,20 @@ ${assistantText.slice(0, 800)}` : "")
1809
1935
  });
1810
1936
  let abortedByCallback = false;
1811
1937
  for (let iteration = 0; iteration < maxIterations; iteration++) {
1938
+ const turnStartedAt = Date.now();
1812
1939
  const result = await this.adapter.chat(conversation, chatOptions);
1940
+ const turnObservability = _AIService.buildObservability(result, turnStartedAt);
1813
1941
  if (!result.toolCalls || result.toolCalls.length === 0) {
1814
1942
  this.logger.debug("[AI] chatWithTools finished", { iteration, content: result.content.slice(0, 80) });
1815
1943
  if (conversationId) {
1816
- await this.persistMessage(conversationId, {
1817
- role: "assistant",
1818
- content: result.content
1819
- });
1944
+ await this.persistMessage(
1945
+ conversationId,
1946
+ {
1947
+ role: "assistant",
1948
+ content: result.content
1949
+ },
1950
+ turnObservability
1951
+ );
1820
1952
  void this.summarizeConversation(conversationId);
1821
1953
  }
1822
1954
  return autoCreatedConversationId ? { ...result, conversationId: autoCreatedConversationId } : result;
@@ -1834,7 +1966,7 @@ ${assistantText.slice(0, 800)}` : "")
1834
1966
  };
1835
1967
  conversation.push(assistantTurn);
1836
1968
  if (conversationId) {
1837
- await this.persistMessage(conversationId, assistantTurn);
1969
+ await this.persistMessage(conversationId, assistantTurn, turnObservability);
1838
1970
  }
1839
1971
  const toolResults = await this.toolRegistry.executeAll(
1840
1972
  result.toolCalls,
@@ -1875,16 +2007,22 @@ ${assistantText.slice(0, 800)}` : "")
1875
2007
  toolErrors: toolErrors.length > 0 ? toolErrors : void 0
1876
2008
  });
1877
2009
  }
2010
+ const finalStartedAt = Date.now();
1878
2011
  const finalResult = await this.adapter.chat(conversation, {
1879
2012
  ...chatOptions,
1880
2013
  tools: void 0,
1881
2014
  toolChoice: void 0
1882
2015
  });
2016
+ const finalObservability = _AIService.buildObservability(finalResult, finalStartedAt);
1883
2017
  if (conversationId) {
1884
- await this.persistMessage(conversationId, {
1885
- role: "assistant",
1886
- content: finalResult.content
1887
- });
2018
+ await this.persistMessage(
2019
+ conversationId,
2020
+ {
2021
+ role: "assistant",
2022
+ content: finalResult.content
2023
+ },
2024
+ finalObservability
2025
+ );
1888
2026
  void this.summarizeConversation(conversationId);
1889
2027
  }
1890
2028
  return autoCreatedConversationId ? { ...finalResult, conversationId: autoCreatedConversationId } : finalResult;
@@ -1937,13 +2075,19 @@ ${assistantText.slice(0, 800)}` : "")
1937
2075
  }
1938
2076
  }
1939
2077
  for (let iteration = 0; iteration < maxIterations; iteration++) {
2078
+ const turnStartedAt = Date.now();
1940
2079
  const result2 = await this.adapter.chat(conversation, chatOptions);
2080
+ const turnObservability = _AIService.buildObservability(result2, turnStartedAt);
1941
2081
  if (!result2.toolCalls || result2.toolCalls.length === 0) {
1942
2082
  if (conversationId) {
1943
- await this.persistMessage(conversationId, {
1944
- role: "assistant",
1945
- content: result2.content
1946
- });
2083
+ await this.persistMessage(
2084
+ conversationId,
2085
+ {
2086
+ role: "assistant",
2087
+ content: result2.content
2088
+ },
2089
+ turnObservability
2090
+ );
1947
2091
  void this.summarizeConversation(conversationId);
1948
2092
  }
1949
2093
  yield textDeltaPart("stream", result2.content);
@@ -1962,7 +2106,7 @@ ${assistantText.slice(0, 800)}` : "")
1962
2106
  };
1963
2107
  conversation.push(assistantTurn);
1964
2108
  if (conversationId) {
1965
- await this.persistMessage(conversationId, assistantTurn);
2109
+ await this.persistMessage(conversationId, assistantTurn, turnObservability);
1966
2110
  }
1967
2111
  const toolResults = await this.toolRegistry.executeAll(
1968
2112
  result2.toolCalls,
@@ -2004,12 +2148,18 @@ ${assistantText.slice(0, 800)}` : "")
2004
2148
  this.logger.warn("[AI] streamChatWithTools max iterations reached");
2005
2149
  }
2006
2150
  const finalOptions = { ...chatOptions, tools: void 0, toolChoice: void 0 };
2151
+ const finalStartedAt = Date.now();
2007
2152
  const result = await this.adapter.chat(conversation, finalOptions);
2153
+ const finalObservability = _AIService.buildObservability(result, finalStartedAt);
2008
2154
  if (conversationId) {
2009
- await this.persistMessage(conversationId, {
2010
- role: "assistant",
2011
- content: result.content
2012
- });
2155
+ await this.persistMessage(
2156
+ conversationId,
2157
+ {
2158
+ role: "assistant",
2159
+ content: result.content
2160
+ },
2161
+ finalObservability
2162
+ );
2013
2163
  void this.summarizeConversation(conversationId);
2014
2164
  }
2015
2165
  yield textDeltaPart("stream", result.content);
@@ -3268,6 +3418,39 @@ function buildPendingActionRoutes(aiService, logger) {
3268
3418
  ];
3269
3419
  }
3270
3420
 
3421
+ // src/routes/eval-routes.ts
3422
+ function buildEvalRoutes(evalRunner, logger) {
3423
+ return [
3424
+ {
3425
+ method: "POST",
3426
+ path: "/api/v1/ai/evals/runs",
3427
+ description: "Execute an AI eval case and persist the run record",
3428
+ auth: true,
3429
+ permissions: ["ai:admin"],
3430
+ handler: async (req) => {
3431
+ const body = req.body ?? {};
3432
+ if (!body.caseId || typeof body.caseId !== "string") {
3433
+ return { status: 400, body: { error: "caseId is required" } };
3434
+ }
3435
+ try {
3436
+ const result = await evalRunner.run({
3437
+ caseId: body.caseId,
3438
+ agentId: body.agentId,
3439
+ model: body.model,
3440
+ judgeModel: body.judgeModel,
3441
+ persist: body.persist
3442
+ });
3443
+ return { status: 200, body: result };
3444
+ } catch (err) {
3445
+ const message = err instanceof Error ? err.message : String(err);
3446
+ logger.error("[AI Route] /ai/evals/runs error", err instanceof Error ? err : void 0);
3447
+ return { status: 500, body: { error: message } };
3448
+ }
3449
+ }
3450
+ }
3451
+ ];
3452
+ }
3453
+
3271
3454
  // src/conversation/objectql-conversation-service.ts
3272
3455
  import { randomUUID as randomUUID2 } from "crypto";
3273
3456
  var CONVERSATIONS_OBJECT = "ai_conversations";
@@ -3351,7 +3534,7 @@ var ObjectQLConversationService = class {
3351
3534
  );
3352
3535
  return conversations;
3353
3536
  }
3354
- async addMessage(conversationId, message) {
3537
+ async addMessage(conversationId, message, extras) {
3355
3538
  const row = await this.engine.findOne(CONVERSATIONS_OBJECT, {
3356
3539
  where: { id: conversationId }
3357
3540
  });
@@ -3389,6 +3572,11 @@ var ObjectQLConversationService = class {
3389
3572
  content: contentStr,
3390
3573
  tool_calls: toolCallsJson,
3391
3574
  tool_call_id: toolCallId,
3575
+ model: extras?.model ?? null,
3576
+ prompt_tokens: extras?.promptTokens ?? null,
3577
+ completion_tokens: extras?.completionTokens ?? null,
3578
+ total_tokens: extras?.totalTokens ?? null,
3579
+ latency_ms: extras?.latencyMs ?? null,
3392
3580
  created_at: now
3393
3581
  });
3394
3582
  await this.engine.update(CONVERSATIONS_OBJECT, { id: conversationId, updated_at: now }, {
@@ -3612,6 +3800,38 @@ var AiMessageObject = ObjectSchema2.create({
3612
3800
  maxLength: 255,
3613
3801
  description: "ID of the tool call this message responds to (when role=tool)"
3614
3802
  }),
3803
+ // ── Per-message observability ────────────────────────────────────
3804
+ // Populated when this message is the output of an LLM call (most
3805
+ // assistant turns). User and tool messages leave them null. Lets
3806
+ // analytics surfaces (cost per turn, latency histograms, A/B model
3807
+ // comparisons) query a single table instead of joining ai_traces
3808
+ // by timestamp.
3809
+ model: Field2.text({
3810
+ label: "Model",
3811
+ required: false,
3812
+ maxLength: 128,
3813
+ description: "Model id reported by the adapter for the call that produced this message"
3814
+ }),
3815
+ prompt_tokens: Field2.number({
3816
+ label: "Prompt Tokens",
3817
+ required: false,
3818
+ description: "Tokens in the request that produced this message"
3819
+ }),
3820
+ completion_tokens: Field2.number({
3821
+ label: "Completion Tokens",
3822
+ required: false,
3823
+ description: "Tokens generated in this message"
3824
+ }),
3825
+ total_tokens: Field2.number({
3826
+ label: "Total Tokens",
3827
+ required: false,
3828
+ description: "prompt + completion for the producing call"
3829
+ }),
3830
+ latency_ms: Field2.number({
3831
+ label: "Latency (ms)",
3832
+ required: false,
3833
+ description: "Wall-clock duration of the LLM call that produced this message"
3834
+ }),
3615
3835
  created_at: Field2.datetime({
3616
3836
  label: "Created At",
3617
3837
  required: true,
@@ -3621,7 +3841,8 @@ var AiMessageObject = ObjectSchema2.create({
3621
3841
  },
3622
3842
  indexes: [
3623
3843
  { fields: ["conversation_id"] },
3624
- { fields: ["conversation_id", "created_at"] }
3844
+ { fields: ["conversation_id", "created_at"] },
3845
+ { fields: ["model"] }
3625
3846
  ],
3626
3847
  enable: {
3627
3848
  trackHistory: false,
@@ -3910,6 +4131,195 @@ var AiPendingActionObject = ObjectSchema4.create({
3910
4131
  }
3911
4132
  });
3912
4133
 
4134
+ // src/objects/ai-eval-case.object.ts
4135
+ import { ObjectSchema as ObjectSchema5, Field as Field5 } from "@objectstack/spec/data";
4136
+ var AiEvalCaseObject = ObjectSchema5.create({
4137
+ name: "ai_eval_cases",
4138
+ label: "AI Eval Case",
4139
+ pluralLabel: "AI Eval Cases",
4140
+ icon: "flask-conical",
4141
+ isSystem: true,
4142
+ description: "Golden test cases that pin down expected AI behavior",
4143
+ fields: {
4144
+ id: Field5.text({
4145
+ label: "Case ID",
4146
+ required: true,
4147
+ readonly: true
4148
+ }),
4149
+ name: Field5.text({
4150
+ label: "Name",
4151
+ required: true,
4152
+ maxLength: 255,
4153
+ description: "Human-readable case name"
4154
+ }),
4155
+ agent_id: Field5.text({
4156
+ label: "Agent ID",
4157
+ required: true,
4158
+ maxLength: 255,
4159
+ description: "Target agent to invoke (resolved via ai_agents)"
4160
+ }),
4161
+ description: Field5.textarea({
4162
+ label: "Description",
4163
+ required: false,
4164
+ description: "What this case validates and why it matters"
4165
+ }),
4166
+ input: Field5.textarea({
4167
+ label: "Input Messages",
4168
+ required: true,
4169
+ description: "JSON-serialized ModelMessage[] (the user prompt(s) to feed the agent)"
4170
+ }),
4171
+ expected_contains: Field5.text({
4172
+ label: "Expected Substring",
4173
+ required: false,
4174
+ maxLength: 1024,
4175
+ description: "If set, response must contain this substring (case-sensitive). Skipped when expected_regex is set."
4176
+ }),
4177
+ expected_regex: Field5.text({
4178
+ label: "Expected Regex",
4179
+ required: false,
4180
+ maxLength: 1024,
4181
+ description: "If set, response must match this JavaScript regex. Takes precedence over expected_contains."
4182
+ }),
4183
+ judge_instructions: Field5.textarea({
4184
+ label: "Judge Instructions",
4185
+ required: false,
4186
+ description: "Extra rubric passed to the judge model when no expected_* is set"
4187
+ }),
4188
+ enabled: Field5.boolean({
4189
+ label: "Enabled",
4190
+ required: false,
4191
+ defaultValue: true,
4192
+ description: "Disabled cases are skipped by batch runs"
4193
+ }),
4194
+ created_at: Field5.datetime({
4195
+ label: "Created At",
4196
+ required: true,
4197
+ defaultValue: "NOW()",
4198
+ readonly: true
4199
+ }),
4200
+ updated_at: Field5.datetime({
4201
+ label: "Updated At",
4202
+ required: false
4203
+ })
4204
+ },
4205
+ indexes: [
4206
+ { fields: ["agent_id"] },
4207
+ { fields: ["enabled"] }
4208
+ ],
4209
+ enable: {
4210
+ trackHistory: true,
4211
+ searchable: true,
4212
+ apiEnabled: true,
4213
+ trash: true,
4214
+ mru: true
4215
+ }
4216
+ });
4217
+
4218
+ // src/objects/ai-eval-run.object.ts
4219
+ import { ObjectSchema as ObjectSchema6, Field as Field6 } from "@objectstack/spec/data";
4220
+ var AiEvalRunObject = ObjectSchema6.create({
4221
+ name: "ai_eval_runs",
4222
+ label: "AI Eval Run",
4223
+ pluralLabel: "AI Eval Runs",
4224
+ icon: "gauge",
4225
+ isSystem: true,
4226
+ description: "One execution of an eval case (used for regression tracking and model A/B comparisons)",
4227
+ fields: {
4228
+ id: Field6.text({
4229
+ label: "Run ID",
4230
+ required: true,
4231
+ readonly: true
4232
+ }),
4233
+ case_id: Field6.lookup("ai_eval_cases", {
4234
+ label: "Case",
4235
+ required: true
4236
+ }),
4237
+ agent_id: Field6.text({
4238
+ label: "Agent ID",
4239
+ required: true,
4240
+ maxLength: 255,
4241
+ description: "Agent that was invoked (denormalized for fast filtering)"
4242
+ }),
4243
+ model: Field6.text({
4244
+ label: "Model",
4245
+ required: true,
4246
+ maxLength: 128,
4247
+ description: "Model id used for the eval (denormalized for A/B comparison)"
4248
+ }),
4249
+ status: Field6.select({
4250
+ label: "Status",
4251
+ required: true,
4252
+ options: [
4253
+ { label: "Pass", value: "pass" },
4254
+ { label: "Fail", value: "fail" },
4255
+ { label: "Error", value: "error" }
4256
+ ]
4257
+ }),
4258
+ score: Field6.number({
4259
+ label: "Score (0\u2013100)",
4260
+ required: false,
4261
+ description: "100 for pass, 0 for fail when using substring/regex check; judge score otherwise"
4262
+ }),
4263
+ response: Field6.textarea({
4264
+ label: "Response",
4265
+ required: false,
4266
+ description: "The assistant response that was scored"
4267
+ }),
4268
+ error: Field6.textarea({
4269
+ label: "Error",
4270
+ required: false,
4271
+ description: "Adapter error stack when status=error"
4272
+ }),
4273
+ judge_model: Field6.text({
4274
+ label: "Judge Model",
4275
+ required: false,
4276
+ maxLength: 128,
4277
+ description: "Model id of the judge (null if check was rule-based)"
4278
+ }),
4279
+ judge_reasoning: Field6.textarea({
4280
+ label: "Judge Reasoning",
4281
+ required: false,
4282
+ description: "Free-form explanation from the judge model"
4283
+ }),
4284
+ prompt_tokens: Field6.number({
4285
+ label: "Prompt Tokens",
4286
+ required: false
4287
+ }),
4288
+ completion_tokens: Field6.number({
4289
+ label: "Completion Tokens",
4290
+ required: false
4291
+ }),
4292
+ total_tokens: Field6.number({
4293
+ label: "Total Tokens",
4294
+ required: false
4295
+ }),
4296
+ latency_ms: Field6.number({
4297
+ label: "Latency (ms)",
4298
+ required: false
4299
+ }),
4300
+ run_at: Field6.datetime({
4301
+ label: "Run At",
4302
+ required: true,
4303
+ defaultValue: "NOW()",
4304
+ readonly: true
4305
+ })
4306
+ },
4307
+ indexes: [
4308
+ { fields: ["case_id"] },
4309
+ { fields: ["model"] },
4310
+ { fields: ["status"] },
4311
+ { fields: ["case_id", "run_at"] },
4312
+ { fields: ["agent_id", "model"] }
4313
+ ],
4314
+ enable: {
4315
+ trackHistory: false,
4316
+ searchable: false,
4317
+ apiEnabled: true,
4318
+ trash: false,
4319
+ mru: false
4320
+ }
4321
+ });
4322
+
3913
4323
  // src/views/ai-trace.view.ts
3914
4324
  import { defineView } from "@objectstack/spec";
3915
4325
  var AiTraceView = defineView({
@@ -3967,9 +4377,85 @@ var AiTraceView = defineView({
3967
4377
  }
3968
4378
  });
3969
4379
 
3970
- // src/views/ai-pending-action.view.ts
4380
+ // src/views/ai-message.view.ts
3971
4381
  import { defineView as defineView2 } from "@objectstack/spec";
3972
- var AiPendingActionView = defineView2({
4382
+ var AiMessageView = defineView2({
4383
+ list: {
4384
+ type: "grid",
4385
+ data: { provider: "object", object: "ai_messages" },
4386
+ columns: [
4387
+ { field: "created_at", label: "Time" },
4388
+ { field: "conversation_id", label: "Conversation" },
4389
+ { field: "role" },
4390
+ { field: "model" },
4391
+ { field: "prompt_tokens", label: "Prompt" },
4392
+ { field: "completion_tokens", label: "Output" },
4393
+ { field: "total_tokens", label: "Total" },
4394
+ { field: "latency_ms", label: "Latency (ms)" }
4395
+ ],
4396
+ sort: [{ field: "created_at", order: "desc" }],
4397
+ pagination: { pageSize: 50 },
4398
+ searchableFields: ["conversation_id", "content", "tool_call_id"],
4399
+ filterableFields: ["role", "model", "conversation_id"]
4400
+ },
4401
+ listViews: {
4402
+ assistants_only: {
4403
+ label: "Assistant turns",
4404
+ type: "grid",
4405
+ data: { provider: "object", object: "ai_messages" },
4406
+ columns: [
4407
+ { field: "created_at", label: "Time" },
4408
+ { field: "conversation_id", label: "Conversation" },
4409
+ { field: "model" },
4410
+ { field: "prompt_tokens", label: "Prompt" },
4411
+ { field: "completion_tokens", label: "Output" },
4412
+ { field: "total_tokens", label: "Total" },
4413
+ { field: "latency_ms", label: "Latency (ms)" },
4414
+ { field: "content", label: "Reply (preview)" }
4415
+ ],
4416
+ filter: [{ field: "role", operator: "=", value: "assistant" }],
4417
+ sort: [{ field: "created_at", order: "desc" }]
4418
+ },
4419
+ by_model: {
4420
+ label: "By model",
4421
+ type: "grid",
4422
+ data: { provider: "object", object: "ai_messages" },
4423
+ columns: [
4424
+ { field: "model" },
4425
+ { field: "created_at", label: "Time" },
4426
+ { field: "latency_ms", label: "Latency (ms)" },
4427
+ { field: "total_tokens", label: "Tokens" },
4428
+ { field: "conversation_id", label: "Conversation" }
4429
+ ],
4430
+ filter: [{ field: "role", operator: "=", value: "assistant" }],
4431
+ sort: [
4432
+ { field: "model", order: "asc" },
4433
+ { field: "created_at", order: "desc" }
4434
+ ]
4435
+ },
4436
+ slow: {
4437
+ label: "Slow turns (>5s)",
4438
+ type: "grid",
4439
+ data: { provider: "object", object: "ai_messages" },
4440
+ columns: [
4441
+ { field: "created_at", label: "Time" },
4442
+ { field: "model" },
4443
+ { field: "latency_ms", label: "Latency (ms)" },
4444
+ { field: "total_tokens", label: "Tokens" },
4445
+ { field: "conversation_id", label: "Conversation" }
4446
+ ],
4447
+ filter: [
4448
+ { field: "role", operator: "=", value: "assistant" },
4449
+ { field: "latency_ms", operator: ">", value: 5e3 }
4450
+ ],
4451
+ sort: [{ field: "latency_ms", order: "desc" }]
4452
+ }
4453
+ }
4454
+ });
4455
+
4456
+ // src/views/ai-pending-action.view.ts
4457
+ import { defineView as defineView3 } from "@objectstack/spec";
4458
+ var AiPendingActionView = defineView3({
3973
4459
  list: {
3974
4460
  type: "grid",
3975
4461
  data: { provider: "object", object: "ai_pending_actions" },
@@ -4199,12 +4685,325 @@ var AiPendingActionView = defineView2({
4199
4685
  }
4200
4686
  });
4201
4687
 
4688
+ // src/views/ai-eval.view.ts
4689
+ import { defineView as defineView4 } from "@objectstack/spec";
4690
+ var AiEvalRunView = defineView4({
4691
+ list: {
4692
+ type: "grid",
4693
+ data: { provider: "object", object: "ai_eval_runs" },
4694
+ columns: [
4695
+ { field: "run_at", label: "Run At" },
4696
+ { field: "case_id", label: "Case" },
4697
+ { field: "agent_id", label: "Agent" },
4698
+ { field: "model" },
4699
+ { field: "status" },
4700
+ { field: "score" },
4701
+ { field: "latency_ms", label: "Latency (ms)" },
4702
+ { field: "total_tokens", label: "Tokens" }
4703
+ ],
4704
+ sort: [{ field: "run_at", order: "desc" }],
4705
+ pagination: { pageSize: 50 },
4706
+ filterableFields: ["status", "model", "agent_id", "case_id"],
4707
+ searchableFields: ["response", "judge_reasoning"]
4708
+ },
4709
+ listViews: {
4710
+ failures: {
4711
+ label: "Failures & errors",
4712
+ type: "grid",
4713
+ data: { provider: "object", object: "ai_eval_runs" },
4714
+ columns: [
4715
+ { field: "run_at", label: "Run At" },
4716
+ { field: "case_id", label: "Case" },
4717
+ { field: "model" },
4718
+ { field: "status" },
4719
+ { field: "score" },
4720
+ { field: "error" },
4721
+ { field: "judge_reasoning" }
4722
+ ],
4723
+ filter: [{ field: "status", operator: "in", value: ["fail", "error"] }],
4724
+ sort: [{ field: "run_at", order: "desc" }]
4725
+ },
4726
+ by_model: {
4727
+ label: "By model",
4728
+ type: "grid",
4729
+ data: { provider: "object", object: "ai_eval_runs" },
4730
+ columns: [
4731
+ { field: "model" },
4732
+ { field: "case_id", label: "Case" },
4733
+ { field: "status" },
4734
+ { field: "score" },
4735
+ { field: "latency_ms", label: "Latency (ms)" },
4736
+ { field: "total_tokens", label: "Tokens" },
4737
+ { field: "run_at", label: "Run At" }
4738
+ ],
4739
+ sort: [
4740
+ { field: "model", order: "asc" },
4741
+ { field: "run_at", order: "desc" }
4742
+ ]
4743
+ },
4744
+ latest_per_case: {
4745
+ label: "Latest per case",
4746
+ type: "grid",
4747
+ data: { provider: "object", object: "ai_eval_runs" },
4748
+ columns: [
4749
+ { field: "case_id", label: "Case" },
4750
+ { field: "model" },
4751
+ { field: "status" },
4752
+ { field: "score" },
4753
+ { field: "latency_ms", label: "Latency (ms)" },
4754
+ { field: "run_at", label: "Run At" }
4755
+ ],
4756
+ sort: [
4757
+ { field: "case_id", order: "asc" },
4758
+ { field: "run_at", order: "desc" }
4759
+ ]
4760
+ }
4761
+ }
4762
+ });
4763
+ var AiEvalCaseView = defineView4({
4764
+ list: {
4765
+ type: "grid",
4766
+ data: { provider: "object", object: "ai_eval_cases" },
4767
+ columns: [
4768
+ { field: "name" },
4769
+ { field: "agent_id", label: "Agent" },
4770
+ { field: "enabled" },
4771
+ { field: "expected_contains", label: "Expected (substring)" },
4772
+ { field: "expected_regex", label: "Expected (regex)" },
4773
+ { field: "updated_at" }
4774
+ ],
4775
+ sort: [{ field: "updated_at", order: "desc" }],
4776
+ pagination: { pageSize: 50 },
4777
+ filterableFields: ["agent_id", "enabled"],
4778
+ searchableFields: ["name", "description", "input"]
4779
+ }
4780
+ });
4781
+
4782
+ // src/eval/eval-runner.ts
4783
+ import { randomUUID as randomUUID3 } from "crypto";
4784
+ import { z } from "zod";
4785
+ var EVAL_CASES_OBJECT = "ai_eval_cases";
4786
+ var EVAL_RUNS_OBJECT = "ai_eval_runs";
4787
+ var JudgeOutputSchema = z.object({
4788
+ score: z.number().min(0).max(100),
4789
+ reasoning: z.string().min(1)
4790
+ });
4791
+ var EvalRunner = class {
4792
+ constructor(metadataService, dataEngine, aiService, agentRuntime) {
4793
+ this.metadataService = metadataService;
4794
+ this.dataEngine = dataEngine;
4795
+ this.aiService = aiService;
4796
+ this.agentRuntime = agentRuntime;
4797
+ }
4798
+ async run(options) {
4799
+ const caseRow = await this.loadCase(options.caseId);
4800
+ const agentId = options.agentId ?? caseRow.agent_id;
4801
+ const agent = await this.agentRuntime.loadAgent(agentId);
4802
+ if (!agent) {
4803
+ throw new Error(`EvalRunner: agent "${agentId}" not found`);
4804
+ }
4805
+ const userMessages = this.parseInput(caseRow.input);
4806
+ const activeSkills = await this.agentRuntime.resolveActiveSkills(
4807
+ agent,
4808
+ options.agentContext
4809
+ );
4810
+ const systemMessages = this.agentRuntime.buildSystemMessages(
4811
+ agent,
4812
+ options.agentContext,
4813
+ activeSkills
4814
+ );
4815
+ const toolDefs = this.aiService.toolRegistry.getAll();
4816
+ const agentOptions = this.agentRuntime.buildRequestOptions(
4817
+ agent,
4818
+ toolDefs,
4819
+ activeSkills
4820
+ );
4821
+ const fullMessages = [...systemMessages, ...userMessages];
4822
+ const effectiveModel = options.model ?? agentOptions.model ?? "(adapter default)";
4823
+ const startedAt = Date.now();
4824
+ let responseText = "";
4825
+ let errorMessage = null;
4826
+ let promptTokens = null;
4827
+ let completionTokens = null;
4828
+ let totalTokens = null;
4829
+ try {
4830
+ const result2 = await this.aiService.chatWithTools(fullMessages, {
4831
+ ...agentOptions,
4832
+ model: options.model ?? agentOptions.model,
4833
+ maxIterations: agent.planning?.maxIterations
4834
+ });
4835
+ responseText = result2.content ?? "";
4836
+ const usage = result2.usage;
4837
+ if (usage) {
4838
+ promptTokens = usage.promptTokens ?? null;
4839
+ completionTokens = usage.completionTokens ?? null;
4840
+ totalTokens = usage.totalTokens ?? null;
4841
+ }
4842
+ } catch (err) {
4843
+ errorMessage = err instanceof Error ? err.stack ?? err.message : String(err);
4844
+ }
4845
+ const latencyMs = Date.now() - startedAt;
4846
+ let status = "error";
4847
+ let score = null;
4848
+ let judgeModel = null;
4849
+ let judgeReasoning = null;
4850
+ if (errorMessage) {
4851
+ status = "error";
4852
+ } else if (caseRow.expected_regex) {
4853
+ let regex = null;
4854
+ try {
4855
+ regex = new RegExp(caseRow.expected_regex);
4856
+ } catch (re) {
4857
+ status = "error";
4858
+ errorMessage = `Invalid expected_regex: ${re instanceof Error ? re.message : String(re)}`;
4859
+ }
4860
+ if (regex) {
4861
+ const matched = regex.test(responseText);
4862
+ status = matched ? "pass" : "fail";
4863
+ score = matched ? 100 : 0;
4864
+ }
4865
+ } else if (caseRow.expected_contains) {
4866
+ const matched = responseText.includes(caseRow.expected_contains);
4867
+ status = matched ? "pass" : "fail";
4868
+ score = matched ? 100 : 0;
4869
+ } else {
4870
+ judgeModel = options.judgeModel ?? options.model ?? agentOptions.model ?? null;
4871
+ try {
4872
+ const judgement = await this.runJudge({
4873
+ model: judgeModel,
4874
+ caseRow,
4875
+ response: responseText
4876
+ });
4877
+ score = judgement.score;
4878
+ judgeReasoning = judgement.reasoning;
4879
+ status = judgement.score >= 70 ? "pass" : "fail";
4880
+ } catch (je) {
4881
+ status = "error";
4882
+ errorMessage = je instanceof Error ? je.stack ?? je.message : String(je);
4883
+ }
4884
+ }
4885
+ const result = {
4886
+ id: randomUUID3(),
4887
+ caseId: caseRow.id,
4888
+ agentId,
4889
+ model: effectiveModel,
4890
+ status,
4891
+ score,
4892
+ response: responseText,
4893
+ error: errorMessage,
4894
+ judgeModel,
4895
+ judgeReasoning,
4896
+ promptTokens,
4897
+ completionTokens,
4898
+ totalTokens,
4899
+ latencyMs
4900
+ };
4901
+ if (options.persist !== false) {
4902
+ await this.persist(result);
4903
+ }
4904
+ return result;
4905
+ }
4906
+ // ── Helpers ──────────────────────────────────────────────────────
4907
+ async loadCase(caseId) {
4908
+ const row = await this.dataEngine.findOne(EVAL_CASES_OBJECT, {
4909
+ where: { id: caseId }
4910
+ });
4911
+ if (!row) {
4912
+ throw new Error(`EvalRunner: case "${caseId}" not found`);
4913
+ }
4914
+ if (row.enabled === false) {
4915
+ throw new Error(`EvalRunner: case "${caseId}" is disabled`);
4916
+ }
4917
+ return row;
4918
+ }
4919
+ parseInput(input) {
4920
+ const trimmed = input.trim();
4921
+ if (!trimmed.startsWith("[") && !trimmed.startsWith("{") && !trimmed.startsWith('"')) {
4922
+ return [{ role: "user", content: input }];
4923
+ }
4924
+ let parsed;
4925
+ try {
4926
+ parsed = JSON.parse(trimmed);
4927
+ } catch {
4928
+ return [{ role: "user", content: input }];
4929
+ }
4930
+ if (Array.isArray(parsed)) {
4931
+ return parsed;
4932
+ }
4933
+ if (typeof parsed === "string") {
4934
+ return [{ role: "user", content: parsed }];
4935
+ }
4936
+ if (parsed && typeof parsed === "object" && "role" in parsed) {
4937
+ return [parsed];
4938
+ }
4939
+ throw new Error("input must be a string, ModelMessage, or ModelMessage[]");
4940
+ }
4941
+ async runJudge(args) {
4942
+ const rubric = args.caseRow.judge_instructions?.trim() || "Decide whether the assistant response correctly and helpfully answers the user request.";
4943
+ const judgeMessages = [
4944
+ {
4945
+ role: "system",
4946
+ content: "You are an impartial grader for an AI evaluation harness. Score the candidate response from 0 to 100 where 100 means it fully and correctly satisfies the rubric and 0 means it does not. Reply with structured JSON only."
4947
+ },
4948
+ {
4949
+ role: "user",
4950
+ content: [
4951
+ `# Rubric
4952
+ ${rubric}`,
4953
+ `# Case name
4954
+ ${args.caseRow.name}`,
4955
+ args.caseRow.description ? `# Case description
4956
+ ${args.caseRow.description}` : "",
4957
+ `# Original user input
4958
+ ${args.caseRow.input}`,
4959
+ `# Candidate response
4960
+ ${args.response || "(empty)"}`
4961
+ ].filter(Boolean).join("\n\n")
4962
+ }
4963
+ ];
4964
+ if (typeof this.aiService.generateObject === "function") {
4965
+ const out = await this.aiService.generateObject(judgeMessages, JudgeOutputSchema, {
4966
+ model: args.model ?? void 0
4967
+ });
4968
+ return JudgeOutputSchema.parse(out.object);
4969
+ }
4970
+ const judged = await this.aiService.chatWithTools(judgeMessages, {
4971
+ model: args.model ?? void 0
4972
+ });
4973
+ const text = judged.content ?? "";
4974
+ const match = text.match(/\{[\s\S]*\}/);
4975
+ if (!match) {
4976
+ throw new Error(`Judge response did not contain JSON: ${text.slice(0, 200)}`);
4977
+ }
4978
+ return JudgeOutputSchema.parse(JSON.parse(match[0]));
4979
+ }
4980
+ async persist(run) {
4981
+ await this.dataEngine.insert(EVAL_RUNS_OBJECT, {
4982
+ id: run.id,
4983
+ case_id: run.caseId,
4984
+ agent_id: run.agentId,
4985
+ model: run.model,
4986
+ status: run.status,
4987
+ score: run.score,
4988
+ response: run.response,
4989
+ error: run.error,
4990
+ judge_model: run.judgeModel,
4991
+ judge_reasoning: run.judgeReasoning,
4992
+ prompt_tokens: run.promptTokens,
4993
+ completion_tokens: run.completionTokens,
4994
+ total_tokens: run.totalTokens,
4995
+ latency_ms: run.latencyMs,
4996
+ run_at: (/* @__PURE__ */ new Date()).toISOString()
4997
+ });
4998
+ }
4999
+ };
5000
+
4202
5001
  // src/plugin.ts
4203
5002
  init_data_tools();
4204
5003
  init_metadata_tools();
4205
5004
 
4206
5005
  // src/tools/query-data.tool.ts
4207
- import { z } from "zod";
5006
+ import { z as z2 } from "zod";
4208
5007
 
4209
5008
  // src/schema-retriever.ts
4210
5009
  var SchemaRetriever = class {
@@ -4379,19 +5178,19 @@ function buildAiEngineContext(ctx) {
4379
5178
  }
4380
5179
  return { roles: [], permissions: [], isSystem: true };
4381
5180
  }
4382
- var QueryPlanSchema = z.object({
4383
- objectName: z.string().min(1).describe('The snake_case object name to query (e.g. "task", "account").'),
4384
- whereJson: z.string().nullable().describe(
5181
+ var QueryPlanSchema = z2.object({
5182
+ objectName: z2.string().min(1).describe('The snake_case object name to query (e.g. "task", "account").'),
5183
+ whereJson: z2.string().nullable().describe(
4385
5184
  'Filter conditions encoded as a JSON object string. Examples: `{"status":"completed"}`, `{"subject":{"$contains":"Build"}}`, `{"amount":{"$gt":100}}`. Pass null to match all records.'
4386
5185
  ),
4387
- fields: z.array(z.string()).nullable().describe("Field names to return. Pass null to return all fields."),
4388
- orderBy: z.array(
4389
- z.object({
4390
- field: z.string(),
4391
- order: z.enum(["asc", "desc"])
5186
+ fields: z2.array(z2.string()).nullable().describe("Field names to return. Pass null to return all fields."),
5187
+ orderBy: z2.array(
5188
+ z2.object({
5189
+ field: z2.string(),
5190
+ order: z2.enum(["asc", "desc"])
4392
5191
  })
4393
5192
  ).nullable().describe("Sort order. First entry is primary sort key. Pass null for no sort."),
4394
- limit: z.number().int().min(1).max(200).nullable().describe("Maximum number of records (default 20, max 200). Pass null for default.")
5193
+ limit: z2.number().int().min(1).max(200).nullable().describe("Maximum number of records (default 20, max 200). Pass null for default.")
4395
5194
  });
4396
5195
  var QUERY_DATA_TOOL = {
4397
5196
  name: "query_data",
@@ -5360,13 +6159,14 @@ Capabilities:
5360
6159
 
5361
6160
  Guidelines:
5362
6161
  1. Always use the describe_object tool first to understand a table's structure before querying it.
5363
- 2. Respect the user's current context \u2014 if they are viewing a specific object or record, use that as the default scope.
5364
- 3. When presenting data, format it in a clear and readable way using markdown tables or bullet lists.
5365
- 4. For large result sets, summarize the data and mention the total count.
5366
- 5. When performing aggregations, explain the results in plain language.
5367
- 6. If a query returns no results, suggest possible reasons and alternative queries.
5368
- 7. Never expose internal IDs unless the user explicitly asks for them.
5369
- 8. Always answer in the same language the user is using.`,
6162
+ 2. Do NOT assume generic fields like \`status\`, \`is_active\`, \`deleted_at\`, \`type\`, or \`enabled\` exist on every object \u2014 they almost never do. Field names in \`where\`, \`fields\`, \`orderBy\`, \`groupBy\`, and aggregations MUST come from describe_object output. If the tool returns an "Unknown field" error, call describe_object on that object and retry with real field names.
6163
+ 3. Respect the user's current context \u2014 if they are viewing a specific object or record, use that as the default scope.
6164
+ 4. When presenting data, format it in a clear and readable way using markdown tables or bullet lists.
6165
+ 5. For large result sets, summarize the data and mention the total count.
6166
+ 6. When performing aggregations, explain the results in plain language.
6167
+ 7. If a query returns no results, suggest possible reasons and alternative queries.
6168
+ 8. Never expose internal IDs unless the user explicitly asks for them.
6169
+ 9. Always answer in the same language the user is using.`,
5370
6170
  tools: [
5371
6171
  "query_data",
5372
6172
  "list_objects",
@@ -5478,10 +6278,11 @@ Guidelines:
5478
6278
 
5479
6279
  // src/adapters/vercel-adapter.ts
5480
6280
  import { generateText, streamText, generateObject, tool as vercelTool, jsonSchema } from "ai";
5481
- function buildVercelOptions(options) {
6281
+ function buildVercelOptions(options, modelId) {
5482
6282
  if (!options) return {};
5483
6283
  const opts = {};
5484
- if (options.temperature != null) opts.temperature = options.temperature;
6284
+ const reasoning = isReasoningModel(modelId);
6285
+ if (options.temperature != null && !reasoning) opts.temperature = options.temperature;
5485
6286
  if (options.maxTokens != null) opts.maxTokens = options.maxTokens;
5486
6287
  if (options.stop?.length) opts.stopSequences = options.stop;
5487
6288
  if (options.tools?.length) {
@@ -5499,6 +6300,11 @@ function buildVercelOptions(options) {
5499
6300
  }
5500
6301
  return opts;
5501
6302
  }
6303
+ function isReasoningModel(modelId) {
6304
+ if (!modelId) return false;
6305
+ const id = modelId.includes("/") ? modelId.slice(modelId.lastIndexOf("/") + 1) : modelId;
6306
+ return /^(o[134](?:-|$)|gpt-5(?:-|$)|o4-mini)/i.test(id);
6307
+ }
5502
6308
  var VercelLLMAdapter = class {
5503
6309
  constructor(config) {
5504
6310
  this.name = "vercel";
@@ -5508,7 +6314,7 @@ var VercelLLMAdapter = class {
5508
6314
  const result = await generateText({
5509
6315
  model: this.model,
5510
6316
  messages,
5511
- ...buildVercelOptions(options)
6317
+ ...buildVercelOptions(options, this.model.modelId)
5512
6318
  });
5513
6319
  return {
5514
6320
  content: result.text,
@@ -5525,7 +6331,7 @@ var VercelLLMAdapter = class {
5525
6331
  const result = await generateText({
5526
6332
  model: this.model,
5527
6333
  prompt,
5528
- ...buildVercelOptions(options)
6334
+ ...buildVercelOptions(options, this.model.modelId)
5529
6335
  });
5530
6336
  return {
5531
6337
  content: result.text,
@@ -5541,7 +6347,7 @@ var VercelLLMAdapter = class {
5541
6347
  const result = streamText({
5542
6348
  model: this.model,
5543
6349
  messages,
5544
- ...buildVercelOptions(options)
6350
+ ...buildVercelOptions(options, this.model.modelId)
5545
6351
  });
5546
6352
  try {
5547
6353
  for await (const part of result.fullStream) {
@@ -5567,7 +6373,7 @@ var VercelLLMAdapter = class {
5567
6373
  schema,
5568
6374
  schemaName,
5569
6375
  schemaDescription,
5570
- ...buildVercelOptions(rest)
6376
+ ...buildVercelOptions(rest, this.model.modelId)
5571
6377
  });
5572
6378
  return {
5573
6379
  object: result.object,
@@ -5656,7 +6462,7 @@ function computeCost(pricing, usage) {
5656
6462
  }
5657
6463
 
5658
6464
  // src/plugin.ts
5659
- var AIServicePlugin = class {
6465
+ var _AIServicePlugin = class _AIServicePlugin {
5660
6466
  constructor(options = {}) {
5661
6467
  this.name = "com.objectstack.service-ai";
5662
6468
  this.version = "1.0.0";
@@ -5664,13 +6470,44 @@ var AIServicePlugin = class {
5664
6470
  this.dependencies = ["com.objectstack.engine.objectql"];
5665
6471
  this.options = options;
5666
6472
  }
6473
+ /**
6474
+ * Normalise OpenAI-compatible preset providers (DeepSeek / DashScope /
6475
+ * Cloudflare / SiliconFlow / OpenRouter) into the `provider=openai` shape
6476
+ * with the appropriate base URL pre-filled. Returns the rewritten values
6477
+ * map; non-preset providers pass through unchanged.
6478
+ */
6479
+ normalisePresetProvider(values) {
6480
+ const provider = String(values.provider ?? "memory");
6481
+ if (provider === "cloudflare") {
6482
+ const accountId = String(values.cloudflare_account_id ?? "").trim();
6483
+ const gatewayId = String(values.cloudflare_gateway_id ?? "default").trim() || "default";
6484
+ if (!accountId) return values;
6485
+ return {
6486
+ ...values,
6487
+ provider: "openai",
6488
+ openai_api_key: values.cloudflare_api_key,
6489
+ openai_base_url: `https://gateway.ai.cloudflare.com/v1/${accountId}/${gatewayId}/compat`,
6490
+ openai_model: values.cloudflare_model ?? "openai/gpt-4o-mini"
6491
+ };
6492
+ }
6493
+ const preset = _AIServicePlugin.OPENAI_COMPATIBLE_PRESETS[provider];
6494
+ if (!preset) return values;
6495
+ return {
6496
+ ...values,
6497
+ provider: "openai",
6498
+ openai_api_key: values[`${provider}_api_key`],
6499
+ openai_base_url: preset.baseURL,
6500
+ openai_model: values[`${provider}_model`] ?? preset.defaultModel
6501
+ };
6502
+ }
5667
6503
  /**
5668
6504
  * Build an LLM adapter from a provider/key/model triple. Used both
5669
6505
  * by the boot-time auto-detect path and by the live `settings:changed`
5670
6506
  * rebuild path. Returns `null` if the requested provider cannot be
5671
6507
  * loaded or required credentials are missing.
5672
6508
  */
5673
- async buildAdapterFromValues(ctx, values) {
6509
+ async buildAdapterFromValues(ctx, rawValues) {
6510
+ const values = this.normalisePresetProvider(rawValues);
5674
6511
  const provider = String(values.provider ?? "memory");
5675
6512
  if (provider === "memory") {
5676
6513
  return { adapter: new MemoryLLMAdapter(), description: "MemoryLLMAdapter (echo mode)" };
@@ -5968,8 +6805,8 @@ var AIServicePlugin = class {
5968
6805
  type: "plugin",
5969
6806
  scope: "project",
5970
6807
  namespace: "ai",
5971
- objects: [AiConversationObject, AiMessageObject, AiTraceObject, AiPendingActionObject],
5972
- views: [AiTraceView, AiPendingActionView]
6808
+ objects: [AiConversationObject, AiMessageObject, AiTraceObject, AiPendingActionObject, AiEvalCaseObject, AiEvalRunObject],
6809
+ views: [AiTraceView, AiMessageView, AiPendingActionView, AiEvalCaseView, AiEvalRunView]
5973
6810
  });
5974
6811
  if (this.options.debug) {
5975
6812
  ctx.hook("ai:beforeChat", async (messages) => {
@@ -6006,7 +6843,11 @@ var AIServicePlugin = class {
6006
6843
  try {
6007
6844
  const dataEngine = ctx.getService("data");
6008
6845
  if (dataEngine) {
6009
- registerDataTools(this.service.toolRegistry, { dataEngine });
6846
+ registerDataTools(this.service.toolRegistry, {
6847
+ dataEngine,
6848
+ metadataService,
6849
+ protocol: protocolService
6850
+ });
6010
6851
  ctx.logger.info("[AI] Built-in data tools registered");
6011
6852
  if (metadataService) {
6012
6853
  registerQueryDataTool(this.service.toolRegistry, {
@@ -6227,6 +7068,20 @@ var AIServicePlugin = class {
6227
7068
  const assistantRoutes = buildAssistantRoutes(this.service, agentRuntime, skillRegistry, ctx.logger);
6228
7069
  routes.push(...assistantRoutes);
6229
7070
  ctx.logger.info(`[AI] Assistant (ambient) routes registered (${assistantRoutes.length} routes)`);
7071
+ const evalDataEngine = ctx.getService("data");
7072
+ if (evalDataEngine && typeof evalDataEngine.insert === "function") {
7073
+ const evalRunner = new EvalRunner(
7074
+ metadataService,
7075
+ evalDataEngine,
7076
+ this.service,
7077
+ agentRuntime
7078
+ );
7079
+ const evalRoutes = buildEvalRoutes(evalRunner, ctx.logger);
7080
+ routes.push(...evalRoutes);
7081
+ ctx.logger.info(`[AI] Eval routes registered (${evalRoutes.length} routes)`);
7082
+ } else {
7083
+ ctx.logger.debug("[AI] IDataEngine not available, skipping eval routes");
7084
+ }
6230
7085
  } else {
6231
7086
  ctx.logger.debug("[AI] Metadata service not available, skipping agent and assistant routes");
6232
7087
  }
@@ -6455,6 +7310,20 @@ var AIServicePlugin = class {
6455
7310
  this.service = void 0;
6456
7311
  }
6457
7312
  };
7313
+ /**
7314
+ * OpenAI-compatible preset providers — these all expose `/v1/chat/completions`
7315
+ * in OpenAI shape, so we re-use the `@ai-sdk/openai` SDK with a preset
7316
+ * base URL. Centralising the mapping here keeps the settings UI ergonomic
7317
+ * (operators pick "DeepSeek", not "openai" + a base URL they have to look up)
7318
+ * without bloating buildAdapterFromValues with a switch per provider.
7319
+ */
7320
+ _AIServicePlugin.OPENAI_COMPATIBLE_PRESETS = {
7321
+ deepseek: { baseURL: "https://api.deepseek.com", defaultModel: "deepseek-chat" },
7322
+ dashscope: { baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1", defaultModel: "qwen-plus" },
7323
+ siliconflow: { baseURL: "https://api.siliconflow.cn/v1", defaultModel: "Qwen/Qwen2.5-7B-Instruct" },
7324
+ openrouter: { baseURL: "https://openrouter.ai/api/v1", defaultModel: "openai/gpt-4o-mini" }
7325
+ };
7326
+ var AIServicePlugin = _AIServicePlugin;
6458
7327
  function extractOverrides(payload) {
6459
7328
  if (!payload || typeof payload !== "object") return {};
6460
7329
  const p = payload;