@objectstack/formula 10.2.0 → 11.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@objectstack/formula",
3
- "version": "10.2.0",
3
+ "version": "11.0.0",
4
4
  "license": "Apache-2.0",
5
5
  "description": "ObjectStack canonical expression engine — CEL (cel-js) + ObjectStack stdlib + dialect registry",
6
6
  "main": "dist/index.js",
@@ -14,7 +14,7 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@marcbachmann/cel-js": "^7.6.1",
17
- "@objectstack/spec": "10.2.0"
17
+ "@objectstack/spec": "11.0.0"
18
18
  },
19
19
  "devDependencies": {
20
20
  "typescript": "^6.0.3",
package/src/cel-engine.ts CHANGED
@@ -126,6 +126,42 @@ export function firstUndeclaredReference(
126
126
  return null;
127
127
  }
128
128
 
129
+ /**
130
+ * The result type cel-js's type-checker infers for a `value`/`predicate`
131
+ * expression — its raw CEL type name (`'int'`, `'double'`, `'string'`, `'bool'`,
132
+ * `'google.protobuf.Timestamp'`, `'dyn'`, …) — or `null` when the expression does
133
+ * not type-check. Reuses the SAME record-scoped, stdlib-registered env as
134
+ * {@link firstUndeclaredReference}: namespace roots (`record`, `previous`, …) are
135
+ * declared `map` and `knownFields` are declared `dyn`, so both `record.<field>`
136
+ * and bare `<field>` references resolve while every stdlib call carries its
137
+ * declared return type.
138
+ *
139
+ * Deliberately conservative. A member access (`record.amount`) or a bare field is
140
+ * `dyn`, and an operator over two `dyn` operands stays `dyn` (cel-js cannot prove
141
+ * it numeric), so `record.a + record.b` — which could be string concatenation —
142
+ * infers `dyn`, not a number. A typed literal or a stdlib return DOES pin the
143
+ * type, so the common computed-number formulas resolve concretely:
144
+ * `daysBetween(start_date, end_date) + 1` → `int`, `amount * 0.1` → `double`. A
145
+ * caller keying off a concrete numeric type therefore never mis-classifies an
146
+ * ambiguous formula.
147
+ */
148
+ export function inferCelType(source: string, knownFields: readonly string[] = []): string | null {
149
+ if (typeof source !== 'string' || !source.trim()) return null;
150
+ try {
151
+ const env = knownFields.length === 0
152
+ ? (recordScopeEnv ??= buildScopedEnv([]))
153
+ : buildScopedEnv(knownFields);
154
+ const result = env.parse(source).check?.() as
155
+ | { valid?: boolean; type?: unknown }
156
+ | undefined;
157
+ if (!result || result.valid === false) return null;
158
+ return typeof result.type === 'string' ? result.type : null;
159
+ } catch {
160
+ // Parse/other faults mean we cannot prove a type — the conservative `null`.
161
+ return null;
162
+ }
163
+ }
164
+
129
165
  /** @deprecated use {@link firstUndeclaredReference} with no fields. */
130
166
  export function detectBareReference(source: string): string | null {
131
167
  return firstUndeclaredReference(source);
package/src/index.ts CHANGED
@@ -24,7 +24,7 @@ export type { CelFilterCompileResult, CelFilterCompileOptions, CelFilterFailReas
24
24
  export { matchesFilterCondition } from './matches-filter';
25
25
  // ADR-0032 — shared validator + introspection (one validator for build,
26
26
  // registration, and the agent-callable validate_expression tool).
27
- export { validateExpression, introspectScope, expectedDialect, CEL_STDLIB_FUNCTIONS } from './validate';
28
- export type { FieldRole, ExprSchemaHint, ExprValidationError, ExprValidationResult } from './validate';
27
+ export { validateExpression, introspectScope, expectedDialect, inferExpressionType, CEL_STDLIB_FUNCTIONS } from './validate';
28
+ export type { FieldRole, ExprInput, ExprSchemaHint, ExprValidationError, ExprValidationResult, InferredValueType } from './validate';
29
29
  export type { SeedValue, SeedPrimitive } from './seed-eval';
30
30
  export type { DialectEngine, EvalContext, EvalResult, EvalError } from './types';
package/src/stdlib.ts CHANGED
@@ -14,6 +14,7 @@
14
14
  import type { Environment } from '@marcbachmann/cel-js';
15
15
 
16
16
  import type { EvalContext } from './types';
17
+ import { createEvalUser, type EvalUser } from '@objectstack/spec';
17
18
 
18
19
  /**
19
20
  * Calendar-day parts (y/m/d) of an instant *as seen in a timezone*
@@ -266,6 +267,34 @@ export function registerNumericCoercions(env: Environment): Environment {
266
267
  return env;
267
268
  }
268
269
 
270
+ /**
271
+ * Normalize the loosely-typed EvalContext user into the canonical EvalUser
272
+ * (ADR-0068). `roles` is preferred; a legacy singular `role` is folded in so
273
+ * existing call sites keep working.
274
+ */
275
+ function toEvalUser(u: NonNullable<EvalContext['user']>): EvalUser {
276
+ const legacyRole = typeof u.role === 'string' && u.role ? [u.role] : [];
277
+ const roles = Array.isArray(u.roles) ? (u.roles as string[]) : [];
278
+ const canonical = createEvalUser({
279
+ id: u.id,
280
+ name: typeof u.name === 'string' ? u.name : undefined,
281
+ email: typeof u.email === 'string' ? u.email : undefined,
282
+ roles: [...roles, ...legacyRole],
283
+ organizationId:
284
+ typeof u.organizationId === 'string' || u.organizationId === null
285
+ ? (u.organizationId as string | null)
286
+ : undefined,
287
+ });
288
+ // Back-compat: keep the DEPRECATED singular `role` readable so existing
289
+ // predicates (`os.user.role`, `current_user.role`) keep resolving during the
290
+ // ADR-0068 migration window. `roles[]` is the canonical surface; the footgun
291
+ // ADR-0068 removes is the server-side OVERWRITE of `role`, not read access.
292
+ if (typeof u.role === 'string' && u.role) {
293
+ (canonical as EvalUser & { role?: string }).role = u.role;
294
+ }
295
+ return canonical;
296
+ }
297
+
269
298
  /**
270
299
  * Build the variable scope for a single evaluation. Absent fields are simply
271
300
  * not bound — CEL macros (`has(record.foo)`) handle missing-key safely.
@@ -279,7 +308,19 @@ export function buildScope(ctx: EvalContext): Record<string, unknown> {
279
308
 
280
309
  // Namespaced data — written as `os.user.id`, `os.env`, etc. in CEL.
281
310
  const os: Record<string, unknown> = {};
282
- if (ctx.user !== undefined) os.user = ctx.user;
311
+ if (ctx.user !== undefined) {
312
+ // ADR-0068: one canonical EvalUser under every alias (`current_user`,
313
+ // `user`, `ctx.user`, `os.user`) — the SAME object, so a predicate
314
+ // evaluates identically wherever it is authored.
315
+ const currentUser = toEvalUser(ctx.user);
316
+ scope.current_user = currentUser;
317
+ scope.user = currentUser;
318
+ scope.ctx = {
319
+ ...(typeof scope.ctx === 'object' && scope.ctx !== null ? scope.ctx : {}),
320
+ user: currentUser,
321
+ };
322
+ os.user = currentUser;
323
+ }
283
324
  if (ctx.org !== undefined) os.org = ctx.org;
284
325
  if (ctx.env !== undefined) os.env = ctx.env;
285
326
  if (Object.keys(os).length > 0) scope.os = os;
package/src/types.ts CHANGED
@@ -30,9 +30,22 @@ export interface EvalContext {
30
30
  * Defaults to `UTC` when unset. Calendar-day `date` rendering stays tz-naive.
31
31
  */
32
32
  timezone?: string;
33
- /** Current authenticated subject (hook / action / view contexts). */
33
+ /**
34
+ * Current authenticated subject (hook / action / view contexts).
35
+ *
36
+ * ADR-0068: the canonical user contract is {@link EvalUser} from
37
+ * `@objectstack/spec`, surfaced to predicates as `current_user` (aliases
38
+ * `user`, `ctx.user`). `roles: string[]` is the only canonical role field;
39
+ * the singular `role` is deprecated (its "overwritten to 'admin' on
40
+ * promotion" behavior is the footgun ADR-0068 eliminates).
41
+ */
34
42
  user?: {
35
43
  id: string;
44
+ /** CANONICAL (ADR-0068). Scope-resolved role names assigned to the user. */
45
+ roles?: string[];
46
+ /** Active organization ID (null = platform / unscoped). */
47
+ organizationId?: string | null;
48
+ /** @deprecated ADR-0068 — use {@link roles}. Retained for back-compat only. */
36
49
  role?: string;
37
50
  email?: string;
38
51
  [key: string]: unknown;
@@ -1,5 +1,5 @@
1
1
  import { describe, it, expect } from 'vitest';
2
- import { validateExpression, introspectScope, expectedDialect } from './validate';
2
+ import { validateExpression, introspectScope, expectedDialect, inferExpressionType } from './validate';
3
3
 
4
4
  describe('validateExpression (ADR-0032)', () => {
5
5
  describe('predicates (CEL)', () => {
@@ -165,3 +165,45 @@ describe('validateExpression (ADR-0032)', () => {
165
165
  });
166
166
  });
167
167
  });
168
+
169
+ describe('inferExpressionType — coarse value-type of a formula', () => {
170
+ // The host object's fields, so a bare `<field>` reference resolves the same as
171
+ // `record.<field>` (a stored formula may be written either way).
172
+ const fields = ['start_date', 'end_date', 'amount', 'rate', 'first', 'last', 'name', 'items'];
173
+
174
+ it('infers number for a computed-number formula (the leave_days repro)', () => {
175
+ // daysBetween(...): int, int + 1 → int → number. The exact case a "total
176
+ // leave days" dashboard card needs a SUM measure derived for.
177
+ expect(inferExpressionType('daysBetween(start_date, end_date) + 1', { fields })).toBe('number');
178
+ expect(inferExpressionType('daysBetween(record.start_date, record.end_date) + 1')).toBe('number');
179
+ expect(inferExpressionType('amount * 0.1', { fields })).toBe('number'); // dyn * double → double
180
+ expect(inferExpressionType('round(amount)', { fields })).toBe('number');
181
+ expect(inferExpressionType('len(items)', { fields })).toBe('number');
182
+ });
183
+
184
+ it('accepts the canonical Expression envelope as input', () => {
185
+ expect(inferExpressionType({ dialect: 'cel', source: 'amount * 0.1' }, { fields })).toBe('number');
186
+ });
187
+
188
+ it('infers text / boolean / date for non-numeric formulas', () => {
189
+ expect(inferExpressionType('upper(name)', { fields })).toBe('text');
190
+ expect(inferExpressionType('rate >= 0.5', { fields })).toBe('boolean');
191
+ expect(inferExpressionType('today()')).toBe('date');
192
+ });
193
+
194
+ it('is conservative — an ambiguous (dyn) result is unknown, never number', () => {
195
+ // `first + last` could be string concatenation OR numeric addition; with two
196
+ // untyped operands cel-js yields `dyn`, so we must NOT call it a number (else
197
+ // a dataset would SUM a text formula). This is the safety property.
198
+ expect(inferExpressionType('first + last', { fields })).toBe('unknown');
199
+ expect(inferExpressionType('amount + rate', { fields })).toBe('unknown');
200
+ });
201
+
202
+ it('returns unknown for empty, absent, or un-type-checkable expressions', () => {
203
+ expect(inferExpressionType('')).toBe('unknown');
204
+ expect(inferExpressionType(null)).toBe('unknown');
205
+ expect(inferExpressionType(undefined)).toBe('unknown');
206
+ expect(inferExpressionType('no_such_fn(amount)', { fields })).toBe('unknown'); // no overload
207
+ expect(inferExpressionType('undeclared_field + 1')).toBe('unknown'); // bare ref, no fields given
208
+ });
209
+ });
package/src/validate.ts CHANGED
@@ -17,7 +17,7 @@
17
17
  * This validator detects that specific mistake and returns the exact fix.
18
18
  */
19
19
 
20
- import { celEngine, firstUndeclaredReference } from './cel-engine';
20
+ import { celEngine, firstUndeclaredReference, inferCelType } from './cel-engine';
21
21
  import { templateEngine } from './template-engine';
22
22
 
23
23
  export type FieldRole = 'predicate' | 'value' | 'template';
@@ -53,6 +53,14 @@ export interface ExprSchemaHint {
53
53
  * did-you-mean *warning*. (Default.)
54
54
  */
55
55
  scope?: 'record' | 'flattened';
56
+ /**
57
+ * ADR-0068 D4 — the closed catalog of valid role names (built-in + declared).
58
+ * When supplied, a role-membership predicate testing a role NOT in this set
59
+ * (e.g. `'org_admni' in current_user.roles`) is flagged as an error. Closes
60
+ * the AI-hallucination hole where a model invents a plausible-but-nonexistent
61
+ * role that then silently never matches. Absent => role checks are skipped.
62
+ */
63
+ roleCatalog?: readonly string[];
56
64
  }
57
65
 
58
66
  export interface ExprValidationError {
@@ -146,6 +154,51 @@ function levenshtein(a: string, b: string): number {
146
154
  return dp[m];
147
155
  }
148
156
 
157
+ // ADR-0068 D4 — role-membership predicate heads: a role NAME literal used in a
158
+ // membership test against a user subject's `.roles` (or the deprecated singular
159
+ // `.role`). Matched names are validated against the closed role catalog.
160
+ const ROLE_IN_RE = /(['"])([a-z0-9_]+)\1\s+in\s+(?:current_user|user|ctx\.user)\.roles\b/g;
161
+ const ROLE_CONTAINS_RE = /(?:current_user|user|ctx\.user)\.roles\s*\.\s*contains\(\s*(['"])([a-z0-9_]+)\1\s*\)/g;
162
+ // Bounded quantifiers ({0,N}, not * / *?) keep this linear: a CEL `exists`
163
+ // body is tiny in practice, and unbounded greedy/lazy scanners here backtrack
164
+ // polynomially (O(n^2)) on adversarial input like repeated `user.roles.exists(`
165
+ // (ADR-0068 D4 ReDoS hardening). The pre-`==` class excludes `=` so the bounded
166
+ // run stops cleanly before the operator without a lazy quantifier.
167
+ const ROLE_EXISTS_RE = /(?:current_user|user|ctx\.user)\.roles\s*\.\s*exists\s*\([^,)]{0,64},[^)=]{0,128}==\s*(['"])([a-z0-9_]+)\1/g;
168
+ const ROLE_EQ_RE = /(?:current_user|user|ctx\.user)\.role\s*==\s*(['"])([a-z0-9_]+)\1/g;
169
+
170
+ /**
171
+ * Flag role-membership predicates referencing a role outside the closed catalog
172
+ * (ADR-0068 D4 — anti-hallucination). No-op when no `roleCatalog` is supplied.
173
+ */
174
+ function checkRoleCatalog(
175
+ source: string,
176
+ schema: ExprSchemaHint | undefined,
177
+ errors: ExprValidationError[],
178
+ ): void {
179
+ const catalog = schema?.roleCatalog;
180
+ if (!catalog || catalog.length === 0) return;
181
+ const known = new Set(catalog);
182
+ const seen = new Set<string>();
183
+ for (const re of [ROLE_IN_RE, ROLE_CONTAINS_RE, ROLE_EXISTS_RE, ROLE_EQ_RE]) {
184
+ re.lastIndex = 0;
185
+ let m: RegExpExecArray | null;
186
+ while ((m = re.exec(source)) !== null) {
187
+ const name = m[2];
188
+ if (known.has(name) || seen.has(name)) continue;
189
+ seen.add(name);
190
+ const suggestion = nearest(name, catalog);
191
+ errors.push({
192
+ source,
193
+ message:
194
+ `unknown role \`${name}\` — not a defined role` +
195
+ (suggestion ? `; did you mean \`${suggestion}\`?` : '.') +
196
+ ` Valid roles: ${catalog.join(', ')}.`,
197
+ });
198
+ }
199
+ }
200
+ }
201
+
149
202
  /**
150
203
  * Validate one expression for a given field role. Never throws — returns a
151
204
  * structured result. Call sites decide whether to throw (build/registration)
@@ -194,6 +247,7 @@ export function validateExpression(
194
247
  });
195
248
  } else {
196
249
  checkFieldExistence(source, schema, errors);
250
+ checkRoleCatalog(source, schema, errors);
197
251
  if (schema?.scope === 'record') {
198
252
  // In a `record`-scoped site a bare top-level identifier is a silent bug —
199
253
  // it must be `record.<field>` (#1928). Hard error.
@@ -246,16 +300,61 @@ export function introspectScope(role: FieldRole, schema?: ExprSchemaHint): {
246
300
  dialect: 'cel' | 'template';
247
301
  fields: string[];
248
302
  roots: string[];
303
+ roles: string[];
249
304
  functions: string[];
250
305
  } {
251
306
  return {
252
307
  dialect: expectedDialect(role),
253
308
  fields: [...(schema?.fields ?? [])],
254
- roots: ['record', 'previous', 'input', 'os', 'vars'],
309
+ roots: ['record', 'previous', 'input', 'os', 'current_user', 'user', 'vars'],
310
+ roles: [...(schema?.roleCatalog ?? [])],
255
311
  functions: CEL_STDLIB_FUNCTIONS,
256
312
  };
257
313
  }
258
314
 
315
+ /**
316
+ * Coarse value categories a `value`/formula expression can compute. `'unknown'`
317
+ * means cel-js could not prove a concrete type — either a `dyn` result (an
318
+ * ambiguous expression over untyped operands) or one that does not type-check.
319
+ */
320
+ export type InferredValueType = 'number' | 'text' | 'boolean' | 'date' | 'unknown';
321
+
322
+ /** Map a cel-js type-checker type name onto an ObjectStack field value category. */
323
+ function celTypeToValueType(celType: string | null): InferredValueType {
324
+ switch (celType) {
325
+ case 'int':
326
+ case 'uint':
327
+ case 'double':
328
+ return 'number';
329
+ case 'string':
330
+ return 'text';
331
+ case 'bool':
332
+ return 'boolean';
333
+ case 'google.protobuf.Timestamp':
334
+ return 'date';
335
+ default:
336
+ // `dyn`, `google.protobuf.Duration`, list/map, null, or un-type-checkable.
337
+ return 'unknown';
338
+ }
339
+ }
340
+
341
+ /**
342
+ * Infer the coarse value type a `value`/formula expression computes — `'number'`,
343
+ * `'text'`, `'boolean'`, `'date'`, or `'unknown'` when cel-js cannot prove a
344
+ * concrete type. `schema.fields` (the host object's field names) are declared so
345
+ * a bare `<field>` reference resolves the same as `record.<field>`.
346
+ *
347
+ * The motivating use is measure-eligibility: a dataset derives a SUM measure for
348
+ * a `formula` field ONLY when this returns `'number'`, so an ambiguous or
349
+ * non-numeric formula never yields an incoherent measure. Conservative by
350
+ * construction — see {@link inferCelType}.
351
+ */
352
+ export function inferExpressionType(input: ExprInput, schema?: ExprSchemaHint): InferredValueType {
353
+ const { source } = toSource(input);
354
+ if (!source.trim()) return 'unknown';
355
+ return celTypeToValueType(inferCelType(source, schema?.fields));
356
+ }
357
+
259
358
  /**
260
359
  * Public catalog of CEL functions available in expressions — what `introspectScope`
261
360
  * advertises to authors (incl. AI). Every entry MUST actually resolve at runtime: