@rudderjs/ai 1.18.2 → 1.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +16 -11
  2. package/dist/budget-orm/index.d.ts +95 -1
  3. package/dist/budget-orm/index.d.ts.map +1 -1
  4. package/dist/budget-orm/index.js +176 -4
  5. package/dist/budget-orm/index.js.map +1 -1
  6. package/dist/commands/ai-eval.d.ts +97 -1
  7. package/dist/commands/ai-eval.d.ts.map +1 -1
  8. package/dist/commands/ai-eval.js +379 -4
  9. package/dist/commands/ai-eval.js.map +1 -1
  10. package/dist/commands/make-agent.d.ts +2 -1
  11. package/dist/commands/make-agent.d.ts.map +1 -1
  12. package/dist/commands/make-agent.js +22 -4
  13. package/dist/commands/make-agent.js.map +1 -1
  14. package/dist/conversation-orm/index.d.ts +115 -1
  15. package/dist/conversation-orm/index.d.ts.map +1 -1
  16. package/dist/conversation-orm/index.js +214 -4
  17. package/dist/conversation-orm/index.js.map +1 -1
  18. package/dist/doctor.d.ts +1 -1
  19. package/dist/doctor.d.ts.map +1 -1
  20. package/dist/doctor.js +67 -4
  21. package/dist/doctor.js.map +1 -1
  22. package/dist/memory-embedding/index.d.ts +120 -1
  23. package/dist/memory-embedding/index.d.ts.map +1 -1
  24. package/dist/memory-embedding/index.js +228 -4
  25. package/dist/memory-embedding/index.js.map +1 -1
  26. package/dist/memory-orm/index.d.ts +117 -1
  27. package/dist/memory-orm/index.d.ts.map +1 -1
  28. package/dist/memory-orm/index.js +186 -4
  29. package/dist/memory-orm/index.js.map +1 -1
  30. package/dist/server/index.d.ts +1 -1
  31. package/dist/server/index.d.ts.map +1 -1
  32. package/dist/server/index.js +5 -4
  33. package/dist/server/index.js.map +1 -1
  34. package/dist/server/provider.d.ts +22 -0
  35. package/dist/server/provider.d.ts.map +1 -0
  36. package/dist/server/provider.js +179 -0
  37. package/dist/server/provider.js.map +1 -0
  38. package/package.json +12 -6
package/README.md CHANGED
@@ -1,29 +1,34 @@
1
1
  # @rudderjs/ai
2
2
 
3
- > Deprecated. The AI engine moved to [`@gemstack/ai-sdk`](https://www.npmjs.com/package/@gemstack/ai-sdk).
3
+ > The AI engine moved to [`@gemstack/ai-sdk`](https://www.npmjs.com/package/@gemstack/ai-sdk). This package re-exports the engine for backwards compatibility **and** is the home of the Rudder-specific AI bindings that intentionally did not graduate to the framework-agnostic engine.
4
4
 
5
- This package is now a thin compatibility shim that re-exports `@gemstack/ai-sdk` (and every one of its subpaths) so existing Rudder apps and the internal dependents (`telescope`, `orm-prisma`, `orm-drizzle`) keep working unchanged.
5
+ ## Two kinds of module here
6
6
 
7
- ## Migrate
8
-
9
- Replace the import specifier; the API is identical.
7
+ **1. Re-exported engine subpaths** — thin `export *` over `@gemstack/ai-sdk`. Migrate these by swapping the specifier; the API is identical.
10
8
 
11
9
  ```diff
12
10
  - import { Agent } from '@rudderjs/ai'
13
11
  + import { Agent } from '@gemstack/ai-sdk'
14
12
  ```
15
13
 
16
- Subpaths map one to one:
17
-
18
- | Old | New |
14
+ | `@rudderjs/ai` subpath | `@gemstack/ai-sdk` equivalent |
19
15
  |---|---|
20
16
  | `@rudderjs/ai` | `@gemstack/ai-sdk` |
21
- | `@rudderjs/ai/server` | `@gemstack/ai-sdk/server` |
22
17
  | `@rudderjs/ai/node` | `@gemstack/ai-sdk/node` |
23
18
  | `@rudderjs/ai/mcp` | `@gemstack/ai-sdk/mcp` |
24
19
  | `@rudderjs/ai/eval` | `@gemstack/ai-sdk/eval` |
25
20
  | `@rudderjs/ai/computer-use` | `@gemstack/ai-sdk/computer-use` |
26
21
  | `@rudderjs/ai/react` | `@gemstack/ai-sdk/react` |
27
- | `@rudderjs/ai/*` | `@gemstack/ai-sdk/*` |
22
+ | `@rudderjs/ai/observers`, `/chat-mentions`, `/gateway` | same subpath on `@gemstack/ai-sdk` |
23
+
24
+ **2. Rudder bindings** — real implementations that couple the agnostic engine to a Rudder package, so they live here (no `@gemstack/ai-sdk` equivalent). Keep importing them from `@rudderjs/ai`:
25
+
26
+ | Subpath | Couples to | What it is |
27
+ |---|---|---|
28
+ | `@rudderjs/ai/server` | `@rudderjs/core` | `AiProvider` — reads `config('ai')`, wires providers/stores into the container |
29
+ | `@rudderjs/ai/commands/make-agent` | `@rudderjs/console` | `make:agent` scaffolder spec |
30
+ | `@rudderjs/ai/commands/ai-eval` | `@rudderjs/core` | `ai:eval` CLI command (discovers + runs eval suites) |
31
+ | `@rudderjs/ai/doctor` | `@rudderjs/console` | `ai:provider-keys` doctor check |
32
+ | `@rudderjs/ai/{conversation,memory,budget}-orm`, `/memory-embedding` | `@rudderjs/orm` | ORM-backed stores implementing the engine's neutral contracts |
28
33
 
29
- See the `@gemstack/ai-sdk` README for full documentation.
34
+ See the `@gemstack/ai-sdk` README for full engine documentation.
@@ -1,2 +1,96 @@
1
- export * from '@gemstack/ai-sdk/budget-orm';
1
+ /**
2
+ * `@rudderjs/ai/budget-orm` — ORM-backed {@link BudgetStorage} for #A6 Phase 4.
3
+ *
4
+ * Production-grade replacement for `memoryBudgetStorage()` (which is
5
+ * single-process only). Persists per-user spend counters in a
6
+ * `BudgetUsage` table via the registered `@rudderjs/orm` adapter — works
7
+ * across queue workers, web processes, and horizontally-scaled deployments.
8
+ *
9
+ * Wire it into your AI middleware:
10
+ *
11
+ * ```ts
12
+ * import { withBudget } from '@gemstack/ai-sdk'
13
+ * import { ormBudgetStorage } from '@rudderjs/ai/budget-orm'
14
+ *
15
+ * const budgeted = withBudget({
16
+ * user: (ctx) => ctx.context as string,
17
+ * budget: () => ({ daily: 0.50, monthly: 10 }),
18
+ * storage: ormBudgetStorage(),
19
+ * })
20
+ * ```
21
+ *
22
+ * The schema lives at {@link budgetUsagePrismaSchema} — copy it into your
23
+ * Prisma schema (or a new `prisma/schema/<file>.prisma` if you use the
24
+ * multi-file setup). The `@@unique([userId, period, periodKey])`
25
+ * constraint is the one load-bearing index — without it, the
26
+ * find-or-create path can race and produce duplicate rows.
27
+ *
28
+ * # Atomicity caveat
29
+ *
30
+ * `checkAndDebit` does a read-then-conditional-increment. The increment
31
+ * itself is atomic (`UPDATE col = col + n`), but the cap check sits
32
+ * between the read and the write. Under high concurrency for a single
33
+ * user (more than ~1 in-flight budgeted request at a time), total spend
34
+ * can briefly exceed `cap` by up to `costUsd × concurrency`. For typical
35
+ * apps this is a non-issue.
36
+ *
37
+ * Strict guarantees require a database transaction with serializable
38
+ * isolation or a Redis-backed counter — both planned as follow-ups. File
39
+ * an issue if you hit this in production.
40
+ */
41
+ import { Model } from '@rudderjs/orm';
42
+ import { type BudgetCheckOptions, type BudgetCheckResult, type BudgetPeriod, type BudgetStorage } from '@gemstack/ai-sdk';
43
+ /**
44
+ * Model row backing {@link OrmBudgetStorage}. Exposed so apps that
45
+ * want admin views (e.g. "show me top spenders this month") can use
46
+ * `BudgetUsageRecord.where(...).get()` instead of routing every read
47
+ * through the {@link BudgetStorage} interface.
48
+ *
49
+ * The `@@unique([userId, period, periodKey])` constraint is required —
50
+ * without it, two concurrent first-writes for the same user/period
51
+ * create duplicate rows and the cap accounting silently drifts.
52
+ */
53
+ export declare class BudgetUsageRecord extends Model {
54
+ static table: string;
55
+ static fillable: string[];
56
+ id: string;
57
+ userId: string;
58
+ /** `'daily'` or `'monthly'`. */
59
+ period: string;
60
+ /** TZ-aware bucket key — `YYYY-MM-DD` (daily) or `YYYY-MM` (monthly). */
61
+ periodKey: string;
62
+ /** Cumulative USD spend in this period. */
63
+ spent: number;
64
+ createdAt: Date;
65
+ updatedAt: Date | null;
66
+ }
67
+ /**
68
+ * Production `BudgetStorage` backed by the registered `@rudderjs/orm`
69
+ * adapter. See the module JSDoc for setup + the atomicity caveat.
70
+ */
71
+ export declare class OrmBudgetStorage implements BudgetStorage {
72
+ checkAndDebit(opts: BudgetCheckOptions): Promise<BudgetCheckResult>;
73
+ /** Apply the read-then-conditional-increment path on an existing row. */
74
+ private _applyIncrementPath;
75
+ reset(userId: string, period: BudgetPeriod, now?: Date, timezone?: string): Promise<void>;
76
+ }
77
+ /**
78
+ * Convenience factory — returns a fresh {@link OrmBudgetStorage}
79
+ * instance. Prefer this over `new OrmBudgetStorage()` for symmetry with
80
+ * `memoryBudgetStorage()`.
81
+ */
82
+ export declare function ormBudgetStorage(): BudgetStorage;
83
+ /**
84
+ * Reference Prisma schema for `OrmBudgetStorage`. Copy into your
85
+ * `prisma/schema/<file>.prisma` (or paste alongside an existing model).
86
+ *
87
+ * The `@@unique([userId, period, periodKey])` constraint is required —
88
+ * without it the find-or-create path can race and produce duplicate
89
+ * rows, breaking cap accounting.
90
+ *
91
+ * SQLite stores `Float` as `REAL`; Postgres / MySQL as `DOUBLE
92
+ * PRECISION` / `DOUBLE`. All three give 15+ significant digits — more
93
+ * than enough for sub-cent budget tracking.
94
+ */
95
+ export declare const budgetUsagePrismaSchema = "model BudgetUsage {\n id String @id @default(cuid())\n userId String\n /// 'daily' | 'monthly'\n period String\n /// YYYY-MM-DD (daily) or YYYY-MM (monthly), in the configured timezone\n periodKey String\n /// Cumulative USD spend in this period\n spent Float @default(0)\n createdAt DateTime @default(now())\n updatedAt DateTime @updatedAt\n\n @@unique([userId, period, periodKey])\n @@index([userId])\n}\n";
2
96
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/budget-orm/index.ts"],"names":[],"mappings":"AAGA,cAAc,6BAA6B,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/budget-orm/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,eAAe,CAAA;AACrC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,aAAa,EAEnB,MAAM,kBAAkB,CAAA;AAIzB;;;;;;;;;GASG;AACH,qBAAa,iBAAkB,SAAQ,KAAK;IAC1C,OAAgB,KAAK,SAAmB;IACxC,OAAgB,QAAQ,WAA6C;IAE7D,EAAE,EAAS,MAAM,CAAA;IACjB,MAAM,EAAK,MAAM,CAAA;IACzB,gCAAgC;IACxB,MAAM,EAAK,MAAM,CAAA;IACzB,yEAAyE;IACjE,SAAS,EAAE,MAAM,CAAA;IACzB,2CAA2C;IACnC,KAAK,EAAM,MAAM,CAAA;IACjB,SAAS,EAAE,IAAI,CAAA;IACf,SAAS,EAAE,IAAI,GAAG,IAAI,CAAA;CAC/B;AAID;;;GAGG;AACH,qBAAa,gBAAiB,YAAW,aAAa;IAC9C,aAAa,CAAC,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAuDzE,yEAAyE;YAC3D,mBAAmB;IAsB3B,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAQhG;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,IAAI,aAAa,CAEhD;AAID;;;;;;;;;;;GAWG;AACH,eAAO,MAAM,uBAAuB,8bAenC,CAAA"}
@@ -1,5 +1,177 @@
1
- // @rudderjs/ai is deprecated. The AI engine now lives in @gemstack/ai-sdk.
2
- // This module re-exports it for backwards compatibility; import from
3
- // '@gemstack/ai-sdk/budget-orm' directly in new code.
4
- export * from '@gemstack/ai-sdk/budget-orm';
1
+ /**
2
+ * `@rudderjs/ai/budget-orm` — ORM-backed {@link BudgetStorage} for #A6 Phase 4.
3
+ *
4
+ * Production-grade replacement for `memoryBudgetStorage()` (which is
5
+ * single-process only). Persists per-user spend counters in a
6
+ * `BudgetUsage` table via the registered `@rudderjs/orm` adapter — works
7
+ * across queue workers, web processes, and horizontally-scaled deployments.
8
+ *
9
+ * Wire it into your AI middleware:
10
+ *
11
+ * ```ts
12
+ * import { withBudget } from '@gemstack/ai-sdk'
13
+ * import { ormBudgetStorage } from '@rudderjs/ai/budget-orm'
14
+ *
15
+ * const budgeted = withBudget({
16
+ * user: (ctx) => ctx.context as string,
17
+ * budget: () => ({ daily: 0.50, monthly: 10 }),
18
+ * storage: ormBudgetStorage(),
19
+ * })
20
+ * ```
21
+ *
22
+ * The schema lives at {@link budgetUsagePrismaSchema} — copy it into your
23
+ * Prisma schema (or a new `prisma/schema/<file>.prisma` if you use the
24
+ * multi-file setup). The `@@unique([userId, period, periodKey])`
25
+ * constraint is the one load-bearing index — without it, the
26
+ * find-or-create path can race and produce duplicate rows.
27
+ *
28
+ * # Atomicity caveat
29
+ *
30
+ * `checkAndDebit` does a read-then-conditional-increment. The increment
31
+ * itself is atomic (`UPDATE col = col + n`), but the cap check sits
32
+ * between the read and the write. Under high concurrency for a single
33
+ * user (more than ~1 in-flight budgeted request at a time), total spend
34
+ * can briefly exceed `cap` by up to `costUsd × concurrency`. For typical
35
+ * apps this is a non-issue.
36
+ *
37
+ * Strict guarantees require a database transaction with serializable
38
+ * isolation or a Redis-backed counter — both planned as follow-ups. File
39
+ * an issue if you hit this in production.
40
+ */
41
+ import { Model } from '@rudderjs/orm';
42
+ import { periodKey as buildPeriodKey, } from '@gemstack/ai-sdk';
43
+ // ─── ORM Model ────────────────────────────────────────────
44
+ /**
45
+ * Model row backing {@link OrmBudgetStorage}. Exposed so apps that
46
+ * want admin views (e.g. "show me top spenders this month") can use
47
+ * `BudgetUsageRecord.where(...).get()` instead of routing every read
48
+ * through the {@link BudgetStorage} interface.
49
+ *
50
+ * The `@@unique([userId, period, periodKey])` constraint is required —
51
+ * without it, two concurrent first-writes for the same user/period
52
+ * create duplicate rows and the cap accounting silently drifts.
53
+ */
54
+ export class BudgetUsageRecord extends Model {
55
+ static table = 'budgetUsage';
56
+ static fillable = ['userId', 'period', 'periodKey', 'spent'];
57
+ }
58
+ // ─── BudgetStorage adapter ────────────────────────────────
59
+ /**
60
+ * Production `BudgetStorage` backed by the registered `@rudderjs/orm`
61
+ * adapter. See the module JSDoc for setup + the atomicity caveat.
62
+ */
63
+ export class OrmBudgetStorage {
64
+ async checkAndDebit(opts) {
65
+ if (!Number.isFinite(opts.cap) || opts.cap < 0) {
66
+ throw new Error(`[ai-sdk] BudgetStorage: cap must be a non-negative finite number, got ${opts.cap}`);
67
+ }
68
+ if (!Number.isFinite(opts.costUsd) || opts.costUsd < 0) {
69
+ throw new Error(`[ai-sdk] BudgetStorage: costUsd must be a non-negative finite number, got ${opts.costUsd}`);
70
+ }
71
+ const now = opts.now ?? new Date();
72
+ const key = buildPeriodKey(opts.period, now, opts.timezone);
73
+ const existing = await BudgetUsageRecord
74
+ .where('userId', opts.userId)
75
+ .where('period', opts.period)
76
+ .where('periodKey', key)
77
+ .first();
78
+ // ─── No row yet — first write for this period ─────────
79
+ if (!existing) {
80
+ // Pure-read on an empty bucket — still empty after.
81
+ if (opts.costUsd === 0) {
82
+ return { allowed: true, spent: 0, cap: opts.cap };
83
+ }
84
+ // Single debit larger than cap — refuse before creating the row,
85
+ // so we don't pollute storage with denied requests.
86
+ if (opts.costUsd > opts.cap) {
87
+ return { allowed: false, spent: 0, cap: opts.cap };
88
+ }
89
+ try {
90
+ await BudgetUsageRecord.create({
91
+ userId: opts.userId,
92
+ period: opts.period,
93
+ periodKey: key,
94
+ spent: opts.costUsd,
95
+ });
96
+ return { allowed: true, spent: opts.costUsd, cap: opts.cap };
97
+ }
98
+ catch (e) {
99
+ // Race: another caller created the row between our `first()` and
100
+ // `create()`. Re-read and fall through to the increment path.
101
+ // We deliberately don't sniff the error type — any create failure
102
+ // means the row may now exist; let the re-read decide.
103
+ const refetched = await BudgetUsageRecord
104
+ .where('userId', opts.userId)
105
+ .where('period', opts.period)
106
+ .where('periodKey', key)
107
+ .first();
108
+ if (!refetched)
109
+ throw e; // not a unique-constraint race; surface the original error
110
+ return this._applyIncrementPath(refetched, opts);
111
+ }
112
+ }
113
+ return this._applyIncrementPath(existing, opts);
114
+ }
115
+ /** Apply the read-then-conditional-increment path on an existing row. */
116
+ async _applyIncrementPath(row, opts) {
117
+ const current = Number(row.spent ?? 0);
118
+ // Pure read.
119
+ if (opts.costUsd === 0) {
120
+ return { allowed: true, spent: current, cap: opts.cap };
121
+ }
122
+ // Cap check — read-then-decide. Atomic under single-writer; under
123
+ // concurrent writers, see the module-level atomicity caveat.
124
+ if (current + opts.costUsd > opts.cap) {
125
+ return { allowed: false, spent: current, cap: opts.cap };
126
+ }
127
+ const updated = await BudgetUsageRecord.increment(row.id, 'spent', opts.costUsd);
128
+ const newSpent = Number(updated?.spent ?? current + opts.costUsd);
129
+ return { allowed: true, spent: newSpent, cap: opts.cap };
130
+ }
131
+ async reset(userId, period, now, timezone) {
132
+ const key = buildPeriodKey(period, now ?? new Date(), timezone);
133
+ await BudgetUsageRecord
134
+ .where('userId', userId)
135
+ .where('period', period)
136
+ .where('periodKey', key)
137
+ .deleteAll();
138
+ }
139
+ }
140
+ /**
141
+ * Convenience factory — returns a fresh {@link OrmBudgetStorage}
142
+ * instance. Prefer this over `new OrmBudgetStorage()` for symmetry with
143
+ * `memoryBudgetStorage()`.
144
+ */
145
+ export function ormBudgetStorage() {
146
+ return new OrmBudgetStorage();
147
+ }
148
+ // ─── Schema reference ─────────────────────────────────────
149
+ /**
150
+ * Reference Prisma schema for `OrmBudgetStorage`. Copy into your
151
+ * `prisma/schema/<file>.prisma` (or paste alongside an existing model).
152
+ *
153
+ * The `@@unique([userId, period, periodKey])` constraint is required —
154
+ * without it the find-or-create path can race and produce duplicate
155
+ * rows, breaking cap accounting.
156
+ *
157
+ * SQLite stores `Float` as `REAL`; Postgres / MySQL as `DOUBLE
158
+ * PRECISION` / `DOUBLE`. All three give 15+ significant digits — more
159
+ * than enough for sub-cent budget tracking.
160
+ */
161
+ export const budgetUsagePrismaSchema = `model BudgetUsage {
162
+ id String @id @default(cuid())
163
+ userId String
164
+ /// 'daily' | 'monthly'
165
+ period String
166
+ /// YYYY-MM-DD (daily) or YYYY-MM (monthly), in the configured timezone
167
+ periodKey String
168
+ /// Cumulative USD spend in this period
169
+ spent Float @default(0)
170
+ createdAt DateTime @default(now())
171
+ updatedAt DateTime @updatedAt
172
+
173
+ @@unique([userId, period, periodKey])
174
+ @@index([userId])
175
+ }
176
+ `;
5
177
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/budget-orm/index.ts"],"names":[],"mappings":"AAAA,2EAA2E;AAC3E,qEAAqE;AACrE,sDAAsD;AACtD,cAAc,6BAA6B,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/budget-orm/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,eAAe,CAAA;AACrC,OAAO,EAKL,SAAS,IAAI,cAAc,GAC5B,MAAM,kBAAkB,CAAA;AAEzB,6DAA6D;AAE7D;;;;;;;;;GASG;AACH,MAAM,OAAO,iBAAkB,SAAQ,KAAK;IAC1C,MAAM,CAAU,KAAK,GAAM,aAAa,CAAA;IACxC,MAAM,CAAU,QAAQ,GAAG,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,OAAO,CAAC,CAAA;;AAcvE,6DAA6D;AAE7D;;;GAGG;AACH,MAAM,OAAO,gBAAgB;IAC3B,KAAK,CAAC,aAAa,CAAC,IAAwB;QAC1C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAI,KAAK,CAAC,yEAAyE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;QACtG,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACvD,MAAM,IAAI,KAAK,CAAC,6EAA6E,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;QAC9G,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,IAAI,IAAI,EAAE,CAAA;QAClC,MAAM,GAAG,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAA;QAE3D,MAAM,QAAQ,GAAG,MAAM,iBAAiB;aACrC,KAAK,CAAC,QAAQ,EAAK,IAAI,CAAC,MAAM,CAAC;aAC/B,KAAK,CAAC,QAAQ,EAAK,IAAI,CAAC,MAAM,CAAC;aAC/B,KAAK,CAAC,WAAW,EAAE,GAAG,CAAC;aACvB,KAAK,EAAyC,CAAA;QAEjD,yDAAyD;QACzD,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,oDAAoD;YACpD,IAAI,IAAI,CAAC,OAAO,KAAK,CAAC,EAAE,CAAC;gBACvB,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAA;YACnD,CAAC;YACD,iEAAiE;YACjE,oDAAoD;YACpD,IAAI,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBAC5B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAA;YACpD,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,iBAAiB,CAAC,MAAM,CAAC;oBAC7B,MAAM,EAAK,IAAI,CAAC,MAAM;oBACtB,MAAM,EAAK,IAAI,CAAC,MAAM;oBACtB,SAAS,EAAE,GAAG;oBACd,KAAK,EAAM,IAAI,CAAC,OAAO;iBACxB,CAAC,CAAA;gBACF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAA;YAC9D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,iEAAiE;gBACjE,8DAA8D;gBAC9D,kEAAkE;gBAClE,uDAAuD;gBACvD,MAAM,SAAS,GAAG,MAAM,iBAAiB;qBACtC,KAAK,CAAC,QAAQ,EAAK,IAAI,CAAC,MAAM,CAAC;qBAC/B,KAAK,CAAC,QAAQ,EAAK,IAAI,CAAC,MAAM,CAAC;qBAC/B,KAAK,CAAC,WAAW,EAAE,GAAG,CAAC;qBACvB,KAAK,EAAyC,CAAA;gBACjD,IAAI,CAAC,SAAS;oBAAE,MAAM,CAAC,CAAA,CAAE,2DAA2D;gBACpF,OAAO,IAAI,CAAC,mBAAmB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAA;YAClD,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC,mBAAmB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAA;IACjD,CAAC;IAED,yEAAyE;IACjE,KAAK,CAAC,mBAAmB,CAC/B,GAAuB,EACvB,IAAwB;QAExB,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,CAAA;QAEtC,aAAa;QACb,IAAI,IAAI,CAAC,OAAO,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAA;QACzD,CAAC;QAED,kEAAkE;QAClE,6DAA6D;QAC7D,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACtC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAA;QAC1D,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,iBAAiB,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,CAAiC,CAAA;QAChH,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,EAAE,KAAK,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAA;QACjE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAA;IAC1D,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,MAAc,EAAE,MAAoB,EAAE,GAAU,EAAE,QAAiB;QAC7E,MAAM,GAAG,GAAG,cAAc,CAAC,MAAM,EAAE,GAAG,IAAI,IAAI,IAAI,EAAE,EAAE,QAAQ,CAAC,CAAA;QAC/D,MAAM,iBAAiB;aACpB,KAAK,CAAC,QAAQ,EAAK,MAAM,CAAC;aAC1B,KAAK,CAAC,QAAQ,EAAK,MAAM,CAAC;aAC1B,KAAK,CAAC,WAAW,EAAE,GAAG,CAAC;aACvB,SAAS,EAAE,CAAA;IAChB,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO,IAAI,gBAAgB,EAAE,CAAA;AAC/B,CAAC;AAED,6DAA6D;AAE7D;;;;;;;;;;;GAWG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;;;;;;;;;;;;;;;CAetC,CAAA"}
@@ -1,2 +1,98 @@
1
- export * from '@gemstack/ai-sdk/commands/ai-eval';
1
+ /**
2
+ * `pnpm rudder ai:eval` — discover `evals/**\/*.eval.ts` suites,
3
+ * run each, and report. Console reporter by default; `--json` emits
4
+ * a machine-readable envelope to stdout for CI.
5
+ *
6
+ * This is a Rudder CLI binding: it reads `config('ai')` via
7
+ * `@rudderjs/core` and registers onto the Rudder runner, so it lives
8
+ * on the Rudder side. The eval framework it drives (`runSuite`,
9
+ * reporters, fixtures) lives in the agnostic `@gemstack/ai-sdk/eval`.
10
+ *
11
+ * Registered from the CLI loader (`packages/cli/src/index.ts`)
12
+ * — the AiProvider doesn't own this so it surfaces even when the
13
+ * user app fails to boot, matching the `command:list --json`
14
+ * graceful-degradation pattern from #349.
15
+ */
16
+ import type { EvalSuite } from '@gemstack/ai-sdk/eval';
17
+ type Rudder = {
18
+ command(name: string, handler: (args: string[]) => void | Promise<void>): {
19
+ description(text: string): unknown;
20
+ };
21
+ };
22
+ /** CLI flags + positional name filter. */
23
+ export interface AiEvalOptions {
24
+ /** Substring filter (case-insensitive) applied to suite names. */
25
+ filter?: string;
26
+ /** Stop on the first failing suite. */
27
+ bail: boolean;
28
+ /** Emit `{ suites: [...] }` JSON to stdout. */
29
+ json: boolean;
30
+ /**
31
+ * Run against the real provider, capture each case's assistant
32
+ * turns to `evals/__fixtures__/<suite>/<case>.json`. Existing
33
+ * fixtures are overwritten — diff in your VCS to see what changed.
34
+ * Default `false`.
35
+ */
36
+ record?: boolean;
37
+ /**
38
+ * Swap the runtime with `AiFake.fake()` and feed each case its
39
+ * recorded fixture via `respondWithSequence`. Zero API calls,
40
+ * deterministic regression tests. Cases without a fixture fall
41
+ * through to a normal run with a stderr warning. Default `false`.
42
+ */
43
+ replay?: boolean;
44
+ /**
45
+ * Path for a self-contained HTML report (#A5 Phase 5). Pasteable
46
+ * into PR comments / Slack threads. Coexists with `--json` (JSON
47
+ * still goes to stdout, HTML goes to disk).
48
+ */
49
+ html?: string;
50
+ }
51
+ /**
52
+ * Test seam — every external dependency gets an injectable
53
+ * override. The CLI handler defaults each to its real impl.
54
+ */
55
+ export interface AiEvalDeps {
56
+ cwd?: string;
57
+ stdout?: {
58
+ write(s: string): boolean | void;
59
+ };
60
+ stderr?: {
61
+ write(s: string): boolean | void;
62
+ };
63
+ /** Override the file walk (test harness returns a virtual list). */
64
+ discover?: (cwd: string, pattern: string) => Promise<string[]>;
65
+ /** Override file → suite loader (test harness uses an in-memory map). */
66
+ loadSuite?: (absPath: string) => Promise<EvalSuite | null>;
67
+ /** Override config lookup (test harness skips `@rudderjs/core`). */
68
+ configPattern?: () => string | null | Promise<string | null>;
69
+ /**
70
+ * Override fixtures directory (defaults to `<cwd>/evals/__fixtures__`).
71
+ * Tests point to a tmpdir to keep round-trips off the source tree.
72
+ */
73
+ fixturesDir?: string;
74
+ }
75
+ /** Register the `ai:eval` command on the rudder runner. */
76
+ export declare function registerAiEvalCommand(rudder: Rudder): void;
77
+ /**
78
+ * Parse the rest-of-line. Recognizes:
79
+ * - boolean flags: `--bail`, `--json`, `--record`, `--replay`
80
+ * - value flags : `--html <path>` or `--html=<path>`
81
+ * - one positional name filter (anything not consumed above)
82
+ */
83
+ export declare function parseArgs(args: string[]): AiEvalOptions;
84
+ /**
85
+ * Execute the CLI flow. Returns the process exit code (0 = all pass,
86
+ * 1 = at least one suite had a failure or no suites discovered).
87
+ *
88
+ * The handler is `process.exit`-free so tests can drive it directly.
89
+ */
90
+ export declare function runEvalCli(opts: AiEvalOptions, deps?: AiEvalDeps): Promise<number>;
91
+ /**
92
+ * Recursive walk constrained to a `<dir>/**\/*<suffix>` shape.
93
+ * Returns absolute paths sorted lexicographically for stable test
94
+ * output and predictable `--bail` ordering.
95
+ */
96
+ export declare function discoverSuiteFiles(cwd: string, pattern: string): Promise<string[]>;
97
+ export {};
2
98
  //# sourceMappingURL=ai-eval.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"ai-eval.d.ts","sourceRoot":"","sources":["../../src/commands/ai-eval.ts"],"names":[],"mappings":"AAGA,cAAc,mCAAmC,CAAA"}
1
+ {"version":3,"file":"ai-eval.d.ts","sourceRoot":"","sources":["../../src/commands/ai-eval.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAgBH,OAAO,KAAK,EAAE,SAAS,EAA4C,MAAM,uBAAuB,CAAA;AAQhG,KAAK,MAAM,GAAG;IACZ,OAAO,CACL,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAChD;QAAE,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,CAAA;CAC1C,CAAA;AAED,0CAA0C;AAC1C,MAAM,WAAW,aAAa;IAC5B,kEAAkE;IAClE,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,uCAAuC;IACvC,IAAI,EAAK,OAAO,CAAA;IAChB,+CAA+C;IAC/C,IAAI,EAAK,OAAO,CAAA;IAChB;;;;;OAKG;IACH,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;;OAKG;IACH,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;OAIG;IACH,IAAI,CAAC,EAAE,MAAM,CAAA;CACd;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,GAAG,CAAC,EAAS,MAAM,CAAA;IACnB,MAAM,CAAC,EAAM;QAAE,KAAK,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,IAAI,CAAA;KAAE,CAAA;IACjD,MAAM,CAAC,EAAM;QAAE,KAAK,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,IAAI,CAAA;KAAE,CAAA;IACjD,oEAAoE;IACpE,QAAQ,CAAC,EAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;IAChE,yEAAyE;IACzE,SAAS,CAAC,EAAG,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,CAAA;IAC3D,oEAAoE;IACpE,aAAa,CAAC,EAAE,MAAM,MAAM,GAAG,IAAI,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAA;IAC5D;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED,2DAA2D;AAC3D,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAO1D;AAMD;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,aAAa,CA8BvD;AAID;;;;;GAKG;AACH,wBAAsB,UAAU,CAAC,IAAI,EAAE,aAAa,EAAE,IAAI,GAAE,UAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAsE5F;AA4JD;;;;GAIG;AACH,wBAAsB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAMxF"}