@gscdump/cloudflare 0.25.13 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import { ArchetypeQuery, ArchetypeResult, ArchetypeResultRow } from "@gscdump/sdk";
2
+ import { Result } from "gscdump/result";
2
3
  import { IcebergTableName } from "@gscdump/engine/iceberg";
3
4
  import { ServerTailDirective } from "@gscdump/contracts";
4
5
  /** Placeholder substituted for the engine-specific table reference. */
@@ -69,6 +70,16 @@ declare class DuckDbIcebergTimeoutError extends Error {
69
70
  name: string;
70
71
  constructor(timeoutMs: number);
71
72
  }
73
+ /**
74
+ * The modelled, caller-actionable failure channel for a DuckDB-over-Iceberg
75
+ * query. As with the R2 SQL client, callers branch on which class came back: a
76
+ * `DuckDbIcebergTimeoutError` is the retry-able deadline overrun, a
77
+ * `DuckDbIcebergError` is a hard sibling-RPC failure (or the `aux-cloud-only`
78
+ * routing reject). The error variant IS the existing throwable class, so the
79
+ * throwing wrappers preserve the identity/message tests assert
80
+ * (`rejects.toThrow(/OOM in sibling/)`, `rejects.toThrow(DuckDbIcebergError)`).
81
+ */
82
+ type DuckDbIcebergQueryError = DuckDbIcebergError | DuckDbIcebergTimeoutError;
72
83
  /** A configured DuckDB-over-Iceberg executor. */
73
84
  interface DuckDbIcebergExecutor {
74
85
  /** Run a raw SQL string with `{{TABLE_<name>}}` placeholders resolved. */
@@ -77,6 +88,16 @@ interface DuckDbIcebergExecutor {
77
88
  runPlan: (plan: ArchetypeSqlPlan) => Promise<DuckDbIcebergResult>;
78
89
  /** Translate + run an archetype query. Handles `arbitrary-sql` verbatim. */
79
90
  runArchetype: (query: ArchetypeQuery) => Promise<DuckDbIcebergResult>;
91
+ /**
92
+ * Errors-as-values core for {@link DuckDbIcebergExecutor.runArchetype}:
93
+ * returns the modelled timeout-vs-hard-fail `DuckDbIcebergQueryError` instead
94
+ * of throwing, so the dispatcher can branch on retry-ability (a timeout may be
95
+ * worth a fallback) without `instanceof` over a `catch`. Optional so a
96
+ * hand-rolled executor (e.g. a host app's own service-binding executor) can
97
+ * implement only the throwing surface; {@link createDuckDbIcebergExecutor}
98
+ * always provides it.
99
+ */
100
+ runArchetypeResult?: (query: ArchetypeQuery) => Promise<Result<DuckDbIcebergResult, DuckDbIcebergQueryError>>;
80
101
  }
81
102
  /**
82
103
  * Create a DuckDB-over-Iceberg-files executor.
@@ -124,10 +145,31 @@ declare class R2SqlTimeoutError extends Error {
124
145
  name: string;
125
146
  constructor(timeoutMs: number);
126
147
  }
148
+ /**
149
+ * The modelled, caller-actionable failure channel for an R2 SQL query. Callers
150
+ * branch on which class came back — a `R2SqlTimeoutError` is a transient retry
151
+ * candidate (the query outran the per-query deadline), while a `R2SqlError`
152
+ * (HTTP 4xx/5xx, a rejected envelope, a transport blow-up) is a hard failure.
153
+ * The error variant IS the existing throwable class, so the throwing wrapper
154
+ * preserves the exact identity/message tests assert (`rejects.toThrow(/HTTP 403/)`,
155
+ * `rejects.toBeInstanceOf(R2SqlTimeoutError)`). Defects — a programmer handing
156
+ * the client malformed params (`escapeSqlValue` / `inlineParams`) — are NOT
157
+ * modelled here; they keep throwing `R2SqlError` synchronously.
158
+ */
159
+ type R2SqlQueryError = R2SqlError | R2SqlTimeoutError;
127
160
  /** A configured R2 SQL client. */
128
161
  interface R2SqlClient {
129
162
  /** Run a raw SQL string (table reference already resolved). */
130
163
  query: (sql: string) => Promise<R2SqlResult>;
164
+ /**
165
+ * Errors-as-values core for {@link R2SqlClient.query}: returns the modelled
166
+ * timeout-vs-hard-fail `R2SqlQueryError` instead of throwing, so callers can
167
+ * branch on retry-ability without `instanceof` over a `catch`. Optional so a
168
+ * hand-rolled `R2SqlClient` (e.g. a host app's own endpoint-backed client) can
169
+ * implement only the throwing surface; {@link createR2SqlClient} always
170
+ * provides it.
171
+ */
172
+ queryResult?: (sql: string) => Promise<Result<R2SqlResult, R2SqlQueryError>>;
131
173
  /** Run a dialect-neutral plan: resolve `{{TABLE}}`, inline params, send. */
132
174
  runPlan: (plan: ArchetypeSqlPlan) => Promise<R2SqlResult>;
133
175
  /** Translate + run an archetype query end to end. */
@@ -148,10 +190,19 @@ interface ServerTailDispatcherConfig {
148
190
  declare class ServerTailRoutingError extends Error {
149
191
  name: string;
150
192
  }
193
+ /**
194
+ * Errors-as-values core for {@link resolveServerTailEngine}: returns a
195
+ * `ServerTailRoutingError` instead of throwing when an archetype is `cloud-only`
196
+ * (the one caller-actionable routing failure — the consumer must route that
197
+ * query through the cloud endpoints, not the server tail). Pure — no I/O.
198
+ */
199
+ declare function resolveServerTailEngineResult(query: ArchetypeQuery): Result<ServerTailEngine, ServerTailRoutingError>;
151
200
  /**
152
201
  * Decide which engine answers an archetype query. Pure — no I/O. Exposed so
153
202
  * the file-resolution endpoint can compute the `ServerTailDirective.engine`
154
- * with the SAME logic the dispatcher uses at execution time.
203
+ * with the SAME logic the dispatcher uses at execution time. Throws
204
+ * `ServerTailRoutingError` for a `cloud-only` archetype; see
205
+ * {@link resolveServerTailEngineResult} for the errors-as-values core.
155
206
  */
156
207
  declare function resolveServerTailEngine(query: ArchetypeQuery): ServerTailEngine;
157
208
  /** A configured server-tail dispatcher. */
@@ -170,4 +221,4 @@ interface ServerTailDispatcher {
170
221
  * executor and routes every `ArchetypeQuery` to one of them.
171
222
  */
172
223
  declare function createServerTailDispatcher(config: ServerTailDispatcherConfig): ServerTailDispatcher;
173
- export { type ArchetypeSqlPlan, DuckDbIcebergError, type DuckDbIcebergExecutor, type DuckDbIcebergExecutorConfig, type DuckDbIcebergResult, type DuckDbIcebergRow, DuckDbIcebergTimeoutError, type DuckDbSvc, type R2SqlClient, type R2SqlClientConfig, R2SqlError, type R2SqlResult, type R2SqlRow, R2SqlTimeoutError, type ServerTailDispatcher, type ServerTailDispatcherConfig, type ServerTailEngine, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine };
224
+ export { type ArchetypeSqlPlan, DuckDbIcebergError, type DuckDbIcebergExecutor, type DuckDbIcebergExecutorConfig, type DuckDbIcebergQueryError, type DuckDbIcebergResult, type DuckDbIcebergRow, DuckDbIcebergTimeoutError, type DuckDbSvc, type R2SqlClient, type R2SqlClientConfig, R2SqlError, type R2SqlQueryError, type R2SqlResult, type R2SqlRow, R2SqlTimeoutError, type ServerTailDispatcher, type ServerTailDispatcherConfig, type ServerTailEngine, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine, resolveServerTailEngineResult };
@@ -1,5 +1,6 @@
1
1
  import { bindLiterals, inferTable } from "@gscdump/engine";
2
2
  import { ARCHETYPE_EXECUTION_CLASS } from "@gscdump/sdk";
3
+ import { err, ok, unwrapResult } from "gscdump/result";
3
4
  const TABLE_PLACEHOLDER = "{{TABLE}}";
4
5
  function dimColumn(dim) {
5
6
  if (dim === "page") return "url";
@@ -133,7 +134,7 @@ function buildTopNBreakdown(q) {
133
134
  const metrics = (q.metrics.includes(q.orderBy.metric) ? q.metrics : [...q.metrics, q.orderBy.metric]).map(metricExpr).join(", ");
134
135
  const order = `${q.orderBy.metric} ${q.orderBy.dir.toUpperCase()}`;
135
136
  const facet = facetPredicate(q);
136
- let sql = `SELECT ${col}, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause}${facet.sql} GROUP BY ${col} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
137
+ let sql = `SELECT ${col}, ${metrics}${q.includeTotal ? ", COUNT(*) OVER() AS __total" : ""} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause}${facet.sql} GROUP BY ${col} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
137
138
  if (q.offset && q.offset > 0) sql += ` OFFSET ${Math.floor(q.offset)}`;
138
139
  return {
139
140
  table,
@@ -220,18 +221,36 @@ function buildArchetypeSql(query) {
220
221
  var ServerTailRoutingError = class extends Error {
221
222
  name = "ServerTailRoutingError";
222
223
  };
223
- function resolveServerTailEngine(query) {
224
+ function routingErrorToException(error) {
225
+ return error;
226
+ }
227
+ function resolveServerTailEngineResult(query) {
224
228
  const cls = ARCHETYPE_EXECUTION_CLASS[query.archetype];
225
- if (cls === "cloud-only") throw new ServerTailRoutingError(`archetype '${query.archetype}' is cloud-only — not a server-tail query`);
226
- if (cls === "duckdb") return "duckdb";
227
- if (query.archetype === "top-n-breakdown" && query.offset && query.offset > 0) return "duckdb";
229
+ if (cls === "cloud-only") return err(new ServerTailRoutingError(`archetype '${query.archetype}' is cloud-only — not a server-tail query`));
230
+ if (cls === "duckdb") return ok("duckdb");
231
+ if (query.archetype === "top-n-breakdown" && query.offset && query.offset > 0) return ok("duckdb");
232
+ if (query.archetype === "top-n-breakdown" && query.includeTotal) return ok("duckdb");
228
233
  const facets = query.facets;
229
- if (facets && facets.length > 0) return "duckdb";
230
- return "r2-sql";
234
+ if (facets && facets.length > 0) return ok("duckdb");
235
+ return ok("r2-sql");
236
+ }
237
+ function resolveServerTailEngine(query) {
238
+ return unwrapResult(resolveServerTailEngineResult(query), routingErrorToException);
231
239
  }
232
240
  function sourceFor(engine) {
233
241
  return engine === "r2-sql" ? "server-r2-sql" : "server-duckdb";
234
242
  }
243
+ function extractTotal(rows) {
244
+ if (!rows.length || !("__total" in rows[0])) return { rows };
245
+ const totalRows = Number(rows[0].__total) || 0;
246
+ return {
247
+ rows: rows.map((r) => {
248
+ const { __total, ...rest } = r;
249
+ return rest;
250
+ }),
251
+ totalRows
252
+ };
253
+ }
235
254
  function createServerTailDispatcher(config) {
236
255
  function route(query) {
237
256
  return resolveServerTailEngine(query);
@@ -255,13 +274,15 @@ function createServerTailDispatcher(config) {
255
274
  };
256
275
  }
257
276
  const res = await config.duckdb.runArchetype(query);
277
+ const { rows, totalRows } = extractTotal(res.rows);
258
278
  return {
259
279
  archetype: query.archetype,
260
- rows: res.rows,
280
+ rows,
261
281
  source: sourceFor("duckdb"),
262
282
  meta: {
263
- rowCount: res.rows.length,
264
- queryMs: res.queryMs
283
+ rowCount: rows.length,
284
+ queryMs: res.queryMs,
285
+ ...totalRows !== void 0 ? { totalRows } : {}
265
286
  }
266
287
  };
267
288
  }
@@ -279,6 +300,9 @@ var DuckDbIcebergTimeoutError = class extends Error {
279
300
  super(`DuckDB-over-Iceberg query exceeded ${timeoutMs}ms deadline`);
280
301
  }
281
302
  };
303
+ function duckDbIcebergErrorToException(error) {
304
+ return error;
305
+ }
282
306
  const DEFAULT_TIMEOUT_MS$1 = 25e3;
283
307
  function icebergTableRef(config, table) {
284
308
  if (config.tableRefStyle === "catalog") return `${config.namespace}.${table}`;
@@ -295,17 +319,22 @@ function resolveTablePlaceholders(sql, config) {
295
319
  }
296
320
  function createDuckDbIcebergExecutor(config) {
297
321
  const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS$1;
298
- async function send(sql) {
322
+ async function sendResult(sql) {
299
323
  const started = Date.now();
300
- const result = await withDeadline(config.svc.runSQL({ sql }), timeoutMs).catch((err) => {
301
- if (err instanceof DuckDbIcebergTimeoutError) throw err;
302
- throw new DuckDbIcebergError(`DUCKDB_SVC.runSQL failed: ${err.message}`);
303
- });
304
- return {
324
+ const raced = await withDeadline(config.svc.runSQL({ sql }), timeoutMs).then((value) => ok(value)).catch((error) => error instanceof DuckDbIcebergTimeoutError ? err(error) : err(new DuckDbIcebergError(`DUCKDB_SVC.runSQL failed: ${error.message}`)));
325
+ if (!raced.ok) return raced;
326
+ const result = raced.value;
327
+ return ok({
305
328
  rows: result.rows ?? [],
306
329
  sql: result.sql ?? sql,
307
330
  queryMs: Date.now() - started
308
- };
331
+ });
332
+ }
333
+ async function send(sql) {
334
+ return unwrapResult(await sendResult(sql), duckDbIcebergErrorToException);
335
+ }
336
+ function runSqlResult(sql, params = []) {
337
+ return sendResult(bindLiterals(resolveTablePlaceholders(sql, config), params));
309
338
  }
310
339
  function runSql(sql, params = []) {
311
340
  return send(bindLiterals(resolveTablePlaceholders(sql, config), params));
@@ -313,15 +342,22 @@ function createDuckDbIcebergExecutor(config) {
313
342
  function runPlan(plan) {
314
343
  return send(bindLiterals(plan.sql.split(TABLE_PLACEHOLDER).join(icebergTableRef(config, plan.table)), plan.params));
315
344
  }
345
+ function runPlanResult(plan) {
346
+ return sendResult(bindLiterals(plan.sql.split(TABLE_PLACEHOLDER).join(icebergTableRef(config, plan.table)), plan.params));
347
+ }
348
+ function runArchetypeResult(query) {
349
+ if (query.archetype === "arbitrary-sql") return runSqlResult(query.sql, query.params ?? []);
350
+ if (query.archetype === "aux-cloud-only") return Promise.resolve(err(new DuckDbIcebergError("aux-cloud-only is not an Iceberg query")));
351
+ return runPlanResult(buildArchetypeSql(query));
352
+ }
316
353
  async function runArchetype(query) {
317
- if (query.archetype === "arbitrary-sql") return runSql(query.sql, query.params ?? []);
318
- if (query.archetype === "aux-cloud-only") throw new DuckDbIcebergError("aux-cloud-only is not an Iceberg query");
319
- return runPlan(buildArchetypeSql(query));
354
+ return unwrapResult(await runArchetypeResult(query), duckDbIcebergErrorToException);
320
355
  }
321
356
  return {
322
357
  runSql,
323
358
  runPlan,
324
- runArchetype
359
+ runArchetype,
360
+ runArchetypeResult
325
361
  };
326
362
  }
327
363
  function r2TableRef(namespace, table) {
@@ -341,6 +377,9 @@ var R2SqlTimeoutError = class extends Error {
341
377
  super(`R2 SQL query exceeded ${timeoutMs}ms deadline`);
342
378
  }
343
379
  };
380
+ function r2SqlErrorToException(error) {
381
+ return error;
382
+ }
344
383
  const DEFAULT_API_BASE = "https://api.sql.cloudflarestorage.com/api/v1";
345
384
  const DEFAULT_TIMEOUT_MS = 25e3;
346
385
  const PARTITION_PREDICATE_RE = /\b(site_id|search_type)(\s*=)/g;
@@ -403,7 +442,7 @@ function createR2SqlClient(config) {
403
442
  const apiBase = config.apiBase ?? DEFAULT_API_BASE;
404
443
  const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
405
444
  const endpoint = `${apiBase}/accounts/${config.accountId}/r2-sql/query/${config.bucket}`;
406
- async function query(sql) {
445
+ async function queryResult(sql) {
407
446
  const started = Date.now();
408
447
  const controller = new AbortController();
409
448
  const timer = setTimeout(() => controller.abort(new R2SqlTimeoutError(timeoutMs)), timeoutMs);
@@ -419,23 +458,26 @@ function createR2SqlClient(config) {
419
458
  body: JSON.stringify({ query: sql }),
420
459
  signal: controller.signal
421
460
  });
422
- } catch (err) {
423
- if (err instanceof R2SqlTimeoutError || err?.name === "AbortError") throw new R2SqlTimeoutError(timeoutMs);
424
- throw new R2SqlError(`R2 SQL request failed: ${err.message}`);
461
+ } catch (error) {
462
+ if (error instanceof R2SqlTimeoutError || error?.name === "AbortError") return err(new R2SqlTimeoutError(timeoutMs));
463
+ return err(new R2SqlError(`R2 SQL request failed: ${error.message}`));
425
464
  } finally {
426
465
  clearTimeout(timer);
427
466
  }
428
467
  if (!response.ok) {
429
468
  const text = await response.text().catch(() => "");
430
- throw new R2SqlError(`R2 SQL HTTP ${response.status}: ${text}`, response.status);
469
+ return err(new R2SqlError(`R2 SQL HTTP ${response.status}: ${text}`, response.status));
431
470
  }
432
471
  const envelope = await response.json();
433
- if (!envelope.success) throw new R2SqlError(`R2 SQL query rejected: ${envelope.errors?.map((e) => e.message).join("; ") ?? "unknown R2 SQL error"}`);
434
- return {
472
+ if (!envelope.success) return err(new R2SqlError(`R2 SQL query rejected: ${envelope.errors?.map((e) => e.message).join("; ") ?? "unknown R2 SQL error"}`));
473
+ return ok({
435
474
  rows: normalizeRows(envelope.result),
436
475
  sql,
437
476
  queryMs: Date.now() - started
438
- };
477
+ });
478
+ }
479
+ async function query(sql) {
480
+ return unwrapResult(await queryResult(sql), r2SqlErrorToException);
439
481
  }
440
482
  function runPlan(plan) {
441
483
  const tableRef = r2TableRef(config.namespace, plan.table);
@@ -446,8 +488,9 @@ function createR2SqlClient(config) {
446
488
  }
447
489
  return {
448
490
  query,
491
+ queryResult,
449
492
  runPlan,
450
493
  runArchetype
451
494
  };
452
495
  }
453
- export { DuckDbIcebergError, DuckDbIcebergTimeoutError, R2SqlError, R2SqlTimeoutError, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine };
496
+ export { DuckDbIcebergError, DuckDbIcebergTimeoutError, R2SqlError, R2SqlTimeoutError, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine, resolveServerTailEngineResult };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/cloudflare",
3
3
  "type": "module",
4
- "version": "0.25.13",
4
+ "version": "0.26.0",
5
5
  "description": "Cloudflare-Workers-flavored helpers for the gscdump analytics stack: AnalyticsEnv binding contract, R2 SigV4 presigner, size-hint HMAC, DuckDB Workers shims, engine factory.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -46,18 +46,18 @@
46
46
  "dependencies": {
47
47
  "@uwdata/flechette": "^2.5.0",
48
48
  "aws4fetch": "^1.0.20",
49
- "@gscdump/engine": "0.25.13",
50
- "@gscdump/contracts": "0.25.13",
51
- "@gscdump/sdk": "0.25.13",
52
- "gscdump": "0.25.13",
53
- "@gscdump/engine-sqlite": "0.25.13"
49
+ "@gscdump/contracts": "0.26.0",
50
+ "@gscdump/engine-sqlite": "0.26.0",
51
+ "@gscdump/sdk": "0.26.0",
52
+ "@gscdump/engine": "0.26.0",
53
+ "gscdump": "0.26.0"
54
54
  },
55
55
  "devDependencies": {
56
- "@cloudflare/vitest-pool-workers": "^0.16.10",
57
- "@cloudflare/workers-types": "^4.20260531.1",
56
+ "@cloudflare/vitest-pool-workers": "^0.16.12",
57
+ "@cloudflare/workers-types": "^4.20260603.1",
58
58
  "h3": "^1.15.11",
59
59
  "typescript": "^6.0.3",
60
- "wrangler": "^4.95.0"
60
+ "wrangler": "^4.97.0"
61
61
  },
62
62
  "scripts": {
63
63
  "build": "obuild",