@gscdump/cloudflare 0.20.3 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,5 +1,7 @@
1
- import { DuckDBFactory, ParquetCodec, QueryExecutor, Row, createStorageEngine } from "@gscdump/engine";
1
+ import { DuckDBFactory, IcebergTableName, ParquetCodec, QueryExecutor, Row, createStorageEngine } from "@gscdump/engine";
2
2
  import { H3Event } from "h3";
3
+ import { ArchetypeQuery, ArchetypeResult, ArchetypeResultRow } from "@gscdump/sdk";
4
+ import { ServerTailDirective } from "@gscdump/contracts";
3
5
  declare function getWasmDuckDBFactory(): DuckDBFactory;
4
6
  declare function resetWasmDuckDB(): void;
5
7
  interface AnalyticsEnv {
@@ -89,6 +91,187 @@ declare function createR2Presigner(env: AnalyticsEnv): ({
89
91
  bucket,
90
92
  expiresIn
91
93
  }: PresignOptions) => Promise<string>;
94
+ /** Placeholder substituted for the engine-specific table reference. */
95
+ declare const TABLE_PLACEHOLDER = "{{TABLE}}";
96
+ /** A dialect-neutral SQL plan. */
97
+ interface ArchetypeSqlPlan {
98
+ /** SQL with `{{TABLE}}` standing in for the table reference. */
99
+ sql: string;
100
+ /** Bound parameters, in `?`-order. */
101
+ params: unknown[];
102
+ /** The Iceberg fact table this query reads. */
103
+ table: IcebergTableName;
104
+ }
105
+ /**
106
+ * Translate an archetype query to a dialect-neutral SQL plan.
107
+ *
108
+ * Throws for `arbitrary-sql` (caller-supplied SQL, handled by the DuckDB
109
+ * executor directly) and `aux-cloud-only` (not an Iceberg query).
110
+ */
111
+ declare function buildArchetypeSql(query: ArchetypeQuery): ArchetypeSqlPlan;
112
+ /** Row returned by the DuckDB sibling. */
113
+ type DuckDbIcebergRow = Record<string, string | number | null>;
114
+ /**
115
+ * The minimal `DUCKDB_SVC` shape this executor needs — a structural subset of
116
+ * the binding in `workers-duckdb.ts` / `env.ts`. Any binding with `runSQL`
117
+ * satisfies it.
118
+ */
119
+ interface DuckDbSvc {
120
+ runSQL: (args: {
121
+ sql: string;
122
+ }) => Promise<{
123
+ rows: unknown[];
124
+ sql: string;
125
+ }>;
126
+ }
127
+ /** Configuration for the DuckDB-over-Iceberg executor. */
128
+ interface DuckDbIcebergExecutorConfig {
129
+ /** The DuckDB service binding (the sibling Worker RPC). */
130
+ svc: DuckDbSvc;
131
+ /**
132
+ * R2 Data Catalog warehouse identifier. The sibling resolves Iceberg table
133
+ * locations from `<warehouse>` + `<namespace>` + table name.
134
+ */
135
+ warehouse: string;
136
+ /** Iceberg namespace the 5 fact tables live in. */
137
+ namespace: string;
138
+ /**
139
+ * How the sibling addresses an Iceberg table in a `FROM` clause. Defaults to
140
+ * DuckDB's `iceberg_scan('<warehouse>/<namespace>/<table>')`. Overridable so
141
+ * a sibling configured with the Iceberg REST catalog can use
142
+ * `iceberg_scan('<namespace>.<table>')` or an attached-catalog reference.
143
+ */
144
+ tableRefStyle?: 'path' | 'catalog';
145
+ /** Per-query wall-clock deadline (ms). Default 25s. */
146
+ timeoutMs?: number;
147
+ }
148
+ /** Result of a DuckDB-over-Iceberg query. */
149
+ interface DuckDbIcebergResult {
150
+ rows: DuckDbIcebergRow[];
151
+ /** The exact SQL sent to the sibling. */
152
+ sql: string;
153
+ queryMs: number;
154
+ }
155
+ declare class DuckDbIcebergError extends Error {
156
+ name: string;
157
+ }
158
+ declare class DuckDbIcebergTimeoutError extends Error {
159
+ name: string;
160
+ constructor(timeoutMs: number);
161
+ }
162
+ /** A configured DuckDB-over-Iceberg executor. */
163
+ interface DuckDbIcebergExecutor {
164
+ /** Run a raw SQL string with `{{TABLE_<name>}}` placeholders resolved. */
165
+ runSql: (sql: string, params?: readonly unknown[]) => Promise<DuckDbIcebergResult>;
166
+ /** Run a dialect-neutral plan: resolve `{{TABLE}}`, bind params, send. */
167
+ runPlan: (plan: ArchetypeSqlPlan) => Promise<DuckDbIcebergResult>;
168
+ /** Translate + run an archetype query. Handles `arbitrary-sql` verbatim. */
169
+ runArchetype: (query: ArchetypeQuery) => Promise<DuckDbIcebergResult>;
170
+ }
171
+ /**
172
+ * Create a DuckDB-over-Iceberg-files executor.
173
+ */
174
+ declare function createDuckDbIcebergExecutor(config: DuckDbIcebergExecutorConfig): DuckDbIcebergExecutor;
175
+ /** Configuration for an R2 SQL client. */
176
+ interface R2SqlClientConfig {
177
+ /** Cloudflare account id. */
178
+ accountId: string;
179
+ /** R2 Data Catalog warehouse name (`<bucket>` or `<account>_<bucket>`). */
180
+ warehouse: string;
181
+ /** Iceberg namespace the 5 fact tables live in. */
182
+ namespace: string;
183
+ /** Cloudflare API token with R2 Data Catalog read scope. */
184
+ token: string;
185
+ /**
186
+ * Override the HTTP endpoint base. Defaults to the public CF API. Tests
187
+ * point this at a local recorder.
188
+ */
189
+ apiBase?: string;
190
+ /**
191
+ * Injectable fetch. Defaults to global `fetch`. Tests pass a fake that
192
+ * returns a recorded CF envelope without a network round-trip.
193
+ */
194
+ fetchImpl?: typeof fetch;
195
+ /** Per-query wall-clock deadline (ms). Default 25s — under the Worker CPU budget. */
196
+ timeoutMs?: number;
197
+ }
198
+ /** A row as returned by R2 SQL — flat dimension + metric values. */
199
+ type R2SqlRow = Record<string, string | number | null>;
200
+ /** Result of an R2 SQL query. */
201
+ interface R2SqlResult {
202
+ rows: R2SqlRow[];
203
+ /** The exact SQL sent (params already inlined). For diagnostics. */
204
+ sql: string;
205
+ /** Wall-clock duration of the HTTP round-trip. */
206
+ queryMs: number;
207
+ }
208
+ declare class R2SqlError extends Error {
209
+ readonly status?: number | undefined;
210
+ name: string;
211
+ constructor(message: string, status?: number | undefined);
212
+ }
213
+ declare class R2SqlTimeoutError extends Error {
214
+ name: string;
215
+ constructor(timeoutMs: number);
216
+ }
217
+ /**
218
+ * Escape a JS value for inline embedding in R2 SQL. R2 SQL has no bound-param
219
+ * channel, so `buildArchetypeSql`'s `?` placeholders are substituted here.
220
+ * Numbers go in bare; strings are single-quote-escaped; null → `NULL`.
221
+ */
222
+ declare function escapeSqlValue(value: unknown): string;
223
+ /**
224
+ * Inline a plan's `?`-bound params into its SQL, in order. R2 SQL accepts only
225
+ * a literal query string. Quote-aware so a `?` inside a string literal is not
226
+ * mistaken for a placeholder.
227
+ */
228
+ declare function inlineParams(sql: string, params: readonly unknown[]): string;
229
+ /** A configured R2 SQL client. */
230
+ interface R2SqlClient {
231
+ /** Run a raw SQL string (table reference already resolved). */
232
+ query: (sql: string) => Promise<R2SqlResult>;
233
+ /** Run a dialect-neutral plan: resolve `{{TABLE}}`, inline params, send. */
234
+ runPlan: (plan: ArchetypeSqlPlan) => Promise<R2SqlResult>;
235
+ /** Translate + run an archetype query end to end. */
236
+ runArchetype: (query: ArchetypeQuery) => Promise<R2SqlResult>;
237
+ }
238
+ /**
239
+ * Create an R2 SQL client. The endpoint requires a real CF token in
240
+ * production; tests inject `fetchImpl` returning a recorded envelope.
241
+ */
242
+ declare function createR2SqlClient(config: R2SqlClientConfig): R2SqlClient;
243
+ /** The two engines the server tail can route to. */
244
+ type ServerTailEngine = 'r2-sql' | 'duckdb';
245
+ /** Executors the dispatcher routes between. */
246
+ interface ServerTailDispatcherConfig {
247
+ r2Sql: R2SqlClient;
248
+ duckdb: DuckDbIcebergExecutor;
249
+ }
250
+ declare class ServerTailRoutingError extends Error {
251
+ name: string;
252
+ }
253
+ /**
254
+ * Decide which engine answers an archetype query. Pure — no I/O. Exposed so
255
+ * the file-resolution endpoint can compute the `ServerTailDirective.engine`
256
+ * with the SAME logic the dispatcher uses at execution time.
257
+ */
258
+ declare function resolveServerTailEngine(query: ArchetypeQuery): ServerTailEngine;
259
+ /** A configured server-tail dispatcher. */
260
+ interface ServerTailDispatcher {
261
+ /** Decide the engine for a query without running it. */
262
+ route: (query: ArchetypeQuery) => ServerTailEngine;
263
+ /**
264
+ * Execute a query, routing by execution class. If `directive` is supplied
265
+ * its `engine` is honoured only when consistent with the archetype's class
266
+ * (a `duckdb`-class archetype always runs on DuckDB regardless).
267
+ */
268
+ execute: <R extends ArchetypeResultRow = ArchetypeResultRow>(query: ArchetypeQuery, directive?: ServerTailDirective) => Promise<ArchetypeResult<R>>;
269
+ }
270
+ /**
271
+ * Create the server-tail dispatcher. Holds an R2 SQL client and a DuckDB
272
+ * executor and routes every `ArchetypeQuery` to one of them.
273
+ */
274
+ declare function createServerTailDispatcher(config: ServerTailDispatcherConfig): ServerTailDispatcher;
92
275
  declare function signSizeHint(env: AnalyticsEnv, key: string, bytes: number): Promise<string>;
93
276
  declare function verifySizeHint(env: AnalyticsEnv, key: string, bytes: number, providedHex: string): Promise<boolean>;
94
277
  declare function createDucklingsCodec(_env: AnalyticsEnv): ParquetCodec;
@@ -98,4 +281,4 @@ interface DucklingsExecutorOptions {
98
281
  ipcTotalBytes?: number;
99
282
  }
100
283
  declare function createDucklingsExecutor(env: AnalyticsEnv, opts?: DucklingsExecutorOptions): QueryExecutor;
101
- export { type AnalyticsEngineHooks, type AnalyticsEnv, type HostedR2QueryKeyInput, type InflightDedupe, type PresignOptions, type Row, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
284
+ export { type AnalyticsEngineHooks, type AnalyticsEnv, type ArchetypeSqlPlan, DuckDbIcebergError, type DuckDbIcebergExecutor, type DuckDbIcebergExecutorConfig, type DuckDbIcebergResult, type DuckDbIcebergRow, DuckDbIcebergTimeoutError, type DuckDbSvc, type HostedR2QueryKeyInput, type InflightDedupe, type PresignOptions, type R2SqlClient, type R2SqlClientConfig, R2SqlError, type R2SqlResult, type R2SqlRow, R2SqlTimeoutError, type Row, type ServerTailDispatcher, type ServerTailDispatcherConfig, type ServerTailEngine, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, createR2SqlClient, createServerTailDispatcher, escapeSqlValue, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, inlineParams, resetWasmDuckDB, resolveServerTailEngine, signSizeHint, useAnalyticsEnv, verifySizeHint };
package/dist/index.mjs CHANGED
@@ -1,10 +1,11 @@
1
- import { SCHEMAS, bindLiterals, coerceRow, createStorageEngine } from "@gscdump/engine";
1
+ import { SCHEMAS, bindLiterals, coerceRow, createStorageEngine, inferTable } from "@gscdump/engine";
2
2
  import { createD1ManifestStore } from "@gscdump/engine-sqlite";
3
3
  import { createR2DataSource } from "@gscdump/engine/r2";
4
4
  import { createHyparquetCodec, decodeParquetToRows } from "@gscdump/engine/hyparquet";
5
5
  import { float64, int32, int64, tableFromArrays, tableToIPC, utf8 } from "@uwdata/flechette";
6
6
  import { createError } from "h3";
7
7
  import { AwsClient } from "aws4fetch";
8
+ import { ARCHETYPE_EXECUTION_CLASS } from "@gscdump/sdk";
8
9
  let handle = null;
9
10
  async function initHandle() {
10
11
  throw new Error("DuckDB-WASM handle not wired for Cloudflare Workers yet. Complete duckdb-wasm-handle.ts before enabling dual-write (user.migration_phase != 'd1').");
@@ -443,6 +444,405 @@ function createR2Presigner(env) {
443
444
  function encodeKey(key) {
444
445
  return key.split("/").map(encodeURIComponent).join("/");
445
446
  }
447
+ const TABLE_PLACEHOLDER = "{{TABLE}}";
448
+ function dimColumn(dim) {
449
+ if (dim === "page") return "url";
450
+ if (dim === "queryCanonical") return "query_canonical";
451
+ return dim;
452
+ }
453
+ function metricExpr(metric) {
454
+ switch (metric) {
455
+ case "clicks": return "SUM(clicks) AS clicks";
456
+ case "impressions": return "SUM(impressions) AS impressions";
457
+ case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr";
458
+ case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position";
459
+ }
460
+ }
461
+ function orderMetricExpr(metric) {
462
+ switch (metric) {
463
+ case "clicks": return "SUM(clicks)";
464
+ case "impressions": return "SUM(impressions)";
465
+ case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0)";
466
+ case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0)";
467
+ }
468
+ }
469
+ function sqlStringLiteral(value) {
470
+ return `'${value.replace(/'/g, "''")}'`;
471
+ }
472
+ function partitionWhere(q) {
473
+ return {
474
+ clause: "site_id = ? AND search_type = ? AND date BETWEEN ? AND ?",
475
+ params: [
476
+ q.siteId,
477
+ q.searchType,
478
+ q.range.start,
479
+ q.range.end
480
+ ]
481
+ };
482
+ }
483
+ function buildSiteDailyTimeseries(q) {
484
+ const w = partitionWhere(q);
485
+ const metrics = q.metrics.map(metricExpr).join(", ");
486
+ return {
487
+ table: "dates",
488
+ params: w.params,
489
+ sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date ORDER BY date ASC`
490
+ };
491
+ }
492
+ function buildEntityDailyTimeseries(q) {
493
+ const table = inferTable([q.entity.dimension]);
494
+ const w = partitionWhere(q);
495
+ const col = dimColumn(q.entity.dimension);
496
+ const metrics = q.metrics.map(metricExpr).join(", ");
497
+ return {
498
+ table,
499
+ params: [...w.params, q.entity.value],
500
+ sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} = ? GROUP BY date ORDER BY date ASC`
501
+ };
502
+ }
503
+ function buildEntityDailySparkline(q) {
504
+ const table = inferTable([q.dimension]);
505
+ const w = partitionWhere(q);
506
+ const col = dimColumn(q.dimension);
507
+ if (q.entities.length === 0) throw new Error("entity-daily-sparkline: empty entities — resolver must pre-resolve the top-N list");
508
+ const inList = q.entities.map(sqlStringLiteral).join(", ");
509
+ return {
510
+ table,
511
+ params: w.params,
512
+ sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} IN (${inList}) GROUP BY date, ${col} ORDER BY date ASC`
513
+ };
514
+ }
515
+ function buildTopNBreakdown(q) {
516
+ const table = inferTable([q.dimension]);
517
+ const w = partitionWhere(q);
518
+ const col = dimColumn(q.dimension);
519
+ const metrics = q.metrics.map(metricExpr).join(", ");
520
+ const order = `${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
521
+ let sql = `SELECT ${col}, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY ${col} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
522
+ if (q.offset && q.offset > 0) sql += ` OFFSET ${Math.floor(q.offset)}`;
523
+ return {
524
+ table,
525
+ params: w.params,
526
+ sql
527
+ };
528
+ }
529
+ function buildSingleRowLookup(q) {
530
+ const dims = Object.keys(q.match);
531
+ const table = inferTable(dims);
532
+ const w = partitionWhere(q);
533
+ const params = [...w.params];
534
+ let clause = w.clause;
535
+ for (const dim of dims) {
536
+ clause += ` AND ${dimColumn(dim)} = ?`;
537
+ params.push(q.match[dim]);
538
+ }
539
+ const metrics = q.metrics.map(metricExpr).join(", ");
540
+ const groupBy = dims.length > 0 ? ` GROUP BY ${dims.map(dimColumn).join(", ")}` : "";
541
+ return {
542
+ table,
543
+ params,
544
+ sql: `SELECT ${dims.length > 0 ? `${dims.map(dimColumn).join(", ")}, ${metrics}` : metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${clause}${groupBy}`
545
+ };
546
+ }
547
+ function buildMultiSeriesStackedDaily(q) {
548
+ const table = inferTable([q.seriesDimension]);
549
+ const w = partitionWhere(q);
550
+ const col = dimColumn(q.seriesDimension);
551
+ return {
552
+ table,
553
+ params: w.params,
554
+ sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date, ${col} ORDER BY date ASC`
555
+ };
556
+ }
557
+ function buildPresetAnalyzer(q) {
558
+ const params = q.params ?? {};
559
+ const minImpressions = Number(params.minImpressions ?? 100);
560
+ const limit = Math.max(1, Math.floor(Number(params.limit ?? 1e3)));
561
+ const w = partitionWhere(q);
562
+ const wp = [...w.params];
563
+ let having;
564
+ switch (q.presetId) {
565
+ case "striking-distance":
566
+ having = `HAVING SUM(impressions) >= ? AND (SUM(sum_position) / NULLIF(SUM(impressions), 0)) BETWEEN ? AND ?`;
567
+ wp.push(minImpressions, Number(params.minPosition ?? 11), Number(params.maxPosition ?? 20));
568
+ break;
569
+ case "opportunity":
570
+ having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
571
+ wp.push(minImpressions);
572
+ break;
573
+ case "zero-click":
574
+ having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
575
+ wp.push(minImpressions);
576
+ break;
577
+ default: throw new Error(`preset-analyzer: preset '${q.presetId}' is not R2-SQL-safe — window-function presets must be sent as archetype 'arbitrary-sql'`);
578
+ }
579
+ return {
580
+ table: "page_queries",
581
+ params: wp,
582
+ sql: `SELECT url, query, SUM(clicks) AS clicks, SUM(impressions) AS impressions, SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr, SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY url, query ${having} ORDER BY SUM(impressions) DESC LIMIT ${limit}`
583
+ };
584
+ }
585
+ function buildTwoDimensionDetail(q) {
586
+ const w = partitionWhere(q);
587
+ const params = [...w.params];
588
+ let clause = w.clause;
589
+ if (q.filter?.page) {
590
+ clause += ` AND url = ?`;
591
+ params.push(q.filter.page);
592
+ }
593
+ if (q.filter?.query) {
594
+ clause += ` AND query = ?`;
595
+ params.push(q.filter.query);
596
+ }
597
+ let sql = `SELECT url, query, ${q.metrics.map(metricExpr).join(", ")} FROM ${TABLE_PLACEHOLDER} WHERE ${clause} GROUP BY url, query`;
598
+ if (q.orderBy) sql += ` ORDER BY ${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
599
+ if (q.limit && q.limit > 0) sql += ` LIMIT ${Math.floor(q.limit)}`;
600
+ return {
601
+ table: "page_queries",
602
+ params,
603
+ sql
604
+ };
605
+ }
606
+ function buildArchetypeSql(query) {
607
+ switch (query.archetype) {
608
+ case "site-daily-timeseries": return buildSiteDailyTimeseries(query);
609
+ case "entity-daily-timeseries": return buildEntityDailyTimeseries(query);
610
+ case "entity-daily-sparkline": return buildEntityDailySparkline(query);
611
+ case "top-n-breakdown": return buildTopNBreakdown(query);
612
+ case "single-row-lookup": return buildSingleRowLookup(query);
613
+ case "multi-series-stacked-daily": return buildMultiSeriesStackedDaily(query);
614
+ case "preset-analyzer": return buildPresetAnalyzer(query);
615
+ case "two-dimension-detail": return buildTwoDimensionDetail(query);
616
+ case "arbitrary-sql": throw new Error("buildArchetypeSql: arbitrary-sql carries caller SQL — the DuckDB executor runs it verbatim");
617
+ case "aux-cloud-only": throw new Error("buildArchetypeSql: aux-cloud-only is not an Iceberg query");
618
+ }
619
+ }
620
+ var ServerTailRoutingError = class extends Error {
621
+ name = "ServerTailRoutingError";
622
+ };
623
+ function resolveServerTailEngine(query) {
624
+ const cls = ARCHETYPE_EXECUTION_CLASS[query.archetype];
625
+ if (cls === "cloud-only") throw new ServerTailRoutingError(`archetype '${query.archetype}' is cloud-only — not a server-tail query`);
626
+ if (cls === "duckdb") return "duckdb";
627
+ if (query.archetype === "top-n-breakdown" && query.offset && query.offset > 0) return "duckdb";
628
+ return "r2-sql";
629
+ }
630
+ function sourceFor(engine) {
631
+ return engine === "r2-sql" ? "server-r2-sql" : "server-duckdb";
632
+ }
633
+ function createServerTailDispatcher(config) {
634
+ function route(query) {
635
+ return resolveServerTailEngine(query);
636
+ }
637
+ async function execute(query, directive) {
638
+ const engine = route(query);
639
+ if (directive && directive.engine !== engine && engine === "r2-sql") return runOn("duckdb", query);
640
+ return runOn(engine, query);
641
+ }
642
+ async function runOn(engine, query) {
643
+ if (engine === "r2-sql") {
644
+ const res = await config.r2Sql.runArchetype(query);
645
+ return {
646
+ archetype: query.archetype,
647
+ rows: res.rows,
648
+ source: sourceFor("r2-sql"),
649
+ meta: {
650
+ rowCount: res.rows.length,
651
+ queryMs: res.queryMs
652
+ }
653
+ };
654
+ }
655
+ const res = await config.duckdb.runArchetype(query);
656
+ return {
657
+ archetype: query.archetype,
658
+ rows: res.rows,
659
+ source: sourceFor("duckdb"),
660
+ meta: {
661
+ rowCount: res.rows.length,
662
+ queryMs: res.queryMs
663
+ }
664
+ };
665
+ }
666
+ return {
667
+ route,
668
+ execute
669
+ };
670
+ }
671
+ var DuckDbIcebergError = class extends Error {
672
+ name = "DuckDbIcebergError";
673
+ };
674
+ var DuckDbIcebergTimeoutError = class extends Error {
675
+ name = "DuckDbIcebergTimeoutError";
676
+ constructor(timeoutMs) {
677
+ super(`DuckDB-over-Iceberg query exceeded ${timeoutMs}ms deadline`);
678
+ }
679
+ };
680
+ const DEFAULT_TIMEOUT_MS$1 = 25e3;
681
+ function icebergTableRef(config, table) {
682
+ if (config.tableRefStyle === "catalog") return `${config.namespace}.${table}`;
683
+ return `iceberg_scan('${config.warehouse}/${config.namespace}/${table}')`;
684
+ }
685
+ function withDeadline(op, timeoutMs) {
686
+ return new Promise((resolve, reject) => {
687
+ const timer = setTimeout(() => reject(new DuckDbIcebergTimeoutError(timeoutMs)), timeoutMs);
688
+ op.then(resolve, reject).finally(() => clearTimeout(timer));
689
+ });
690
+ }
691
+ function resolveTablePlaceholders(sql, config) {
692
+ return sql.replace(/\{\{(\w+)\}\}/g, (_, table) => icebergTableRef(config, table));
693
+ }
694
+ function createDuckDbIcebergExecutor(config) {
695
+ const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS$1;
696
+ async function send(sql) {
697
+ const started = Date.now();
698
+ const result = await withDeadline(config.svc.runSQL({ sql }), timeoutMs).catch((err) => {
699
+ if (err instanceof DuckDbIcebergTimeoutError) throw err;
700
+ throw new DuckDbIcebergError(`DUCKDB_SVC.runSQL failed: ${err.message}`);
701
+ });
702
+ return {
703
+ rows: result.rows ?? [],
704
+ sql: result.sql ?? sql,
705
+ queryMs: Date.now() - started
706
+ };
707
+ }
708
+ function runSql(sql, params = []) {
709
+ return send(bindLiterals(resolveTablePlaceholders(sql, config), params));
710
+ }
711
+ function runPlan(plan) {
712
+ return send(bindLiterals(plan.sql.split(TABLE_PLACEHOLDER).join(icebergTableRef(config, plan.table)), plan.params));
713
+ }
714
+ async function runArchetype(query) {
715
+ if (query.archetype === "arbitrary-sql") return runSql(query.sql, query.params ?? []);
716
+ if (query.archetype === "aux-cloud-only") throw new DuckDbIcebergError("aux-cloud-only is not an Iceberg query");
717
+ return runPlan(buildArchetypeSql(query));
718
+ }
719
+ return {
720
+ runSql,
721
+ runPlan,
722
+ runArchetype
723
+ };
724
+ }
725
+ function r2TableRef(namespace, table) {
726
+ return `${namespace}.${table}`;
727
+ }
728
+ var R2SqlError = class extends Error {
729
+ status;
730
+ name = "R2SqlError";
731
+ constructor(message, status) {
732
+ super(message);
733
+ this.status = status;
734
+ }
735
+ };
736
+ var R2SqlTimeoutError = class extends Error {
737
+ name = "R2SqlTimeoutError";
738
+ constructor(timeoutMs) {
739
+ super(`R2 SQL query exceeded ${timeoutMs}ms deadline`);
740
+ }
741
+ };
742
+ const DEFAULT_API_BASE = "https://api.cloudflare.com/client/v4";
743
+ const DEFAULT_TIMEOUT_MS = 25e3;
744
+ function escapeSqlValue(value) {
745
+ if (value === null || value === void 0) return "NULL";
746
+ if (typeof value === "number") {
747
+ if (!Number.isFinite(value)) throw new R2SqlError(`cannot embed non-finite number in SQL: ${value}`);
748
+ return String(value);
749
+ }
750
+ if (typeof value === "bigint") return value.toString();
751
+ if (typeof value === "boolean") return value ? "TRUE" : "FALSE";
752
+ return `'${String(value).replace(/'/g, "''")}'`;
753
+ }
754
+ function inlineParams(sql, params) {
755
+ let out = "";
756
+ let paramIndex = 0;
757
+ let inString = false;
758
+ for (let i = 0; i < sql.length; i++) {
759
+ const ch = sql[i];
760
+ if (ch === "'") {
761
+ if (inString && sql[i + 1] === "'") {
762
+ out += "''";
763
+ i++;
764
+ continue;
765
+ }
766
+ inString = !inString;
767
+ out += ch;
768
+ continue;
769
+ }
770
+ if (ch === "?" && !inString) {
771
+ if (paramIndex >= params.length) throw new R2SqlError(`SQL has more ? placeholders than params (${params.length})`);
772
+ out += escapeSqlValue(params[paramIndex++]);
773
+ continue;
774
+ }
775
+ out += ch;
776
+ }
777
+ if (paramIndex !== params.length) throw new R2SqlError(`SQL has ${paramIndex} ? placeholders but ${params.length} params supplied`);
778
+ return out;
779
+ }
780
+ function normalizeRows(result) {
781
+ if (!result) return [];
782
+ if (Array.isArray(result.rows)) return result.rows;
783
+ if (Array.isArray(result.columns) && Array.isArray(result.data)) {
784
+ const cols = result.columns;
785
+ return result.data.map((tuple) => {
786
+ const row = {};
787
+ cols.forEach((col, idx) => {
788
+ row[col] = tuple[idx] ?? null;
789
+ });
790
+ return row;
791
+ });
792
+ }
793
+ return [];
794
+ }
795
+ function createR2SqlClient(config) {
796
+ const fetchImpl = config.fetchImpl ?? globalThis.fetch;
797
+ const apiBase = config.apiBase ?? DEFAULT_API_BASE;
798
+ const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
799
+ const endpoint = `${apiBase}/accounts/${config.accountId}/r2-catalog/${config.warehouse}/sql`;
800
+ async function query(sql) {
801
+ const started = Date.now();
802
+ const controller = new AbortController();
803
+ const timer = setTimeout(() => controller.abort(new R2SqlTimeoutError(timeoutMs)), timeoutMs);
804
+ let response;
805
+ try {
806
+ response = await fetchImpl(endpoint, {
807
+ method: "POST",
808
+ headers: {
809
+ "authorization": `Bearer ${config.token}`,
810
+ "content-type": "application/json"
811
+ },
812
+ body: JSON.stringify({ query: sql }),
813
+ signal: controller.signal
814
+ });
815
+ } catch (err) {
816
+ if (err instanceof R2SqlTimeoutError || err?.name === "AbortError") throw new R2SqlTimeoutError(timeoutMs);
817
+ throw new R2SqlError(`R2 SQL request failed: ${err.message}`);
818
+ } finally {
819
+ clearTimeout(timer);
820
+ }
821
+ if (!response.ok) {
822
+ const text = await response.text().catch(() => "");
823
+ throw new R2SqlError(`R2 SQL HTTP ${response.status}: ${text}`, response.status);
824
+ }
825
+ const envelope = await response.json();
826
+ if (!envelope.success) throw new R2SqlError(`R2 SQL query rejected: ${envelope.errors?.map((e) => e.message).join("; ") ?? "unknown R2 SQL error"}`);
827
+ return {
828
+ rows: normalizeRows(envelope.result),
829
+ sql,
830
+ queryMs: Date.now() - started
831
+ };
832
+ }
833
+ function runPlan(plan) {
834
+ const tableRef = r2TableRef(config.namespace, plan.table);
835
+ return query(inlineParams(plan.sql.split(TABLE_PLACEHOLDER).join(tableRef), plan.params));
836
+ }
837
+ function runArchetype(archetypeQuery) {
838
+ return runPlan(buildArchetypeSql(archetypeQuery));
839
+ }
840
+ return {
841
+ query,
842
+ runPlan,
843
+ runArchetype
844
+ };
845
+ }
446
846
  const SIG_HEX_LEN = 16;
447
847
  const keyCache = /* @__PURE__ */ new WeakMap();
448
848
  const stringKeyCache = /* @__PURE__ */ new Map();
@@ -483,4 +883,4 @@ async function verifySizeHint(env, key, bytes, providedHex) {
483
883
  for (let i = 0; i < SIG_HEX_LEN; i++) diff |= expected.charCodeAt(i) ^ providedHex.charCodeAt(i);
484
884
  return diff === 0;
485
885
  }
486
- export { createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
886
+ export { DuckDbIcebergError, DuckDbIcebergTimeoutError, R2SqlError, R2SqlTimeoutError, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, createR2SqlClient, createServerTailDispatcher, escapeSqlValue, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, inlineParams, resetWasmDuckDB, resolveServerTailEngine, signSizeHint, useAnalyticsEnv, verifySizeHint };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/cloudflare",
3
3
  "type": "module",
4
- "version": "0.20.3",
4
+ "version": "0.21.0",
5
5
  "description": "Cloudflare-Workers-flavored helpers for the gscdump analytics stack: AnalyticsEnv binding contract, R2 SigV4 presigner, size-hint HMAC, DuckDB Workers shims, engine factory.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -41,8 +41,11 @@
41
41
  "dependencies": {
42
42
  "@uwdata/flechette": "^2.5.0",
43
43
  "aws4fetch": "^1.0.20",
44
- "@gscdump/engine-sqlite": "0.20.3",
45
- "@gscdump/engine": "0.20.3"
44
+ "@gscdump/contracts": "0.21.0",
45
+ "@gscdump/engine-sqlite": "0.21.0",
46
+ "gscdump": "0.21.0",
47
+ "@gscdump/sdk": "0.21.0",
48
+ "@gscdump/engine": "0.21.0"
46
49
  },
47
50
  "devDependencies": {
48
51
  "@cloudflare/workers-types": "^4.20260520.1",