@gscdump/cloudflare 0.20.2 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +185 -2
- package/dist/index.mjs +402 -2
- package/package.json +6 -3
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import { DuckDBFactory, ParquetCodec, QueryExecutor, Row, createStorageEngine } from "@gscdump/engine";
|
|
1
|
+
import { DuckDBFactory, IcebergTableName, ParquetCodec, QueryExecutor, Row, createStorageEngine } from "@gscdump/engine";
|
|
2
2
|
import { H3Event } from "h3";
|
|
3
|
+
import { ArchetypeQuery, ArchetypeResult, ArchetypeResultRow } from "@gscdump/sdk";
|
|
4
|
+
import { ServerTailDirective } from "@gscdump/contracts";
|
|
3
5
|
declare function getWasmDuckDBFactory(): DuckDBFactory;
|
|
4
6
|
declare function resetWasmDuckDB(): void;
|
|
5
7
|
interface AnalyticsEnv {
|
|
@@ -89,6 +91,187 @@ declare function createR2Presigner(env: AnalyticsEnv): ({
|
|
|
89
91
|
bucket,
|
|
90
92
|
expiresIn
|
|
91
93
|
}: PresignOptions) => Promise<string>;
|
|
94
|
+
/** Placeholder substituted for the engine-specific table reference. */
|
|
95
|
+
declare const TABLE_PLACEHOLDER = "{{TABLE}}";
|
|
96
|
+
/** A dialect-neutral SQL plan. */
|
|
97
|
+
interface ArchetypeSqlPlan {
|
|
98
|
+
/** SQL with `{{TABLE}}` standing in for the table reference. */
|
|
99
|
+
sql: string;
|
|
100
|
+
/** Bound parameters, in `?`-order. */
|
|
101
|
+
params: unknown[];
|
|
102
|
+
/** The Iceberg fact table this query reads. */
|
|
103
|
+
table: IcebergTableName;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Translate an archetype query to a dialect-neutral SQL plan.
|
|
107
|
+
*
|
|
108
|
+
* Throws for `arbitrary-sql` (caller-supplied SQL, handled by the DuckDB
|
|
109
|
+
* executor directly) and `aux-cloud-only` (not an Iceberg query).
|
|
110
|
+
*/
|
|
111
|
+
declare function buildArchetypeSql(query: ArchetypeQuery): ArchetypeSqlPlan;
|
|
112
|
+
/** Row returned by the DuckDB sibling. */
|
|
113
|
+
type DuckDbIcebergRow = Record<string, string | number | null>;
|
|
114
|
+
/**
|
|
115
|
+
* The minimal `DUCKDB_SVC` shape this executor needs — a structural subset of
|
|
116
|
+
* the binding in `workers-duckdb.ts` / `env.ts`. Any binding with `runSQL`
|
|
117
|
+
* satisfies it.
|
|
118
|
+
*/
|
|
119
|
+
interface DuckDbSvc {
|
|
120
|
+
runSQL: (args: {
|
|
121
|
+
sql: string;
|
|
122
|
+
}) => Promise<{
|
|
123
|
+
rows: unknown[];
|
|
124
|
+
sql: string;
|
|
125
|
+
}>;
|
|
126
|
+
}
|
|
127
|
+
/** Configuration for the DuckDB-over-Iceberg executor. */
|
|
128
|
+
interface DuckDbIcebergExecutorConfig {
|
|
129
|
+
/** The DuckDB service binding (the sibling Worker RPC). */
|
|
130
|
+
svc: DuckDbSvc;
|
|
131
|
+
/**
|
|
132
|
+
* R2 Data Catalog warehouse identifier. The sibling resolves Iceberg table
|
|
133
|
+
* locations from `<warehouse>` + `<namespace>` + table name.
|
|
134
|
+
*/
|
|
135
|
+
warehouse: string;
|
|
136
|
+
/** Iceberg namespace the 5 fact tables live in. */
|
|
137
|
+
namespace: string;
|
|
138
|
+
/**
|
|
139
|
+
* How the sibling addresses an Iceberg table in a `FROM` clause. Defaults to
|
|
140
|
+
* DuckDB's `iceberg_scan('<warehouse>/<namespace>/<table>')`. Overridable so
|
|
141
|
+
* a sibling configured with the Iceberg REST catalog can use
|
|
142
|
+
* `iceberg_scan('<namespace>.<table>')` or an attached-catalog reference.
|
|
143
|
+
*/
|
|
144
|
+
tableRefStyle?: 'path' | 'catalog';
|
|
145
|
+
/** Per-query wall-clock deadline (ms). Default 25s. */
|
|
146
|
+
timeoutMs?: number;
|
|
147
|
+
}
|
|
148
|
+
/** Result of a DuckDB-over-Iceberg query. */
|
|
149
|
+
interface DuckDbIcebergResult {
|
|
150
|
+
rows: DuckDbIcebergRow[];
|
|
151
|
+
/** The exact SQL sent to the sibling. */
|
|
152
|
+
sql: string;
|
|
153
|
+
queryMs: number;
|
|
154
|
+
}
|
|
155
|
+
declare class DuckDbIcebergError extends Error {
|
|
156
|
+
name: string;
|
|
157
|
+
}
|
|
158
|
+
declare class DuckDbIcebergTimeoutError extends Error {
|
|
159
|
+
name: string;
|
|
160
|
+
constructor(timeoutMs: number);
|
|
161
|
+
}
|
|
162
|
+
/** A configured DuckDB-over-Iceberg executor. */
|
|
163
|
+
interface DuckDbIcebergExecutor {
|
|
164
|
+
/** Run a raw SQL string with `{{TABLE_<name>}}` placeholders resolved. */
|
|
165
|
+
runSql: (sql: string, params?: readonly unknown[]) => Promise<DuckDbIcebergResult>;
|
|
166
|
+
/** Run a dialect-neutral plan: resolve `{{TABLE}}`, bind params, send. */
|
|
167
|
+
runPlan: (plan: ArchetypeSqlPlan) => Promise<DuckDbIcebergResult>;
|
|
168
|
+
/** Translate + run an archetype query. Handles `arbitrary-sql` verbatim. */
|
|
169
|
+
runArchetype: (query: ArchetypeQuery) => Promise<DuckDbIcebergResult>;
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Create a DuckDB-over-Iceberg-files executor.
|
|
173
|
+
*/
|
|
174
|
+
declare function createDuckDbIcebergExecutor(config: DuckDbIcebergExecutorConfig): DuckDbIcebergExecutor;
|
|
175
|
+
/** Configuration for an R2 SQL client. */
|
|
176
|
+
interface R2SqlClientConfig {
|
|
177
|
+
/** Cloudflare account id. */
|
|
178
|
+
accountId: string;
|
|
179
|
+
/** R2 Data Catalog warehouse name (`<bucket>` or `<account>_<bucket>`). */
|
|
180
|
+
warehouse: string;
|
|
181
|
+
/** Iceberg namespace the 5 fact tables live in. */
|
|
182
|
+
namespace: string;
|
|
183
|
+
/** Cloudflare API token with R2 Data Catalog read scope. */
|
|
184
|
+
token: string;
|
|
185
|
+
/**
|
|
186
|
+
* Override the HTTP endpoint base. Defaults to the public CF API. Tests
|
|
187
|
+
* point this at a local recorder.
|
|
188
|
+
*/
|
|
189
|
+
apiBase?: string;
|
|
190
|
+
/**
|
|
191
|
+
* Injectable fetch. Defaults to global `fetch`. Tests pass a fake that
|
|
192
|
+
* returns a recorded CF envelope without a network round-trip.
|
|
193
|
+
*/
|
|
194
|
+
fetchImpl?: typeof fetch;
|
|
195
|
+
/** Per-query wall-clock deadline (ms). Default 25s — under the Worker CPU budget. */
|
|
196
|
+
timeoutMs?: number;
|
|
197
|
+
}
|
|
198
|
+
/** A row as returned by R2 SQL — flat dimension + metric values. */
|
|
199
|
+
type R2SqlRow = Record<string, string | number | null>;
|
|
200
|
+
/** Result of an R2 SQL query. */
|
|
201
|
+
interface R2SqlResult {
|
|
202
|
+
rows: R2SqlRow[];
|
|
203
|
+
/** The exact SQL sent (params already inlined). For diagnostics. */
|
|
204
|
+
sql: string;
|
|
205
|
+
/** Wall-clock duration of the HTTP round-trip. */
|
|
206
|
+
queryMs: number;
|
|
207
|
+
}
|
|
208
|
+
declare class R2SqlError extends Error {
|
|
209
|
+
readonly status?: number | undefined;
|
|
210
|
+
name: string;
|
|
211
|
+
constructor(message: string, status?: number | undefined);
|
|
212
|
+
}
|
|
213
|
+
declare class R2SqlTimeoutError extends Error {
|
|
214
|
+
name: string;
|
|
215
|
+
constructor(timeoutMs: number);
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Escape a JS value for inline embedding in R2 SQL. R2 SQL has no bound-param
|
|
219
|
+
* channel, so `buildArchetypeSql`'s `?` placeholders are substituted here.
|
|
220
|
+
* Numbers go in bare; strings are single-quote-escaped; null → `NULL`.
|
|
221
|
+
*/
|
|
222
|
+
declare function escapeSqlValue(value: unknown): string;
|
|
223
|
+
/**
|
|
224
|
+
* Inline a plan's `?`-bound params into its SQL, in order. R2 SQL accepts only
|
|
225
|
+
* a literal query string. Quote-aware so a `?` inside a string literal is not
|
|
226
|
+
* mistaken for a placeholder.
|
|
227
|
+
*/
|
|
228
|
+
declare function inlineParams(sql: string, params: readonly unknown[]): string;
|
|
229
|
+
/** A configured R2 SQL client. */
|
|
230
|
+
interface R2SqlClient {
|
|
231
|
+
/** Run a raw SQL string (table reference already resolved). */
|
|
232
|
+
query: (sql: string) => Promise<R2SqlResult>;
|
|
233
|
+
/** Run a dialect-neutral plan: resolve `{{TABLE}}`, inline params, send. */
|
|
234
|
+
runPlan: (plan: ArchetypeSqlPlan) => Promise<R2SqlResult>;
|
|
235
|
+
/** Translate + run an archetype query end to end. */
|
|
236
|
+
runArchetype: (query: ArchetypeQuery) => Promise<R2SqlResult>;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Create an R2 SQL client. The endpoint requires a real CF token in
|
|
240
|
+
* production; tests inject `fetchImpl` returning a recorded envelope.
|
|
241
|
+
*/
|
|
242
|
+
declare function createR2SqlClient(config: R2SqlClientConfig): R2SqlClient;
|
|
243
|
+
/** The two engines the server tail can route to. */
|
|
244
|
+
type ServerTailEngine = 'r2-sql' | 'duckdb';
|
|
245
|
+
/** Executors the dispatcher routes between. */
|
|
246
|
+
interface ServerTailDispatcherConfig {
|
|
247
|
+
r2Sql: R2SqlClient;
|
|
248
|
+
duckdb: DuckDbIcebergExecutor;
|
|
249
|
+
}
|
|
250
|
+
declare class ServerTailRoutingError extends Error {
|
|
251
|
+
name: string;
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Decide which engine answers an archetype query. Pure — no I/O. Exposed so
|
|
255
|
+
* the file-resolution endpoint can compute the `ServerTailDirective.engine`
|
|
256
|
+
* with the SAME logic the dispatcher uses at execution time.
|
|
257
|
+
*/
|
|
258
|
+
declare function resolveServerTailEngine(query: ArchetypeQuery): ServerTailEngine;
|
|
259
|
+
/** A configured server-tail dispatcher. */
|
|
260
|
+
interface ServerTailDispatcher {
|
|
261
|
+
/** Decide the engine for a query without running it. */
|
|
262
|
+
route: (query: ArchetypeQuery) => ServerTailEngine;
|
|
263
|
+
/**
|
|
264
|
+
* Execute a query, routing by execution class. If `directive` is supplied
|
|
265
|
+
* its `engine` is honoured only when consistent with the archetype's class
|
|
266
|
+
* (a `duckdb`-class archetype always runs on DuckDB regardless).
|
|
267
|
+
*/
|
|
268
|
+
execute: <R extends ArchetypeResultRow = ArchetypeResultRow>(query: ArchetypeQuery, directive?: ServerTailDirective) => Promise<ArchetypeResult<R>>;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Create the server-tail dispatcher. Holds an R2 SQL client and a DuckDB
|
|
272
|
+
* executor and routes every `ArchetypeQuery` to one of them.
|
|
273
|
+
*/
|
|
274
|
+
declare function createServerTailDispatcher(config: ServerTailDispatcherConfig): ServerTailDispatcher;
|
|
92
275
|
declare function signSizeHint(env: AnalyticsEnv, key: string, bytes: number): Promise<string>;
|
|
93
276
|
declare function verifySizeHint(env: AnalyticsEnv, key: string, bytes: number, providedHex: string): Promise<boolean>;
|
|
94
277
|
declare function createDucklingsCodec(_env: AnalyticsEnv): ParquetCodec;
|
|
@@ -98,4 +281,4 @@ interface DucklingsExecutorOptions {
|
|
|
98
281
|
ipcTotalBytes?: number;
|
|
99
282
|
}
|
|
100
283
|
declare function createDucklingsExecutor(env: AnalyticsEnv, opts?: DucklingsExecutorOptions): QueryExecutor;
|
|
101
|
-
export { type AnalyticsEngineHooks, type AnalyticsEnv, type HostedR2QueryKeyInput, type InflightDedupe, type PresignOptions, type Row, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
|
284
|
+
export { type AnalyticsEngineHooks, type AnalyticsEnv, type ArchetypeSqlPlan, DuckDbIcebergError, type DuckDbIcebergExecutor, type DuckDbIcebergExecutorConfig, type DuckDbIcebergResult, type DuckDbIcebergRow, DuckDbIcebergTimeoutError, type DuckDbSvc, type HostedR2QueryKeyInput, type InflightDedupe, type PresignOptions, type R2SqlClient, type R2SqlClientConfig, R2SqlError, type R2SqlResult, type R2SqlRow, R2SqlTimeoutError, type Row, type ServerTailDispatcher, type ServerTailDispatcherConfig, type ServerTailEngine, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, createR2SqlClient, createServerTailDispatcher, escapeSqlValue, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, inlineParams, resetWasmDuckDB, resolveServerTailEngine, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
package/dist/index.mjs
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import { SCHEMAS, bindLiterals, coerceRow, createStorageEngine } from "@gscdump/engine";
|
|
1
|
+
import { SCHEMAS, bindLiterals, coerceRow, createStorageEngine, inferTable } from "@gscdump/engine";
|
|
2
2
|
import { createD1ManifestStore } from "@gscdump/engine-sqlite";
|
|
3
3
|
import { createR2DataSource } from "@gscdump/engine/r2";
|
|
4
4
|
import { createHyparquetCodec, decodeParquetToRows } from "@gscdump/engine/hyparquet";
|
|
5
5
|
import { float64, int32, int64, tableFromArrays, tableToIPC, utf8 } from "@uwdata/flechette";
|
|
6
6
|
import { createError } from "h3";
|
|
7
7
|
import { AwsClient } from "aws4fetch";
|
|
8
|
+
import { ARCHETYPE_EXECUTION_CLASS } from "@gscdump/sdk";
|
|
8
9
|
let handle = null;
|
|
9
10
|
async function initHandle() {
|
|
10
11
|
throw new Error("DuckDB-WASM handle not wired for Cloudflare Workers yet. Complete duckdb-wasm-handle.ts before enabling dual-write (user.migration_phase != 'd1').");
|
|
@@ -443,6 +444,405 @@ function createR2Presigner(env) {
|
|
|
443
444
|
function encodeKey(key) {
|
|
444
445
|
return key.split("/").map(encodeURIComponent).join("/");
|
|
445
446
|
}
|
|
447
|
+
const TABLE_PLACEHOLDER = "{{TABLE}}";
|
|
448
|
+
function dimColumn(dim) {
|
|
449
|
+
if (dim === "page") return "url";
|
|
450
|
+
if (dim === "queryCanonical") return "query_canonical";
|
|
451
|
+
return dim;
|
|
452
|
+
}
|
|
453
|
+
function metricExpr(metric) {
|
|
454
|
+
switch (metric) {
|
|
455
|
+
case "clicks": return "SUM(clicks) AS clicks";
|
|
456
|
+
case "impressions": return "SUM(impressions) AS impressions";
|
|
457
|
+
case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr";
|
|
458
|
+
case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position";
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
function orderMetricExpr(metric) {
|
|
462
|
+
switch (metric) {
|
|
463
|
+
case "clicks": return "SUM(clicks)";
|
|
464
|
+
case "impressions": return "SUM(impressions)";
|
|
465
|
+
case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0)";
|
|
466
|
+
case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0)";
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
function sqlStringLiteral(value) {
|
|
470
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
471
|
+
}
|
|
472
|
+
function partitionWhere(q) {
|
|
473
|
+
return {
|
|
474
|
+
clause: "site_id = ? AND search_type = ? AND date BETWEEN ? AND ?",
|
|
475
|
+
params: [
|
|
476
|
+
q.siteId,
|
|
477
|
+
q.searchType,
|
|
478
|
+
q.range.start,
|
|
479
|
+
q.range.end
|
|
480
|
+
]
|
|
481
|
+
};
|
|
482
|
+
}
|
|
483
|
+
function buildSiteDailyTimeseries(q) {
|
|
484
|
+
const w = partitionWhere(q);
|
|
485
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
486
|
+
return {
|
|
487
|
+
table: "dates",
|
|
488
|
+
params: w.params,
|
|
489
|
+
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date ORDER BY date ASC`
|
|
490
|
+
};
|
|
491
|
+
}
|
|
492
|
+
function buildEntityDailyTimeseries(q) {
|
|
493
|
+
const table = inferTable([q.entity.dimension]);
|
|
494
|
+
const w = partitionWhere(q);
|
|
495
|
+
const col = dimColumn(q.entity.dimension);
|
|
496
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
497
|
+
return {
|
|
498
|
+
table,
|
|
499
|
+
params: [...w.params, q.entity.value],
|
|
500
|
+
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} = ? GROUP BY date ORDER BY date ASC`
|
|
501
|
+
};
|
|
502
|
+
}
|
|
503
|
+
function buildEntityDailySparkline(q) {
|
|
504
|
+
const table = inferTable([q.dimension]);
|
|
505
|
+
const w = partitionWhere(q);
|
|
506
|
+
const col = dimColumn(q.dimension);
|
|
507
|
+
if (q.entities.length === 0) throw new Error("entity-daily-sparkline: empty entities — resolver must pre-resolve the top-N list");
|
|
508
|
+
const inList = q.entities.map(sqlStringLiteral).join(", ");
|
|
509
|
+
return {
|
|
510
|
+
table,
|
|
511
|
+
params: w.params,
|
|
512
|
+
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} IN (${inList}) GROUP BY date, ${col} ORDER BY date ASC`
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
function buildTopNBreakdown(q) {
|
|
516
|
+
const table = inferTable([q.dimension]);
|
|
517
|
+
const w = partitionWhere(q);
|
|
518
|
+
const col = dimColumn(q.dimension);
|
|
519
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
520
|
+
const order = `${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
|
|
521
|
+
let sql = `SELECT ${col}, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY ${col} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
|
|
522
|
+
if (q.offset && q.offset > 0) sql += ` OFFSET ${Math.floor(q.offset)}`;
|
|
523
|
+
return {
|
|
524
|
+
table,
|
|
525
|
+
params: w.params,
|
|
526
|
+
sql
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
function buildSingleRowLookup(q) {
|
|
530
|
+
const dims = Object.keys(q.match);
|
|
531
|
+
const table = inferTable(dims);
|
|
532
|
+
const w = partitionWhere(q);
|
|
533
|
+
const params = [...w.params];
|
|
534
|
+
let clause = w.clause;
|
|
535
|
+
for (const dim of dims) {
|
|
536
|
+
clause += ` AND ${dimColumn(dim)} = ?`;
|
|
537
|
+
params.push(q.match[dim]);
|
|
538
|
+
}
|
|
539
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
540
|
+
const groupBy = dims.length > 0 ? ` GROUP BY ${dims.map(dimColumn).join(", ")}` : "";
|
|
541
|
+
return {
|
|
542
|
+
table,
|
|
543
|
+
params,
|
|
544
|
+
sql: `SELECT ${dims.length > 0 ? `${dims.map(dimColumn).join(", ")}, ${metrics}` : metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${clause}${groupBy}`
|
|
545
|
+
};
|
|
546
|
+
}
|
|
547
|
+
function buildMultiSeriesStackedDaily(q) {
|
|
548
|
+
const table = inferTable([q.seriesDimension]);
|
|
549
|
+
const w = partitionWhere(q);
|
|
550
|
+
const col = dimColumn(q.seriesDimension);
|
|
551
|
+
return {
|
|
552
|
+
table,
|
|
553
|
+
params: w.params,
|
|
554
|
+
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date, ${col} ORDER BY date ASC`
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
function buildPresetAnalyzer(q) {
|
|
558
|
+
const params = q.params ?? {};
|
|
559
|
+
const minImpressions = Number(params.minImpressions ?? 100);
|
|
560
|
+
const limit = Math.max(1, Math.floor(Number(params.limit ?? 1e3)));
|
|
561
|
+
const w = partitionWhere(q);
|
|
562
|
+
const wp = [...w.params];
|
|
563
|
+
let having;
|
|
564
|
+
switch (q.presetId) {
|
|
565
|
+
case "striking-distance":
|
|
566
|
+
having = `HAVING SUM(impressions) >= ? AND (SUM(sum_position) / NULLIF(SUM(impressions), 0)) BETWEEN ? AND ?`;
|
|
567
|
+
wp.push(minImpressions, Number(params.minPosition ?? 11), Number(params.maxPosition ?? 20));
|
|
568
|
+
break;
|
|
569
|
+
case "opportunity":
|
|
570
|
+
having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
|
|
571
|
+
wp.push(minImpressions);
|
|
572
|
+
break;
|
|
573
|
+
case "zero-click":
|
|
574
|
+
having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
|
|
575
|
+
wp.push(minImpressions);
|
|
576
|
+
break;
|
|
577
|
+
default: throw new Error(`preset-analyzer: preset '${q.presetId}' is not R2-SQL-safe — window-function presets must be sent as archetype 'arbitrary-sql'`);
|
|
578
|
+
}
|
|
579
|
+
return {
|
|
580
|
+
table: "page_queries",
|
|
581
|
+
params: wp,
|
|
582
|
+
sql: `SELECT url, query, SUM(clicks) AS clicks, SUM(impressions) AS impressions, SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr, SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY url, query ${having} ORDER BY SUM(impressions) DESC LIMIT ${limit}`
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
function buildTwoDimensionDetail(q) {
|
|
586
|
+
const w = partitionWhere(q);
|
|
587
|
+
const params = [...w.params];
|
|
588
|
+
let clause = w.clause;
|
|
589
|
+
if (q.filter?.page) {
|
|
590
|
+
clause += ` AND url = ?`;
|
|
591
|
+
params.push(q.filter.page);
|
|
592
|
+
}
|
|
593
|
+
if (q.filter?.query) {
|
|
594
|
+
clause += ` AND query = ?`;
|
|
595
|
+
params.push(q.filter.query);
|
|
596
|
+
}
|
|
597
|
+
let sql = `SELECT url, query, ${q.metrics.map(metricExpr).join(", ")} FROM ${TABLE_PLACEHOLDER} WHERE ${clause} GROUP BY url, query`;
|
|
598
|
+
if (q.orderBy) sql += ` ORDER BY ${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
|
|
599
|
+
if (q.limit && q.limit > 0) sql += ` LIMIT ${Math.floor(q.limit)}`;
|
|
600
|
+
return {
|
|
601
|
+
table: "page_queries",
|
|
602
|
+
params,
|
|
603
|
+
sql
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
function buildArchetypeSql(query) {
|
|
607
|
+
switch (query.archetype) {
|
|
608
|
+
case "site-daily-timeseries": return buildSiteDailyTimeseries(query);
|
|
609
|
+
case "entity-daily-timeseries": return buildEntityDailyTimeseries(query);
|
|
610
|
+
case "entity-daily-sparkline": return buildEntityDailySparkline(query);
|
|
611
|
+
case "top-n-breakdown": return buildTopNBreakdown(query);
|
|
612
|
+
case "single-row-lookup": return buildSingleRowLookup(query);
|
|
613
|
+
case "multi-series-stacked-daily": return buildMultiSeriesStackedDaily(query);
|
|
614
|
+
case "preset-analyzer": return buildPresetAnalyzer(query);
|
|
615
|
+
case "two-dimension-detail": return buildTwoDimensionDetail(query);
|
|
616
|
+
case "arbitrary-sql": throw new Error("buildArchetypeSql: arbitrary-sql carries caller SQL — the DuckDB executor runs it verbatim");
|
|
617
|
+
case "aux-cloud-only": throw new Error("buildArchetypeSql: aux-cloud-only is not an Iceberg query");
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
var ServerTailRoutingError = class extends Error {
|
|
621
|
+
name = "ServerTailRoutingError";
|
|
622
|
+
};
|
|
623
|
+
function resolveServerTailEngine(query) {
|
|
624
|
+
const cls = ARCHETYPE_EXECUTION_CLASS[query.archetype];
|
|
625
|
+
if (cls === "cloud-only") throw new ServerTailRoutingError(`archetype '${query.archetype}' is cloud-only — not a server-tail query`);
|
|
626
|
+
if (cls === "duckdb") return "duckdb";
|
|
627
|
+
if (query.archetype === "top-n-breakdown" && query.offset && query.offset > 0) return "duckdb";
|
|
628
|
+
return "r2-sql";
|
|
629
|
+
}
|
|
630
|
+
function sourceFor(engine) {
|
|
631
|
+
return engine === "r2-sql" ? "server-r2-sql" : "server-duckdb";
|
|
632
|
+
}
|
|
633
|
+
function createServerTailDispatcher(config) {
|
|
634
|
+
function route(query) {
|
|
635
|
+
return resolveServerTailEngine(query);
|
|
636
|
+
}
|
|
637
|
+
async function execute(query, directive) {
|
|
638
|
+
const engine = route(query);
|
|
639
|
+
if (directive && directive.engine !== engine && engine === "r2-sql") return runOn("duckdb", query);
|
|
640
|
+
return runOn(engine, query);
|
|
641
|
+
}
|
|
642
|
+
async function runOn(engine, query) {
|
|
643
|
+
if (engine === "r2-sql") {
|
|
644
|
+
const res = await config.r2Sql.runArchetype(query);
|
|
645
|
+
return {
|
|
646
|
+
archetype: query.archetype,
|
|
647
|
+
rows: res.rows,
|
|
648
|
+
source: sourceFor("r2-sql"),
|
|
649
|
+
meta: {
|
|
650
|
+
rowCount: res.rows.length,
|
|
651
|
+
queryMs: res.queryMs
|
|
652
|
+
}
|
|
653
|
+
};
|
|
654
|
+
}
|
|
655
|
+
const res = await config.duckdb.runArchetype(query);
|
|
656
|
+
return {
|
|
657
|
+
archetype: query.archetype,
|
|
658
|
+
rows: res.rows,
|
|
659
|
+
source: sourceFor("duckdb"),
|
|
660
|
+
meta: {
|
|
661
|
+
rowCount: res.rows.length,
|
|
662
|
+
queryMs: res.queryMs
|
|
663
|
+
}
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
return {
|
|
667
|
+
route,
|
|
668
|
+
execute
|
|
669
|
+
};
|
|
670
|
+
}
|
|
671
|
+
var DuckDbIcebergError = class extends Error {
|
|
672
|
+
name = "DuckDbIcebergError";
|
|
673
|
+
};
|
|
674
|
+
var DuckDbIcebergTimeoutError = class extends Error {
|
|
675
|
+
name = "DuckDbIcebergTimeoutError";
|
|
676
|
+
constructor(timeoutMs) {
|
|
677
|
+
super(`DuckDB-over-Iceberg query exceeded ${timeoutMs}ms deadline`);
|
|
678
|
+
}
|
|
679
|
+
};
|
|
680
|
+
const DEFAULT_TIMEOUT_MS$1 = 25e3;
|
|
681
|
+
function icebergTableRef(config, table) {
|
|
682
|
+
if (config.tableRefStyle === "catalog") return `${config.namespace}.${table}`;
|
|
683
|
+
return `iceberg_scan('${config.warehouse}/${config.namespace}/${table}')`;
|
|
684
|
+
}
|
|
685
|
+
function withDeadline(op, timeoutMs) {
|
|
686
|
+
return new Promise((resolve, reject) => {
|
|
687
|
+
const timer = setTimeout(() => reject(new DuckDbIcebergTimeoutError(timeoutMs)), timeoutMs);
|
|
688
|
+
op.then(resolve, reject).finally(() => clearTimeout(timer));
|
|
689
|
+
});
|
|
690
|
+
}
|
|
691
|
+
function resolveTablePlaceholders(sql, config) {
|
|
692
|
+
return sql.replace(/\{\{(\w+)\}\}/g, (_, table) => icebergTableRef(config, table));
|
|
693
|
+
}
|
|
694
|
+
function createDuckDbIcebergExecutor(config) {
|
|
695
|
+
const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS$1;
|
|
696
|
+
async function send(sql) {
|
|
697
|
+
const started = Date.now();
|
|
698
|
+
const result = await withDeadline(config.svc.runSQL({ sql }), timeoutMs).catch((err) => {
|
|
699
|
+
if (err instanceof DuckDbIcebergTimeoutError) throw err;
|
|
700
|
+
throw new DuckDbIcebergError(`DUCKDB_SVC.runSQL failed: ${err.message}`);
|
|
701
|
+
});
|
|
702
|
+
return {
|
|
703
|
+
rows: result.rows ?? [],
|
|
704
|
+
sql: result.sql ?? sql,
|
|
705
|
+
queryMs: Date.now() - started
|
|
706
|
+
};
|
|
707
|
+
}
|
|
708
|
+
function runSql(sql, params = []) {
|
|
709
|
+
return send(bindLiterals(resolveTablePlaceholders(sql, config), params));
|
|
710
|
+
}
|
|
711
|
+
function runPlan(plan) {
|
|
712
|
+
return send(bindLiterals(plan.sql.split(TABLE_PLACEHOLDER).join(icebergTableRef(config, plan.table)), plan.params));
|
|
713
|
+
}
|
|
714
|
+
async function runArchetype(query) {
|
|
715
|
+
if (query.archetype === "arbitrary-sql") return runSql(query.sql, query.params ?? []);
|
|
716
|
+
if (query.archetype === "aux-cloud-only") throw new DuckDbIcebergError("aux-cloud-only is not an Iceberg query");
|
|
717
|
+
return runPlan(buildArchetypeSql(query));
|
|
718
|
+
}
|
|
719
|
+
return {
|
|
720
|
+
runSql,
|
|
721
|
+
runPlan,
|
|
722
|
+
runArchetype
|
|
723
|
+
};
|
|
724
|
+
}
|
|
725
|
+
function r2TableRef(namespace, table) {
|
|
726
|
+
return `${namespace}.${table}`;
|
|
727
|
+
}
|
|
728
|
+
var R2SqlError = class extends Error {
|
|
729
|
+
status;
|
|
730
|
+
name = "R2SqlError";
|
|
731
|
+
constructor(message, status) {
|
|
732
|
+
super(message);
|
|
733
|
+
this.status = status;
|
|
734
|
+
}
|
|
735
|
+
};
|
|
736
|
+
var R2SqlTimeoutError = class extends Error {
|
|
737
|
+
name = "R2SqlTimeoutError";
|
|
738
|
+
constructor(timeoutMs) {
|
|
739
|
+
super(`R2 SQL query exceeded ${timeoutMs}ms deadline`);
|
|
740
|
+
}
|
|
741
|
+
};
|
|
742
|
+
const DEFAULT_API_BASE = "https://api.cloudflare.com/client/v4";
|
|
743
|
+
const DEFAULT_TIMEOUT_MS = 25e3;
|
|
744
|
+
function escapeSqlValue(value) {
|
|
745
|
+
if (value === null || value === void 0) return "NULL";
|
|
746
|
+
if (typeof value === "number") {
|
|
747
|
+
if (!Number.isFinite(value)) throw new R2SqlError(`cannot embed non-finite number in SQL: ${value}`);
|
|
748
|
+
return String(value);
|
|
749
|
+
}
|
|
750
|
+
if (typeof value === "bigint") return value.toString();
|
|
751
|
+
if (typeof value === "boolean") return value ? "TRUE" : "FALSE";
|
|
752
|
+
return `'${String(value).replace(/'/g, "''")}'`;
|
|
753
|
+
}
|
|
754
|
+
function inlineParams(sql, params) {
|
|
755
|
+
let out = "";
|
|
756
|
+
let paramIndex = 0;
|
|
757
|
+
let inString = false;
|
|
758
|
+
for (let i = 0; i < sql.length; i++) {
|
|
759
|
+
const ch = sql[i];
|
|
760
|
+
if (ch === "'") {
|
|
761
|
+
if (inString && sql[i + 1] === "'") {
|
|
762
|
+
out += "''";
|
|
763
|
+
i++;
|
|
764
|
+
continue;
|
|
765
|
+
}
|
|
766
|
+
inString = !inString;
|
|
767
|
+
out += ch;
|
|
768
|
+
continue;
|
|
769
|
+
}
|
|
770
|
+
if (ch === "?" && !inString) {
|
|
771
|
+
if (paramIndex >= params.length) throw new R2SqlError(`SQL has more ? placeholders than params (${params.length})`);
|
|
772
|
+
out += escapeSqlValue(params[paramIndex++]);
|
|
773
|
+
continue;
|
|
774
|
+
}
|
|
775
|
+
out += ch;
|
|
776
|
+
}
|
|
777
|
+
if (paramIndex !== params.length) throw new R2SqlError(`SQL has ${paramIndex} ? placeholders but ${params.length} params supplied`);
|
|
778
|
+
return out;
|
|
779
|
+
}
|
|
780
|
+
function normalizeRows(result) {
|
|
781
|
+
if (!result) return [];
|
|
782
|
+
if (Array.isArray(result.rows)) return result.rows;
|
|
783
|
+
if (Array.isArray(result.columns) && Array.isArray(result.data)) {
|
|
784
|
+
const cols = result.columns;
|
|
785
|
+
return result.data.map((tuple) => {
|
|
786
|
+
const row = {};
|
|
787
|
+
cols.forEach((col, idx) => {
|
|
788
|
+
row[col] = tuple[idx] ?? null;
|
|
789
|
+
});
|
|
790
|
+
return row;
|
|
791
|
+
});
|
|
792
|
+
}
|
|
793
|
+
return [];
|
|
794
|
+
}
|
|
795
|
+
function createR2SqlClient(config) {
|
|
796
|
+
const fetchImpl = config.fetchImpl ?? globalThis.fetch;
|
|
797
|
+
const apiBase = config.apiBase ?? DEFAULT_API_BASE;
|
|
798
|
+
const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
799
|
+
const endpoint = `${apiBase}/accounts/${config.accountId}/r2-catalog/${config.warehouse}/sql`;
|
|
800
|
+
async function query(sql) {
|
|
801
|
+
const started = Date.now();
|
|
802
|
+
const controller = new AbortController();
|
|
803
|
+
const timer = setTimeout(() => controller.abort(new R2SqlTimeoutError(timeoutMs)), timeoutMs);
|
|
804
|
+
let response;
|
|
805
|
+
try {
|
|
806
|
+
response = await fetchImpl(endpoint, {
|
|
807
|
+
method: "POST",
|
|
808
|
+
headers: {
|
|
809
|
+
"authorization": `Bearer ${config.token}`,
|
|
810
|
+
"content-type": "application/json"
|
|
811
|
+
},
|
|
812
|
+
body: JSON.stringify({ query: sql }),
|
|
813
|
+
signal: controller.signal
|
|
814
|
+
});
|
|
815
|
+
} catch (err) {
|
|
816
|
+
if (err instanceof R2SqlTimeoutError || err?.name === "AbortError") throw new R2SqlTimeoutError(timeoutMs);
|
|
817
|
+
throw new R2SqlError(`R2 SQL request failed: ${err.message}`);
|
|
818
|
+
} finally {
|
|
819
|
+
clearTimeout(timer);
|
|
820
|
+
}
|
|
821
|
+
if (!response.ok) {
|
|
822
|
+
const text = await response.text().catch(() => "");
|
|
823
|
+
throw new R2SqlError(`R2 SQL HTTP ${response.status}: ${text}`, response.status);
|
|
824
|
+
}
|
|
825
|
+
const envelope = await response.json();
|
|
826
|
+
if (!envelope.success) throw new R2SqlError(`R2 SQL query rejected: ${envelope.errors?.map((e) => e.message).join("; ") ?? "unknown R2 SQL error"}`);
|
|
827
|
+
return {
|
|
828
|
+
rows: normalizeRows(envelope.result),
|
|
829
|
+
sql,
|
|
830
|
+
queryMs: Date.now() - started
|
|
831
|
+
};
|
|
832
|
+
}
|
|
833
|
+
function runPlan(plan) {
|
|
834
|
+
const tableRef = r2TableRef(config.namespace, plan.table);
|
|
835
|
+
return query(inlineParams(plan.sql.split(TABLE_PLACEHOLDER).join(tableRef), plan.params));
|
|
836
|
+
}
|
|
837
|
+
function runArchetype(archetypeQuery) {
|
|
838
|
+
return runPlan(buildArchetypeSql(archetypeQuery));
|
|
839
|
+
}
|
|
840
|
+
return {
|
|
841
|
+
query,
|
|
842
|
+
runPlan,
|
|
843
|
+
runArchetype
|
|
844
|
+
};
|
|
845
|
+
}
|
|
446
846
|
const SIG_HEX_LEN = 16;
|
|
447
847
|
const keyCache = /* @__PURE__ */ new WeakMap();
|
|
448
848
|
const stringKeyCache = /* @__PURE__ */ new Map();
|
|
@@ -483,4 +883,4 @@ async function verifySizeHint(env, key, bytes, providedHex) {
|
|
|
483
883
|
for (let i = 0; i < SIG_HEX_LEN; i++) diff |= expected.charCodeAt(i) ^ providedHex.charCodeAt(i);
|
|
484
884
|
return diff === 0;
|
|
485
885
|
}
|
|
486
|
-
export { createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
|
886
|
+
export { DuckDbIcebergError, DuckDbIcebergTimeoutError, R2SqlError, R2SqlTimeoutError, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, createR2SqlClient, createServerTailDispatcher, escapeSqlValue, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, inlineParams, resetWasmDuckDB, resolveServerTailEngine, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/cloudflare",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.21.0",
|
|
5
5
|
"description": "Cloudflare-Workers-flavored helpers for the gscdump analytics stack: AnalyticsEnv binding contract, R2 SigV4 presigner, size-hint HMAC, DuckDB Workers shims, engine factory.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -41,8 +41,11 @@
|
|
|
41
41
|
"dependencies": {
|
|
42
42
|
"@uwdata/flechette": "^2.5.0",
|
|
43
43
|
"aws4fetch": "^1.0.20",
|
|
44
|
-
"@gscdump/
|
|
45
|
-
"@gscdump/engine-sqlite": "0.
|
|
44
|
+
"@gscdump/contracts": "0.21.0",
|
|
45
|
+
"@gscdump/engine-sqlite": "0.21.0",
|
|
46
|
+
"gscdump": "0.21.0",
|
|
47
|
+
"@gscdump/sdk": "0.21.0",
|
|
48
|
+
"@gscdump/engine": "0.21.0"
|
|
46
49
|
},
|
|
47
50
|
"devDependencies": {
|
|
48
51
|
"@cloudflare/workers-types": "^4.20260520.1",
|