@gscdump/cloudflare 0.23.2 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +2 -185
- package/dist/index.mjs +17 -462
- package/dist/server-tail/index.d.mts +164 -0
- package/dist/server-tail/index.mjs +448 -0
- package/package.json +16 -8
package/dist/index.d.mts
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
import { DuckDBFactory,
|
|
1
|
+
import { DuckDBFactory, ParquetCodec, QueryExecutor, Row, createStorageEngine } from "@gscdump/engine";
|
|
2
2
|
import { H3Event } from "h3";
|
|
3
|
-
import { ArchetypeQuery, ArchetypeResult, ArchetypeResultRow } from "@gscdump/sdk";
|
|
4
|
-
import { ServerTailDirective } from "@gscdump/contracts";
|
|
5
3
|
declare function getWasmDuckDBFactory(): DuckDBFactory;
|
|
6
4
|
declare function resetWasmDuckDB(): void;
|
|
7
5
|
interface AnalyticsEnv {
|
|
@@ -91,187 +89,6 @@ declare function createR2Presigner(env: AnalyticsEnv): ({
|
|
|
91
89
|
bucket,
|
|
92
90
|
expiresIn
|
|
93
91
|
}: PresignOptions) => Promise<string>;
|
|
94
|
-
/** Placeholder substituted for the engine-specific table reference. */
|
|
95
|
-
declare const TABLE_PLACEHOLDER = "{{TABLE}}";
|
|
96
|
-
/** A dialect-neutral SQL plan. */
|
|
97
|
-
interface ArchetypeSqlPlan {
|
|
98
|
-
/** SQL with `{{TABLE}}` standing in for the table reference. */
|
|
99
|
-
sql: string;
|
|
100
|
-
/** Bound parameters, in `?`-order. */
|
|
101
|
-
params: unknown[];
|
|
102
|
-
/** The Iceberg fact table this query reads. */
|
|
103
|
-
table: IcebergTableName;
|
|
104
|
-
}
|
|
105
|
-
/**
|
|
106
|
-
* Translate an archetype query to a dialect-neutral SQL plan.
|
|
107
|
-
*
|
|
108
|
-
* Throws for `arbitrary-sql` (caller-supplied SQL, handled by the DuckDB
|
|
109
|
-
* executor directly) and `aux-cloud-only` (not an Iceberg query).
|
|
110
|
-
*/
|
|
111
|
-
declare function buildArchetypeSql(query: ArchetypeQuery): ArchetypeSqlPlan;
|
|
112
|
-
/** Row returned by the DuckDB sibling. */
|
|
113
|
-
type DuckDbIcebergRow = Record<string, string | number | null>;
|
|
114
|
-
/**
|
|
115
|
-
* The minimal `DUCKDB_SVC` shape this executor needs — a structural subset of
|
|
116
|
-
* the binding in `workers-duckdb.ts` / `env.ts`. Any binding with `runSQL`
|
|
117
|
-
* satisfies it.
|
|
118
|
-
*/
|
|
119
|
-
interface DuckDbSvc {
|
|
120
|
-
runSQL: (args: {
|
|
121
|
-
sql: string;
|
|
122
|
-
}) => Promise<{
|
|
123
|
-
rows: unknown[];
|
|
124
|
-
sql: string;
|
|
125
|
-
}>;
|
|
126
|
-
}
|
|
127
|
-
/** Configuration for the DuckDB-over-Iceberg executor. */
|
|
128
|
-
interface DuckDbIcebergExecutorConfig {
|
|
129
|
-
/** The DuckDB service binding (the sibling Worker RPC). */
|
|
130
|
-
svc: DuckDbSvc;
|
|
131
|
-
/**
|
|
132
|
-
* R2 Data Catalog warehouse identifier. The sibling resolves Iceberg table
|
|
133
|
-
* locations from `<warehouse>` + `<namespace>` + table name.
|
|
134
|
-
*/
|
|
135
|
-
warehouse: string;
|
|
136
|
-
/** Iceberg namespace the 5 fact tables live in. */
|
|
137
|
-
namespace: string;
|
|
138
|
-
/**
|
|
139
|
-
* How the sibling addresses an Iceberg table in a `FROM` clause. Defaults to
|
|
140
|
-
* DuckDB's `iceberg_scan('<warehouse>/<namespace>/<table>')`. Overridable so
|
|
141
|
-
* a sibling configured with the Iceberg REST catalog can use
|
|
142
|
-
* `iceberg_scan('<namespace>.<table>')` or an attached-catalog reference.
|
|
143
|
-
*/
|
|
144
|
-
tableRefStyle?: 'path' | 'catalog';
|
|
145
|
-
/** Per-query wall-clock deadline (ms). Default 25s. */
|
|
146
|
-
timeoutMs?: number;
|
|
147
|
-
}
|
|
148
|
-
/** Result of a DuckDB-over-Iceberg query. */
|
|
149
|
-
interface DuckDbIcebergResult {
|
|
150
|
-
rows: DuckDbIcebergRow[];
|
|
151
|
-
/** The exact SQL sent to the sibling. */
|
|
152
|
-
sql: string;
|
|
153
|
-
queryMs: number;
|
|
154
|
-
}
|
|
155
|
-
declare class DuckDbIcebergError extends Error {
|
|
156
|
-
name: string;
|
|
157
|
-
}
|
|
158
|
-
declare class DuckDbIcebergTimeoutError extends Error {
|
|
159
|
-
name: string;
|
|
160
|
-
constructor(timeoutMs: number);
|
|
161
|
-
}
|
|
162
|
-
/** A configured DuckDB-over-Iceberg executor. */
|
|
163
|
-
interface DuckDbIcebergExecutor {
|
|
164
|
-
/** Run a raw SQL string with `{{TABLE_<name>}}` placeholders resolved. */
|
|
165
|
-
runSql: (sql: string, params?: readonly unknown[]) => Promise<DuckDbIcebergResult>;
|
|
166
|
-
/** Run a dialect-neutral plan: resolve `{{TABLE}}`, bind params, send. */
|
|
167
|
-
runPlan: (plan: ArchetypeSqlPlan) => Promise<DuckDbIcebergResult>;
|
|
168
|
-
/** Translate + run an archetype query. Handles `arbitrary-sql` verbatim. */
|
|
169
|
-
runArchetype: (query: ArchetypeQuery) => Promise<DuckDbIcebergResult>;
|
|
170
|
-
}
|
|
171
|
-
/**
|
|
172
|
-
* Create a DuckDB-over-Iceberg-files executor.
|
|
173
|
-
*/
|
|
174
|
-
declare function createDuckDbIcebergExecutor(config: DuckDbIcebergExecutorConfig): DuckDbIcebergExecutor;
|
|
175
|
-
/** Configuration for an R2 SQL client. */
|
|
176
|
-
interface R2SqlClientConfig {
|
|
177
|
-
/** Cloudflare account id. */
|
|
178
|
-
accountId: string;
|
|
179
|
-
/** R2 Data Catalog warehouse name (`<bucket>` or `<account>_<bucket>`). */
|
|
180
|
-
warehouse: string;
|
|
181
|
-
/** Iceberg namespace the 5 fact tables live in. */
|
|
182
|
-
namespace: string;
|
|
183
|
-
/** Cloudflare API token with R2 Data Catalog read scope. */
|
|
184
|
-
token: string;
|
|
185
|
-
/**
|
|
186
|
-
* Override the HTTP endpoint base. Defaults to the public CF API. Tests
|
|
187
|
-
* point this at a local recorder.
|
|
188
|
-
*/
|
|
189
|
-
apiBase?: string;
|
|
190
|
-
/**
|
|
191
|
-
* Injectable fetch. Defaults to global `fetch`. Tests pass a fake that
|
|
192
|
-
* returns a recorded CF envelope without a network round-trip.
|
|
193
|
-
*/
|
|
194
|
-
fetchImpl?: typeof fetch;
|
|
195
|
-
/** Per-query wall-clock deadline (ms). Default 25s — under the Worker CPU budget. */
|
|
196
|
-
timeoutMs?: number;
|
|
197
|
-
}
|
|
198
|
-
/** A row as returned by R2 SQL — flat dimension + metric values. */
|
|
199
|
-
type R2SqlRow = Record<string, string | number | null>;
|
|
200
|
-
/** Result of an R2 SQL query. */
|
|
201
|
-
interface R2SqlResult {
|
|
202
|
-
rows: R2SqlRow[];
|
|
203
|
-
/** The exact SQL sent (params already inlined). For diagnostics. */
|
|
204
|
-
sql: string;
|
|
205
|
-
/** Wall-clock duration of the HTTP round-trip. */
|
|
206
|
-
queryMs: number;
|
|
207
|
-
}
|
|
208
|
-
declare class R2SqlError extends Error {
|
|
209
|
-
readonly status?: number | undefined;
|
|
210
|
-
name: string;
|
|
211
|
-
constructor(message: string, status?: number | undefined);
|
|
212
|
-
}
|
|
213
|
-
declare class R2SqlTimeoutError extends Error {
|
|
214
|
-
name: string;
|
|
215
|
-
constructor(timeoutMs: number);
|
|
216
|
-
}
|
|
217
|
-
/**
|
|
218
|
-
* Escape a JS value for inline embedding in R2 SQL. R2 SQL has no bound-param
|
|
219
|
-
* channel, so `buildArchetypeSql`'s `?` placeholders are substituted here.
|
|
220
|
-
* Numbers go in bare; strings are single-quote-escaped; null → `NULL`.
|
|
221
|
-
*/
|
|
222
|
-
declare function escapeSqlValue(value: unknown): string;
|
|
223
|
-
/**
|
|
224
|
-
* Inline a plan's `?`-bound params into its SQL, in order. R2 SQL accepts only
|
|
225
|
-
* a literal query string. Quote-aware so a `?` inside a string literal is not
|
|
226
|
-
* mistaken for a placeholder.
|
|
227
|
-
*/
|
|
228
|
-
declare function inlineParams(sql: string, params: readonly unknown[]): string;
|
|
229
|
-
/** A configured R2 SQL client. */
|
|
230
|
-
interface R2SqlClient {
|
|
231
|
-
/** Run a raw SQL string (table reference already resolved). */
|
|
232
|
-
query: (sql: string) => Promise<R2SqlResult>;
|
|
233
|
-
/** Run a dialect-neutral plan: resolve `{{TABLE}}`, inline params, send. */
|
|
234
|
-
runPlan: (plan: ArchetypeSqlPlan) => Promise<R2SqlResult>;
|
|
235
|
-
/** Translate + run an archetype query end to end. */
|
|
236
|
-
runArchetype: (query: ArchetypeQuery) => Promise<R2SqlResult>;
|
|
237
|
-
}
|
|
238
|
-
/**
|
|
239
|
-
* Create an R2 SQL client. The endpoint requires a real CF token in
|
|
240
|
-
* production; tests inject `fetchImpl` returning a recorded envelope.
|
|
241
|
-
*/
|
|
242
|
-
declare function createR2SqlClient(config: R2SqlClientConfig): R2SqlClient;
|
|
243
|
-
/** The two engines the server tail can route to. */
|
|
244
|
-
type ServerTailEngine = 'r2-sql' | 'duckdb';
|
|
245
|
-
/** Executors the dispatcher routes between. */
|
|
246
|
-
interface ServerTailDispatcherConfig {
|
|
247
|
-
r2Sql: R2SqlClient;
|
|
248
|
-
duckdb: DuckDbIcebergExecutor;
|
|
249
|
-
}
|
|
250
|
-
declare class ServerTailRoutingError extends Error {
|
|
251
|
-
name: string;
|
|
252
|
-
}
|
|
253
|
-
/**
|
|
254
|
-
* Decide which engine answers an archetype query. Pure — no I/O. Exposed so
|
|
255
|
-
* the file-resolution endpoint can compute the `ServerTailDirective.engine`
|
|
256
|
-
* with the SAME logic the dispatcher uses at execution time.
|
|
257
|
-
*/
|
|
258
|
-
declare function resolveServerTailEngine(query: ArchetypeQuery): ServerTailEngine;
|
|
259
|
-
/** A configured server-tail dispatcher. */
|
|
260
|
-
interface ServerTailDispatcher {
|
|
261
|
-
/** Decide the engine for a query without running it. */
|
|
262
|
-
route: (query: ArchetypeQuery) => ServerTailEngine;
|
|
263
|
-
/**
|
|
264
|
-
* Execute a query, routing by execution class. If `directive` is supplied
|
|
265
|
-
* its `engine` is honoured only when consistent with the archetype's class
|
|
266
|
-
* (a `duckdb`-class archetype always runs on DuckDB regardless).
|
|
267
|
-
*/
|
|
268
|
-
execute: <R extends ArchetypeResultRow = ArchetypeResultRow>(query: ArchetypeQuery, directive?: ServerTailDirective) => Promise<ArchetypeResult<R>>;
|
|
269
|
-
}
|
|
270
|
-
/**
|
|
271
|
-
* Create the server-tail dispatcher. Holds an R2 SQL client and a DuckDB
|
|
272
|
-
* executor and routes every `ArchetypeQuery` to one of them.
|
|
273
|
-
*/
|
|
274
|
-
declare function createServerTailDispatcher(config: ServerTailDispatcherConfig): ServerTailDispatcher;
|
|
275
92
|
declare function signSizeHint(env: AnalyticsEnv, key: string, bytes: number): Promise<string>;
|
|
276
93
|
declare function verifySizeHint(env: AnalyticsEnv, key: string, bytes: number, providedHex: string): Promise<boolean>;
|
|
277
94
|
declare function createDucklingsCodec(_env: AnalyticsEnv): ParquetCodec;
|
|
@@ -281,4 +98,4 @@ interface DucklingsExecutorOptions {
|
|
|
281
98
|
ipcTotalBytes?: number;
|
|
282
99
|
}
|
|
283
100
|
declare function createDucklingsExecutor(env: AnalyticsEnv, opts?: DucklingsExecutorOptions): QueryExecutor;
|
|
284
|
-
export { type AnalyticsEngineHooks, type AnalyticsEnv, type
|
|
101
|
+
export { type AnalyticsEngineHooks, type AnalyticsEnv, type HostedR2QueryKeyInput, type InflightDedupe, type PresignOptions, type Row, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
package/dist/index.mjs
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
import { SCHEMAS, bindLiterals, coerceRow, createStorageEngine
|
|
1
|
+
import { SCHEMAS, bindLiterals, coerceRow, createStorageEngine } from "@gscdump/engine";
|
|
2
2
|
import { createD1ManifestStore } from "@gscdump/engine-sqlite";
|
|
3
3
|
import { createR2DataSource } from "@gscdump/engine/r2";
|
|
4
4
|
import { createHyparquetCodec, decodeParquetToRows } from "@gscdump/engine/hyparquet";
|
|
5
5
|
import { float64, int32, int64, tableFromArrays, tableToIPC, utf8 } from "@uwdata/flechette";
|
|
6
6
|
import { createError } from "h3";
|
|
7
7
|
import { AwsClient } from "aws4fetch";
|
|
8
|
-
import { ARCHETYPE_EXECUTION_CLASS } from "@gscdump/sdk";
|
|
9
8
|
let handle = null;
|
|
10
9
|
async function initHandle() {
|
|
11
10
|
throw new Error("DuckDB-WASM handle not wired for Cloudflare Workers yet. Complete duckdb-wasm-handle.ts before enabling dual-write (user.migration_phase != 'd1').");
|
|
@@ -399,23 +398,16 @@ function stableStringify(value) {
|
|
|
399
398
|
}).join(",")}}`;
|
|
400
399
|
return JSON.stringify(value);
|
|
401
400
|
}
|
|
402
|
-
function hashString(value) {
|
|
403
|
-
let hash = 0;
|
|
404
|
-
for (let i = 0; i < value.length; i++) {
|
|
405
|
-
hash = (hash << 5) - hash + value.charCodeAt(i);
|
|
406
|
-
hash |= 0;
|
|
407
|
-
}
|
|
408
|
-
return Math.abs(hash).toString(36);
|
|
409
|
-
}
|
|
410
401
|
function getHostedR2QueryKey(input) {
|
|
411
|
-
return [
|
|
402
|
+
return JSON.stringify([
|
|
412
403
|
input.userId,
|
|
413
404
|
input.siteId,
|
|
414
|
-
|
|
415
|
-
input.comparison ?
|
|
416
|
-
input.comparisonFilter ??
|
|
417
|
-
]
|
|
405
|
+
stableStringify(input.state),
|
|
406
|
+
input.comparison === void 0 ? null : stableStringify(input.comparison),
|
|
407
|
+
input.comparisonFilter ?? null
|
|
408
|
+
]);
|
|
418
409
|
}
|
|
410
|
+
const MAX_EXPIRES_IN = 604800;
|
|
419
411
|
function createR2Presigner(env) {
|
|
420
412
|
if (!env.R2_ACCESS_KEY_ID || !env.R2_SECRET_ACCESS_KEY) throw createError({
|
|
421
413
|
statusCode: 500,
|
|
@@ -433,6 +425,14 @@ function createR2Presigner(env) {
|
|
|
433
425
|
});
|
|
434
426
|
const endpoint = `https://${env.CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com`;
|
|
435
427
|
return async function presignGet({ key, bucket, expiresIn = 3600 }) {
|
|
428
|
+
if (!Number.isInteger(expiresIn) || expiresIn <= 0) throw createError({
|
|
429
|
+
statusCode: 400,
|
|
430
|
+
message: `expiresIn must be a positive integer (got ${expiresIn})`
|
|
431
|
+
});
|
|
432
|
+
if (expiresIn > MAX_EXPIRES_IN) throw createError({
|
|
433
|
+
statusCode: 400,
|
|
434
|
+
message: `expiresIn exceeds the ${MAX_EXPIRES_IN}s (7 day) S3 SigV4 maximum (got ${expiresIn})`
|
|
435
|
+
});
|
|
436
436
|
const url = new URL(`${endpoint}/${bucket}/${encodeKey(key)}`);
|
|
437
437
|
url.searchParams.set("X-Amz-Expires", String(expiresIn));
|
|
438
438
|
return (await aws.sign(url.toString(), {
|
|
@@ -444,452 +444,7 @@ function createR2Presigner(env) {
|
|
|
444
444
|
function encodeKey(key) {
|
|
445
445
|
return key.split("/").map(encodeURIComponent).join("/");
|
|
446
446
|
}
|
|
447
|
-
const
|
|
448
|
-
function dimColumn(dim) {
|
|
449
|
-
if (dim === "page") return "url";
|
|
450
|
-
if (dim === "queryCanonical") return "query_canonical";
|
|
451
|
-
return dim;
|
|
452
|
-
}
|
|
453
|
-
function metricExpr(metric) {
|
|
454
|
-
switch (metric) {
|
|
455
|
-
case "clicks": return "SUM(clicks) AS clicks";
|
|
456
|
-
case "impressions": return "SUM(impressions) AS impressions";
|
|
457
|
-
case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr";
|
|
458
|
-
case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position";
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
function orderMetricExpr(metric) {
|
|
462
|
-
switch (metric) {
|
|
463
|
-
case "clicks": return "SUM(clicks)";
|
|
464
|
-
case "impressions": return "SUM(impressions)";
|
|
465
|
-
case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0)";
|
|
466
|
-
case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0)";
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
const DEVICE_SUFFIXES = [
|
|
470
|
-
"desktop",
|
|
471
|
-
"mobile",
|
|
472
|
-
"tablet"
|
|
473
|
-
];
|
|
474
|
-
function metricExprForSource(metric, source) {
|
|
475
|
-
switch (metric) {
|
|
476
|
-
case "clicks": return `SUM(${source.clicks}) AS clicks`;
|
|
477
|
-
case "impressions": return `SUM(${source.impressions}) AS impressions`;
|
|
478
|
-
case "ctr": return `SUM(${source.clicks}) / NULLIF(SUM(${source.impressions}), 0) AS ctr`;
|
|
479
|
-
case "position": return `SUM(${source.sumPosition}) / NULLIF(SUM(${source.impressions}), 0) AS position`;
|
|
480
|
-
}
|
|
481
|
-
}
|
|
482
|
-
function deviceSource(suffix) {
|
|
483
|
-
return {
|
|
484
|
-
clicks: `clicks_${suffix}`,
|
|
485
|
-
impressions: `impressions_${suffix}`,
|
|
486
|
-
sumPosition: `sum_position_${suffix}`
|
|
487
|
-
};
|
|
488
|
-
}
|
|
489
|
-
function sqlStringLiteral(value) {
|
|
490
|
-
return `'${value.replace(/'/g, "''")}'`;
|
|
491
|
-
}
|
|
492
|
-
function partitionWhere(q) {
|
|
493
|
-
return {
|
|
494
|
-
clause: "site_id = ? AND search_type = ? AND date BETWEEN ? AND ?",
|
|
495
|
-
params: [
|
|
496
|
-
q.siteId,
|
|
497
|
-
q.searchType,
|
|
498
|
-
q.range.start,
|
|
499
|
-
q.range.end
|
|
500
|
-
]
|
|
501
|
-
};
|
|
502
|
-
}
|
|
503
|
-
function buildSiteDailyTimeseries(q) {
|
|
504
|
-
const w = partitionWhere(q);
|
|
505
|
-
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
506
|
-
return {
|
|
507
|
-
table: "dates",
|
|
508
|
-
params: w.params,
|
|
509
|
-
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date ORDER BY date ASC`
|
|
510
|
-
};
|
|
511
|
-
}
|
|
512
|
-
function buildEntityDailyTimeseries(q) {
|
|
513
|
-
const table = inferTable([q.entity.dimension]);
|
|
514
|
-
const w = partitionWhere(q);
|
|
515
|
-
const col = dimColumn(q.entity.dimension);
|
|
516
|
-
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
517
|
-
return {
|
|
518
|
-
table,
|
|
519
|
-
params: [...w.params, q.entity.value],
|
|
520
|
-
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} = ? GROUP BY date ORDER BY date ASC`
|
|
521
|
-
};
|
|
522
|
-
}
|
|
523
|
-
function buildEntityDailySparkline(q) {
|
|
524
|
-
const table = inferTable([q.dimension]);
|
|
525
|
-
const w = partitionWhere(q);
|
|
526
|
-
const col = dimColumn(q.dimension);
|
|
527
|
-
if (q.entities.length === 0) throw new Error("entity-daily-sparkline: empty entities — resolver must pre-resolve the top-N list");
|
|
528
|
-
const inList = q.entities.map(sqlStringLiteral).join(", ");
|
|
529
|
-
return {
|
|
530
|
-
table,
|
|
531
|
-
params: w.params,
|
|
532
|
-
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} IN (${inList}) GROUP BY date, ${col} ORDER BY date ASC`
|
|
533
|
-
};
|
|
534
|
-
}
|
|
535
|
-
function buildTopNBreakdown(q) {
|
|
536
|
-
const table = inferTable([q.dimension]);
|
|
537
|
-
const w = partitionWhere(q);
|
|
538
|
-
if (q.dimension === "device") {
|
|
539
|
-
const metricList = q.metrics.includes(q.orderBy.metric) ? q.metrics : [...q.metrics, q.orderBy.metric];
|
|
540
|
-
const order = `${q.orderBy.metric} ${q.orderBy.dir.toUpperCase()}`;
|
|
541
|
-
let sql = `${DEVICE_SUFFIXES.map((suffix) => {
|
|
542
|
-
const source = deviceSource(suffix);
|
|
543
|
-
const metrics = metricList.map((m) => metricExprForSource(m, source)).join(", ");
|
|
544
|
-
return `SELECT '${suffix.toUpperCase()}' AS device, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause}`;
|
|
545
|
-
}).join(" UNION ALL ")} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
|
|
546
|
-
if (q.offset && q.offset > 0) sql += ` OFFSET ${Math.floor(q.offset)}`;
|
|
547
|
-
return {
|
|
548
|
-
table,
|
|
549
|
-
params: DEVICE_SUFFIXES.flatMap(() => w.params),
|
|
550
|
-
sql
|
|
551
|
-
};
|
|
552
|
-
}
|
|
553
|
-
const col = dimColumn(q.dimension);
|
|
554
|
-
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
555
|
-
const order = `${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
|
|
556
|
-
let sql = `SELECT ${col}, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY ${col} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
|
|
557
|
-
if (q.offset && q.offset > 0) sql += ` OFFSET ${Math.floor(q.offset)}`;
|
|
558
|
-
return {
|
|
559
|
-
table,
|
|
560
|
-
params: w.params,
|
|
561
|
-
sql
|
|
562
|
-
};
|
|
563
|
-
}
|
|
564
|
-
function buildSingleRowLookup(q) {
|
|
565
|
-
const dims = Object.keys(q.match);
|
|
566
|
-
const table = inferTable(dims);
|
|
567
|
-
const w = partitionWhere(q);
|
|
568
|
-
const params = [...w.params];
|
|
569
|
-
let clause = w.clause;
|
|
570
|
-
for (const dim of dims) {
|
|
571
|
-
clause += ` AND ${dimColumn(dim)} = ?`;
|
|
572
|
-
params.push(q.match[dim]);
|
|
573
|
-
}
|
|
574
|
-
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
575
|
-
const groupBy = dims.length > 0 ? ` GROUP BY ${dims.map(dimColumn).join(", ")}` : "";
|
|
576
|
-
return {
|
|
577
|
-
table,
|
|
578
|
-
params,
|
|
579
|
-
sql: `SELECT ${dims.length > 0 ? `${dims.map(dimColumn).join(", ")}, ${metrics}` : metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${clause}${groupBy}`
|
|
580
|
-
};
|
|
581
|
-
}
|
|
582
|
-
function buildMultiSeriesStackedDaily(q) {
|
|
583
|
-
const table = inferTable([q.seriesDimension]);
|
|
584
|
-
const w = partitionWhere(q);
|
|
585
|
-
if (q.seriesDimension === "device") {
|
|
586
|
-
const selects = DEVICE_SUFFIXES.map((suffix) => {
|
|
587
|
-
const source = deviceSource(suffix);
|
|
588
|
-
return `SELECT date, '${suffix.toUpperCase()}' AS device, ${metricExprForSource(q.metric, source)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date`;
|
|
589
|
-
});
|
|
590
|
-
return {
|
|
591
|
-
table,
|
|
592
|
-
params: DEVICE_SUFFIXES.flatMap(() => w.params),
|
|
593
|
-
sql: `${selects.join(" UNION ALL ")} ORDER BY date ASC, device ASC`
|
|
594
|
-
};
|
|
595
|
-
}
|
|
596
|
-
const col = dimColumn(q.seriesDimension);
|
|
597
|
-
return {
|
|
598
|
-
table,
|
|
599
|
-
params: w.params,
|
|
600
|
-
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date, ${col} ORDER BY date ASC`
|
|
601
|
-
};
|
|
602
|
-
}
|
|
603
|
-
function buildPresetAnalyzer(q) {
|
|
604
|
-
const params = q.params ?? {};
|
|
605
|
-
const minImpressions = Number(params.minImpressions ?? 100);
|
|
606
|
-
const limit = Math.max(1, Math.floor(Number(params.limit ?? 1e3)));
|
|
607
|
-
const w = partitionWhere(q);
|
|
608
|
-
const wp = [...w.params];
|
|
609
|
-
let having;
|
|
610
|
-
switch (q.presetId) {
|
|
611
|
-
case "striking-distance":
|
|
612
|
-
having = `HAVING SUM(impressions) >= ? AND (SUM(sum_position) / NULLIF(SUM(impressions), 0)) BETWEEN ? AND ?`;
|
|
613
|
-
wp.push(minImpressions, Number(params.minPosition ?? 11), Number(params.maxPosition ?? 20));
|
|
614
|
-
break;
|
|
615
|
-
case "opportunity":
|
|
616
|
-
having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
|
|
617
|
-
wp.push(minImpressions);
|
|
618
|
-
break;
|
|
619
|
-
case "zero-click":
|
|
620
|
-
having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
|
|
621
|
-
wp.push(minImpressions);
|
|
622
|
-
break;
|
|
623
|
-
default: throw new Error(`preset-analyzer: preset '${q.presetId}' is not R2-SQL-safe — window-function presets must be sent as archetype 'arbitrary-sql'`);
|
|
624
|
-
}
|
|
625
|
-
return {
|
|
626
|
-
table: "page_queries",
|
|
627
|
-
params: wp,
|
|
628
|
-
sql: `SELECT url, query, SUM(clicks) AS clicks, SUM(impressions) AS impressions, SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr, SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY url, query ${having} ORDER BY SUM(impressions) DESC LIMIT ${limit}`
|
|
629
|
-
};
|
|
630
|
-
}
|
|
631
|
-
function buildTwoDimensionDetail(q) {
|
|
632
|
-
const w = partitionWhere(q);
|
|
633
|
-
const params = [...w.params];
|
|
634
|
-
let clause = w.clause;
|
|
635
|
-
if (q.filter?.page) {
|
|
636
|
-
clause += ` AND url = ?`;
|
|
637
|
-
params.push(q.filter.page);
|
|
638
|
-
}
|
|
639
|
-
if (q.filter?.query) {
|
|
640
|
-
clause += ` AND query = ?`;
|
|
641
|
-
params.push(q.filter.query);
|
|
642
|
-
}
|
|
643
|
-
let sql = `SELECT url, query, ${q.metrics.map(metricExpr).join(", ")} FROM ${TABLE_PLACEHOLDER} WHERE ${clause} GROUP BY url, query`;
|
|
644
|
-
if (q.orderBy) sql += ` ORDER BY ${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
|
|
645
|
-
if (q.limit && q.limit > 0) sql += ` LIMIT ${Math.floor(q.limit)}`;
|
|
646
|
-
return {
|
|
647
|
-
table: "page_queries",
|
|
648
|
-
params,
|
|
649
|
-
sql
|
|
650
|
-
};
|
|
651
|
-
}
|
|
652
|
-
function buildArchetypeSql(query) {
|
|
653
|
-
switch (query.archetype) {
|
|
654
|
-
case "site-daily-timeseries": return buildSiteDailyTimeseries(query);
|
|
655
|
-
case "entity-daily-timeseries": return buildEntityDailyTimeseries(query);
|
|
656
|
-
case "entity-daily-sparkline": return buildEntityDailySparkline(query);
|
|
657
|
-
case "top-n-breakdown": return buildTopNBreakdown(query);
|
|
658
|
-
case "single-row-lookup": return buildSingleRowLookup(query);
|
|
659
|
-
case "multi-series-stacked-daily": return buildMultiSeriesStackedDaily(query);
|
|
660
|
-
case "preset-analyzer": return buildPresetAnalyzer(query);
|
|
661
|
-
case "two-dimension-detail": return buildTwoDimensionDetail(query);
|
|
662
|
-
case "arbitrary-sql": throw new Error("buildArchetypeSql: arbitrary-sql carries caller SQL — the DuckDB executor runs it verbatim");
|
|
663
|
-
case "aux-cloud-only": throw new Error("buildArchetypeSql: aux-cloud-only is not an Iceberg query");
|
|
664
|
-
}
|
|
665
|
-
}
|
|
666
|
-
var ServerTailRoutingError = class extends Error {
|
|
667
|
-
name = "ServerTailRoutingError";
|
|
668
|
-
};
|
|
669
|
-
function resolveServerTailEngine(query) {
|
|
670
|
-
const cls = ARCHETYPE_EXECUTION_CLASS[query.archetype];
|
|
671
|
-
if (cls === "cloud-only") throw new ServerTailRoutingError(`archetype '${query.archetype}' is cloud-only — not a server-tail query`);
|
|
672
|
-
if (cls === "duckdb") return "duckdb";
|
|
673
|
-
if (query.archetype === "top-n-breakdown" && query.offset && query.offset > 0) return "duckdb";
|
|
674
|
-
return "r2-sql";
|
|
675
|
-
}
|
|
676
|
-
function sourceFor(engine) {
|
|
677
|
-
return engine === "r2-sql" ? "server-r2-sql" : "server-duckdb";
|
|
678
|
-
}
|
|
679
|
-
function createServerTailDispatcher(config) {
|
|
680
|
-
function route(query) {
|
|
681
|
-
return resolveServerTailEngine(query);
|
|
682
|
-
}
|
|
683
|
-
async function execute(query, directive) {
|
|
684
|
-
const engine = route(query);
|
|
685
|
-
if (directive && directive.engine !== engine && engine === "r2-sql") return runOn("duckdb", query);
|
|
686
|
-
return runOn(engine, query);
|
|
687
|
-
}
|
|
688
|
-
async function runOn(engine, query) {
|
|
689
|
-
if (engine === "r2-sql") {
|
|
690
|
-
const res = await config.r2Sql.runArchetype(query);
|
|
691
|
-
return {
|
|
692
|
-
archetype: query.archetype,
|
|
693
|
-
rows: res.rows,
|
|
694
|
-
source: sourceFor("r2-sql"),
|
|
695
|
-
meta: {
|
|
696
|
-
rowCount: res.rows.length,
|
|
697
|
-
queryMs: res.queryMs
|
|
698
|
-
}
|
|
699
|
-
};
|
|
700
|
-
}
|
|
701
|
-
const res = await config.duckdb.runArchetype(query);
|
|
702
|
-
return {
|
|
703
|
-
archetype: query.archetype,
|
|
704
|
-
rows: res.rows,
|
|
705
|
-
source: sourceFor("duckdb"),
|
|
706
|
-
meta: {
|
|
707
|
-
rowCount: res.rows.length,
|
|
708
|
-
queryMs: res.queryMs
|
|
709
|
-
}
|
|
710
|
-
};
|
|
711
|
-
}
|
|
712
|
-
return {
|
|
713
|
-
route,
|
|
714
|
-
execute
|
|
715
|
-
};
|
|
716
|
-
}
|
|
717
|
-
var DuckDbIcebergError = class extends Error {
|
|
718
|
-
name = "DuckDbIcebergError";
|
|
719
|
-
};
|
|
720
|
-
var DuckDbIcebergTimeoutError = class extends Error {
|
|
721
|
-
name = "DuckDbIcebergTimeoutError";
|
|
722
|
-
constructor(timeoutMs) {
|
|
723
|
-
super(`DuckDB-over-Iceberg query exceeded ${timeoutMs}ms deadline`);
|
|
724
|
-
}
|
|
725
|
-
};
|
|
726
|
-
const DEFAULT_TIMEOUT_MS$1 = 25e3;
|
|
727
|
-
function icebergTableRef(config, table) {
|
|
728
|
-
if (config.tableRefStyle === "catalog") return `${config.namespace}.${table}`;
|
|
729
|
-
return `iceberg_scan('${config.warehouse}/${config.namespace}/${table}')`;
|
|
730
|
-
}
|
|
731
|
-
function withDeadline(op, timeoutMs) {
|
|
732
|
-
return new Promise((resolve, reject) => {
|
|
733
|
-
const timer = setTimeout(() => reject(new DuckDbIcebergTimeoutError(timeoutMs)), timeoutMs);
|
|
734
|
-
op.then(resolve, reject).finally(() => clearTimeout(timer));
|
|
735
|
-
});
|
|
736
|
-
}
|
|
737
|
-
function resolveTablePlaceholders(sql, config) {
|
|
738
|
-
return sql.replace(/\{\{(\w+)\}\}/g, (_, table) => icebergTableRef(config, table));
|
|
739
|
-
}
|
|
740
|
-
function createDuckDbIcebergExecutor(config) {
|
|
741
|
-
const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS$1;
|
|
742
|
-
async function send(sql) {
|
|
743
|
-
const started = Date.now();
|
|
744
|
-
const result = await withDeadline(config.svc.runSQL({ sql }), timeoutMs).catch((err) => {
|
|
745
|
-
if (err instanceof DuckDbIcebergTimeoutError) throw err;
|
|
746
|
-
throw new DuckDbIcebergError(`DUCKDB_SVC.runSQL failed: ${err.message}`);
|
|
747
|
-
});
|
|
748
|
-
return {
|
|
749
|
-
rows: result.rows ?? [],
|
|
750
|
-
sql: result.sql ?? sql,
|
|
751
|
-
queryMs: Date.now() - started
|
|
752
|
-
};
|
|
753
|
-
}
|
|
754
|
-
function runSql(sql, params = []) {
|
|
755
|
-
return send(bindLiterals(resolveTablePlaceholders(sql, config), params));
|
|
756
|
-
}
|
|
757
|
-
function runPlan(plan) {
|
|
758
|
-
return send(bindLiterals(plan.sql.split(TABLE_PLACEHOLDER).join(icebergTableRef(config, plan.table)), plan.params));
|
|
759
|
-
}
|
|
760
|
-
async function runArchetype(query) {
|
|
761
|
-
if (query.archetype === "arbitrary-sql") return runSql(query.sql, query.params ?? []);
|
|
762
|
-
if (query.archetype === "aux-cloud-only") throw new DuckDbIcebergError("aux-cloud-only is not an Iceberg query");
|
|
763
|
-
return runPlan(buildArchetypeSql(query));
|
|
764
|
-
}
|
|
765
|
-
return {
|
|
766
|
-
runSql,
|
|
767
|
-
runPlan,
|
|
768
|
-
runArchetype
|
|
769
|
-
};
|
|
770
|
-
}
|
|
771
|
-
function r2TableRef(namespace, table) {
|
|
772
|
-
return `${namespace}.${table}`;
|
|
773
|
-
}
|
|
774
|
-
var R2SqlError = class extends Error {
|
|
775
|
-
status;
|
|
776
|
-
name = "R2SqlError";
|
|
777
|
-
constructor(message, status) {
|
|
778
|
-
super(message);
|
|
779
|
-
this.status = status;
|
|
780
|
-
}
|
|
781
|
-
};
|
|
782
|
-
var R2SqlTimeoutError = class extends Error {
|
|
783
|
-
name = "R2SqlTimeoutError";
|
|
784
|
-
constructor(timeoutMs) {
|
|
785
|
-
super(`R2 SQL query exceeded ${timeoutMs}ms deadline`);
|
|
786
|
-
}
|
|
787
|
-
};
|
|
788
|
-
const DEFAULT_API_BASE = "https://api.cloudflare.com/client/v4";
|
|
789
|
-
const DEFAULT_TIMEOUT_MS = 25e3;
|
|
790
|
-
function escapeSqlValue(value) {
|
|
791
|
-
if (value === null || value === void 0) return "NULL";
|
|
792
|
-
if (typeof value === "number") {
|
|
793
|
-
if (!Number.isFinite(value)) throw new R2SqlError(`cannot embed non-finite number in SQL: ${value}`);
|
|
794
|
-
return String(value);
|
|
795
|
-
}
|
|
796
|
-
if (typeof value === "bigint") return value.toString();
|
|
797
|
-
if (typeof value === "boolean") return value ? "TRUE" : "FALSE";
|
|
798
|
-
return `'${String(value).replace(/'/g, "''")}'`;
|
|
799
|
-
}
|
|
800
|
-
function inlineParams(sql, params) {
|
|
801
|
-
let out = "";
|
|
802
|
-
let paramIndex = 0;
|
|
803
|
-
let inString = false;
|
|
804
|
-
for (let i = 0; i < sql.length; i++) {
|
|
805
|
-
const ch = sql[i];
|
|
806
|
-
if (ch === "'") {
|
|
807
|
-
if (inString && sql[i + 1] === "'") {
|
|
808
|
-
out += "''";
|
|
809
|
-
i++;
|
|
810
|
-
continue;
|
|
811
|
-
}
|
|
812
|
-
inString = !inString;
|
|
813
|
-
out += ch;
|
|
814
|
-
continue;
|
|
815
|
-
}
|
|
816
|
-
if (ch === "?" && !inString) {
|
|
817
|
-
if (paramIndex >= params.length) throw new R2SqlError(`SQL has more ? placeholders than params (${params.length})`);
|
|
818
|
-
out += escapeSqlValue(params[paramIndex++]);
|
|
819
|
-
continue;
|
|
820
|
-
}
|
|
821
|
-
out += ch;
|
|
822
|
-
}
|
|
823
|
-
if (paramIndex !== params.length) throw new R2SqlError(`SQL has ${paramIndex} ? placeholders but ${params.length} params supplied`);
|
|
824
|
-
return out;
|
|
825
|
-
}
|
|
826
|
-
function normalizeRows(result) {
|
|
827
|
-
if (!result) return [];
|
|
828
|
-
if (Array.isArray(result.rows)) return result.rows;
|
|
829
|
-
if (Array.isArray(result.columns) && Array.isArray(result.data)) {
|
|
830
|
-
const cols = result.columns;
|
|
831
|
-
return result.data.map((tuple) => {
|
|
832
|
-
const row = {};
|
|
833
|
-
cols.forEach((col, idx) => {
|
|
834
|
-
row[col] = tuple[idx] ?? null;
|
|
835
|
-
});
|
|
836
|
-
return row;
|
|
837
|
-
});
|
|
838
|
-
}
|
|
839
|
-
return [];
|
|
840
|
-
}
|
|
841
|
-
function createR2SqlClient(config) {
|
|
842
|
-
const fetchImpl = config.fetchImpl ?? globalThis.fetch;
|
|
843
|
-
const apiBase = config.apiBase ?? DEFAULT_API_BASE;
|
|
844
|
-
const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
845
|
-
const endpoint = `${apiBase}/accounts/${config.accountId}/r2-catalog/${config.warehouse}/sql`;
|
|
846
|
-
async function query(sql) {
|
|
847
|
-
const started = Date.now();
|
|
848
|
-
const controller = new AbortController();
|
|
849
|
-
const timer = setTimeout(() => controller.abort(new R2SqlTimeoutError(timeoutMs)), timeoutMs);
|
|
850
|
-
let response;
|
|
851
|
-
try {
|
|
852
|
-
response = await fetchImpl(endpoint, {
|
|
853
|
-
method: "POST",
|
|
854
|
-
headers: {
|
|
855
|
-
"authorization": `Bearer ${config.token}`,
|
|
856
|
-
"content-type": "application/json"
|
|
857
|
-
},
|
|
858
|
-
body: JSON.stringify({ query: sql }),
|
|
859
|
-
signal: controller.signal
|
|
860
|
-
});
|
|
861
|
-
} catch (err) {
|
|
862
|
-
if (err instanceof R2SqlTimeoutError || err?.name === "AbortError") throw new R2SqlTimeoutError(timeoutMs);
|
|
863
|
-
throw new R2SqlError(`R2 SQL request failed: ${err.message}`);
|
|
864
|
-
} finally {
|
|
865
|
-
clearTimeout(timer);
|
|
866
|
-
}
|
|
867
|
-
if (!response.ok) {
|
|
868
|
-
const text = await response.text().catch(() => "");
|
|
869
|
-
throw new R2SqlError(`R2 SQL HTTP ${response.status}: ${text}`, response.status);
|
|
870
|
-
}
|
|
871
|
-
const envelope = await response.json();
|
|
872
|
-
if (!envelope.success) throw new R2SqlError(`R2 SQL query rejected: ${envelope.errors?.map((e) => e.message).join("; ") ?? "unknown R2 SQL error"}`);
|
|
873
|
-
return {
|
|
874
|
-
rows: normalizeRows(envelope.result),
|
|
875
|
-
sql,
|
|
876
|
-
queryMs: Date.now() - started
|
|
877
|
-
};
|
|
878
|
-
}
|
|
879
|
-
function runPlan(plan) {
|
|
880
|
-
const tableRef = r2TableRef(config.namespace, plan.table);
|
|
881
|
-
return query(inlineParams(plan.sql.split(TABLE_PLACEHOLDER).join(tableRef), plan.params));
|
|
882
|
-
}
|
|
883
|
-
function runArchetype(archetypeQuery) {
|
|
884
|
-
return runPlan(buildArchetypeSql(archetypeQuery));
|
|
885
|
-
}
|
|
886
|
-
return {
|
|
887
|
-
query,
|
|
888
|
-
runPlan,
|
|
889
|
-
runArchetype
|
|
890
|
-
};
|
|
891
|
-
}
|
|
892
|
-
const SIG_HEX_LEN = 16;
|
|
447
|
+
const SIG_HEX_LEN = 64;
|
|
893
448
|
const keyCache = /* @__PURE__ */ new WeakMap();
|
|
894
449
|
const stringKeyCache = /* @__PURE__ */ new Map();
|
|
895
450
|
async function getKey(env) {
|
|
@@ -929,4 +484,4 @@ async function verifySizeHint(env, key, bytes, providedHex) {
|
|
|
929
484
|
for (let i = 0; i < SIG_HEX_LEN; i++) diff |= expected.charCodeAt(i) ^ providedHex.charCodeAt(i);
|
|
930
485
|
return diff === 0;
|
|
931
486
|
}
|
|
932
|
-
export {
|
|
487
|
+
export { createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, createR2Presigner, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { IcebergTableName } from "@gscdump/engine";
|
|
2
|
+
import { ArchetypeQuery, ArchetypeResult, ArchetypeResultRow } from "@gscdump/sdk";
|
|
3
|
+
import { ServerTailDirective } from "@gscdump/contracts";
|
|
4
|
+
/** A dialect-neutral SQL plan. */
|
|
5
|
+
interface ArchetypeSqlPlan {
|
|
6
|
+
/** SQL with `{{TABLE}}` standing in for the table reference. */
|
|
7
|
+
sql: string;
|
|
8
|
+
/** Bound parameters, in `?`-order. */
|
|
9
|
+
params: unknown[];
|
|
10
|
+
/** The Iceberg fact table this query reads. */
|
|
11
|
+
table: IcebergTableName;
|
|
12
|
+
}
|
|
13
|
+
/** Row returned by the DuckDB sibling. */
|
|
14
|
+
type DuckDbIcebergRow = Record<string, string | number | null>;
|
|
15
|
+
/**
|
|
16
|
+
* The minimal `DUCKDB_SVC` shape this executor needs — a structural subset of
|
|
17
|
+
* the binding in `workers-duckdb.ts` / `env.ts`. Any binding with `runSQL`
|
|
18
|
+
* satisfies it.
|
|
19
|
+
*/
|
|
20
|
+
interface DuckDbSvc {
|
|
21
|
+
runSQL: (args: {
|
|
22
|
+
sql: string;
|
|
23
|
+
}) => Promise<{
|
|
24
|
+
rows: unknown[];
|
|
25
|
+
sql: string;
|
|
26
|
+
}>;
|
|
27
|
+
}
|
|
28
|
+
/** Configuration for the DuckDB-over-Iceberg executor. */
|
|
29
|
+
interface DuckDbIcebergExecutorConfig {
|
|
30
|
+
/** The DuckDB service binding (the sibling Worker RPC). */
|
|
31
|
+
svc: DuckDbSvc;
|
|
32
|
+
/**
|
|
33
|
+
* R2 Data Catalog warehouse identifier. The sibling resolves Iceberg table
|
|
34
|
+
* locations from `<warehouse>` + `<namespace>` + table name.
|
|
35
|
+
*/
|
|
36
|
+
warehouse: string;
|
|
37
|
+
/** Iceberg namespace the 5 fact tables live in. */
|
|
38
|
+
namespace: string;
|
|
39
|
+
/**
|
|
40
|
+
* How the sibling addresses an Iceberg table in a `FROM` clause. Defaults to
|
|
41
|
+
* DuckDB's `iceberg_scan('<warehouse>/<namespace>/<table>')`. Overridable so
|
|
42
|
+
* a sibling configured with the Iceberg REST catalog can use
|
|
43
|
+
* `iceberg_scan('<namespace>.<table>')` or an attached-catalog reference.
|
|
44
|
+
*/
|
|
45
|
+
tableRefStyle?: 'path' | 'catalog';
|
|
46
|
+
/** Per-query wall-clock deadline (ms). Default 25s. */
|
|
47
|
+
timeoutMs?: number;
|
|
48
|
+
}
|
|
49
|
+
/** Result of a DuckDB-over-Iceberg query. */
|
|
50
|
+
interface DuckDbIcebergResult {
|
|
51
|
+
rows: DuckDbIcebergRow[];
|
|
52
|
+
/** The exact SQL sent to the sibling. */
|
|
53
|
+
sql: string;
|
|
54
|
+
queryMs: number;
|
|
55
|
+
}
|
|
56
|
+
declare class DuckDbIcebergError extends Error {
|
|
57
|
+
name: string;
|
|
58
|
+
}
|
|
59
|
+
declare class DuckDbIcebergTimeoutError extends Error {
|
|
60
|
+
name: string;
|
|
61
|
+
constructor(timeoutMs: number);
|
|
62
|
+
}
|
|
63
|
+
/** A configured DuckDB-over-Iceberg executor. */
|
|
64
|
+
interface DuckDbIcebergExecutor {
|
|
65
|
+
/** Run a raw SQL string with `{{TABLE_<name>}}` placeholders resolved. */
|
|
66
|
+
runSql: (sql: string, params?: readonly unknown[]) => Promise<DuckDbIcebergResult>;
|
|
67
|
+
/** Run a dialect-neutral plan: resolve `{{TABLE}}`, bind params, send. */
|
|
68
|
+
runPlan: (plan: ArchetypeSqlPlan) => Promise<DuckDbIcebergResult>;
|
|
69
|
+
/** Translate + run an archetype query. Handles `arbitrary-sql` verbatim. */
|
|
70
|
+
runArchetype: (query: ArchetypeQuery) => Promise<DuckDbIcebergResult>;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Create a DuckDB-over-Iceberg-files executor.
|
|
74
|
+
*/
|
|
75
|
+
declare function createDuckDbIcebergExecutor(config: DuckDbIcebergExecutorConfig): DuckDbIcebergExecutor;
|
|
76
|
+
/** Configuration for an R2 SQL client. */
|
|
77
|
+
interface R2SqlClientConfig {
|
|
78
|
+
/** Cloudflare account id. */
|
|
79
|
+
accountId: string;
|
|
80
|
+
/** R2 Data Catalog warehouse name (`<bucket>` or `<account>_<bucket>`). */
|
|
81
|
+
warehouse: string;
|
|
82
|
+
/** Iceberg namespace the 5 fact tables live in. */
|
|
83
|
+
namespace: string;
|
|
84
|
+
/** Cloudflare API token with R2 Data Catalog read scope. */
|
|
85
|
+
token: string;
|
|
86
|
+
/**
|
|
87
|
+
* Override the HTTP endpoint base. Defaults to the public CF API. Tests
|
|
88
|
+
* point this at a local recorder.
|
|
89
|
+
*/
|
|
90
|
+
apiBase?: string;
|
|
91
|
+
/**
|
|
92
|
+
* Injectable fetch. Defaults to global `fetch`. Tests pass a fake that
|
|
93
|
+
* returns a recorded CF envelope without a network round-trip.
|
|
94
|
+
*/
|
|
95
|
+
fetchImpl?: typeof fetch;
|
|
96
|
+
/** Per-query wall-clock deadline (ms). Default 25s — under the Worker CPU budget. */
|
|
97
|
+
timeoutMs?: number;
|
|
98
|
+
}
|
|
99
|
+
/** A row as returned by R2 SQL — flat dimension + metric values. */
|
|
100
|
+
type R2SqlRow = Record<string, string | number | null>;
|
|
101
|
+
/** Result of an R2 SQL query. */
|
|
102
|
+
interface R2SqlResult {
|
|
103
|
+
rows: R2SqlRow[];
|
|
104
|
+
/** The exact SQL sent (params already inlined). For diagnostics. */
|
|
105
|
+
sql: string;
|
|
106
|
+
/** Wall-clock duration of the HTTP round-trip. */
|
|
107
|
+
queryMs: number;
|
|
108
|
+
}
|
|
109
|
+
declare class R2SqlError extends Error {
|
|
110
|
+
readonly status?: number | undefined;
|
|
111
|
+
name: string;
|
|
112
|
+
constructor(message: string, status?: number | undefined);
|
|
113
|
+
}
|
|
114
|
+
declare class R2SqlTimeoutError extends Error {
|
|
115
|
+
name: string;
|
|
116
|
+
constructor(timeoutMs: number);
|
|
117
|
+
}
|
|
118
|
+
/** A configured R2 SQL client. */
|
|
119
|
+
interface R2SqlClient {
|
|
120
|
+
/** Run a raw SQL string (table reference already resolved). */
|
|
121
|
+
query: (sql: string) => Promise<R2SqlResult>;
|
|
122
|
+
/** Run a dialect-neutral plan: resolve `{{TABLE}}`, inline params, send. */
|
|
123
|
+
runPlan: (plan: ArchetypeSqlPlan) => Promise<R2SqlResult>;
|
|
124
|
+
/** Translate + run an archetype query end to end. */
|
|
125
|
+
runArchetype: (query: ArchetypeQuery) => Promise<R2SqlResult>;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Create an R2 SQL client. The endpoint requires a real CF token in
|
|
129
|
+
* production; tests inject `fetchImpl` returning a recorded envelope.
|
|
130
|
+
*/
|
|
131
|
+
declare function createR2SqlClient(config: R2SqlClientConfig): R2SqlClient;
|
|
132
|
+
/** The two engines the server tail can route to. */
|
|
133
|
+
type ServerTailEngine = 'r2-sql' | 'duckdb';
|
|
134
|
+
/** Executors the dispatcher routes between. */
|
|
135
|
+
interface ServerTailDispatcherConfig {
|
|
136
|
+
r2Sql: R2SqlClient;
|
|
137
|
+
duckdb: DuckDbIcebergExecutor;
|
|
138
|
+
}
|
|
139
|
+
declare class ServerTailRoutingError extends Error {
|
|
140
|
+
name: string;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Decide which engine answers an archetype query. Pure — no I/O. Exposed so
|
|
144
|
+
* the file-resolution endpoint can compute the `ServerTailDirective.engine`
|
|
145
|
+
* with the SAME logic the dispatcher uses at execution time.
|
|
146
|
+
*/
|
|
147
|
+
declare function resolveServerTailEngine(query: ArchetypeQuery): ServerTailEngine;
|
|
148
|
+
/** A configured server-tail dispatcher. */
|
|
149
|
+
interface ServerTailDispatcher {
|
|
150
|
+
/** Decide the engine for a query without running it. */
|
|
151
|
+
route: (query: ArchetypeQuery) => ServerTailEngine;
|
|
152
|
+
/**
|
|
153
|
+
* Execute a query, routing by execution class. If `directive` is supplied
|
|
154
|
+
* its `engine` is honoured only when consistent with the archetype's class
|
|
155
|
+
* (a `duckdb`-class archetype always runs on DuckDB regardless).
|
|
156
|
+
*/
|
|
157
|
+
execute: <R extends ArchetypeResultRow = ArchetypeResultRow>(query: ArchetypeQuery, directive?: ServerTailDirective) => Promise<ArchetypeResult<R>>;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Create the server-tail dispatcher. Holds an R2 SQL client and a DuckDB
|
|
161
|
+
* executor and routes every `ArchetypeQuery` to one of them.
|
|
162
|
+
*/
|
|
163
|
+
declare function createServerTailDispatcher(config: ServerTailDispatcherConfig): ServerTailDispatcher;
|
|
164
|
+
export { type ArchetypeSqlPlan, DuckDbIcebergError, type DuckDbIcebergExecutor, type DuckDbIcebergExecutorConfig, type DuckDbIcebergResult, type DuckDbIcebergRow, DuckDbIcebergTimeoutError, type DuckDbSvc, type R2SqlClient, type R2SqlClientConfig, R2SqlError, type R2SqlResult, type R2SqlRow, R2SqlTimeoutError, type ServerTailDispatcher, type ServerTailDispatcherConfig, type ServerTailEngine, ServerTailRoutingError, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine };
|
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
import { bindLiterals, inferTable } from "@gscdump/engine";
|
|
2
|
+
import { ARCHETYPE_EXECUTION_CLASS } from "@gscdump/sdk";
|
|
3
|
+
var ServerTailRoutingError = class extends Error {
|
|
4
|
+
name = "ServerTailRoutingError";
|
|
5
|
+
};
|
|
6
|
+
function resolveServerTailEngine(query) {
|
|
7
|
+
const cls = ARCHETYPE_EXECUTION_CLASS[query.archetype];
|
|
8
|
+
if (cls === "cloud-only") throw new ServerTailRoutingError(`archetype '${query.archetype}' is cloud-only — not a server-tail query`);
|
|
9
|
+
if (cls === "duckdb") return "duckdb";
|
|
10
|
+
if (query.archetype === "top-n-breakdown" && query.offset && query.offset > 0) return "duckdb";
|
|
11
|
+
return "r2-sql";
|
|
12
|
+
}
|
|
13
|
+
function sourceFor(engine) {
|
|
14
|
+
return engine === "r2-sql" ? "server-r2-sql" : "server-duckdb";
|
|
15
|
+
}
|
|
16
|
+
function createServerTailDispatcher(config) {
|
|
17
|
+
function route(query) {
|
|
18
|
+
return resolveServerTailEngine(query);
|
|
19
|
+
}
|
|
20
|
+
async function execute(query, directive) {
|
|
21
|
+
const engine = route(query);
|
|
22
|
+
if (directive && directive.engine !== engine && engine === "r2-sql") return runOn("duckdb", query);
|
|
23
|
+
return runOn(engine, query);
|
|
24
|
+
}
|
|
25
|
+
async function runOn(engine, query) {
|
|
26
|
+
if (engine === "r2-sql") {
|
|
27
|
+
const res = await config.r2Sql.runArchetype(query);
|
|
28
|
+
return {
|
|
29
|
+
archetype: query.archetype,
|
|
30
|
+
rows: res.rows,
|
|
31
|
+
source: sourceFor("r2-sql"),
|
|
32
|
+
meta: {
|
|
33
|
+
rowCount: res.rows.length,
|
|
34
|
+
queryMs: res.queryMs
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
const res = await config.duckdb.runArchetype(query);
|
|
39
|
+
return {
|
|
40
|
+
archetype: query.archetype,
|
|
41
|
+
rows: res.rows,
|
|
42
|
+
source: sourceFor("duckdb"),
|
|
43
|
+
meta: {
|
|
44
|
+
rowCount: res.rows.length,
|
|
45
|
+
queryMs: res.queryMs
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
route,
|
|
51
|
+
execute
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
const TABLE_PLACEHOLDER = "{{TABLE}}";
|
|
55
|
+
function dimColumn(dim) {
|
|
56
|
+
if (dim === "page") return "url";
|
|
57
|
+
if (dim === "queryCanonical") return "query_canonical";
|
|
58
|
+
return dim;
|
|
59
|
+
}
|
|
60
|
+
function metricExpr(metric) {
|
|
61
|
+
switch (metric) {
|
|
62
|
+
case "clicks": return "SUM(clicks) AS clicks";
|
|
63
|
+
case "impressions": return "SUM(impressions) AS impressions";
|
|
64
|
+
case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr";
|
|
65
|
+
case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position";
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
function orderMetricExpr(metric) {
|
|
69
|
+
switch (metric) {
|
|
70
|
+
case "clicks": return "SUM(clicks)";
|
|
71
|
+
case "impressions": return "SUM(impressions)";
|
|
72
|
+
case "ctr": return "SUM(clicks) / NULLIF(SUM(impressions), 0)";
|
|
73
|
+
case "position": return "SUM(sum_position) / NULLIF(SUM(impressions), 0)";
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
const DEVICE_SUFFIXES = [
|
|
77
|
+
"desktop",
|
|
78
|
+
"mobile",
|
|
79
|
+
"tablet"
|
|
80
|
+
];
|
|
81
|
+
function metricExprForSource(metric, source) {
|
|
82
|
+
switch (metric) {
|
|
83
|
+
case "clicks": return `SUM(${source.clicks}) AS clicks`;
|
|
84
|
+
case "impressions": return `SUM(${source.impressions}) AS impressions`;
|
|
85
|
+
case "ctr": return `SUM(${source.clicks}) / NULLIF(SUM(${source.impressions}), 0) AS ctr`;
|
|
86
|
+
case "position": return `SUM(${source.sumPosition}) / NULLIF(SUM(${source.impressions}), 0) AS position`;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
function deviceSource(suffix) {
|
|
90
|
+
return {
|
|
91
|
+
clicks: `clicks_${suffix}`,
|
|
92
|
+
impressions: `impressions_${suffix}`,
|
|
93
|
+
sumPosition: `sum_position_${suffix}`
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
function sqlStringLiteral(value) {
|
|
97
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
98
|
+
}
|
|
99
|
+
function partitionWhere(q) {
|
|
100
|
+
return {
|
|
101
|
+
clause: "site_id = ? AND search_type = ? AND date BETWEEN ? AND ?",
|
|
102
|
+
params: [
|
|
103
|
+
q.siteId,
|
|
104
|
+
q.searchType,
|
|
105
|
+
q.range.start,
|
|
106
|
+
q.range.end
|
|
107
|
+
]
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
function buildSiteDailyTimeseries(q) {
|
|
111
|
+
const w = partitionWhere(q);
|
|
112
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
113
|
+
return {
|
|
114
|
+
table: "dates",
|
|
115
|
+
params: w.params,
|
|
116
|
+
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date ORDER BY date ASC`
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
function buildEntityDailyTimeseries(q) {
|
|
120
|
+
const table = inferTable([q.entity.dimension]);
|
|
121
|
+
const w = partitionWhere(q);
|
|
122
|
+
const col = dimColumn(q.entity.dimension);
|
|
123
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
124
|
+
return {
|
|
125
|
+
table,
|
|
126
|
+
params: [...w.params, q.entity.value],
|
|
127
|
+
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} = ? GROUP BY date ORDER BY date ASC`
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
function buildEntityDailySparkline(q) {
|
|
131
|
+
const table = inferTable([q.dimension]);
|
|
132
|
+
const w = partitionWhere(q);
|
|
133
|
+
const col = dimColumn(q.dimension);
|
|
134
|
+
if (q.entities.length === 0) throw new Error("entity-daily-sparkline: empty entities — resolver must pre-resolve the top-N list");
|
|
135
|
+
const inList = q.entities.map(sqlStringLiteral).join(", ");
|
|
136
|
+
return {
|
|
137
|
+
table,
|
|
138
|
+
params: w.params,
|
|
139
|
+
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} IN (${inList}) GROUP BY date, ${col} ORDER BY date ASC`
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
function buildTopNBreakdown(q) {
|
|
143
|
+
const table = inferTable([q.dimension]);
|
|
144
|
+
const w = partitionWhere(q);
|
|
145
|
+
if (q.dimension === "device") {
|
|
146
|
+
const metricList = q.metrics.includes(q.orderBy.metric) ? q.metrics : [...q.metrics, q.orderBy.metric];
|
|
147
|
+
const order = `${q.orderBy.metric} ${q.orderBy.dir.toUpperCase()}`;
|
|
148
|
+
let sql = `${DEVICE_SUFFIXES.map((suffix) => {
|
|
149
|
+
const source = deviceSource(suffix);
|
|
150
|
+
const metrics = metricList.map((m) => metricExprForSource(m, source)).join(", ");
|
|
151
|
+
return `SELECT '${suffix.toUpperCase()}' AS device, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause}`;
|
|
152
|
+
}).join(" UNION ALL ")} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
|
|
153
|
+
if (q.offset && q.offset > 0) sql += ` OFFSET ${Math.floor(q.offset)}`;
|
|
154
|
+
return {
|
|
155
|
+
table,
|
|
156
|
+
params: DEVICE_SUFFIXES.flatMap(() => w.params),
|
|
157
|
+
sql
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
const col = dimColumn(q.dimension);
|
|
161
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
162
|
+
const order = `${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
|
|
163
|
+
let sql = `SELECT ${col}, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY ${col} ORDER BY ${order} LIMIT ${Math.max(0, Math.floor(q.limit))}`;
|
|
164
|
+
if (q.offset && q.offset > 0) sql += ` OFFSET ${Math.floor(q.offset)}`;
|
|
165
|
+
return {
|
|
166
|
+
table,
|
|
167
|
+
params: w.params,
|
|
168
|
+
sql
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
function buildSingleRowLookup(q) {
|
|
172
|
+
const dims = Object.keys(q.match);
|
|
173
|
+
const table = inferTable(dims);
|
|
174
|
+
const w = partitionWhere(q);
|
|
175
|
+
const params = [...w.params];
|
|
176
|
+
let clause = w.clause;
|
|
177
|
+
for (const dim of dims) {
|
|
178
|
+
clause += ` AND ${dimColumn(dim)} = ?`;
|
|
179
|
+
params.push(q.match[dim]);
|
|
180
|
+
}
|
|
181
|
+
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
182
|
+
const groupBy = dims.length > 0 ? ` GROUP BY ${dims.map(dimColumn).join(", ")}` : "";
|
|
183
|
+
return {
|
|
184
|
+
table,
|
|
185
|
+
params,
|
|
186
|
+
sql: `SELECT ${dims.length > 0 ? `${dims.map(dimColumn).join(", ")}, ${metrics}` : metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${clause}${groupBy}`
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
function buildMultiSeriesStackedDaily(q) {
|
|
190
|
+
const table = inferTable([q.seriesDimension]);
|
|
191
|
+
const w = partitionWhere(q);
|
|
192
|
+
if (q.seriesDimension === "device") {
|
|
193
|
+
const selects = DEVICE_SUFFIXES.map((suffix) => {
|
|
194
|
+
const source = deviceSource(suffix);
|
|
195
|
+
return `SELECT date, '${suffix.toUpperCase()}' AS device, ${metricExprForSource(q.metric, source)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date`;
|
|
196
|
+
});
|
|
197
|
+
return {
|
|
198
|
+
table,
|
|
199
|
+
params: DEVICE_SUFFIXES.flatMap(() => w.params),
|
|
200
|
+
sql: `${selects.join(" UNION ALL ")} ORDER BY date ASC, device ASC`
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
const col = dimColumn(q.seriesDimension);
|
|
204
|
+
return {
|
|
205
|
+
table,
|
|
206
|
+
params: w.params,
|
|
207
|
+
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date, ${col} ORDER BY date ASC`
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
function buildPresetAnalyzer(q) {
|
|
211
|
+
const params = q.params ?? {};
|
|
212
|
+
const minImpressions = Number(params.minImpressions ?? 100);
|
|
213
|
+
const limit = Math.max(1, Math.floor(Number(params.limit ?? 1e3)));
|
|
214
|
+
const w = partitionWhere(q);
|
|
215
|
+
const wp = [...w.params];
|
|
216
|
+
let having;
|
|
217
|
+
switch (q.presetId) {
|
|
218
|
+
case "striking-distance":
|
|
219
|
+
having = `HAVING SUM(impressions) >= ? AND (SUM(sum_position) / NULLIF(SUM(impressions), 0)) BETWEEN ? AND ?`;
|
|
220
|
+
wp.push(minImpressions, Number(params.minPosition ?? 11), Number(params.maxPosition ?? 20));
|
|
221
|
+
break;
|
|
222
|
+
case "opportunity":
|
|
223
|
+
having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
|
|
224
|
+
wp.push(minImpressions);
|
|
225
|
+
break;
|
|
226
|
+
case "zero-click":
|
|
227
|
+
having = `HAVING SUM(impressions) >= ? AND SUM(clicks) = 0`;
|
|
228
|
+
wp.push(minImpressions);
|
|
229
|
+
break;
|
|
230
|
+
default: throw new Error(`preset-analyzer: preset '${q.presetId}' is not R2-SQL-safe — window-function presets must be sent as archetype 'arbitrary-sql'`);
|
|
231
|
+
}
|
|
232
|
+
return {
|
|
233
|
+
table: "page_queries",
|
|
234
|
+
params: wp,
|
|
235
|
+
sql: `SELECT url, query, SUM(clicks) AS clicks, SUM(impressions) AS impressions, SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr, SUM(sum_position) / NULLIF(SUM(impressions), 0) AS position FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY url, query ${having} ORDER BY SUM(impressions) DESC LIMIT ${limit}`
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
function buildTwoDimensionDetail(q) {
|
|
239
|
+
const w = partitionWhere(q);
|
|
240
|
+
const params = [...w.params];
|
|
241
|
+
let clause = w.clause;
|
|
242
|
+
if (q.filter?.page) {
|
|
243
|
+
clause += ` AND url = ?`;
|
|
244
|
+
params.push(q.filter.page);
|
|
245
|
+
}
|
|
246
|
+
if (q.filter?.query) {
|
|
247
|
+
clause += ` AND query = ?`;
|
|
248
|
+
params.push(q.filter.query);
|
|
249
|
+
}
|
|
250
|
+
let sql = `SELECT url, query, ${q.metrics.map(metricExpr).join(", ")} FROM ${TABLE_PLACEHOLDER} WHERE ${clause} GROUP BY url, query`;
|
|
251
|
+
if (q.orderBy) sql += ` ORDER BY ${orderMetricExpr(q.orderBy.metric)} ${q.orderBy.dir.toUpperCase()}`;
|
|
252
|
+
if (q.limit && q.limit > 0) sql += ` LIMIT ${Math.floor(q.limit)}`;
|
|
253
|
+
return {
|
|
254
|
+
table: "page_queries",
|
|
255
|
+
params,
|
|
256
|
+
sql
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
function buildArchetypeSql(query) {
|
|
260
|
+
switch (query.archetype) {
|
|
261
|
+
case "site-daily-timeseries": return buildSiteDailyTimeseries(query);
|
|
262
|
+
case "entity-daily-timeseries": return buildEntityDailyTimeseries(query);
|
|
263
|
+
case "entity-daily-sparkline": return buildEntityDailySparkline(query);
|
|
264
|
+
case "top-n-breakdown": return buildTopNBreakdown(query);
|
|
265
|
+
case "single-row-lookup": return buildSingleRowLookup(query);
|
|
266
|
+
case "multi-series-stacked-daily": return buildMultiSeriesStackedDaily(query);
|
|
267
|
+
case "preset-analyzer": return buildPresetAnalyzer(query);
|
|
268
|
+
case "two-dimension-detail": return buildTwoDimensionDetail(query);
|
|
269
|
+
case "arbitrary-sql": throw new Error("buildArchetypeSql: arbitrary-sql carries caller SQL — the DuckDB executor runs it verbatim");
|
|
270
|
+
case "aux-cloud-only": throw new Error("buildArchetypeSql: aux-cloud-only is not an Iceberg query");
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
var DuckDbIcebergError = class extends Error {
|
|
274
|
+
name = "DuckDbIcebergError";
|
|
275
|
+
};
|
|
276
|
+
var DuckDbIcebergTimeoutError = class extends Error {
|
|
277
|
+
name = "DuckDbIcebergTimeoutError";
|
|
278
|
+
constructor(timeoutMs) {
|
|
279
|
+
super(`DuckDB-over-Iceberg query exceeded ${timeoutMs}ms deadline`);
|
|
280
|
+
}
|
|
281
|
+
};
|
|
282
|
+
const DEFAULT_TIMEOUT_MS$1 = 25e3;
|
|
283
|
+
function icebergTableRef(config, table) {
|
|
284
|
+
if (config.tableRefStyle === "catalog") return `${config.namespace}.${table}`;
|
|
285
|
+
return `iceberg_scan('${config.warehouse}/${config.namespace}/${table}')`;
|
|
286
|
+
}
|
|
287
|
+
function withDeadline(op, timeoutMs) {
|
|
288
|
+
return new Promise((resolve, reject) => {
|
|
289
|
+
const timer = setTimeout(() => reject(new DuckDbIcebergTimeoutError(timeoutMs)), timeoutMs);
|
|
290
|
+
op.then(resolve, reject).finally(() => clearTimeout(timer));
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
function resolveTablePlaceholders(sql, config) {
|
|
294
|
+
return sql.replace(/\{\{(\w+)\}\}/g, (_, table) => icebergTableRef(config, table));
|
|
295
|
+
}
|
|
296
|
+
function createDuckDbIcebergExecutor(config) {
|
|
297
|
+
const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS$1;
|
|
298
|
+
async function send(sql) {
|
|
299
|
+
const started = Date.now();
|
|
300
|
+
const result = await withDeadline(config.svc.runSQL({ sql }), timeoutMs).catch((err) => {
|
|
301
|
+
if (err instanceof DuckDbIcebergTimeoutError) throw err;
|
|
302
|
+
throw new DuckDbIcebergError(`DUCKDB_SVC.runSQL failed: ${err.message}`);
|
|
303
|
+
});
|
|
304
|
+
return {
|
|
305
|
+
rows: result.rows ?? [],
|
|
306
|
+
sql: result.sql ?? sql,
|
|
307
|
+
queryMs: Date.now() - started
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
function runSql(sql, params = []) {
|
|
311
|
+
return send(bindLiterals(resolveTablePlaceholders(sql, config), params));
|
|
312
|
+
}
|
|
313
|
+
function runPlan(plan) {
|
|
314
|
+
return send(bindLiterals(plan.sql.split(TABLE_PLACEHOLDER).join(icebergTableRef(config, plan.table)), plan.params));
|
|
315
|
+
}
|
|
316
|
+
async function runArchetype(query) {
|
|
317
|
+
if (query.archetype === "arbitrary-sql") return runSql(query.sql, query.params ?? []);
|
|
318
|
+
if (query.archetype === "aux-cloud-only") throw new DuckDbIcebergError("aux-cloud-only is not an Iceberg query");
|
|
319
|
+
return runPlan(buildArchetypeSql(query));
|
|
320
|
+
}
|
|
321
|
+
return {
|
|
322
|
+
runSql,
|
|
323
|
+
runPlan,
|
|
324
|
+
runArchetype
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
function r2TableRef(namespace, table) {
|
|
328
|
+
return `${namespace}.${table}`;
|
|
329
|
+
}
|
|
330
|
+
var R2SqlError = class extends Error {
|
|
331
|
+
status;
|
|
332
|
+
name = "R2SqlError";
|
|
333
|
+
constructor(message, status) {
|
|
334
|
+
super(message);
|
|
335
|
+
this.status = status;
|
|
336
|
+
}
|
|
337
|
+
};
|
|
338
|
+
var R2SqlTimeoutError = class extends Error {
|
|
339
|
+
name = "R2SqlTimeoutError";
|
|
340
|
+
constructor(timeoutMs) {
|
|
341
|
+
super(`R2 SQL query exceeded ${timeoutMs}ms deadline`);
|
|
342
|
+
}
|
|
343
|
+
};
|
|
344
|
+
const DEFAULT_API_BASE = "https://api.cloudflare.com/client/v4";
|
|
345
|
+
const DEFAULT_TIMEOUT_MS = 25e3;
|
|
346
|
+
function escapeSqlValue(value) {
|
|
347
|
+
if (value === null || value === void 0) return "NULL";
|
|
348
|
+
if (typeof value === "number") {
|
|
349
|
+
if (!Number.isFinite(value)) throw new R2SqlError(`cannot embed non-finite number in SQL: ${value}`);
|
|
350
|
+
return String(value);
|
|
351
|
+
}
|
|
352
|
+
if (typeof value === "bigint") return value.toString();
|
|
353
|
+
if (typeof value === "boolean") return value ? "TRUE" : "FALSE";
|
|
354
|
+
return `'${String(value).replace(/'/g, "''")}'`;
|
|
355
|
+
}
|
|
356
|
+
function inlineParams(sql, params) {
|
|
357
|
+
let out = "";
|
|
358
|
+
let paramIndex = 0;
|
|
359
|
+
let inString = false;
|
|
360
|
+
for (let i = 0; i < sql.length; i++) {
|
|
361
|
+
const ch = sql[i];
|
|
362
|
+
if (ch === "'") {
|
|
363
|
+
if (inString && sql[i + 1] === "'") {
|
|
364
|
+
out += "''";
|
|
365
|
+
i++;
|
|
366
|
+
continue;
|
|
367
|
+
}
|
|
368
|
+
inString = !inString;
|
|
369
|
+
out += ch;
|
|
370
|
+
continue;
|
|
371
|
+
}
|
|
372
|
+
if (ch === "?" && !inString) {
|
|
373
|
+
if (paramIndex >= params.length) throw new R2SqlError(`SQL has more ? placeholders than params (${params.length})`);
|
|
374
|
+
out += escapeSqlValue(params[paramIndex++]);
|
|
375
|
+
continue;
|
|
376
|
+
}
|
|
377
|
+
out += ch;
|
|
378
|
+
}
|
|
379
|
+
if (paramIndex !== params.length) throw new R2SqlError(`SQL has ${paramIndex} ? placeholders but ${params.length} params supplied`);
|
|
380
|
+
return out;
|
|
381
|
+
}
|
|
382
|
+
function normalizeRows(result) {
|
|
383
|
+
if (!result) return [];
|
|
384
|
+
if (Array.isArray(result.rows)) return result.rows;
|
|
385
|
+
if (Array.isArray(result.columns) && Array.isArray(result.data)) {
|
|
386
|
+
const cols = result.columns;
|
|
387
|
+
return result.data.map((tuple) => {
|
|
388
|
+
const row = {};
|
|
389
|
+
cols.forEach((col, idx) => {
|
|
390
|
+
row[col] = tuple[idx] ?? null;
|
|
391
|
+
});
|
|
392
|
+
return row;
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
return [];
|
|
396
|
+
}
|
|
397
|
+
function createR2SqlClient(config) {
|
|
398
|
+
const fetchImpl = config.fetchImpl ?? globalThis.fetch;
|
|
399
|
+
const apiBase = config.apiBase ?? DEFAULT_API_BASE;
|
|
400
|
+
const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
401
|
+
const endpoint = `${apiBase}/accounts/${config.accountId}/r2-catalog/${config.warehouse}/sql`;
|
|
402
|
+
async function query(sql) {
|
|
403
|
+
const started = Date.now();
|
|
404
|
+
const controller = new AbortController();
|
|
405
|
+
const timer = setTimeout(() => controller.abort(new R2SqlTimeoutError(timeoutMs)), timeoutMs);
|
|
406
|
+
let response;
|
|
407
|
+
try {
|
|
408
|
+
response = await fetchImpl(endpoint, {
|
|
409
|
+
method: "POST",
|
|
410
|
+
headers: {
|
|
411
|
+
"authorization": `Bearer ${config.token}`,
|
|
412
|
+
"content-type": "application/json"
|
|
413
|
+
},
|
|
414
|
+
body: JSON.stringify({ query: sql }),
|
|
415
|
+
signal: controller.signal
|
|
416
|
+
});
|
|
417
|
+
} catch (err) {
|
|
418
|
+
if (err instanceof R2SqlTimeoutError || err?.name === "AbortError") throw new R2SqlTimeoutError(timeoutMs);
|
|
419
|
+
throw new R2SqlError(`R2 SQL request failed: ${err.message}`);
|
|
420
|
+
} finally {
|
|
421
|
+
clearTimeout(timer);
|
|
422
|
+
}
|
|
423
|
+
if (!response.ok) {
|
|
424
|
+
const text = await response.text().catch(() => "");
|
|
425
|
+
throw new R2SqlError(`R2 SQL HTTP ${response.status}: ${text}`, response.status);
|
|
426
|
+
}
|
|
427
|
+
const envelope = await response.json();
|
|
428
|
+
if (!envelope.success) throw new R2SqlError(`R2 SQL query rejected: ${envelope.errors?.map((e) => e.message).join("; ") ?? "unknown R2 SQL error"}`);
|
|
429
|
+
return {
|
|
430
|
+
rows: normalizeRows(envelope.result),
|
|
431
|
+
sql,
|
|
432
|
+
queryMs: Date.now() - started
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
function runPlan(plan) {
|
|
436
|
+
const tableRef = r2TableRef(config.namespace, plan.table);
|
|
437
|
+
return query(inlineParams(plan.sql.split(TABLE_PLACEHOLDER).join(tableRef), plan.params));
|
|
438
|
+
}
|
|
439
|
+
function runArchetype(archetypeQuery) {
|
|
440
|
+
return runPlan(buildArchetypeSql(archetypeQuery));
|
|
441
|
+
}
|
|
442
|
+
return {
|
|
443
|
+
query,
|
|
444
|
+
runPlan,
|
|
445
|
+
runArchetype
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
export { DuckDbIcebergError, DuckDbIcebergTimeoutError, R2SqlError, R2SqlTimeoutError, ServerTailRoutingError, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/cloudflare",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.24.0",
|
|
5
5
|
"description": "Cloudflare-Workers-flavored helpers for the gscdump analytics stack: AnalyticsEnv binding contract, R2 SigV4 presigner, size-hint HMAC, DuckDB Workers shims, engine factory.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -25,6 +25,11 @@
|
|
|
25
25
|
"types": "./dist/index.d.mts",
|
|
26
26
|
"import": "./dist/index.mjs",
|
|
27
27
|
"default": "./dist/index.mjs"
|
|
28
|
+
},
|
|
29
|
+
"./server-tail": {
|
|
30
|
+
"types": "./dist/server-tail/index.d.mts",
|
|
31
|
+
"import": "./dist/server-tail/index.mjs",
|
|
32
|
+
"default": "./dist/server-tail/index.mjs"
|
|
28
33
|
}
|
|
29
34
|
},
|
|
30
35
|
"main": "./dist/index.mjs",
|
|
@@ -41,20 +46,23 @@
|
|
|
41
46
|
"dependencies": {
|
|
42
47
|
"@uwdata/flechette": "^2.5.0",
|
|
43
48
|
"aws4fetch": "^1.0.20",
|
|
44
|
-
"@gscdump/
|
|
45
|
-
"@gscdump/
|
|
46
|
-
"@gscdump/
|
|
47
|
-
"@gscdump/
|
|
48
|
-
"gscdump": "0.
|
|
49
|
+
"@gscdump/contracts": "0.24.0",
|
|
50
|
+
"@gscdump/engine-sqlite": "0.24.0",
|
|
51
|
+
"@gscdump/engine": "0.24.0",
|
|
52
|
+
"@gscdump/sdk": "0.24.0",
|
|
53
|
+
"gscdump": "0.24.0"
|
|
49
54
|
},
|
|
50
55
|
"devDependencies": {
|
|
56
|
+
"@cloudflare/vitest-pool-workers": "^0.16.10",
|
|
51
57
|
"@cloudflare/workers-types": "^4.20260527.1",
|
|
52
58
|
"h3": "^1.15.11",
|
|
53
|
-
"typescript": "^6.0.3"
|
|
59
|
+
"typescript": "^6.0.3",
|
|
60
|
+
"wrangler": "^4.95.0"
|
|
54
61
|
},
|
|
55
62
|
"scripts": {
|
|
56
63
|
"build": "obuild",
|
|
57
64
|
"dev": "obuild --stub",
|
|
58
|
-
"typecheck": "tsc --noEmit"
|
|
65
|
+
"typecheck": "tsc --noEmit",
|
|
66
|
+
"test:workers": "GSCDUMP_E2E=1 vitest --run --config vitest.workers.config.ts"
|
|
59
67
|
}
|
|
60
68
|
}
|