@gscdump/cloudflare 0.27.2 → 0.28.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +1 -11
- package/dist/index.mjs +1 -39
- package/dist/server-tail/index.d.mts +20 -2
- package/dist/server-tail/index.mjs +30 -25
- package/package.json +5 -5
package/dist/index.d.mts
CHANGED
|
@@ -79,16 +79,6 @@ interface HostedR2QueryKeyInput {
|
|
|
79
79
|
comparisonFilter?: string;
|
|
80
80
|
}
|
|
81
81
|
declare function getHostedR2QueryKey(input: HostedR2QueryKeyInput): string;
|
|
82
|
-
interface PresignOptions {
|
|
83
|
-
key: string;
|
|
84
|
-
bucket: string;
|
|
85
|
-
expiresIn?: number;
|
|
86
|
-
}
|
|
87
|
-
declare function createR2Presigner(env: AnalyticsEnv): ({
|
|
88
|
-
key,
|
|
89
|
-
bucket,
|
|
90
|
-
expiresIn
|
|
91
|
-
}: PresignOptions) => Promise<string>;
|
|
92
82
|
declare function signSizeHint(env: AnalyticsEnv, key: string, bytes: number): Promise<string>;
|
|
93
83
|
declare function verifySizeHint(env: AnalyticsEnv, key: string, bytes: number, providedHex: string): Promise<boolean>;
|
|
94
84
|
declare function createDucklingsCodec(_env: AnalyticsEnv): ParquetCodec;
|
|
@@ -98,4 +88,4 @@ interface DucklingsExecutorOptions {
|
|
|
98
88
|
ipcTotalBytes?: number;
|
|
99
89
|
}
|
|
100
90
|
declare function createDucklingsExecutor(env: AnalyticsEnv, opts?: DucklingsExecutorOptions): QueryExecutor;
|
|
101
|
-
export { type AnalyticsEngineHooks, type AnalyticsEnv, type HostedR2QueryKeyInput, type InflightDedupe, type
|
|
91
|
+
export { type AnalyticsEngineHooks, type AnalyticsEnv, type HostedR2QueryKeyInput, type InflightDedupe, type Row, createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
package/dist/index.mjs
CHANGED
|
@@ -4,7 +4,6 @@ import { createR2DataSource } from "@gscdump/engine/r2";
|
|
|
4
4
|
import { createHyparquetCodec, decodeParquetToRows } from "@gscdump/engine/hyparquet";
|
|
5
5
|
import { float64, int32, int64, tableFromArrays, tableToIPC, utf8 } from "@uwdata/flechette";
|
|
6
6
|
import { createError } from "h3";
|
|
7
|
-
import { AwsClient } from "aws4fetch";
|
|
8
7
|
let handle = null;
|
|
9
8
|
async function initHandle() {
|
|
10
9
|
throw new Error("DuckDB-WASM handle not wired for Cloudflare Workers yet. Complete duckdb-wasm-handle.ts before enabling dual-write (user.migration_phase != 'd1').");
|
|
@@ -407,43 +406,6 @@ function getHostedR2QueryKey(input) {
|
|
|
407
406
|
input.comparisonFilter ?? null
|
|
408
407
|
]);
|
|
409
408
|
}
|
|
410
|
-
const MAX_EXPIRES_IN = 604800;
|
|
411
|
-
function createR2Presigner(env) {
|
|
412
|
-
if (!env.R2_ACCESS_KEY_ID || !env.R2_SECRET_ACCESS_KEY) throw createError({
|
|
413
|
-
statusCode: 500,
|
|
414
|
-
message: "R2 S3 credentials missing (R2_ACCESS_KEY_ID / R2_SECRET_ACCESS_KEY)"
|
|
415
|
-
});
|
|
416
|
-
if (!env.CLOUDFLARE_ACCOUNT_ID) throw createError({
|
|
417
|
-
statusCode: 500,
|
|
418
|
-
message: "CLOUDFLARE_ACCOUNT_ID missing"
|
|
419
|
-
});
|
|
420
|
-
const aws = new AwsClient({
|
|
421
|
-
accessKeyId: env.R2_ACCESS_KEY_ID,
|
|
422
|
-
secretAccessKey: env.R2_SECRET_ACCESS_KEY,
|
|
423
|
-
service: "s3",
|
|
424
|
-
region: "auto"
|
|
425
|
-
});
|
|
426
|
-
const endpoint = `https://${env.CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com`;
|
|
427
|
-
return async function presignGet({ key, bucket, expiresIn = 3600 }) {
|
|
428
|
-
if (!Number.isInteger(expiresIn) || expiresIn <= 0) throw createError({
|
|
429
|
-
statusCode: 400,
|
|
430
|
-
message: `expiresIn must be a positive integer (got ${expiresIn})`
|
|
431
|
-
});
|
|
432
|
-
if (expiresIn > MAX_EXPIRES_IN) throw createError({
|
|
433
|
-
statusCode: 400,
|
|
434
|
-
message: `expiresIn exceeds the ${MAX_EXPIRES_IN}s (7 day) S3 SigV4 maximum (got ${expiresIn})`
|
|
435
|
-
});
|
|
436
|
-
const url = new URL(`${endpoint}/${bucket}/${encodeKey(key)}`);
|
|
437
|
-
url.searchParams.set("X-Amz-Expires", String(expiresIn));
|
|
438
|
-
return (await aws.sign(url.toString(), {
|
|
439
|
-
method: "GET",
|
|
440
|
-
aws: { signQuery: true }
|
|
441
|
-
})).url;
|
|
442
|
-
};
|
|
443
|
-
}
|
|
444
|
-
function encodeKey(key) {
|
|
445
|
-
return key.split("/").map(encodeURIComponent).join("/");
|
|
446
|
-
}
|
|
447
409
|
const SIG_HEX_LEN = 64;
|
|
448
410
|
const keyCache = /* @__PURE__ */ new WeakMap();
|
|
449
411
|
const stringKeyCache = /* @__PURE__ */ new Map();
|
|
@@ -484,4 +446,4 @@ async function verifySizeHint(env, key, bytes, providedHex) {
|
|
|
484
446
|
for (let i = 0; i < SIG_HEX_LEN; i++) diff |= expected.charCodeAt(i) ^ providedHex.charCodeAt(i);
|
|
485
447
|
return diff === 0;
|
|
486
448
|
}
|
|
487
|
-
export { createDucklingsCodec, createDucklingsExecutor, createInflightDedupe,
|
|
449
|
+
export { createDucklingsCodec, createDucklingsExecutor, createInflightDedupe, getAnalyticsEngine, getHostedR2QueryKey, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
|
|
@@ -8,7 +8,25 @@ interface ArchetypeSqlPlan {
|
|
|
8
8
|
params: unknown[];
|
|
9
9
|
table: ArchetypeFactTable;
|
|
10
10
|
}
|
|
11
|
-
|
|
11
|
+
interface BuildArchetypeSqlOptions {
|
|
12
|
+
/**
|
|
13
|
+
* Set by the DuckDB file-list executor, which reads raw Iceberg parquet
|
|
14
|
+
* directly via `read_parquet([...])`, bypassing the catalog metadata layer
|
|
15
|
+
* that synthesizes the identity-partition columns. `site_id` / `search_type`
|
|
16
|
+
* are partition identities NOT materialized in the data files (see engine
|
|
17
|
+
* `iceberg/schema.ts`: "carried implicitly in the object-key prefix"), so a
|
|
18
|
+
* `WHERE site_id = ?` predicate fails with `Referenced column "site_id" not
|
|
19
|
+
* found in FROM clause`. Those files are already pruned to (site_id,
|
|
20
|
+
* search_type) at resolution time, so the predicate is redundant — emit only
|
|
21
|
+
* the row-level `date` range (a real stored column; `month(date)` is the
|
|
22
|
+
* partition transform, not `date` itself).
|
|
23
|
+
*
|
|
24
|
+
* The R2 SQL catalog path (default `false`) exposes partition columns as
|
|
25
|
+
* virtual columns and needs the full predicate.
|
|
26
|
+
*/
|
|
27
|
+
partitionPruned?: boolean;
|
|
28
|
+
}
|
|
29
|
+
declare function buildArchetypeSql(query: ArchetypeQuery, opts?: BuildArchetypeSqlOptions): ArchetypeSqlPlan;
|
|
12
30
|
/** Row returned by the DuckDB sibling. */
|
|
13
31
|
type DuckDbIcebergRow = Record<string, string | number | null>;
|
|
14
32
|
/**
|
|
@@ -210,4 +228,4 @@ interface ServerTailDispatcher {
|
|
|
210
228
|
* executor and routes every `ArchetypeQuery` to one of them.
|
|
211
229
|
*/
|
|
212
230
|
declare function createServerTailDispatcher(config: ServerTailDispatcherConfig): ServerTailDispatcher;
|
|
213
|
-
export { type ArchetypeSqlPlan, DuckDbIcebergError, type DuckDbIcebergExecutor, type DuckDbIcebergExecutorConfig, type DuckDbIcebergQueryError, type DuckDbIcebergResult, type DuckDbIcebergRow, DuckDbIcebergTimeoutError, type DuckDbSvc, type R2SqlClient, type R2SqlClientConfig, R2SqlError, type R2SqlQueryError, type R2SqlResult, type R2SqlRow, R2SqlTimeoutError, type ServerTailDispatcher, type ServerTailDispatcherConfig, type ServerTailEngine, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine, resolveServerTailEngineResult };
|
|
231
|
+
export { type ArchetypeSqlPlan, type BuildArchetypeSqlOptions, DuckDbIcebergError, type DuckDbIcebergExecutor, type DuckDbIcebergExecutorConfig, type DuckDbIcebergQueryError, type DuckDbIcebergResult, type DuckDbIcebergRow, DuckDbIcebergTimeoutError, type DuckDbSvc, type R2SqlClient, type R2SqlClientConfig, R2SqlError, type R2SqlQueryError, type R2SqlResult, type R2SqlRow, R2SqlTimeoutError, type ServerTailDispatcher, type ServerTailDispatcherConfig, type ServerTailEngine, ServerTailRoutingError, TABLE_PLACEHOLDER, buildArchetypeSql, createDuckDbIcebergExecutor, createR2SqlClient, createServerTailDispatcher, resolveServerTailEngine, resolveServerTailEngineResult };
|
|
@@ -84,7 +84,11 @@ function moverClause(movers) {
|
|
|
84
84
|
function sqlStringLiteral(value) {
|
|
85
85
|
return `'${value.replace(/'/g, "''")}'`;
|
|
86
86
|
}
|
|
87
|
-
function partitionWhere(q) {
|
|
87
|
+
function partitionWhere(q, partitionPruned = false) {
|
|
88
|
+
if (partitionPruned) return {
|
|
89
|
+
clause: "date BETWEEN ? AND ?",
|
|
90
|
+
params: [q.range.start, q.range.end]
|
|
91
|
+
};
|
|
88
92
|
return {
|
|
89
93
|
clause: "site_id = ? AND search_type = ? AND date BETWEEN ? AND ?",
|
|
90
94
|
params: [
|
|
@@ -126,8 +130,8 @@ function facetPredicate(query) {
|
|
|
126
130
|
params
|
|
127
131
|
};
|
|
128
132
|
}
|
|
129
|
-
function buildSiteDailyTimeseries(q) {
|
|
130
|
-
const w = partitionWhere(q);
|
|
133
|
+
function buildSiteDailyTimeseries(q, pruned) {
|
|
134
|
+
const w = partitionWhere(q, pruned);
|
|
131
135
|
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
132
136
|
return {
|
|
133
137
|
table: "dates",
|
|
@@ -135,9 +139,9 @@ function buildSiteDailyTimeseries(q) {
|
|
|
135
139
|
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date ORDER BY date ASC`
|
|
136
140
|
};
|
|
137
141
|
}
|
|
138
|
-
function buildEntityDailyTimeseries(q) {
|
|
142
|
+
function buildEntityDailyTimeseries(q, pruned) {
|
|
139
143
|
const table = tableForDimensions([q.entity.dimension]);
|
|
140
|
-
const w = partitionWhere(q);
|
|
144
|
+
const w = partitionWhere(q, pruned);
|
|
141
145
|
const col = dimColumn(q.entity.dimension);
|
|
142
146
|
const metrics = q.metrics.map(metricExpr).join(", ");
|
|
143
147
|
return {
|
|
@@ -146,9 +150,9 @@ function buildEntityDailyTimeseries(q) {
|
|
|
146
150
|
sql: `SELECT date, ${metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} = ? GROUP BY date ORDER BY date ASC`
|
|
147
151
|
};
|
|
148
152
|
}
|
|
149
|
-
function buildEntityDailySparkline(q) {
|
|
153
|
+
function buildEntityDailySparkline(q, pruned) {
|
|
150
154
|
const table = tableForDimensions([q.dimension]);
|
|
151
|
-
const w = partitionWhere(q);
|
|
155
|
+
const w = partitionWhere(q, pruned);
|
|
152
156
|
const col = dimColumn(q.dimension);
|
|
153
157
|
if (q.entities.length === 0) throw new Error("entity-daily-sparkline: empty entities - resolver must pre-resolve the top-N list");
|
|
154
158
|
const inList = q.entities.map(sqlStringLiteral).join(", ");
|
|
@@ -158,9 +162,9 @@ function buildEntityDailySparkline(q) {
|
|
|
158
162
|
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} AND ${col} IN (${inList}) GROUP BY date, ${col} ORDER BY date ASC`
|
|
159
163
|
};
|
|
160
164
|
}
|
|
161
|
-
function buildTopNBreakdown(q) {
|
|
165
|
+
function buildTopNBreakdown(q, pruned) {
|
|
162
166
|
const table = tableForDimensions([q.dimension]);
|
|
163
|
-
const w = partitionWhere(q);
|
|
167
|
+
const w = partitionWhere(q, pruned);
|
|
164
168
|
const order = `${q.orderBy.metric} ${q.orderBy.dir.toUpperCase()}`;
|
|
165
169
|
const limit = `LIMIT ${Math.max(0, Math.floor(q.limit))}`;
|
|
166
170
|
const offset = q.offset && q.offset > 0 ? ` OFFSET ${Math.floor(q.offset)}` : "";
|
|
@@ -171,7 +175,7 @@ function buildTopNBreakdown(q) {
|
|
|
171
175
|
const wPrev = partitionWhere({
|
|
172
176
|
...q,
|
|
173
177
|
range: q.compareRange
|
|
174
|
-
});
|
|
178
|
+
}, pruned);
|
|
175
179
|
const deviceSelects = (clause, ml) => DEVICE_SUFFIXES.map((suffix) => {
|
|
176
180
|
const source = deviceSource(suffix);
|
|
177
181
|
const metrics = ml.map((m) => metricExprForSource(m, source)).join(", ");
|
|
@@ -204,7 +208,7 @@ function buildTopNBreakdown(q) {
|
|
|
204
208
|
const wPrev = partitionWhere({
|
|
205
209
|
...q,
|
|
206
210
|
range: q.compareRange
|
|
207
|
-
});
|
|
211
|
+
}, pruned);
|
|
208
212
|
const curMetrics = metricList.map(metricExpr).join(", ");
|
|
209
213
|
const prevMetrics = STD_METRICS.map((m) => metricExpr(m)).join(", ");
|
|
210
214
|
const curCols = metricList.map((m) => coalesceMetric(m, "c", m)).join(", ");
|
|
@@ -232,10 +236,10 @@ function buildTopNBreakdown(q) {
|
|
|
232
236
|
sql
|
|
233
237
|
};
|
|
234
238
|
}
|
|
235
|
-
function buildSingleRowLookup(q) {
|
|
239
|
+
function buildSingleRowLookup(q, pruned) {
|
|
236
240
|
const dims = Object.keys(q.match);
|
|
237
241
|
const table = tableForDimensions(dims);
|
|
238
|
-
const w = partitionWhere(q);
|
|
242
|
+
const w = partitionWhere(q, pruned);
|
|
239
243
|
const params = [...w.params];
|
|
240
244
|
let clause = w.clause;
|
|
241
245
|
for (const dim of dims) {
|
|
@@ -250,9 +254,9 @@ function buildSingleRowLookup(q) {
|
|
|
250
254
|
sql: `SELECT ${dims.length > 0 ? `${dims.map(dimColumn).join(", ")}, ${metrics}` : metrics} FROM ${TABLE_PLACEHOLDER} WHERE ${clause}${groupBy}`
|
|
251
255
|
};
|
|
252
256
|
}
|
|
253
|
-
function buildMultiSeriesStackedDaily(q) {
|
|
257
|
+
function buildMultiSeriesStackedDaily(q, pruned) {
|
|
254
258
|
const table = tableForDimensions([q.seriesDimension]);
|
|
255
|
-
const w = partitionWhere(q);
|
|
259
|
+
const w = partitionWhere(q, pruned);
|
|
256
260
|
if (q.seriesDimension === "device") {
|
|
257
261
|
const selects = DEVICE_SUFFIXES.map((suffix) => {
|
|
258
262
|
const source = deviceSource(suffix);
|
|
@@ -271,8 +275,8 @@ function buildMultiSeriesStackedDaily(q) {
|
|
|
271
275
|
sql: `SELECT date, ${col}, ${metricExpr(q.metric)} FROM ${TABLE_PLACEHOLDER} WHERE ${w.clause} GROUP BY date, ${col} ORDER BY date ASC`
|
|
272
276
|
};
|
|
273
277
|
}
|
|
274
|
-
function buildTwoDimensionDetail(q) {
|
|
275
|
-
const w = partitionWhere(q);
|
|
278
|
+
function buildTwoDimensionDetail(q, pruned) {
|
|
279
|
+
const w = partitionWhere(q, pruned);
|
|
276
280
|
const params = [...w.params];
|
|
277
281
|
let clause = w.clause;
|
|
278
282
|
if (q.filter?.page) {
|
|
@@ -295,15 +299,16 @@ function buildTwoDimensionDetail(q) {
|
|
|
295
299
|
sql
|
|
296
300
|
};
|
|
297
301
|
}
|
|
298
|
-
function buildArchetypeSql(query) {
|
|
302
|
+
function buildArchetypeSql(query, opts = {}) {
|
|
303
|
+
const pruned = opts.partitionPruned ?? false;
|
|
299
304
|
switch (query.archetype) {
|
|
300
|
-
case "site-daily-timeseries": return buildSiteDailyTimeseries(query);
|
|
301
|
-
case "entity-daily-timeseries": return buildEntityDailyTimeseries(query);
|
|
302
|
-
case "entity-daily-sparkline": return buildEntityDailySparkline(query);
|
|
303
|
-
case "top-n-breakdown": return buildTopNBreakdown(query);
|
|
304
|
-
case "single-row-lookup": return buildSingleRowLookup(query);
|
|
305
|
-
case "multi-series-stacked-daily": return buildMultiSeriesStackedDaily(query);
|
|
306
|
-
case "two-dimension-detail": return buildTwoDimensionDetail(query);
|
|
305
|
+
case "site-daily-timeseries": return buildSiteDailyTimeseries(query, pruned);
|
|
306
|
+
case "entity-daily-timeseries": return buildEntityDailyTimeseries(query, pruned);
|
|
307
|
+
case "entity-daily-sparkline": return buildEntityDailySparkline(query, pruned);
|
|
308
|
+
case "top-n-breakdown": return buildTopNBreakdown(query, pruned);
|
|
309
|
+
case "single-row-lookup": return buildSingleRowLookup(query, pruned);
|
|
310
|
+
case "multi-series-stacked-daily": return buildMultiSeriesStackedDaily(query, pruned);
|
|
311
|
+
case "two-dimension-detail": return buildTwoDimensionDetail(query, pruned);
|
|
307
312
|
case "arbitrary-sql": throw new Error("buildArchetypeSql: arbitrary-sql carries caller SQL - the DuckDB executor runs it verbatim");
|
|
308
313
|
case "aux-cloud-only": throw new Error("buildArchetypeSql: aux-cloud-only is not an Iceberg query");
|
|
309
314
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/cloudflare",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.28.1",
|
|
5
5
|
"description": "Cloudflare-Workers-flavored helpers for the gscdump analytics stack: AnalyticsEnv binding contract, R2 SigV4 presigner, size-hint HMAC, DuckDB Workers shims, engine factory.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -46,10 +46,10 @@
|
|
|
46
46
|
"dependencies": {
|
|
47
47
|
"@uwdata/flechette": "^2.5.0",
|
|
48
48
|
"aws4fetch": "^1.0.20",
|
|
49
|
-
"@gscdump/
|
|
50
|
-
"@gscdump/engine-sqlite": "0.
|
|
51
|
-
"
|
|
52
|
-
"gscdump": "0.
|
|
49
|
+
"@gscdump/engine": "0.28.1",
|
|
50
|
+
"@gscdump/engine-sqlite": "0.28.1",
|
|
51
|
+
"gscdump": "0.28.1",
|
|
52
|
+
"@gscdump/contracts": "0.28.1"
|
|
53
53
|
},
|
|
54
54
|
"devDependencies": {
|
|
55
55
|
"@cloudflare/vitest-pool-workers": "^0.16.16",
|