@gscdump/cloudflare 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Harlan Wilton
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,73 @@
1
+ import { DuckDBFactory, ParquetCodec, QueryExecutor, Row, createStorageEngine } from "@gscdump/engine";
2
+ import { H3Event } from "h3";
3
+ declare function getWasmDuckDBFactory(): DuckDBFactory;
4
+ declare function resetWasmDuckDB(): void;
5
+ interface AnalyticsEnv {
6
+ /** R2 bucket holding parquet + rollup + entity data. Required in origin mode. */
7
+ R2_DATA?: R2Bucket;
8
+ /** Human name of the R2 bucket (used by the presigner when building public URLs). */
9
+ R2_BUCKET_NAME?: string;
10
+ /** Optional: D1 database holding the r2_manifest mirror + sync state. */
11
+ DB?: D1Database;
12
+ /**
13
+ * Optional: DuckDB service binding (Workers RPC) for server-side execution.
14
+ * Structural shape — any binding with `runSQL` + `ping` satisfies it, so
15
+ * hosts can declare their own binding interface without coupling to this one.
16
+ */
17
+ DUCKDB_SVC?: {
18
+ runSQL: (args: {
19
+ sql: string;
20
+ tables?: Record<string, {
21
+ rows: unknown[];
22
+ ddl?: string;
23
+ }>;
24
+ }) => Promise<{
25
+ rows: unknown[];
26
+ sql: string;
27
+ }>;
28
+ ping: () => Promise<string>;
29
+ };
30
+ /** Route override: force D1 as the manifest source even if R2 is bound. */
31
+ ANALYTICS_FORCE_D1?: string;
32
+ /** Route override: disable R2 reads entirely (consumer / legacy mode). */
33
+ R2_READS_ENABLED?: string;
34
+ /** R2 S3-API credentials for presigning (origin mode when the worker can't proxy). */
35
+ R2_ACCESS_KEY_ID?: string;
36
+ R2_SECRET_ACCESS_KEY?: string;
37
+ CLOUDFLARE_ACCOUNT_ID?: string;
38
+ /** Secret used by size-hint-sig.ts to HMAC-sign size hints. Rotating it invalidates cached hints. */
39
+ TOKEN_ENCRYPTION_SECRET?: string;
40
+ }
41
+ /**
42
+ * Resolve the AnalyticsEnv for the current request.
43
+ *
44
+ * Looks for, in order:
45
+ * 1. `event.context.analyticsEnv` — host plugin sets this explicitly.
46
+ * 2. `event.context.cloudflare?.env` — Cloudflare adapter convention.
47
+ * 3. Throws. The layer has no way to fabricate an env; the host must wire it.
48
+ */
49
+ declare function useAnalyticsEnv(event: H3Event): AnalyticsEnv;
50
+ /**
51
+ * Optional per-request hooks for telemetry / tracing. Hosts wire these in
52
+ * to bridge engine activity into their own metrics pipeline.
53
+ */
54
+ interface AnalyticsEngineHooks {
55
+ /** Called once per R2 PUT, with the byte size of the payload. */
56
+ onR2Write?: (byteLength: number) => void;
57
+ }
58
+ declare function getAnalyticsEngine(env: AnalyticsEnv, db: any, hooks?: AnalyticsEngineHooks): ReturnType<typeof createStorageEngine> | null;
59
+ interface PresignOptions {
60
+ key: string;
61
+ bucket: string;
62
+ expiresIn?: number;
63
+ }
64
+ declare function createR2Presigner(env: AnalyticsEnv): ({
65
+ key,
66
+ bucket,
67
+ expiresIn
68
+ }: PresignOptions) => Promise<string>;
69
+ declare function signSizeHint(env: AnalyticsEnv, key: string, bytes: number): Promise<string>;
70
+ declare function verifySizeHint(env: AnalyticsEnv, key: string, bytes: number, providedHex: string): Promise<boolean>;
71
+ declare function createDucklingsCodec(_env: AnalyticsEnv): ParquetCodec;
72
+ declare function createDucklingsExecutor(env: AnalyticsEnv): QueryExecutor;
73
+ export { type AnalyticsEngineHooks, type AnalyticsEnv, type PresignOptions, type Row, createDucklingsCodec, createDucklingsExecutor, createR2Presigner, getAnalyticsEngine, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
package/dist/index.mjs ADDED
@@ -0,0 +1,208 @@
1
+ import { bindLiterals, canonicalEmptyParquetSchema, createStorageEngine } from "@gscdump/engine";
2
+ import { createD1ManifestStore } from "@gscdump/engine-sqlite";
3
+ import { createR2DataSource } from "@gscdump/engine/r2";
4
+ import { createHyparquetCodec, decodeParquetToRows } from "@gscdump/engine/hyparquet";
5
+ import { createError } from "h3";
6
+ import { AwsClient } from "aws4fetch";
7
+ let handle = null;
8
+ async function initHandle() {
9
+ throw new Error("DuckDB-WASM handle not wired for Cloudflare Workers yet. Complete duckdb-wasm-handle.ts before enabling dual-write (user.migration_phase != 'd1').");
10
+ }
11
+ function getWasmDuckDBFactory() {
12
+ return { getDuckDB() {
13
+ if (!handle) handle = initHandle();
14
+ return handle;
15
+ } };
16
+ }
17
+ function resetWasmDuckDB() {
18
+ handle = null;
19
+ }
20
+ function resolveSvc(env) {
21
+ const svc = env.DUCKDB_SVC;
22
+ if (!svc) throw new Error("DUCKDB_SVC service binding is not configured");
23
+ return svc;
24
+ }
25
+ function createDucklingsCodec(_env) {
26
+ return createHyparquetCodec();
27
+ }
28
+ function coerceRow(row) {
29
+ let mutated = null;
30
+ for (const [k, v] of Object.entries(row)) if (typeof v === "bigint") {
31
+ if (!mutated) mutated = { ...row };
32
+ mutated[k] = Number(v);
33
+ }
34
+ return mutated ?? row;
35
+ }
36
+ const READ_PARQUET_PLACEHOLDER = /read_parquet\(\{\{(\w+)\}\}(?:\s*,[^)]*)?\)/g;
37
+ function tmpTableName(placeholder) {
38
+ const uuid = crypto.randomUUID().replace(/-/g, "_");
39
+ return `tmp_${placeholder.toLowerCase()}_${uuid}`;
40
+ }
41
+ const ROW_CACHE_MAX_BYTES = 16 * 1024 * 1024;
42
+ let rowCacheBytes = 0;
43
+ const rowCache = /* @__PURE__ */ new Map();
44
+ function estimateRowsBytes(rows) {
45
+ if (rows.length === 0) return 0;
46
+ const cols = Object.keys(rows[0]).length;
47
+ return rows.length * cols * 64;
48
+ }
49
+ function rowCacheGet(key) {
50
+ const hit = rowCache.get(key);
51
+ if (!hit) return void 0;
52
+ rowCache.delete(key);
53
+ rowCache.set(key, hit);
54
+ return hit.rows;
55
+ }
56
+ function rowCachePut(key, rows) {
57
+ const bytes = estimateRowsBytes(rows);
58
+ if (bytes > ROW_CACHE_MAX_BYTES) return;
59
+ while (rowCacheBytes + bytes > ROW_CACHE_MAX_BYTES) {
60
+ const oldest = rowCache.keys().next().value;
61
+ if (oldest === void 0) break;
62
+ const evicted = rowCache.get(oldest);
63
+ rowCache.delete(oldest);
64
+ rowCacheBytes -= evicted.bytes;
65
+ }
66
+ rowCache.set(key, {
67
+ rows,
68
+ bytes
69
+ });
70
+ rowCacheBytes += bytes;
71
+ }
72
+ function createDucklingsExecutor(env) {
73
+ return { async execute({ sql, params, fileKeys, dataSource, signal, table }) {
74
+ signal?.throwIfAborted();
75
+ const svc = resolveSvc(env);
76
+ const tempNames = {};
77
+ const tables = {};
78
+ await Promise.all(Object.entries(fileKeys).map(async ([placeholder, keys]) => {
79
+ const perFile = await Promise.all(keys.map(async (key) => {
80
+ const cached = rowCacheGet(key);
81
+ if (cached) return cached;
82
+ const rows = await decodeParquetToRows(await dataSource.read(key));
83
+ rowCachePut(key, rows);
84
+ return rows;
85
+ }));
86
+ const merged = [];
87
+ for (const rows of perFile) merged.push(...rows);
88
+ const tmp = tmpTableName(placeholder);
89
+ tempNames[placeholder] = tmp;
90
+ tables[tmp] = {
91
+ rows: merged,
92
+ ddl: `AS SELECT * FROM ${canonicalEmptyParquetSchema(table)} WHERE FALSE`
93
+ };
94
+ }));
95
+ signal?.throwIfAborted();
96
+ const finalSql = bindLiterals(sql.replace(READ_PARQUET_PLACEHOLDER, (_, placeholder) => {
97
+ const tmp = tempNames[placeholder];
98
+ if (!tmp) throw new Error(`createDucklingsExecutor: SQL references {{${placeholder}}} but no fileKeys entry provided`);
99
+ return tmp;
100
+ }), params);
101
+ const result = await svc.runSQL({
102
+ sql: finalSql,
103
+ tables
104
+ });
105
+ return {
106
+ rows: result.rows.map(coerceRow),
107
+ sql: result.sql
108
+ };
109
+ } };
110
+ }
111
+ function getAnalyticsEngine(env, db, hooks = {}) {
112
+ if (!env.R2_DATA) return null;
113
+ const baseDataSource = createR2DataSource({
114
+ bucket: env.R2_DATA,
115
+ bucketName: env.R2_BUCKET_NAME
116
+ });
117
+ return createStorageEngine({
118
+ dataSource: hooks.onR2Write ? {
119
+ ...baseDataSource,
120
+ async write(key, bytes) {
121
+ hooks.onR2Write(bytes.byteLength);
122
+ return baseDataSource.write(key, bytes);
123
+ }
124
+ } : baseDataSource,
125
+ manifestStore: createD1ManifestStore(db),
126
+ codec: createDucklingsCodec(env),
127
+ executor: createDucklingsExecutor(env)
128
+ });
129
+ }
130
+ function useAnalyticsEnv(event) {
131
+ const fromCtx = event.context.analyticsEnv;
132
+ if (fromCtx) return fromCtx;
133
+ const fromCf = event.context.cloudflare?.env;
134
+ if (fromCf) return fromCf;
135
+ throw createError({
136
+ statusCode: 500,
137
+ statusMessage: "AnalyticsEnv not available — host must populate event.context.analyticsEnv or use the Cloudflare adapter"
138
+ });
139
+ }
140
+ function createR2Presigner(env) {
141
+ if (!env.R2_ACCESS_KEY_ID || !env.R2_SECRET_ACCESS_KEY) throw createError({
142
+ statusCode: 500,
143
+ message: "R2 S3 credentials missing (R2_ACCESS_KEY_ID / R2_SECRET_ACCESS_KEY)"
144
+ });
145
+ if (!env.CLOUDFLARE_ACCOUNT_ID) throw createError({
146
+ statusCode: 500,
147
+ message: "CLOUDFLARE_ACCOUNT_ID missing"
148
+ });
149
+ const aws = new AwsClient({
150
+ accessKeyId: env.R2_ACCESS_KEY_ID,
151
+ secretAccessKey: env.R2_SECRET_ACCESS_KEY,
152
+ service: "s3",
153
+ region: "auto"
154
+ });
155
+ const endpoint = `https://${env.CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com`;
156
+ return async function presignGet({ key, bucket, expiresIn = 3600 }) {
157
+ const url = new URL(`${endpoint}/${bucket}/${encodeKey(key)}`);
158
+ url.searchParams.set("X-Amz-Expires", String(expiresIn));
159
+ return (await aws.sign(url.toString(), {
160
+ method: "GET",
161
+ aws: { signQuery: true }
162
+ })).url;
163
+ };
164
+ }
165
+ function encodeKey(key) {
166
+ return key.split("/").map(encodeURIComponent).join("/");
167
+ }
168
+ const SIG_HEX_LEN = 16;
169
+ const keyCache = /* @__PURE__ */ new WeakMap();
170
+ const stringKeyCache = /* @__PURE__ */ new Map();
171
+ async function getKey(env) {
172
+ const secret = env.TOKEN_ENCRYPTION_SECRET;
173
+ if (!secret) throw new Error("size-hint-sig: TOKEN_ENCRYPTION_SECRET not configured");
174
+ let cached = keyCache.get(env);
175
+ if (!cached) {
176
+ cached = stringKeyCache.get(secret);
177
+ if (!cached) {
178
+ cached = crypto.subtle.importKey("raw", new TextEncoder().encode(secret).slice().buffer, {
179
+ name: "HMAC",
180
+ hash: "SHA-256"
181
+ }, false, ["sign", "verify"]);
182
+ stringKeyCache.set(secret, cached);
183
+ }
184
+ keyCache.set(env, cached);
185
+ }
186
+ return cached;
187
+ }
188
+ function toHex(buf, chars) {
189
+ const bytes = new Uint8Array(buf);
190
+ let hex = "";
191
+ for (let i = 0; i < chars / 2; i++) hex += bytes[i].toString(16).padStart(2, "0");
192
+ return hex;
193
+ }
194
+ function payload(key, bytes) {
195
+ return new TextEncoder().encode(`${key}\0${bytes}`).slice().buffer;
196
+ }
197
+ async function signSizeHint(env, key, bytes) {
198
+ const cryptoKey = await getKey(env);
199
+ return toHex(await crypto.subtle.sign("HMAC", cryptoKey, payload(key, bytes)), SIG_HEX_LEN);
200
+ }
201
+ async function verifySizeHint(env, key, bytes, providedHex) {
202
+ if (providedHex.length !== SIG_HEX_LEN) return false;
203
+ const expected = await signSizeHint(env, key, bytes);
204
+ let diff = 0;
205
+ for (let i = 0; i < SIG_HEX_LEN; i++) diff |= expected.charCodeAt(i) ^ providedHex.charCodeAt(i);
206
+ return diff === 0;
207
+ }
208
+ export { createDucklingsCodec, createDucklingsExecutor, createR2Presigner, getAnalyticsEngine, getWasmDuckDBFactory, resetWasmDuckDB, signSizeHint, useAnalyticsEnv, verifySizeHint };
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "@gscdump/cloudflare",
3
+ "type": "module",
4
+ "version": "0.7.0",
5
+ "description": "Cloudflare-Workers-flavored helpers for the gscdump analytics stack: AnalyticsEnv binding contract, R2 SigV4 presigner, size-hint HMAC, DuckDB Workers shims, engine factory.",
6
+ "author": {
7
+ "name": "Harlan Wilton",
8
+ "email": "harlan@harlanzw.com",
9
+ "url": "https://harlanzw.com/"
10
+ },
11
+ "license": "MIT",
12
+ "funding": "https://github.com/sponsors/harlan-zw",
13
+ "homepage": "https://github.com/harlan-zw/gscdump/tree/main/packages/cloudflare#readme",
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git+https://github.com/harlan-zw/gscdump.git",
17
+ "directory": "packages/cloudflare"
18
+ },
19
+ "bugs": {
20
+ "url": "https://github.com/harlan-zw/gscdump/issues"
21
+ },
22
+ "sideEffects": false,
23
+ "exports": {
24
+ ".": {
25
+ "types": "./dist/index.d.mts",
26
+ "import": "./dist/index.mjs"
27
+ }
28
+ },
29
+ "main": "./dist/index.mjs",
30
+ "types": "./dist/index.d.mts",
31
+ "files": [
32
+ "dist"
33
+ ],
34
+ "engines": {
35
+ "node": ">=18"
36
+ },
37
+ "peerDependencies": {
38
+ "h3": "^1.15.0"
39
+ },
40
+ "dependencies": {
41
+ "aws4fetch": "^1.0.20",
42
+ "@gscdump/engine": "0.7.0",
43
+ "@gscdump/engine-sqlite": "0.7.0"
44
+ },
45
+ "devDependencies": {
46
+ "@cloudflare/workers-types": "^4.20260426.1",
47
+ "h3": "^1.15.11",
48
+ "typescript": "^6.0.3"
49
+ },
50
+ "scripts": {
51
+ "build": "obuild",
52
+ "dev": "obuild --stub",
53
+ "typecheck": "tsc --noEmit"
54
+ }
55
+ }