@gscdump/engine-duckdb-wasm 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Harlan Wilton
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # @gscdump/engine-duckdb-wasm
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@gscdump/engine-duckdb-wasm?color=yellow)](https://npmjs.com/package/@gscdump/engine-duckdb-wasm)
4
+ [![npm downloads](https://img.shields.io/npm/dm/@gscdump/engine-duckdb-wasm?color=yellow)](https://npm.chart.dev/@gscdump/engine-duckdb-wasm)
5
+ [![license](https://img.shields.io/github/license/harlan-zw/gscdump?color=yellow)](https://github.com/harlan-zw/gscdump/blob/main/LICENSE)
6
+
7
+ > DuckDB-WASM engine adapter for `@gscdump/analysis` — typed browser analytics against parquet via R2.
8
+
9
+ In-browser DuckDB-WASM connection wrapped as a `SqlQuerySource`. Ships a vendored, stripped-down drizzle-orm DuckDB-WASM adapter (~240 LoC, adapted from `@proj-airi/drizzle-duckdb-wasm`, MIT). Transactions throw — analytics workload is read-only.
10
+
11
+ Bundle: **10.3 kB / 2.72 kB gzipped**. `@duckdb/duckdb-wasm` is an optional peer dep.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ npm install @gscdump/engine-duckdb-wasm @duckdb/duckdb-wasm
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```ts
22
+ import {
23
+ attachParquetUrlTables,
24
+ bootDuckDBWasm,
25
+ createInsightRunner,
26
+ resolveWindow,
27
+ scopeFor,
28
+ strikingMomentum,
29
+ } from '@gscdump/engine-duckdb-wasm'
30
+
31
+ const { db, conn } = await bootDuckDBWasm()
32
+ await attachParquetUrlTables(conn, { tables: [{ name: 'gsc_keywords', url: '/r2/keywords.parquet' }] })
33
+
34
+ const runner = createInsightRunner({ db, conn })
35
+ const window = resolveWindow({ preset: 'last-30d', comparison: 'prev-period' })
36
+ const scope = scopeFor('keywords', { siteId, window })
37
+
38
+ const rows = await strikingMomentum(runner, { ...scope, limit: 50 })
39
+ ```
40
+
41
+ ### Engine source for analyzer dispatch
42
+
43
+ ```ts
44
+ import { createEngine } from '@gscdump/engine-duckdb-wasm'
45
+
46
+ const source = createEngine({
47
+ runner: { query: (sql, params) => conn.query(sql, params) },
48
+ })
49
+ ```
50
+
51
+ ## Exports
52
+
53
+ - `createEngine({ runner })` — builds a `SqlQuerySource` over a DuckDB-WASM connection.
54
+ - `createInsightRunner({ db, conn })` — drizzle-orm handle for typed `.select()` / window functions, with `sql\`...\`` raw escape hatch.
55
+ - `bootDuckDBWasm()` / `attachParquetTables()` / `attachParquetUrlTables()` / `attachSingleTable()` / `createBrowserAnalysisRuntime()` / `createDuckDBBundlesFromBase()` / `listAttachedTables()` — browser runtime primitives.
56
+ - `strikingMomentum(runner, options)` — first-class browser insight.
57
+ - `scopeFor(table, { siteId, window })` / `mergeScope()` — tenant scope predicates.
58
+ - `pages` / `keywords` / `page_keywords` / `countries` / `devices` / `schema` — drizzle schema mirroring `gscdump/analytics` `SCHEMAS`. Drift fails loudly at module load.
59
+ - `browserResolverAdapter` — dialect adapter for the resolver kit.
60
+ - `createClient` / `drizzle` / `DuckDBWasmDatabase` — vendored drizzle-orm DuckDB-WASM adapter.
61
+ - `resolveWindow` (re-exported from `@gscdump/engine/period`).
62
+
63
+ ## Related
64
+
65
+ - [`@gscdump/engine`](../engine) — Storage contracts + dialect-neutral resolver.
66
+ - [`@gscdump/analysis`](../analysis) — Analyzer registry + `analyzeContentGap` (browser semantic).
67
+ - [`@gscdump/engine-duckdb-node`](../engine-duckdb-node) — Node DuckDB counterpart.
68
+ - [`@gscdump/engine-sqlite`](../engine-sqlite) — SQLite / D1 counterpart.
69
+ - [`@gscdump/nuxt-analytics`](../nuxt-analytics) — Nuxt layer that ships this runtime client-side.
70
+
71
+ ## License
72
+
73
+ [MIT](../../LICENSE)
@@ -0,0 +1,224 @@
1
+ import { Assume, DrizzleConfig, entityKind } from "drizzle-orm";
2
+ import { PgDatabase, PgDialect, PgPreparedQuery, PgQueryResultHKT, PgSession } from "drizzle-orm/pg-core";
3
+ import { QueryRow, SqlQuerySource, pgResolverAdapter as browserResolverAdapter } from "@gscdump/engine/resolver";
4
+ import { DrizzleSchema as Schema, countries, devices, drizzleSchema as schema, keywords, page_keywords, pages } from "@gscdump/engine/schema";
5
+ import * as _$_gscdump_engine_scope0 from "@gscdump/engine/scope";
6
+ import { ScopedRunnerOptions, TableScope } from "@gscdump/engine/scope";
7
+ import { AnalyzerRegistry } from "@gscdump/engine/analyzer";
8
+ import { ComparisonMode, ResolveWindowOptions, ResolvedWindow, WindowPreset, resolveWindow } from "@gscdump/engine/period";
9
+ import { AsyncDuckDB, AsyncDuckDBConnection, DuckDBBundles } from "@duckdb/duckdb-wasm";
10
+ import { AnalysisParams } from "@gscdump/engine/analysis-types";
11
+ interface DuckDBWasmClient {
12
+ db: AsyncDuckDB;
13
+ conn: AsyncDuckDBConnection;
14
+ query: (sql: string, params?: unknown[]) => Promise<Record<string, unknown>[]>;
15
+ close: () => Promise<void>;
16
+ }
17
+ declare function createClient(db: AsyncDuckDB, conn: AsyncDuckDBConnection): Promise<DuckDBWasmClient>;
18
+ type Row = Record<string, unknown>;
19
+ interface DuckDBWasmQueryResultHKT extends PgQueryResultHKT {
20
+ type: Assume<this['row'], Row>[];
21
+ }
22
+ declare class DuckDBWasmDatabase<TSchema extends Record<string, unknown> = Record<string, never>> extends PgDatabase<DuckDBWasmQueryResultHKT, TSchema> {
23
+ static readonly [entityKind]: string;
24
+ }
25
+ interface DuckDBWasmDrizzleDatabase<TSchema extends Record<string, unknown> = Record<string, never>> extends DuckDBWasmDatabase<TSchema> {
26
+ $client: Promise<DuckDBWasmClient>;
27
+ }
28
+ declare function drizzle<TSchema extends Record<string, unknown> = Record<string, never>>(client: Promise<DuckDBWasmClient> | DuckDBWasmClient, config?: DrizzleConfig<TSchema>): DuckDBWasmDrizzleDatabase<TSchema>;
29
+ interface BrowserQueryRunner {
30
+ query: (sql: string, params?: unknown[]) => Promise<QueryRow[]>;
31
+ }
32
+ interface EngineConfig {
33
+ runner: BrowserQueryRunner;
34
+ }
35
+ declare function createEngine(config: EngineConfig): SqlQuerySource;
36
+ interface InsightRunnerOptions {
37
+ db: AsyncDuckDB;
38
+ conn: AsyncDuckDBConnection;
39
+ logger?: boolean;
40
+ }
41
+ interface InsightRunner {
42
+ db: DuckDBWasmDrizzleDatabase<Schema>;
43
+ client: Promise<DuckDBWasmClient>;
44
+ close: () => Promise<void>;
45
+ }
46
+ declare function createInsightRunner(opts: InsightRunnerOptions): Promise<InsightRunner>;
47
+ /**
48
+ * Build a per-table predicate set from {siteId, window}. The returned
49
+ * `wherePredicates` composes with user-level filters via `mergeScope`.
50
+ *
51
+ * Note: the current SCHEMAS don't include `site_id` on any table (snapshots
52
+ * are already per-site), so `siteId` is a no-op for now — kept in the API
53
+ * so consumers can add the predicate without an interface change when
54
+ * multi-site snapshots land.
55
+ */
56
+ declare const scopeFor: (table: "pages" | "keywords" | "countries" | "devices" | "page_keywords" | "search_appearance", opts: ScopedRunnerOptions) => TableScope, mergeScope: typeof _$_gscdump_engine_scope0.mergeScope;
57
+ interface StrikingMomentumOptions {
58
+ /** Anchor date (YYYY-MM-DD). Defaults to today. */
59
+ anchor?: string;
60
+ /** Width of each window in days. Default 90. */
61
+ windowDays?: number;
62
+ /** Minimum impressions in the prior window for a row to qualify. Default 10. */
63
+ minPriorImpressions?: number;
64
+ /** Minimum impressions in the recent window. Default 50. */
65
+ minRecentImpressions?: number;
66
+ /** Result limit. Default 20. */
67
+ limit?: number;
68
+ }
69
+ interface StrikingMomentumRow {
70
+ query: string;
71
+ url: string;
72
+ recent_impr: number;
73
+ recent_pos: number;
74
+ prior_impr: number;
75
+ prior_pos: number;
76
+ momentum_score: number;
77
+ }
78
+ declare function strikingMomentum(runner: InsightRunner, opts?: StrikingMomentumOptions): Promise<StrikingMomentumRow[]>;
79
+ interface QueryResult {
80
+ rows: Record<string, unknown>[];
81
+ queryMs: number;
82
+ }
83
+ interface AnalyzeResult {
84
+ results: Record<string, unknown>[];
85
+ meta: Record<string, unknown>;
86
+ queryMs: number;
87
+ }
88
+ interface DuckDBWasmBootResult {
89
+ db: AsyncDuckDB;
90
+ conn: AsyncDuckDBConnection;
91
+ }
92
+ interface BootDuckDBWasmOptions {
93
+ logger?: unknown;
94
+ /**
95
+ * Override the jsDelivr-hosted bundle map. Required in environments where
96
+ * the default CDN is unreachable or where hosts must serve the WASM +
97
+ * worker assets themselves (e.g. Cloudflare Workers' 25 MB per-asset cap).
98
+ */
99
+ bundles?: DuckDBBundles;
100
+ }
101
+ interface BrowserParquetFile {
102
+ bytes: Uint8Array;
103
+ name?: string;
104
+ }
105
+ interface BrowserParquetTable {
106
+ table: string;
107
+ files: BrowserParquetFile[];
108
+ }
109
+ interface BrowserParquetUrlTable {
110
+ table: string;
111
+ urls: string[];
112
+ }
113
+ interface AttachParquetTablesOptions {
114
+ db: AsyncDuckDB;
115
+ conn: AsyncDuckDBConnection;
116
+ tables: BrowserParquetTable[];
117
+ schema?: string;
118
+ }
119
+ interface AttachParquetUrlTablesOptions {
120
+ db: AsyncDuckDB;
121
+ conn: AsyncDuckDBConnection;
122
+ tables: BrowserParquetUrlTable[];
123
+ fetch?: typeof fetch;
124
+ schema?: string;
125
+ fetchInit?: RequestInit;
126
+ /**
127
+ * Manifest version the caller associates with this set of URLs. Returned
128
+ * on the resulting handle so callers can compare against a fresh manifest
129
+ * probe without re-attaching. Purely advisory — the runtime never derives
130
+ * behavior from the value itself.
131
+ */
132
+ version?: number | string;
133
+ /**
134
+ * Called once per parquet file after it's been fetched and registered with
135
+ * DuckDB. Fires in non-deterministic order (Promise.all under the hood).
136
+ * Used by UI progress indicators to tick a per-site counter; a no-op
137
+ * default keeps the hot path free.
138
+ */
139
+ onFileAttached?: (info: {
140
+ table: string;
141
+ index: number;
142
+ total: number;
143
+ }) => void;
144
+ }
145
+ interface AttachSingleTableOptions {
146
+ db: AsyncDuckDB;
147
+ conn: AsyncDuckDBConnection;
148
+ table: string;
149
+ urls: string[];
150
+ fetch?: typeof fetch;
151
+ schema?: string;
152
+ fetchInit?: RequestInit;
153
+ }
154
+ /**
155
+ * Handle returned from {@link attachParquetUrlTables}. Lets callers detach
156
+ * the created views (for lazy re-attach on a new manifest version) or cheap-
157
+ * check the embedded version against a fresh probe.
158
+ */
159
+ interface AttachedTablesHandle {
160
+ version: number | string | undefined;
161
+ tables: string[];
162
+ schema: string;
163
+ detach: () => Promise<void>;
164
+ }
165
+ interface BrowserAnalysisRuntime {
166
+ db: AsyncDuckDB;
167
+ conn: AsyncDuckDBConnection;
168
+ query: (sql: string, params?: unknown[], signal?: AbortSignal) => Promise<QueryResult>;
169
+ analyze: (params: AnalysisParams, registry: AnalyzerRegistry, options?: {
170
+ signal?: AbortSignal;
171
+ }) => Promise<AnalyzeResult>;
172
+ /**
173
+ * Returns true when `expected` doesn't match the version the runtime was
174
+ * attached with — cheap check callers can run before each query to decide
175
+ * whether to detach + re-attach against a fresher manifest. Undefined
176
+ * values on either side compare equal so the no-version path is a no-op.
177
+ */
178
+ isStale: (expected: number | string | undefined) => boolean;
179
+ /** Update the runtime's cached manifest version in-place (e.g. after a re-attach). */
180
+ setVersion: (version: number | string | undefined) => void;
181
+ /**
182
+ * Update the list of attached table names. Lets callers fast-fail in
183
+ * `analyze()` when a SQL plan references a table that wasn't in the manifest
184
+ * for this site (e.g. site has only `keywords` parquet, analyzer wants
185
+ * `page_keywords`) — surface a clean `AttachedTableMissingError` so the
186
+ * caller can route to cloud fallback without paying the SQL execution cost.
187
+ */
188
+ setAttachedTables: (tables: readonly string[]) => void;
189
+ close: () => Promise<void>;
190
+ }
191
+ /**
192
+ * Build a `DuckDBBundles` map from a single base URL hosting the standard
193
+ * DuckDB-WASM asset set (matches the names jsDelivr + `@duckdb/duckdb-wasm`
194
+ * ship). Callers pointing at a Worker / R2 / self-hosted origin can pass
195
+ * just the origin instead of duplicating the URL layout across apps.
196
+ *
197
+ * Omits `coi` (pthread) by default; most hosts don't serve the
198
+ * cross-origin-isolation headers needed to use it and requesting a missing
199
+ * asset fails bundle selection on Safari/Firefox.
200
+ */
201
+ declare function createDuckDBBundlesFromBase(baseUrl: string, options?: {
202
+ includeCoi?: boolean;
203
+ }): DuckDBBundles;
204
+ declare function bootDuckDBWasm(options?: BootDuckDBWasmOptions): Promise<DuckDBWasmBootResult>;
205
+ declare function attachParquetTables(options: AttachParquetTablesOptions): Promise<void>;
206
+ declare function attachParquetUrlTables(options: AttachParquetUrlTablesOptions): Promise<AttachedTablesHandle>;
207
+ /**
208
+ * Incremental attach — fetch + register a single table's URLs and create the
209
+ * view, without touching any other table. Use this for lazy attach when a
210
+ * page only needs one of several available tables.
211
+ */
212
+ declare function attachSingleTable(options: AttachSingleTableOptions): Promise<void>;
213
+ /**
214
+ * List the views currently attached under `schema` via DuckDB's
215
+ * `information_schema`. Lets callers decide whether to call
216
+ * `attachSingleTable` before each query without guessing at state.
217
+ */
218
+ declare function listAttachedTables(conn: AsyncDuckDBConnection, schema?: string): Promise<string[]>;
219
+ declare function createBrowserAnalysisRuntime(boot: DuckDBWasmBootResult, options?: {
220
+ schema?: string;
221
+ version?: number | string;
222
+ attachedTables?: readonly string[];
223
+ }): BrowserAnalysisRuntime;
224
+ export { type AnalyzeResult, type AttachParquetTablesOptions, type AttachParquetUrlTablesOptions, type AttachSingleTableOptions, type AttachedTablesHandle, type BootDuckDBWasmOptions, type BrowserAnalysisRuntime, type BrowserParquetFile, type BrowserParquetTable, type BrowserParquetUrlTable, type BrowserQueryRunner, type ComparisonMode, type DuckDBWasmBootResult, type DuckDBWasmClient, DuckDBWasmDatabase, type DuckDBWasmDrizzleDatabase, type EngineConfig, type InsightRunner, type InsightRunnerOptions, type QueryResult, type ResolveWindowOptions, type ResolvedWindow, type Schema, type ScopedRunnerOptions, type StrikingMomentumOptions, type StrikingMomentumRow, type TableScope, type WindowPreset, attachParquetTables, attachParquetUrlTables, attachSingleTable, bootDuckDBWasm, browserResolverAdapter, countries, createBrowserAnalysisRuntime, createClient, createDuckDBBundlesFromBase, createEngine, createInsightRunner, devices, drizzle, keywords, listAttachedTables, mergeScope, page_keywords, pages, resolveWindow, schema, scopeFor, strikingMomentum };
package/dist/index.mjs ADDED
@@ -0,0 +1,370 @@
1
+ import { arrowToRows } from "@gscdump/engine/arrow";
2
+ import { DefaultLogger, NoopLogger, createTableRelationsHelpers, entityKind, extractTablesRelationalConfig, fillPlaceholders, sql } from "drizzle-orm";
3
+ import { PgDatabase, PgDialect, PgPreparedQuery, PgSession } from "drizzle-orm/pg-core";
4
+ import { createSqlQuerySource, pgResolverAdapter as browserResolverAdapter } from "@gscdump/engine/resolver";
5
+ import { toIsoDate } from "gscdump";
6
+ import { countries, devices, drizzleSchema as schema, keywords, page_keywords, pages } from "@gscdump/engine/schema";
7
+ import { createScopedHelpers } from "@gscdump/engine/scope";
8
+ import { runAnalyzerFromSource } from "@gscdump/engine/analyzer";
9
+ import { createAttachedTableSource } from "@gscdump/engine/source";
10
+ import { sqlEscape } from "@gscdump/engine/sql";
11
+ import { resolveWindow } from "@gscdump/engine/period";
12
+ async function createClient(db, conn) {
13
+ return {
14
+ db,
15
+ conn,
16
+ async query(sql, params = []) {
17
+ if (params.length === 0) return arrowToRows(await conn.query(sql));
18
+ const stmt = await conn.prepare(sql);
19
+ try {
20
+ return arrowToRows(await stmt.query(...params));
21
+ } finally {
22
+ stmt.close();
23
+ }
24
+ },
25
+ async close() {
26
+ await conn.close();
27
+ }
28
+ };
29
+ }
30
+ var DuckDBWasmPreparedQuery = class extends PgPreparedQuery {
31
+ static [entityKind] = "DuckDBWasmPreparedQuery";
32
+ constructor(client, query, logger, queryMetadata, cache) {
33
+ super(query, cache, queryMetadata);
34
+ this.client = client;
35
+ this.logger = logger;
36
+ }
37
+ async execute(placeholderValues = {}) {
38
+ const params = fillPlaceholders(this.query.params, placeholderValues);
39
+ this.logger.logQuery(this.query.sql, params);
40
+ return await (await this.client).query(this.query.sql, params);
41
+ }
42
+ async all(placeholderValues = {}) {
43
+ const params = fillPlaceholders(this.query.params, placeholderValues);
44
+ this.logger.logQuery(this.query.sql, params);
45
+ return await (await this.client).query(this.query.sql, params);
46
+ }
47
+ };
48
+ var DuckDBWasmSession = class extends PgSession {
49
+ static [entityKind] = "DuckDBWasmSession";
50
+ logger;
51
+ cache;
52
+ constructor(client, dialect, _schema, options = {}) {
53
+ super(dialect);
54
+ this.client = client;
55
+ this.options = options;
56
+ this.logger = options.logger ?? new NoopLogger();
57
+ this.cache = options.cache;
58
+ }
59
+ prepareQuery(query, _fields, _name, _isResponseInArrayMode, _customResultMapper, queryMetadata) {
60
+ return new DuckDBWasmPreparedQuery(this.client, query, this.logger, queryMetadata, this.cache);
61
+ }
62
+ async query(sql, params) {
63
+ this.logger.logQuery(sql, params);
64
+ return (await this.client).query(sql, params);
65
+ }
66
+ transaction(_transaction, _config) {
67
+ throw new Error("Transactions are not supported by the DuckDB-WASM drizzle adapter. The analytics workload is read-only; if transactions become necessary, implement PgSession.transaction() in @gscdump/engine-duckdb-wasm.");
68
+ }
69
+ };
70
+ var DuckDBWasmDatabase = class extends PgDatabase {
71
+ static [entityKind] = "DuckDBWasmDatabase";
72
+ };
73
+ function drizzle(client, config = {}) {
74
+ const dialect = new PgDialect({ casing: config.casing });
75
+ const clientPromise = Promise.resolve(client);
76
+ let logger;
77
+ if (config.logger === true) logger = new DefaultLogger();
78
+ else if (config.logger !== false) logger = config.logger;
79
+ let schema;
80
+ if (config.schema) {
81
+ const tablesConfig = extractTablesRelationalConfig(config.schema, createTableRelationsHelpers);
82
+ schema = {
83
+ fullSchema: config.schema,
84
+ schema: tablesConfig.tables,
85
+ tableNamesMap: tablesConfig.tableNamesMap
86
+ };
87
+ }
88
+ const db = new DuckDBWasmDatabase(dialect, new DuckDBWasmSession(clientPromise, dialect, schema, { logger }), schema);
89
+ db.$client = clientPromise;
90
+ return db;
91
+ }
92
+ function createEngine(config) {
93
+ const { runner } = config;
94
+ return createSqlQuerySource({
95
+ name: "browser",
96
+ adapter: browserResolverAdapter,
97
+ execute: (sql, params) => runner.query(sql, params),
98
+ extraCapabilities: { attachedTables: true }
99
+ });
100
+ }
101
+ async function strikingMomentum(runner, opts = {}) {
102
+ const windowDays = opts.windowDays ?? 90;
103
+ const anchor = opts.anchor ?? toIsoDate(/* @__PURE__ */ new Date());
104
+ const minPrior = opts.minPriorImpressions ?? 10;
105
+ const minRecent = opts.minRecentImpressions ?? 50;
106
+ const limit = opts.limit ?? 20;
107
+ const windowExpr = sql`
108
+ WITH pk AS (
109
+ SELECT
110
+ query,
111
+ url,
112
+ ${page_keywords.date} AS date,
113
+ ${page_keywords.impressions} AS impressions,
114
+ ${page_keywords.sum_position} AS sum_position,
115
+ CASE
116
+ WHEN ${page_keywords.date} >= (DATE ${sql.raw(`'${anchor}'`)} - INTERVAL ${sql.raw(`${windowDays}`)} DAY)
117
+ THEN 'recent' ELSE 'prior'
118
+ END AS period
119
+ FROM ${page_keywords}
120
+ WHERE ${page_keywords.date} >= (DATE ${sql.raw(`'${anchor}'`)} - INTERVAL ${sql.raw(`${windowDays * 2}`)} DAY)
121
+ AND ${page_keywords.date} < (DATE ${sql.raw(`'${anchor}'`)} + INTERVAL 1 DAY)
122
+ ),
123
+ agg AS (
124
+ SELECT
125
+ query, url, period,
126
+ SUM(impressions) AS impr,
127
+ SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1 AS weighted_pos
128
+ FROM pk
129
+ GROUP BY query, url, period
130
+ ),
131
+ paired AS (
132
+ SELECT
133
+ query, url,
134
+ MAX(CASE WHEN period = 'recent' THEN impr END) AS recent_impr,
135
+ MAX(CASE WHEN period = 'recent' THEN weighted_pos END) AS recent_pos,
136
+ MAX(CASE WHEN period = 'prior' THEN impr END) AS prior_impr,
137
+ MAX(CASE WHEN period = 'prior' THEN weighted_pos END) AS prior_pos
138
+ FROM agg
139
+ GROUP BY query, url
140
+ ),
141
+ best AS (
142
+ SELECT
143
+ query, url, recent_impr, recent_pos, prior_impr, prior_pos,
144
+ ROW_NUMBER() OVER (PARTITION BY query ORDER BY recent_impr DESC NULLS LAST) AS rn
145
+ FROM paired
146
+ WHERE prior_impr >= ${minPrior} AND recent_impr >= ${minRecent}
147
+ )
148
+ SELECT
149
+ query, url,
150
+ recent_impr, recent_pos, prior_impr, prior_pos,
151
+ (prior_pos - recent_pos)
152
+ * LN(recent_impr + 1)
153
+ * CASE WHEN recent_pos BETWEEN 8 AND 20 THEN 1.5 ELSE 1.0 END
154
+ AS momentum_score
155
+ FROM best
156
+ WHERE rn = 1
157
+ ORDER BY momentum_score DESC NULLS LAST
158
+ LIMIT ${limit}
159
+ `;
160
+ return await runner.db.execute(windowExpr);
161
+ }
162
+ async function createInsightRunner(opts) {
163
+ const client = await createClient(opts.db, opts.conn);
164
+ const clientPromise = Promise.resolve(client);
165
+ return {
166
+ db: drizzle(clientPromise, {
167
+ schema,
168
+ logger: opts.logger
169
+ }),
170
+ client: clientPromise,
171
+ close: () => client.close()
172
+ };
173
+ }
174
+ const { scopeFor, mergeScope } = createScopedHelpers(schema);
175
+ function fileName(table, index, provided) {
176
+ return provided ?? `${table}_${index}.parquet`;
177
+ }
178
+ function readParquetViewSql(schema, table, files) {
179
+ return `CREATE OR REPLACE VIEW ${schema}.${table} AS SELECT * FROM read_parquet([${files.map((name) => `'${sqlEscape(name)}'`).join(", ")}], union_by_name = true)`;
180
+ }
181
+ function createDuckDBBundlesFromBase(baseUrl, options = {}) {
182
+ const base = baseUrl.replace(/\/+$/, "");
183
+ const bundles = {
184
+ mvp: {
185
+ mainModule: `${base}/duckdb-mvp.wasm`,
186
+ mainWorker: `${base}/duckdb-browser-mvp.worker.js`
187
+ },
188
+ eh: {
189
+ mainModule: `${base}/duckdb-eh.wasm`,
190
+ mainWorker: `${base}/duckdb-browser-eh.worker.js`
191
+ }
192
+ };
193
+ if (options.includeCoi) bundles.coi = {
194
+ mainModule: `${base}/duckdb-coi.wasm`,
195
+ mainWorker: `${base}/duckdb-browser-coi.worker.js`,
196
+ pthreadWorker: `${base}/duckdb-browser-coi.pthread.worker.js`
197
+ };
198
+ return bundles;
199
+ }
200
+ async function bootDuckDBWasm(options = {}) {
201
+ const { getJsDelivrBundles, selectBundle, AsyncDuckDB, ConsoleLogger } = await import("@duckdb/duckdb-wasm");
202
+ const bundle = await selectBundle(options.bundles ?? getJsDelivrBundles());
203
+ const workerUrl = URL.createObjectURL(new Blob([`importScripts("${bundle.mainWorker}");`], { type: "text/javascript" }));
204
+ const worker = new Worker(workerUrl);
205
+ const db = new AsyncDuckDB(options.logger ?? new ConsoleLogger(), worker);
206
+ await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
207
+ URL.revokeObjectURL(workerUrl);
208
+ return {
209
+ db,
210
+ conn: await db.connect()
211
+ };
212
+ }
213
+ async function attachParquetTables(options) {
214
+ const { db, conn, tables, schema = "main" } = options;
215
+ for (const table of tables) {
216
+ const names = [];
217
+ for (let i = 0; i < table.files.length; i++) {
218
+ const file = table.files[i];
219
+ const name = fileName(table.table, i, file.name);
220
+ names.push(name);
221
+ await db.registerFileBuffer(name, file.bytes);
222
+ }
223
+ await conn.query(readParquetViewSql(schema, table.table, names));
224
+ }
225
+ }
226
+ async function attachParquetUrlTables(options) {
227
+ const { db, conn, tables, fetch: fetchImpl = globalThis.fetch.bind(globalThis), schema = "main", fetchInit, version, onFileAttached } = options;
228
+ const flat = [];
229
+ const counts = {};
230
+ for (const [table, urls] of tables.map((t) => [t.table, t.urls])) {
231
+ if (urls.length === 0) continue;
232
+ counts[table] = urls.length;
233
+ for (let i = 0; i < urls.length; i++) flat.push({
234
+ table,
235
+ url: urls[i],
236
+ index: i
237
+ });
238
+ }
239
+ const tableFailures = /* @__PURE__ */ new Map();
240
+ const total = flat.length;
241
+ await Promise.all(flat.map(async ({ table, url, index }) => {
242
+ if (tableFailures.has(table)) return;
243
+ await fetchImpl(url, fetchInit).then(async (response) => {
244
+ if (!response.ok) throw new Error(`fetch ${url} failed: ${response.status}`);
245
+ const bytes = new Uint8Array(await response.arrayBuffer());
246
+ await db.registerFileBuffer(fileName(table, index), bytes);
247
+ onFileAttached?.({
248
+ table,
249
+ index,
250
+ total
251
+ });
252
+ }).catch((err) => {
253
+ tableFailures.set(table, err instanceof Error ? err : new Error(String(err)));
254
+ });
255
+ }));
256
+ const attached = [];
257
+ for (const table of Object.keys(counts)) {
258
+ if (tableFailures.has(table)) continue;
259
+ const names = [];
260
+ for (let i = 0; i < counts[table]; i++) names.push(fileName(table, i));
261
+ await conn.query(readParquetViewSql(schema, table, names));
262
+ attached.push(table);
263
+ }
264
+ if (tableFailures.size > 0) for (const [table, err] of tableFailures) console.warn(`[gscdump/engine-duckdb-wasm] dropped table "${table}" — ${err.message}`);
265
+ return {
266
+ version,
267
+ tables: attached,
268
+ schema,
269
+ async detach() {
270
+ for (const table of attached) await conn.query(`DROP VIEW IF EXISTS ${schema}.${table}`);
271
+ }
272
+ };
273
+ }
274
+ async function attachSingleTable(options) {
275
+ const { db, conn, table, urls, fetch: fetchImpl = globalThis.fetch.bind(globalThis), schema = "main", fetchInit } = options;
276
+ if (urls.length === 0) return;
277
+ await Promise.all(urls.map(async (url, index) => {
278
+ const response = await fetchImpl(url, fetchInit);
279
+ if (!response.ok) throw new Error(`fetch ${url} failed: ${response.status}`);
280
+ const bytes = new Uint8Array(await response.arrayBuffer());
281
+ await db.registerFileBuffer(fileName(table, index), bytes);
282
+ }));
283
+ const names = urls.map((_, i) => fileName(table, i));
284
+ await conn.query(readParquetViewSql(schema, table, names));
285
+ }
286
+ async function listAttachedTables(conn, schema = "main") {
287
+ return arrowToRows(await conn.query(`SELECT table_name FROM information_schema.tables WHERE table_schema = '${sqlEscape(schema)}'`)).map((r) => String(r.table_name));
288
+ }
289
+ function createBrowserAnalysisRuntime(boot, options = {}) {
290
+ const { db, conn } = boot;
291
+ const schema = options.schema ?? "main";
292
+ let version = options.version;
293
+ let attachedTables = options.attachedTables;
294
+ let chain = Promise.resolve();
295
+ async function cancelOnAbort(signal, work) {
296
+ if (!signal) return work;
297
+ if (signal.aborted) {
298
+ conn.cancelSent().catch(() => {});
299
+ throw signal.reason ?? new DOMException("aborted", "AbortError");
300
+ }
301
+ const onAbort = () => {
302
+ conn.cancelSent().catch(() => {});
303
+ };
304
+ signal.addEventListener("abort", onAbort, { once: true });
305
+ try {
306
+ return await work;
307
+ } finally {
308
+ signal.removeEventListener("abort", onAbort);
309
+ }
310
+ }
311
+ async function runParameterized(sql, params, signal) {
312
+ signal?.throwIfAborted();
313
+ return cancelOnAbort(signal, (async () => {
314
+ if (!params || params.length === 0) return conn.query(sql);
315
+ const stmt = await conn.prepare(sql);
316
+ try {
317
+ return await stmt.query(...params);
318
+ } finally {
319
+ await stmt.close();
320
+ }
321
+ })());
322
+ }
323
+ return {
324
+ db,
325
+ conn,
326
+ async query(sql, params, signal) {
327
+ const t0 = performance.now();
328
+ return {
329
+ rows: arrowToRows(await runParameterized(sql, params, signal)),
330
+ queryMs: performance.now() - t0
331
+ };
332
+ },
333
+ async analyze(params, registry, options = {}) {
334
+ const signal = options.signal;
335
+ const run = async () => {
336
+ signal?.throwIfAborted();
337
+ const t0 = performance.now();
338
+ const result = await runAnalyzerFromSource(createAttachedTableSource({ query: async (sql, bindParams, innerSignal) => {
339
+ return arrowToRows(await runParameterized(sql, bindParams, innerSignal ?? signal));
340
+ } }, {
341
+ schema,
342
+ signal,
343
+ attachedTables
344
+ }), params, registry);
345
+ return {
346
+ results: result.results,
347
+ meta: result.meta,
348
+ queryMs: performance.now() - t0
349
+ };
350
+ };
351
+ const next = chain.then(run, run);
352
+ chain = next.catch(() => {});
353
+ return next;
354
+ },
355
+ isStale(expected) {
356
+ return expected !== version;
357
+ },
358
+ setVersion(next) {
359
+ version = next;
360
+ },
361
+ setAttachedTables(next) {
362
+ attachedTables = next;
363
+ },
364
+ async close() {
365
+ await conn.close();
366
+ await db.terminate();
367
+ }
368
+ };
369
+ }
370
+ export { DuckDBWasmDatabase, attachParquetTables, attachParquetUrlTables, attachSingleTable, bootDuckDBWasm, browserResolverAdapter, countries, createBrowserAnalysisRuntime, createClient, createDuckDBBundlesFromBase, createEngine, createInsightRunner, devices, drizzle, keywords, listAttachedTables, mergeScope, page_keywords, pages, resolveWindow, schema, scopeFor, strikingMomentum };
package/package.json ADDED
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "@gscdump/engine-duckdb-wasm",
3
+ "type": "module",
4
+ "version": "0.7.2",
5
+ "description": "DuckDB-WASM engine adapter for @gscdump/analysis — typed browser analytics against parquet via R2.",
6
+ "author": {
7
+ "name": "Harlan Wilton",
8
+ "email": "harlan@harlanzw.com",
9
+ "url": "https://harlanzw.com/"
10
+ },
11
+ "license": "MIT",
12
+ "funding": "https://github.com/sponsors/harlan-zw",
13
+ "homepage": "https://github.com/harlan-zw/gscdump/tree/main/packages/engine-duckdb-wasm#readme",
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git+https://github.com/harlan-zw/gscdump.git",
17
+ "directory": "packages/engine-duckdb-wasm"
18
+ },
19
+ "bugs": {
20
+ "url": "https://github.com/harlan-zw/gscdump/issues"
21
+ },
22
+ "sideEffects": false,
23
+ "exports": {
24
+ ".": {
25
+ "types": "./dist/index.d.mts",
26
+ "import": "./dist/index.mjs",
27
+ "default": "./dist/index.mjs"
28
+ }
29
+ },
30
+ "main": "./dist/index.mjs",
31
+ "types": "./dist/index.d.mts",
32
+ "files": [
33
+ "dist"
34
+ ],
35
+ "engines": {
36
+ "node": ">=18"
37
+ },
38
+ "peerDependencies": {
39
+ "@duckdb/duckdb-wasm": "^1.32.0"
40
+ },
41
+ "peerDependenciesMeta": {
42
+ "@duckdb/duckdb-wasm": {
43
+ "optional": true
44
+ }
45
+ },
46
+ "dependencies": {
47
+ "drizzle-orm": "^0.45.2",
48
+ "@gscdump/engine": "0.7.2",
49
+ "gscdump": "0.7.2"
50
+ },
51
+ "devDependencies": {
52
+ "@duckdb/duckdb-wasm": "^1.32.0",
53
+ "vitest": "^4.1.5"
54
+ },
55
+ "scripts": {
56
+ "build": "obuild",
57
+ "dev": "obuild --stub",
58
+ "typecheck": "tsc --noEmit",
59
+ "test": "vitest"
60
+ }
61
+ }