@gscdump/engine 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ import { escapeLike } from "../sql-fragments.mjs";
3
3
  import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
4
4
  import { PgDialect } from "drizzle-orm/pg-core";
5
5
  import { sql } from "drizzle-orm";
6
- import { SQLiteAsyncDialect } from "drizzle-orm/sqlite-core";
7
6
  const COMPARISON_FILTER_SQL = {
8
7
  new: sql`AND (p.impressions IS NULL OR p.impressions = 0)`,
9
8
  lost: sql`AND p.impressions > 0 AND c.impressions = 0`,
@@ -132,9 +131,9 @@ function resolveToSQLOptimized(state, options) {
132
131
  }
133
132
  if (hasDate) cteSelect.push(adapter.dateColRef(tableKey));
134
133
  const t = schema[tableKey];
135
- cteSelect.push(sql`SUM(${t.clicks}) as clicks`);
136
- cteSelect.push(sql`SUM(${t.impressions}) as impressions`);
137
- cteSelect.push(sql`SUM(${t.sum_position}) as sum_position`);
134
+ cteSelect.push(sql`CAST(SUM(${t.clicks}) AS DOUBLE) as clicks`);
135
+ cteSelect.push(sql`CAST(SUM(${t.impressions}) AS DOUBLE) as impressions`);
136
+ cteSelect.push(sql`CAST(SUM(${t.sum_position}) AS DOUBLE) as sum_position`);
138
137
  const groupByExprs = groupByDims.map((d) => adapter.dimExprSql(d, tableKey));
139
138
  if (hasDate) groupByExprs.push(adapter.dateColRef(tableKey));
140
139
  const outerSelect = [];
@@ -144,11 +143,11 @@ function resolveToSQLOptimized(state, options) {
144
143
  for (const m of metrics) switch (m) {
145
144
  case "clicks":
146
145
  outerSelect.push(sql.raw("clicks"));
147
- outerTotals.push(sql.raw("SUM(clicks) OVER() as totalClicks"));
146
+ outerTotals.push(sql.raw("CAST(SUM(clicks) OVER() AS DOUBLE) as totalClicks"));
148
147
  break;
149
148
  case "impressions":
150
149
  outerSelect.push(sql.raw("impressions"));
151
- outerTotals.push(sql.raw("SUM(impressions) OVER() as totalImpressions"));
150
+ outerTotals.push(sql.raw("CAST(SUM(impressions) OVER() AS DOUBLE) as totalImpressions"));
152
151
  break;
153
152
  case "ctr":
154
153
  outerSelect.push(sql.raw("CAST(clicks AS REAL) / NULLIF(impressions, 0) as ctr"));
@@ -626,7 +625,6 @@ function createResolverAdapter(config) {
626
625
  };
627
626
  }
628
627
  const pgDialect = new PgDialect();
629
- const sqliteDialect = new SQLiteAsyncDialect();
630
628
  function compilePg(query) {
631
629
  const compiled = pgDialect.sqlToQuery(query);
632
630
  return {
@@ -634,13 +632,6 @@ function compilePg(query) {
634
632
  params: compiled.params
635
633
  };
636
634
  }
637
- function compileSqlite(query) {
638
- const compiled = sqliteDialect.sqlToQuery(query);
639
- return {
640
- sql: compiled.sql,
641
- params: compiled.params
642
- };
643
- }
644
635
  const PG_BASE_CONFIG = {
645
636
  schema: drizzleSchema,
646
637
  datasetToTableKey: {
@@ -673,4 +664,4 @@ function createParquetResolverAdapter() {
673
664
  tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
674
665
  });
675
666
  }
676
- export { resolveToSQL as _, createResolverAdapter as a, LOGICAL_DATASETS as c, inferLogicalDataset as d, supportsDimensionOnSurface as f, resolveComparisonSQL as g, mergeExtras as h, compileSqlite as i, assertDimensionsSupported as l, buildTotalsSql as m, pgResolverAdapter as n, createSqlFragments as o, buildExtrasQueries as p, compilePg as r, DIMENSION_SURFACES as s, createParquetResolverAdapter as t, dimensionColumn as u, resolveToSQLOptimized as v };
667
+ export { DIMENSION_SURFACES as a, dimensionColumn as c, buildExtrasQueries as d, buildTotalsSql as f, resolveToSQLOptimized as g, resolveToSQL as h, createSqlFragments as i, inferLogicalDataset as l, resolveComparisonSQL as m, pgResolverAdapter as n, LOGICAL_DATASETS as o, mergeExtras as p, createResolverAdapter as r, assertDimensionsSupported as s, createParquetResolverAdapter as t, supportsDimensionOnSurface as u };
@@ -1,12 +1,64 @@
1
- import { T as Row } from "./storage.mjs";
1
+ import { T as Row$1 } from "./storage.mjs";
2
+ import { o as ResolverAdapter } from "./types.mjs";
2
3
  import { t as AnalysisParams } from "./analysis-types.mjs";
3
- import { r as FileSet } from "./source-types.mjs";
4
+ import { PlannerCapabilities } from "gscdump/query/plan";
5
+ import { TableName } from "gscdump/contracts";
4
6
  import { BuilderState } from "gscdump/query";
7
+ type QueryRow = Record<string, unknown>;
8
+ interface FileSet {
9
+ table: TableName;
10
+ partitions: string[];
11
+ }
12
+ interface ExecuteSqlOptions {
13
+ fileSets?: Record<string, FileSet>;
14
+ }
5
15
  /**
6
- * Capabilities a Plan may require of its host. A dispatcher matches these
7
- * against a source's declared capabilities and rejects mismatches.
16
+ * Flat capability bag: planner-side flags (`regex`, `comparisonJoin`, ...)
17
+ * mixed with storage-side flags. `executeSql: true` means the source provides
18
+ * the `executeSql` method; analyzer dispatch reads this single flag instead
19
+ * of probing the function shape.
8
20
  */
9
- type Capability = 'executeSql' | 'partitionedParquet' | 'attachedTables' | 'regex' | 'windowTotals' | 'comparisonJoin';
21
+ interface SourceCapabilities extends PlannerCapabilities {
22
+ executeSql?: boolean;
23
+ attachedTables?: boolean;
24
+ fileSets?: boolean;
25
+ /**
26
+ * true iff the source provides a `ResolverAdapter` for analyzers that
27
+ * compose SQL from a typed `BuilderState` at plan-build time.
28
+ */
29
+ adapter?: boolean;
30
+ }
31
+ type AnalysisSourceKind = 'local' | 'browser' | 'live' | 'in-memory' | 'composite' | 'attached-table';
32
+ interface AnalysisQuerySource {
33
+ name?: string;
34
+ /** Telemetry tag stamped onto analyzer result meta; not used for routing. */
35
+ kind?: AnalysisSourceKind;
36
+ capabilities: SourceCapabilities;
37
+ /**
38
+ * Dialect adapter surfaced for analyzers that compose SQL from a
39
+ * `BuilderState` at plan-build time. Optional for pure row sources.
40
+ */
41
+ adapter?: ResolverAdapter<any>;
42
+ /** Tenant scope; multi-tenant dialects (sqlite/D1) require it, parquet omits it. */
43
+ siteId?: string | number;
44
+ queryRows: (state: BuilderState) => Promise<QueryRow[]>;
45
+ /**
46
+ * Present iff `capabilities.executeSql === true`. Receives the compiled
47
+ * SQL plan with `{{FILES}}` placeholders; sources that advertise
48
+ * `capabilities.fileSets` consume `opts.fileSets`, others ignore them.
49
+ */
50
+ executeSql?: (sql: string, params?: unknown[], opts?: ExecuteSqlOptions) => Promise<QueryRow[]>;
51
+ }
52
+ /**
53
+ * Capabilities a Plan may require of its host. Dispatch matches `requires`
54
+ * against the source's declared `capabilities` (and the presence of
55
+ * `executeSql`) and rejects mismatches.
56
+ *
57
+ * `'executeSql'` checks for the method on the source; the rest are flag keys
58
+ * on `SourceCapabilities`. Single source of truth — adding a new capability
59
+ * is one line in `SourceCapabilities`.
60
+ */
61
+ type RequiredCapability = 'executeSql' | keyof SourceCapabilities;
10
62
  interface SqlExtraQuery {
11
63
  name: string;
12
64
  sql: string;
@@ -24,13 +76,11 @@ interface SqlPlan {
24
76
  previous?: FileSet;
25
77
  extraFiles?: Record<string, FileSet>;
26
78
  extraQueries?: SqlExtraQuery[];
27
- /** Emits direct table refs (browser-only). Dispatcher rejects for manifest path. */
28
- requiresAttachedTables?: boolean;
29
79
  }
30
- interface TypedRowQuery<T extends Row = Row> {
80
+ interface TypedRowQuery<T extends Row$1 = Row$1> {
31
81
  state: BuilderState;
32
82
  /** Optional type tag for downstream narrowing. */
33
- rowType?: (row: Row) => T;
83
+ rowType?: (row: Row$1) => T;
34
84
  }
35
85
  /**
36
86
  * Row-queries plan: a named set of typed `BuilderState` queries. A portable
@@ -42,7 +92,29 @@ interface RowQueriesPlan {
42
92
  queries: Record<string, TypedRowQuery>;
43
93
  }
44
94
  type Plan = SqlPlan | RowQueriesPlan;
45
- interface ReduceContext<TRow extends Row = Row> {
95
+ /**
96
+ * Plan-build context. Surfaced from the source at dispatch time so analyzers
97
+ * that compose SQL from a typed `BuilderState` can pick up the right dialect
98
+ * adapter without importing one directly. Most SQL analyzers emit static SQL
99
+ * and ignore this; only the BuilderState-driven `data-query` / `data-detail`
100
+ * analyzers consume it today.
101
+ *
102
+ * `adapter` is optional on the type; analyzers that need it should call
103
+ * `requireAdapter(ctx, id)` rather than non-null-asserting. Capability
104
+ * declaration (`'adapter'` in `requires`) is the runtime guarantee; the
105
+ * helper makes the failure mode loud if the contract is broken.
106
+ */
107
+ interface BuildContext {
108
+ adapter?: ResolverAdapter<any>;
109
+ siteId?: string | number;
110
+ }
111
+ /**
112
+ * Throw a uniform error if a SQL analyzer declared the `'adapter'` capability
113
+ * but the dispatcher handed it a context without one. Centralizes the assert
114
+ * so analyzers don't repeat `ctx.adapter!` with explanatory comments.
115
+ */
116
+ declare function requireAdapter(ctx: BuildContext, analyzerId: string): ResolverAdapter<any>;
117
+ interface ReduceContext<TRow extends Row$1 = Row$1> {
46
118
  params: AnalysisParams;
47
119
  /** Extra SQL-query results keyed by `SqlExtraQuery.name`. */
48
120
  extras?: Record<string, TRow[]>;
@@ -53,25 +125,75 @@ interface ReduceContext<TRow extends Row = Row> {
53
125
  * when their reducer assumes specific columns exist — catches drift between
54
126
  * `build` (SELECT list) and `reduce` (column access) at compile time.
55
127
  */
56
- interface Analyzer<P extends AnalysisParams = AnalysisParams, R = unknown, TRow extends Row = Row> {
128
+ interface Analyzer<P extends AnalysisParams = AnalysisParams, R = unknown, TRow extends Row$1 = Row$1> {
57
129
  /** Stable tool id (e.g. `striking-distance`, `opportunity`). */
58
130
  id: string;
59
131
  /** Capabilities a host source must provide. */
60
- requires: readonly Capability[];
61
- /** Pure: params → plan. Snapshot-testable. */
62
- build: (params: P) => Plan;
132
+ requires: readonly RequiredCapability[];
133
+ /** Pure: params → plan. Snapshot-testable. `ctx` carries the source's dialect adapter when one is available. */
134
+ build: (params: P, ctx?: BuildContext) => Plan;
63
135
  /** Pure: rows + context → typed result + meta. */
64
136
  reduce: (rows: TRow[] | Record<string, TRow[]>, ctx: ReduceContext<TRow>) => {
65
137
  results: R;
66
138
  meta?: Record<string, unknown>;
67
139
  };
68
140
  }
141
+ interface SqlPlanSpec {
142
+ sql: string;
143
+ params: unknown[];
144
+ current: FileSet;
145
+ previous?: FileSet;
146
+ extraFiles?: Record<string, FileSet>;
147
+ extraQueries?: SqlExtraQuery[];
148
+ }
149
+ interface ReduceCtx<InputRow> {
150
+ /** Extra SQL-query results keyed by `SqlExtraQuery.name` (SQL path only). */
151
+ extras?: Record<string, InputRow[]>;
152
+ }
153
+ type Reducer<Params, InputRow, Result> = (rows: InputRow[] | Record<string, InputRow[]>, params: Params, ctx: ReduceCtx<InputRow>) => {
154
+ results: Result;
155
+ meta?: Record<string, unknown>;
156
+ };
157
+ interface DefineAnalyzerOptions<Params extends AnalysisParams, InputRow, Result> {
158
+ id: string;
159
+ /**
160
+ * Shared reducer used by both SQL and row paths. Use this when the
161
+ * post-aggregation row count is small and filter/sort/derive can live in
162
+ * one place. Mutually exclusive with `reduceSql` / `reduceRows`.
163
+ */
164
+ reduce?: Reducer<Params, InputRow, Result>;
165
+ /** SQL-only reducer. Required when `buildSql` is set without `reduce`. */
166
+ reduceSql?: Reducer<Params, InputRow, Result>;
167
+ /** Row-only reducer. Required when `buildRows` is set without `reduce`. */
168
+ reduceRows?: Reducer<Params, InputRow, Result>;
169
+ /** SQL plan builder. Omit if the analyzer has no SQL path. */
170
+ buildSql?: (params: Params, ctx: BuildContext) => SqlPlanSpec;
171
+ /** Row plan builder. Omit if the analyzer has no row path. */
172
+ buildRows?: (params: Params, ctx: BuildContext) => Record<string, BuilderState>;
173
+ /** Capabilities required by the SQL plan. Defaults to `['executeSql', 'fileSets']`. */
174
+ sqlRequires?: readonly RequiredCapability[];
175
+ /** Capabilities required by the row plan. Defaults to `[]`. */
176
+ rowsRequires?: readonly RequiredCapability[];
177
+ }
178
+ interface DefinedAnalyzer {
179
+ id: string;
180
+ sql?: Analyzer;
181
+ rows?: Analyzer;
182
+ }
183
+ declare function defineAnalyzer<Params extends AnalysisParams, InputRow, Result>(opts: DefineAnalyzerOptions<Params, InputRow, Result>): DefinedAnalyzer;
69
184
  interface AnalyzerVariants {
70
185
  sql?: Analyzer;
71
186
  rows?: Analyzer;
72
187
  }
73
188
  interface AnalyzerRegistryInit {
189
+ /**
190
+ * Preferred for in-tree composition: pass `DefinedAnalyzer[]` directly so
191
+ * SQL/row variants can never drift apart from their `defineAnalyzer` site.
192
+ */
193
+ defined?: readonly DefinedAnalyzer[];
194
+ /** Flat-array path retained for narrow tree-shaken registry composition. */
74
195
  rows?: readonly Analyzer[];
196
+ /** Flat-array path retained for narrow tree-shaken registry composition. */
75
197
  sql?: readonly Analyzer[];
76
198
  }
77
199
  interface AnalyzerRegistry {
@@ -89,4 +211,4 @@ interface AnalyzerRegistry {
89
211
  * or per-request in a worker).
90
212
  */
91
213
  declare function createAnalyzerRegistry(init?: AnalyzerRegistryInit): AnalyzerRegistry;
92
- export { Analyzer as a, ReduceContext as c, SqlPlan as d, TypedRowQuery as f, createAnalyzerRegistry as i, RowQueriesPlan as l, AnalyzerRegistryInit as n, Capability as o, AnalyzerVariants as r, Plan as s, AnalyzerRegistry as t, SqlExtraQuery as u };
214
+ export { ExecuteSqlOptions as C, SourceCapabilities as E, AnalysisSourceKind as S, QueryRow as T, SqlExtraQuery as _, DefineAnalyzerOptions as a, requireAdapter as b, Reducer as c, Analyzer as d, BuildContext as f, RowQueriesPlan as g, RequiredCapability as h, createAnalyzerRegistry as i, SqlPlanSpec as l, ReduceContext as m, AnalyzerRegistryInit as n, DefinedAnalyzer as o, Plan as p, AnalyzerVariants as r, ReduceCtx as s, AnalyzerRegistry as t, defineAnalyzer as u, SqlPlan as v, FileSet as w, AnalysisQuerySource as x, TypedRowQuery as y };
@@ -1,26 +1,6 @@
1
1
  import { t as SCHEMAS } from "./schema.mjs";
2
- import { _ as resolveToSQL } from "./pg-adapter.mjs";
2
+ import "./pg-adapter.mjs";
3
3
  import { normalizeUrl } from "gscdump/normalize";
4
- function createSqlQuerySource(options) {
5
- const { name, adapter, execute, siteId, extraCapabilities } = options;
6
- return {
7
- name,
8
- capabilities: {
9
- ...adapter.capabilities,
10
- ...extraCapabilities
11
- },
12
- async queryRows(state) {
13
- const resolved = resolveToSQL(state, {
14
- adapter,
15
- siteId
16
- });
17
- return execute(resolved.sql, resolved.params);
18
- },
19
- executeSql(sql, params) {
20
- return execute(sql, params ?? []);
21
- }
22
- };
23
- }
24
4
  function collectInternalFilters(filter) {
25
5
  if (!filter || !("_filters" in filter)) return [];
26
6
  const flat = filter._filters;
@@ -85,7 +65,4 @@ function assertSchemaInSync(options) {
85
65
  if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
86
66
  }
87
67
  }
88
- function isSqlQuerySource(s) {
89
- return typeof s.executeSql === "function";
90
- }
91
- export { getFilterDimensions as a, matchesMetricFilter as c, createSqlQuerySource as d, getDimensionFilters as i, matchesTopLevelPage as l, assertSchemaInSync as n, getInternalFilters as o, dimensionValue as r, matchesDimensionFilter as s, isSqlQuerySource as t, metricValue as u };
68
+ export { getInternalFilters as a, matchesTopLevelPage as c, getFilterDimensions as i, metricValue as l, dimensionValue as n, matchesDimensionFilter as o, getDimensionFilters as r, matchesMetricFilter as s, assertSchemaInSync as t };
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Describes a hot/cold snapshot set. Produced by the snapshot builder,
3
+ * consumed by `attachSnapshotIndex`. Filenames are derived from `cold`
4
+ * via `cold-${yearMonth}.duckdb`; hot is always `hot.duckdb` when
5
+ * `hot: true`.
6
+ */
7
+ interface SnapshotIndex {
8
+ version: 1;
9
+ builtAt: string;
10
+ cold: string[];
11
+ hot: boolean;
12
+ hotDays: number;
13
+ }
14
+ export { SnapshotIndex as t };
@@ -480,24 +480,5 @@ interface EngineOptions {
480
480
  now?: () => number;
481
481
  }
482
482
  declare function dayPartition(date: string): string;
483
- declare function monthPartition(month: string): string;
484
- /**
485
- * Weekly partition keyed by the Monday-of-week ISO date (e.g. `weekly/2026-04-20`
486
- * for the ISO week containing 2026-04-22). Names are stable + sortable; the
487
- * dashboard never parses them, only reads via the manifest.
488
- */
489
- declare function weekPartition(mondayIsoDate: string): string;
490
- /**
491
- * Quarterly partition (e.g. `quarterly/2026-Q2` for Apr-Jun 2026). Used as the
492
- * cold-tier shape for `d90` compaction outputs.
493
- */
494
- declare function quarterPartition(quarter: string): string;
495
- /**
496
- * Monday-of-week as a YYYY-MM-DD string for the ISO week containing `isoDate`.
497
- * Used by tiered compaction to bucket raw daily files into weekly groups.
498
- */
499
- declare function mondayOfWeek(isoDate: string): string;
500
- /** YYYY-Qq for the quarter containing the given YYYY-MM month string. */
501
- declare function quarterOfMonth(month: string): string;
502
483
  declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
503
- export { SyncStateDetail as A, WriteResult as B, QueryExecutor as C, SearchType$1 as D, RunSQLOptions as E, TenantCtx$1 as F, monthPartition as G, inferLegacyTier as H, Watermark as I, quarterPartition as J, objectKey as K, WatermarkFilter as L, SyncStateKind as M, SyncStateScope as N, StorageEngine as O, TableName$1 as P, WatermarkScope as R, QueryExecuteResult as S, Row$1 as T, inferSearchType as U, dayPartition as V, mondayOfWeek as W, CompactionThresholds as X, weekPartition as Y, enumeratePartitions as Z, PurgeFilter as _, DataSource as a, QueryCtx as b, FileSetRef as c, LockScope as d, ManifestEntry as f, ParquetCodec as g, OptimizedQueryResult as h, DEFAULT_SEARCH_TYPE as i, SyncStateFilter as j, SyncState as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, quarterOfMonth as q, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeResult as v, QueryResult as w, QueryExecuteOptions as x, PurgeUrlsResult as y, WriteCtx as z };
484
+ export { SyncStateDetail as A, WriteResult as B, QueryExecutor as C, SearchType$1 as D, RunSQLOptions as E, TenantCtx$1 as F, CompactionThresholds as G, inferLegacyTier as H, Watermark as I, enumeratePartitions as K, WatermarkFilter as L, SyncStateKind as M, SyncStateScope as N, StorageEngine as O, TableName$1 as P, WatermarkScope as R, QueryExecuteResult as S, Row$1 as T, inferSearchType as U, dayPartition as V, objectKey as W, PurgeFilter as _, DataSource as a, QueryCtx as b, FileSetRef as c, LockScope as d, ManifestEntry as f, ParquetCodec as g, OptimizedQueryResult as h, DEFAULT_SEARCH_TYPE as i, SyncStateFilter as j, SyncState as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeResult as v, QueryResult as w, QueryExecuteOptions as x, PurgeUrlsResult as y, WriteCtx as z };
@@ -0,0 +1,91 @@
1
+ import { O as StorageEngine, a as DataSource } from "../_chunks/storage.mjs";
2
+ import { NodeDuckDBOptions, createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
3
+ import { t as SnapshotIndex } from "../_chunks/snapshot.mjs";
4
+ import { Row, TableName } from "gscdump/contracts";
5
+ interface NodeHarnessOptions {
6
+ dataDir: string;
7
+ /** Tenant user id. Defaults to `'local'` for single-user CLI installs. */
8
+ userId?: string;
9
+ /** Name of the manifest file under `dataDir`. Defaults to `manifest.json`. */
10
+ manifestFilename?: string;
11
+ }
12
+ interface NodeHarness {
13
+ engine: StorageEngine;
14
+ /**
15
+ * Underlying filesystem-backed DataSource. Exposed so commands that write
16
+ * derivative artifacts (rollups, exports) don't have to re-instantiate it.
17
+ */
18
+ dataSource: DataSource;
19
+ dataDir: string;
20
+ userId: string;
21
+ siteIdFor: (siteUrl: string) => string;
22
+ runRawSql: (opts: {
23
+ sql: string;
24
+ siteUrl: string;
25
+ table: TableName;
26
+ params?: unknown[];
27
+ }) => Promise<{
28
+ rows: Row[];
29
+ sql: string;
30
+ keys: string[];
31
+ }>;
32
+ }
33
+ declare function createNodeHarness(opts: NodeHarnessOptions): NodeHarness;
34
+ /**
35
+ * Runs arbitrary SQL and returns rows as plain objects. Caller supplies
36
+ * this so the function works with AsyncDuckDB (browser DuckDB-WASM) or
37
+ * @duckdb/node-api (Node) without coupling to either.
38
+ */
39
+ type SnapshotQueryRunner = (sql: string) => Promise<Array<Record<string, unknown>>>;
40
+ interface AttachSnapshotOptions {
41
+ /** Index produced by the builder. */
42
+ index: SnapshotIndex;
43
+ /**
44
+ * Map from filename (`cold-YYYY-MM.duckdb`, `hot.duckdb`) to an HTTPS
45
+ * URL (typically a pre-signed R2 URL). Must contain an entry for every
46
+ * cold month in `index.cold` and — if `index.hot` — for `hot.duckdb`.
47
+ */
48
+ attachUrls: Record<string, string>;
49
+ /** Schema the unified views land under. Default `main`. */
50
+ schema?: string;
51
+ /**
52
+ * DuckDB httpfs can error with "Server sent back more data than expected"
53
+ * against some proxies; `force_download=true` sidesteps it. Default true.
54
+ */
55
+ forceDownload?: boolean;
56
+ }
57
+ interface AttachSnapshotResult {
58
+ schema: string;
59
+ /** Aliases we ATTACH'd — e.g. ['cold_2024_09', 'cold_2024_10', 'hot']. */
60
+ aliases: string[];
61
+ /** Table names with a UNION view created under `schema`. */
62
+ tables: string[];
63
+ }
64
+ /**
65
+ * Turns a filename like `cold-2024-09.duckdb` into a valid SQL identifier
66
+ * `cold_2024_09`. `hot.duckdb` → `hot`.
67
+ */
68
+ declare function snapshotAlias(fileName: string): string;
69
+ declare function attachSnapshotIndex(runner: SnapshotQueryRunner, opts: AttachSnapshotOptions): Promise<AttachSnapshotResult>;
70
+ interface AttachParquetIndexOptions {
71
+ /**
72
+ * Map of table name → list of Parquet URLs. The URL list may mix monthly
73
+ * compacted files and per-day files — DuckDB will scan all of them with
74
+ * `union_by_name = true`. Empty lists are skipped (no view created).
75
+ */
76
+ tables: Record<string, string[]>;
77
+ /** Schema the views land under. Default `main`. */
78
+ schema?: string;
79
+ /**
80
+ * DuckDB httpfs can error with "Server sent back more data than expected"
81
+ * against some proxies; `force_download=true` sidesteps it. Default true.
82
+ */
83
+ forceDownload?: boolean;
84
+ }
85
+ interface AttachParquetIndexResult {
86
+ schema: string;
87
+ /** Tables for which a view was created. */
88
+ tables: string[];
89
+ }
90
+ declare function attachParquetIndex(runner: SnapshotQueryRunner, opts: AttachParquetIndexOptions): Promise<AttachParquetIndexResult>;
91
+ export { type AttachParquetIndexOptions, type AttachParquetIndexResult, type AttachSnapshotOptions, type AttachSnapshotResult, type NodeDuckDBOptions, type NodeHarness, type NodeHarnessOptions, type SnapshotQueryRunner, attachParquetIndex, attachSnapshotIndex, createNodeDuckDBHandle, createNodeHarness, resetNodeDuckDB, snapshotAlias };
@@ -0,0 +1,133 @@
1
+ import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine } from "../_chunks/engine.mjs";
2
+ import { createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
3
+ import { createFilesystemDataSource, createFilesystemManifestStore } from "./filesystem.mjs";
4
+ import path from "node:path";
5
+ import { encodeSiteId } from "gscdump/tenant";
6
+ function createNodeHarness(opts) {
7
+ const dataDir = opts.dataDir;
8
+ const userId = opts.userId ?? "local";
9
+ const manifestFilename = opts.manifestFilename ?? "manifest.json";
10
+ const handle = createNodeDuckDBHandle();
11
+ const factory = { getDuckDB: async () => handle };
12
+ const dataSource = createFilesystemDataSource({ rootDir: dataDir });
13
+ const engine = createStorageEngine({
14
+ dataSource,
15
+ manifestStore: createFilesystemManifestStore({ path: path.join(dataDir, manifestFilename) }),
16
+ codec: createDuckDBCodec(factory),
17
+ executor: createDuckDBExecutor(factory)
18
+ });
19
+ async function runRawSql(runOpts) {
20
+ const result = await engine.runSQL({
21
+ ctx: {
22
+ userId,
23
+ siteId: encodeSiteId(runOpts.siteUrl)
24
+ },
25
+ table: runOpts.table,
26
+ fileSets: { FILES: { table: runOpts.table } },
27
+ sql: runOpts.sql,
28
+ params: runOpts.params ?? []
29
+ });
30
+ return {
31
+ rows: result.rows,
32
+ sql: result.sql,
33
+ keys: result.objectKeys
34
+ };
35
+ }
36
+ return {
37
+ engine,
38
+ dataSource,
39
+ dataDir,
40
+ userId,
41
+ siteIdFor: encodeSiteId,
42
+ runRawSql
43
+ };
44
+ }
45
+ const IDENT_RE = /^[A-Z_][\w$]*$/i;
46
+ async function attachParquetIndex(runner, opts) {
47
+ const schema = opts.schema ?? "main";
48
+ const forceDownload = opts.forceDownload !== false;
49
+ if (!IDENT_RE.test(schema)) throw new TypeError(`attachParquetIndex: invalid schema identifier ${JSON.stringify(schema)}`);
50
+ for (const table of Object.keys(opts.tables)) if (!IDENT_RE.test(table)) throw new TypeError(`attachParquetIndex: invalid table identifier ${JSON.stringify(table)}`);
51
+ await runner("LOAD httpfs").catch(() => void 0);
52
+ if (forceDownload) await runner("SET force_download=true");
53
+ await runner(`CREATE SCHEMA IF NOT EXISTS ${schema}`);
54
+ const created = [];
55
+ for (const [table, urls] of Object.entries(opts.tables)) {
56
+ if (urls.length === 0) continue;
57
+ await runner(`CREATE OR REPLACE VIEW ${schema}.${table} AS SELECT * FROM read_parquet([${urls.map((u) => `'${u.replace(/'/g, "''")}'`).join(", ")}], union_by_name = true)`);
58
+ created.push(table);
59
+ }
60
+ return {
61
+ schema,
62
+ tables: created
63
+ };
64
+ }
65
+ const YEAR_MONTH_RE = /^\d{4}-\d{2}$/;
66
+ const SCHEMA_IDENT_RE = /^[A-Z_][\w$]*$/i;
67
+ const COLD_FILENAME_RE = /^cold-(\d{4}-\d{2})\.duckdb$/;
68
+ function snapshotAlias(fileName) {
69
+ if (fileName === "hot.duckdb") return "hot";
70
+ const m = fileName.match(COLD_FILENAME_RE);
71
+ if (!m?.[1]) throw new TypeError(`snapshotAlias: unrecognised filename ${JSON.stringify(fileName)}`);
72
+ return `cold_${m[1].replace("-", "_")}`;
73
+ }
74
+ async function attachSnapshotIndex(runner, opts) {
75
+ const { index, attachUrls } = opts;
76
+ const schema = opts.schema ?? "main";
77
+ const forceDownload = opts.forceDownload !== false;
78
+ if (index?.version !== 1) throw new TypeError(`attachSnapshotIndex: unsupported snapshot index version ${String(index?.version)}; expected 1`);
79
+ if (!SCHEMA_IDENT_RE.test(schema)) throw new TypeError(`attachSnapshotIndex: invalid schema identifier ${JSON.stringify(schema)}`);
80
+ for (const ym of index.cold) if (!YEAR_MONTH_RE.test(ym)) throw new TypeError(`attachSnapshotIndex: invalid YYYY-MM entry ${JSON.stringify(ym)} in index.cold`);
81
+ await runner("LOAD httpfs").catch(() => void 0);
82
+ if (forceDownload) await runner("SET force_download=true");
83
+ const plan = [];
84
+ for (const ym of index.cold) {
85
+ const fileName = `cold-${ym}.duckdb`;
86
+ const url = attachUrls[fileName];
87
+ if (!url) throw new Error(`attachSnapshotIndex: attachUrls missing entry for ${fileName}`);
88
+ plan.push({
89
+ fileName,
90
+ alias: snapshotAlias(fileName),
91
+ url
92
+ });
93
+ }
94
+ if (index.hot) {
95
+ const fileName = "hot.duckdb";
96
+ const url = attachUrls[fileName];
97
+ if (!url) throw new Error(`attachSnapshotIndex: attachUrls missing entry for ${fileName}`);
98
+ plan.push({
99
+ fileName,
100
+ alias: snapshotAlias(fileName),
101
+ url
102
+ });
103
+ }
104
+ const aliases = [];
105
+ for (const { alias, url } of plan) {
106
+ await runner(`ATTACH '${url.replace(/'/g, "''")}' AS ${alias} (READ_ONLY)`);
107
+ aliases.push(alias);
108
+ }
109
+ const aliasSet = new Set(aliases);
110
+ const tableRows = await runner("SELECT database_name, table_name FROM duckdb_tables()");
111
+ const present = /* @__PURE__ */ new Map();
112
+ for (const row of tableRows) {
113
+ const db = String(row.database_name ?? "");
114
+ const table = String(row.table_name ?? "");
115
+ if (!aliasSet.has(db) || !table) continue;
116
+ const list = present.get(table);
117
+ if (list) list.push(db);
118
+ else present.set(table, [db]);
119
+ }
120
+ const tables = [];
121
+ for (const [table, dbs] of present) {
122
+ if (!SCHEMA_IDENT_RE.test(table)) continue;
123
+ const dbsSet = new Set(dbs);
124
+ await runner(`CREATE OR REPLACE VIEW ${schema}.${table} AS ${aliases.filter((a) => dbsSet.has(a)).map((db) => `SELECT * FROM ${db}.${table}`).join(" UNION ALL BY NAME ")}`);
125
+ tables.push(table);
126
+ }
127
+ return {
128
+ schema,
129
+ aliases,
130
+ tables
131
+ };
132
+ }
133
+ export { attachParquetIndex, attachSnapshotIndex, createNodeDuckDBHandle, createNodeHarness, resetNodeDuckDB, snapshotAlias };
@@ -1,59 +1,13 @@
1
1
  import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
2
- import { r as FileSet, t as AnalysisQuerySource } from "../_chunks/source-types.mjs";
3
- import { a as Analyzer, c as ReduceContext, d as SqlPlan, f as TypedRowQuery, i as createAnalyzerRegistry, l as RowQueriesPlan, n as AnalyzerRegistryInit, o as Capability, r as AnalyzerVariants, s as Plan, t as AnalyzerRegistry, u as SqlExtraQuery } from "../_chunks/registry.mjs";
4
- import { BuilderState } from "gscdump/query";
5
- interface SqlPlanSpec {
6
- sql: string;
7
- params: unknown[];
8
- current: FileSet;
9
- previous?: FileSet;
10
- extraFiles?: Record<string, FileSet>;
11
- extraQueries?: SqlExtraQuery[];
12
- requiresAttachedTables?: boolean;
13
- }
14
- interface ReduceCtx<InputRow> {
15
- /** Extra SQL-query results keyed by `SqlExtraQuery.name` (SQL path only). */
16
- extras?: Record<string, InputRow[]>;
17
- }
18
- type Reducer<Params, InputRow, Result> = (rows: InputRow[] | Record<string, InputRow[]>, params: Params, ctx: ReduceCtx<InputRow>) => {
19
- results: Result;
20
- meta?: Record<string, unknown>;
21
- };
22
- interface DefineAnalyzerOptions<Params extends AnalysisParams, InputRow, Result> {
23
- id: string;
24
- /**
25
- * Shared reducer used by both SQL and row paths. Use this when the
26
- * post-aggregation row count is small and filter/sort/derive can live in
27
- * one place. Mutually exclusive with `reduceSql` / `reduceRows`.
28
- */
29
- reduce?: Reducer<Params, InputRow, Result>;
30
- /** SQL-only reducer. Required when `buildSql` is set without `reduce`. */
31
- reduceSql?: Reducer<Params, InputRow, Result>;
32
- /** Row-only reducer. Required when `buildRows` is set without `reduce`. */
33
- reduceRows?: Reducer<Params, InputRow, Result>;
34
- /** SQL plan builder. Omit if the analyzer has no SQL path. */
35
- buildSql?: (params: Params) => SqlPlanSpec;
36
- /** Row plan builder. Omit if the analyzer has no row path. */
37
- buildRows?: (params: Params) => Record<string, BuilderState>;
38
- /** Capabilities required by the SQL plan. Defaults to `['executeSql', 'partitionedParquet']`. */
39
- sqlRequires?: readonly Capability[];
40
- /** Capabilities required by the row plan. Defaults to `[]`. */
41
- rowsRequires?: readonly Capability[];
42
- }
43
- interface DefinedAnalyzer {
44
- id: string;
45
- sql?: Analyzer;
46
- rows?: Analyzer;
47
- }
48
- declare function defineAnalyzer<Params extends AnalysisParams, InputRow, Result>(opts: DefineAnalyzerOptions<Params, InputRow, Result>): DefinedAnalyzer;
2
+ import { _ as SqlExtraQuery, a as DefineAnalyzerOptions, b as requireAdapter, c as Reducer, d as Analyzer, f as BuildContext, g as RowQueriesPlan, h as RequiredCapability, i as createAnalyzerRegistry, l as SqlPlanSpec, m as ReduceContext, n as AnalyzerRegistryInit, o as DefinedAnalyzer, p as Plan, r as AnalyzerVariants, s as ReduceCtx, t as AnalyzerRegistry, u as defineAnalyzer, v as SqlPlan, x as AnalysisQuerySource, y as TypedRowQuery } from "../_chunks/registry.mjs";
49
3
  declare class AnalyzerCapabilityError extends Error {
50
4
  readonly tool: string;
51
- readonly missing: readonly Capability[];
52
- constructor(tool: string, missing: readonly Capability[]);
5
+ readonly missing: readonly RequiredCapability[];
6
+ constructor(tool: string, missing: readonly RequiredCapability[]);
53
7
  }
54
8
  /**
55
9
  * Run an analyzer against a generic `AnalysisQuerySource`. The registry is
56
10
  * an explicit parameter — callers build one via `createAnalyzerRegistry`.
57
11
  */
58
12
  declare function runAnalyzerFromSource(source: AnalysisQuerySource, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
59
- export { type Analyzer, AnalyzerCapabilityError, type AnalyzerRegistry, type AnalyzerRegistryInit, type AnalyzerVariants, type Capability, type DefineAnalyzerOptions, type DefinedAnalyzer, type Plan, type ReduceContext, type ReduceCtx, type Reducer, type RowQueriesPlan, type SqlExtraQuery, type SqlPlan, type SqlPlanSpec, type TypedRowQuery, createAnalyzerRegistry, defineAnalyzer, runAnalyzerFromSource };
13
+ export { type Analyzer, AnalyzerCapabilityError, type AnalyzerRegistry, type AnalyzerRegistryInit, type AnalyzerVariants, type BuildContext, type DefineAnalyzerOptions, type DefinedAnalyzer, type Plan, type ReduceContext, type ReduceCtx, type Reducer, type RequiredCapability, type RowQueriesPlan, type SqlExtraQuery, type SqlPlan, type SqlPlanSpec, type TypedRowQuery, createAnalyzerRegistry, defineAnalyzer, requireAdapter, runAnalyzerFromSource };