npm - @gscdump/engine - Versions diffs - 0.9.1 → 0.10.0 - Mend

@gscdump/engine 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +1 -1
package/dist/_chunks/dispatch.mjs +11 -17
package/dist/_chunks/engine.mjs +622 -0
package/dist/_chunks/pg-adapter.mjs +6 -15
package/dist/_chunks/registry.d.mts +137 -15
package/dist/_chunks/resolver.mjs +2 -25
package/dist/_chunks/snapshot.d.mts +14 -0
package/dist/_chunks/storage.d.mts +1 -20
package/dist/adapters/node.d.mts +91 -0
package/dist/adapters/node.mjs +133 -0
package/dist/analyzer/index.d.mts +4 -50
package/dist/analyzer/index.mjs +17 -8
package/dist/index.d.mts +2 -2
package/dist/index.mjs +6 -621
package/dist/planner.d.mts +1 -1
package/dist/planner.mjs +1 -1
package/dist/resolver/index.d.mts +1 -23
package/dist/resolver/index.mjs +3 -3
package/dist/rollups.d.mts +163 -0
package/dist/rollups.mjs +346 -0
package/dist/snapshot.d.mts +1 -13
package/dist/source/index.d.mts +30 -8
package/dist/source/index.mjs +42 -7
package/package.json +10 -5
package/dist/_chunks/source-types.d.mts +0 -31
/package/dist/_chunks/{planner.mjs → compiler.mjs} +0 -0

package/dist/_chunks/pg-adapter.mjs CHANGED Viewed

@@ -3,7 +3,6 @@ import { escapeLike } from "../sql-fragments.mjs";
 import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
 import { PgDialect } from "drizzle-orm/pg-core";
 import { sql } from "drizzle-orm";
-import { SQLiteAsyncDialect } from "drizzle-orm/sqlite-core";
 const COMPARISON_FILTER_SQL = {
 	new: sql`AND (p.impressions IS NULL OR p.impressions = 0)`,
 	lost: sql`AND p.impressions > 0 AND c.impressions = 0`,
@@ -132,9 +131,9 @@ function resolveToSQLOptimized(state, options) {
 	}
 	if (hasDate) cteSelect.push(adapter.dateColRef(tableKey));
 	const t = schema[tableKey];
-	cteSelect.push(sql`SUM(${t.clicks}) as clicks`);
-	cteSelect.push(sql`SUM(${t.impressions}) as impressions`);
-	cteSelect.push(sql`SUM(${t.sum_position}) as sum_position`);
+	cteSelect.push(sql`CAST(SUM(${t.clicks}) AS DOUBLE) as clicks`);
+	cteSelect.push(sql`CAST(SUM(${t.impressions}) AS DOUBLE) as impressions`);
+	cteSelect.push(sql`CAST(SUM(${t.sum_position}) AS DOUBLE) as sum_position`);
 	const groupByExprs = groupByDims.map((d) => adapter.dimExprSql(d, tableKey));
 	if (hasDate) groupByExprs.push(adapter.dateColRef(tableKey));
 	const outerSelect = [];
@@ -144,11 +143,11 @@ function resolveToSQLOptimized(state, options) {
 	for (const m of metrics) switch (m) {
 		case "clicks":
 			outerSelect.push(sql.raw("clicks"));
-			outerTotals.push(sql.raw("SUM(clicks) OVER() as totalClicks"));
+			outerTotals.push(sql.raw("CAST(SUM(clicks) OVER() AS DOUBLE) as totalClicks"));
 			break;
 		case "impressions":
 			outerSelect.push(sql.raw("impressions"));
-			outerTotals.push(sql.raw("SUM(impressions) OVER() as totalImpressions"));
+			outerTotals.push(sql.raw("CAST(SUM(impressions) OVER() AS DOUBLE) as totalImpressions"));
 			break;
 		case "ctr":
 			outerSelect.push(sql.raw("CAST(clicks AS REAL) / NULLIF(impressions, 0) as ctr"));
@@ -626,7 +625,6 @@ function createResolverAdapter(config) {
 	};
 }
 const pgDialect = new PgDialect();
-const sqliteDialect = new SQLiteAsyncDialect();
 function compilePg(query) {
 	const compiled = pgDialect.sqlToQuery(query);
 	return {
@@ -634,13 +632,6 @@ function compilePg(query) {
 		params: compiled.params
 	};
 }
-function compileSqlite(query) {
-	const compiled = sqliteDialect.sqlToQuery(query);
-	return {
-		sql: compiled.sql,
-		params: compiled.params
-	};
-}
 const PG_BASE_CONFIG = {
 	schema: drizzleSchema,
 	datasetToTableKey: {
@@ -673,4 +664,4 @@ function createParquetResolverAdapter() {
 		tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
 	});
 }
-export { resolveToSQL as _, createResolverAdapter as a, LOGICAL_DATASETS as c, inferLogicalDataset as d, supportsDimensionOnSurface as f, resolveComparisonSQL as g, mergeExtras as h, compileSqlite as i, assertDimensionsSupported as l, buildTotalsSql as m, pgResolverAdapter as n, createSqlFragments as o, buildExtrasQueries as p, compilePg as r, DIMENSION_SURFACES as s, createParquetResolverAdapter as t, dimensionColumn as u, resolveToSQLOptimized as v };
+export { DIMENSION_SURFACES as a, dimensionColumn as c, buildExtrasQueries as d, buildTotalsSql as f, resolveToSQLOptimized as g, resolveToSQL as h, createSqlFragments as i, inferLogicalDataset as l, resolveComparisonSQL as m, pgResolverAdapter as n, LOGICAL_DATASETS as o, mergeExtras as p, createResolverAdapter as r, assertDimensionsSupported as s, createParquetResolverAdapter as t, supportsDimensionOnSurface as u };

package/dist/_chunks/registry.d.mts CHANGED Viewed

@@ -1,12 +1,64 @@
-import { T as Row } from "./storage.mjs";
+import { T as Row$1 } from "./storage.mjs";
+import { o as ResolverAdapter } from "./types.mjs";
 import { t as AnalysisParams } from "./analysis-types.mjs";
-import { r as FileSet } from "./source-types.mjs";
+import { PlannerCapabilities } from "gscdump/query/plan";
+import { TableName } from "gscdump/contracts";
 import { BuilderState } from "gscdump/query";
+type QueryRow = Record<string, unknown>;
+interface FileSet {
+  table: TableName;
+  partitions: string[];
+}
+interface ExecuteSqlOptions {
+  fileSets?: Record<string, FileSet>;
+}
 /**
- * Capabilities a Plan may require of its host. A dispatcher matches these
- * against a source's declared capabilities and rejects mismatches.
+ * Flat capability bag: planner-side flags (`regex`, `comparisonJoin`, ...)
+ * mixed with storage-side flags. `executeSql: true` means the source provides
+ * the `executeSql` method; analyzer dispatch reads this single flag instead
+ * of probing the function shape.
  */
-type Capability = 'executeSql' | 'partitionedParquet' | 'attachedTables' | 'regex' | 'windowTotals' | 'comparisonJoin';
+interface SourceCapabilities extends PlannerCapabilities {
+  executeSql?: boolean;
+  attachedTables?: boolean;
+  fileSets?: boolean;
+  /**
+   * true iff the source provides a `ResolverAdapter` for analyzers that
+   * compose SQL from a typed `BuilderState` at plan-build time.
+   */
+  adapter?: boolean;
+}
+type AnalysisSourceKind = 'local' | 'browser' | 'live' | 'in-memory' | 'composite' | 'attached-table';
+interface AnalysisQuerySource {
+  name?: string;
+  /** Telemetry tag stamped onto analyzer result meta; not used for routing. */
+  kind?: AnalysisSourceKind;
+  capabilities: SourceCapabilities;
+  /**
+   * Dialect adapter surfaced for analyzers that compose SQL from a
+   * `BuilderState` at plan-build time. Optional for pure row sources.
+   */
+  adapter?: ResolverAdapter<any>;
+  /** Tenant scope; multi-tenant dialects (sqlite/D1) require it, parquet omits it. */
+  siteId?: string | number;
+  queryRows: (state: BuilderState) => Promise<QueryRow[]>;
+  /**
+   * Present iff `capabilities.executeSql === true`. Receives the compiled
+   * SQL plan with `{{FILES}}` placeholders; sources that advertise
+   * `capabilities.fileSets` consume `opts.fileSets`, others ignore them.
+   */
+  executeSql?: (sql: string, params?: unknown[], opts?: ExecuteSqlOptions) => Promise<QueryRow[]>;
+}
+/**
+ * Capabilities a Plan may require of its host. Dispatch matches `requires`
+ * against the source's declared `capabilities` (and the presence of
+ * `executeSql`) and rejects mismatches.
+ *
+ * `'executeSql'` checks for the method on the source; the rest are flag keys
+ * on `SourceCapabilities`. Single source of truth — adding a new capability
+ * is one line in `SourceCapabilities`.
+ */
+type RequiredCapability = 'executeSql' | keyof SourceCapabilities;
 interface SqlExtraQuery {
   name: string;
   sql: string;
@@ -24,13 +76,11 @@ interface SqlPlan {
   previous?: FileSet;
   extraFiles?: Record<string, FileSet>;
   extraQueries?: SqlExtraQuery[];
-  /** Emits direct table refs (browser-only). Dispatcher rejects for manifest path. */
-  requiresAttachedTables?: boolean;
 }
-interface TypedRowQuery<T extends Row = Row> {
+interface TypedRowQuery<T extends Row$1 = Row$1> {
   state: BuilderState;
   /** Optional type tag for downstream narrowing. */
-  rowType?: (row: Row) => T;
+  rowType?: (row: Row$1) => T;
 }
 /**
  * Row-queries plan: a named set of typed `BuilderState` queries. A portable
@@ -42,7 +92,29 @@ interface RowQueriesPlan {
   queries: Record<string, TypedRowQuery>;
 }
 type Plan = SqlPlan | RowQueriesPlan;
-interface ReduceContext<TRow extends Row = Row> {
+/**
+ * Plan-build context. Surfaced from the source at dispatch time so analyzers
+ * that compose SQL from a typed `BuilderState` can pick up the right dialect
+ * adapter without importing one directly. Most SQL analyzers emit static SQL
+ * and ignore this; only the BuilderState-driven `data-query` / `data-detail`
+ * analyzers consume it today.
+ *
+ * `adapter` is optional on the type; analyzers that need it should call
+ * `requireAdapter(ctx, id)` rather than non-null-asserting. Capability
+ * declaration (`'adapter'` in `requires`) is the runtime guarantee; the
+ * helper makes the failure mode loud if the contract is broken.
+ */
+interface BuildContext {
+  adapter?: ResolverAdapter<any>;
+  siteId?: string | number;
+}
+/**
+ * Throw a uniform error if a SQL analyzer declared the `'adapter'` capability
+ * but the dispatcher handed it a context without one. Centralizes the assert
+ * so analyzers don't repeat `ctx.adapter!` with explanatory comments.
+ */
+declare function requireAdapter(ctx: BuildContext, analyzerId: string): ResolverAdapter<any>;
+interface ReduceContext<TRow extends Row$1 = Row$1> {
   params: AnalysisParams;
   /** Extra SQL-query results keyed by `SqlExtraQuery.name`. */
   extras?: Record<string, TRow[]>;
@@ -53,25 +125,75 @@ interface ReduceContext<TRow extends Row = Row> {
  * when their reducer assumes specific columns exist — catches drift between
  * `build` (SELECT list) and `reduce` (column access) at compile time.
  */
-interface Analyzer<P extends AnalysisParams = AnalysisParams, R = unknown, TRow extends Row = Row> {
+interface Analyzer<P extends AnalysisParams = AnalysisParams, R = unknown, TRow extends Row$1 = Row$1> {
   /** Stable tool id (e.g. `striking-distance`, `opportunity`). */
   id: string;
   /** Capabilities a host source must provide. */
-  requires: readonly Capability[];
-  /** Pure: params → plan. Snapshot-testable. */
-  build: (params: P) => Plan;
+  requires: readonly RequiredCapability[];
+  /** Pure: params → plan. Snapshot-testable. `ctx` carries the source's dialect adapter when one is available. */
+  build: (params: P, ctx?: BuildContext) => Plan;
   /** Pure: rows + context → typed result + meta. */
   reduce: (rows: TRow[] | Record<string, TRow[]>, ctx: ReduceContext<TRow>) => {
     results: R;
     meta?: Record<string, unknown>;
   };
 }
+interface SqlPlanSpec {
+  sql: string;
+  params: unknown[];
+  current: FileSet;
+  previous?: FileSet;
+  extraFiles?: Record<string, FileSet>;
+  extraQueries?: SqlExtraQuery[];
+}
+interface ReduceCtx<InputRow> {
+  /** Extra SQL-query results keyed by `SqlExtraQuery.name` (SQL path only). */
+  extras?: Record<string, InputRow[]>;
+}
+type Reducer<Params, InputRow, Result> = (rows: InputRow[] | Record<string, InputRow[]>, params: Params, ctx: ReduceCtx<InputRow>) => {
+  results: Result;
+  meta?: Record<string, unknown>;
+};
+interface DefineAnalyzerOptions<Params extends AnalysisParams, InputRow, Result> {
+  id: string;
+  /**
+   * Shared reducer used by both SQL and row paths. Use this when the
+   * post-aggregation row count is small and filter/sort/derive can live in
+   * one place. Mutually exclusive with `reduceSql` / `reduceRows`.
+   */
+  reduce?: Reducer<Params, InputRow, Result>;
+  /** SQL-only reducer. Required when `buildSql` is set without `reduce`. */
+  reduceSql?: Reducer<Params, InputRow, Result>;
+  /** Row-only reducer. Required when `buildRows` is set without `reduce`. */
+  reduceRows?: Reducer<Params, InputRow, Result>;
+  /** SQL plan builder. Omit if the analyzer has no SQL path. */
+  buildSql?: (params: Params, ctx: BuildContext) => SqlPlanSpec;
+  /** Row plan builder. Omit if the analyzer has no row path. */
+  buildRows?: (params: Params, ctx: BuildContext) => Record<string, BuilderState>;
+  /** Capabilities required by the SQL plan. Defaults to `['executeSql', 'fileSets']`. */
+  sqlRequires?: readonly RequiredCapability[];
+  /** Capabilities required by the row plan. Defaults to `[]`. */
+  rowsRequires?: readonly RequiredCapability[];
+}
+interface DefinedAnalyzer {
+  id: string;
+  sql?: Analyzer;
+  rows?: Analyzer;
+}
+declare function defineAnalyzer<Params extends AnalysisParams, InputRow, Result>(opts: DefineAnalyzerOptions<Params, InputRow, Result>): DefinedAnalyzer;
 interface AnalyzerVariants {
   sql?: Analyzer;
   rows?: Analyzer;
 }
 interface AnalyzerRegistryInit {
+  /**
+   * Preferred for in-tree composition: pass `DefinedAnalyzer[]` directly so
+   * SQL/row variants can never drift apart from their `defineAnalyzer` site.
+   */
+  defined?: readonly DefinedAnalyzer[];
+  /** Flat-array path retained for narrow tree-shaken registry composition. */
   rows?: readonly Analyzer[];
+  /** Flat-array path retained for narrow tree-shaken registry composition. */
   sql?: readonly Analyzer[];
 }
 interface AnalyzerRegistry {
@@ -89,4 +211,4 @@ interface AnalyzerRegistry {
  * or per-request in a worker).
  */
 declare function createAnalyzerRegistry(init?: AnalyzerRegistryInit): AnalyzerRegistry;
-export { Analyzer as a, ReduceContext as c, SqlPlan as d, TypedRowQuery as f, createAnalyzerRegistry as i, RowQueriesPlan as l, AnalyzerRegistryInit as n, Capability as o, AnalyzerVariants as r, Plan as s, AnalyzerRegistry as t, SqlExtraQuery as u };
+export { ExecuteSqlOptions as C, SourceCapabilities as E, AnalysisSourceKind as S, QueryRow as T, SqlExtraQuery as _, DefineAnalyzerOptions as a, requireAdapter as b, Reducer as c, Analyzer as d, BuildContext as f, RowQueriesPlan as g, RequiredCapability as h, createAnalyzerRegistry as i, SqlPlanSpec as l, ReduceContext as m, AnalyzerRegistryInit as n, DefinedAnalyzer as o, Plan as p, AnalyzerVariants as r, ReduceCtx as s, AnalyzerRegistry as t, defineAnalyzer as u, SqlPlan as v, FileSet as w, AnalysisQuerySource as x, TypedRowQuery as y };

package/dist/_chunks/resolver.mjs CHANGED Viewed

@@ -1,26 +1,6 @@
 import { t as SCHEMAS } from "./schema.mjs";
-import { _ as resolveToSQL } from "./pg-adapter.mjs";
+import "./pg-adapter.mjs";
 import { normalizeUrl } from "gscdump/normalize";
-function createSqlQuerySource(options) {
-	const { name, adapter, execute, siteId, extraCapabilities } = options;
-	return {
-		name,
-		capabilities: {
-			...adapter.capabilities,
-			...extraCapabilities
-		},
-		async queryRows(state) {
-			const resolved = resolveToSQL(state, {
-				adapter,
-				siteId
-			});
-			return execute(resolved.sql, resolved.params);
-		},
-		executeSql(sql, params) {
-			return execute(sql, params ?? []);
-		}
-	};
-}
 function collectInternalFilters(filter) {
 	if (!filter || !("_filters" in filter)) return [];
 	const flat = filter._filters;
@@ -85,7 +65,4 @@ function assertSchemaInSync(options) {
 		if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
 	}
 }
-function isSqlQuerySource(s) {
-	return typeof s.executeSql === "function";
-}
-export { getFilterDimensions as a, matchesMetricFilter as c, createSqlQuerySource as d, getDimensionFilters as i, matchesTopLevelPage as l, assertSchemaInSync as n, getInternalFilters as o, dimensionValue as r, matchesDimensionFilter as s, isSqlQuerySource as t, metricValue as u };
+export { getInternalFilters as a, matchesTopLevelPage as c, getFilterDimensions as i, metricValue as l, dimensionValue as n, matchesDimensionFilter as o, getDimensionFilters as r, matchesMetricFilter as s, assertSchemaInSync as t };

package/dist/_chunks/snapshot.d.mts ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * Describes a hot/cold snapshot set. Produced by the snapshot builder,
+ * consumed by `attachSnapshotIndex`. Filenames are derived from `cold`
+ * via `cold-${yearMonth}.duckdb`; hot is always `hot.duckdb` when
+ * `hot: true`.
+ */
+interface SnapshotIndex {
+  version: 1;
+  builtAt: string;
+  cold: string[];
+  hot: boolean;
+  hotDays: number;
+}
+export { SnapshotIndex as t };

package/dist/_chunks/storage.d.mts CHANGED Viewed

@@ -480,24 +480,5 @@ interface EngineOptions {
   now?: () => number;
 }
 declare function dayPartition(date: string): string;
-declare function monthPartition(month: string): string;
-/**
- * Weekly partition keyed by the Monday-of-week ISO date (e.g. `weekly/2026-04-20`
- * for the ISO week containing 2026-04-22). Names are stable + sortable; the
- * dashboard never parses them, only reads via the manifest.
- */
-declare function weekPartition(mondayIsoDate: string): string;
-/**
- * Quarterly partition (e.g. `quarterly/2026-Q2` for Apr-Jun 2026). Used as the
- * cold-tier shape for `d90` compaction outputs.
- */
-declare function quarterPartition(quarter: string): string;
-/**
- * Monday-of-week as a YYYY-MM-DD string for the ISO week containing `isoDate`.
- * Used by tiered compaction to bucket raw daily files into weekly groups.
- */
-declare function mondayOfWeek(isoDate: string): string;
-/** YYYY-Qq for the quarter containing the given YYYY-MM month string. */
-declare function quarterOfMonth(month: string): string;
 declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
-export { SyncStateDetail as A, WriteResult as B, QueryExecutor as C, SearchType$1 as D, RunSQLOptions as E, TenantCtx$1 as F, monthPartition as G, inferLegacyTier as H, Watermark as I, quarterPartition as J, objectKey as K, WatermarkFilter as L, SyncStateKind as M, SyncStateScope as N, StorageEngine as O, TableName$1 as P, WatermarkScope as R, QueryExecuteResult as S, Row$1 as T, inferSearchType as U, dayPartition as V, mondayOfWeek as W, CompactionThresholds as X, weekPartition as Y, enumeratePartitions as Z, PurgeFilter as _, DataSource as a, QueryCtx as b, FileSetRef as c, LockScope as d, ManifestEntry as f, ParquetCodec as g, OptimizedQueryResult as h, DEFAULT_SEARCH_TYPE as i, SyncStateFilter as j, SyncState as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, quarterOfMonth as q, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeResult as v, QueryResult as w, QueryExecuteOptions as x, PurgeUrlsResult as y, WriteCtx as z };
+export { SyncStateDetail as A, WriteResult as B, QueryExecutor as C, SearchType$1 as D, RunSQLOptions as E, TenantCtx$1 as F, CompactionThresholds as G, inferLegacyTier as H, Watermark as I, enumeratePartitions as K, WatermarkFilter as L, SyncStateKind as M, SyncStateScope as N, StorageEngine as O, TableName$1 as P, WatermarkScope as R, QueryExecuteResult as S, Row$1 as T, inferSearchType as U, dayPartition as V, objectKey as W, PurgeFilter as _, DataSource as a, QueryCtx as b, FileSetRef as c, LockScope as d, ManifestEntry as f, ParquetCodec as g, OptimizedQueryResult as h, DEFAULT_SEARCH_TYPE as i, SyncStateFilter as j, SyncState as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeResult as v, QueryResult as w, QueryExecuteOptions as x, PurgeUrlsResult as y, WriteCtx as z };

package/dist/adapters/node.d.mts ADDED Viewed

@@ -0,0 +1,91 @@
+import { O as StorageEngine, a as DataSource } from "../_chunks/storage.mjs";
+import { NodeDuckDBOptions, createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
+import { t as SnapshotIndex } from "../_chunks/snapshot.mjs";
+import { Row, TableName } from "gscdump/contracts";
+interface NodeHarnessOptions {
+  dataDir: string;
+  /** Tenant user id. Defaults to `'local'` for single-user CLI installs. */
+  userId?: string;
+  /** Name of the manifest file under `dataDir`. Defaults to `manifest.json`. */
+  manifestFilename?: string;
+}
+interface NodeHarness {
+  engine: StorageEngine;
+  /**
+   * Underlying filesystem-backed DataSource. Exposed so commands that write
+   *  derivative artifacts (rollups, exports) don't have to re-instantiate it.
+   */
+  dataSource: DataSource;
+  dataDir: string;
+  userId: string;
+  siteIdFor: (siteUrl: string) => string;
+  runRawSql: (opts: {
+    sql: string;
+    siteUrl: string;
+    table: TableName;
+    params?: unknown[];
+  }) => Promise<{
+    rows: Row[];
+    sql: string;
+    keys: string[];
+  }>;
+}
+declare function createNodeHarness(opts: NodeHarnessOptions): NodeHarness;
+/**
+ * Runs arbitrary SQL and returns rows as plain objects. Caller supplies
+ * this so the function works with AsyncDuckDB (browser DuckDB-WASM) or
+ * @duckdb/node-api (Node) without coupling to either.
+ */
+type SnapshotQueryRunner = (sql: string) => Promise<Array<Record<string, unknown>>>;
+interface AttachSnapshotOptions {
+  /** Index produced by the builder. */
+  index: SnapshotIndex;
+  /**
+   * Map from filename (`cold-YYYY-MM.duckdb`, `hot.duckdb`) to an HTTPS
+   * URL (typically a pre-signed R2 URL). Must contain an entry for every
+   * cold month in `index.cold` and — if `index.hot` — for `hot.duckdb`.
+   */
+  attachUrls: Record<string, string>;
+  /** Schema the unified views land under. Default `main`. */
+  schema?: string;
+  /**
+   * DuckDB httpfs can error with "Server sent back more data than expected"
+   * against some proxies; `force_download=true` sidesteps it. Default true.
+   */
+  forceDownload?: boolean;
+}
+interface AttachSnapshotResult {
+  schema: string;
+  /** Aliases we ATTACH'd — e.g. ['cold_2024_09', 'cold_2024_10', 'hot']. */
+  aliases: string[];
+  /** Table names with a UNION view created under `schema`. */
+  tables: string[];
+}
+/**
+ * Turns a filename like `cold-2024-09.duckdb` into a valid SQL identifier
+ * `cold_2024_09`. `hot.duckdb` → `hot`.
+ */
+declare function snapshotAlias(fileName: string): string;
+declare function attachSnapshotIndex(runner: SnapshotQueryRunner, opts: AttachSnapshotOptions): Promise<AttachSnapshotResult>;
+interface AttachParquetIndexOptions {
+  /**
+   * Map of table name → list of Parquet URLs. The URL list may mix monthly
+   * compacted files and per-day files — DuckDB will scan all of them with
+   * `union_by_name = true`. Empty lists are skipped (no view created).
+   */
+  tables: Record<string, string[]>;
+  /** Schema the views land under. Default `main`. */
+  schema?: string;
+  /**
+   * DuckDB httpfs can error with "Server sent back more data than expected"
+   * against some proxies; `force_download=true` sidesteps it. Default true.
+   */
+  forceDownload?: boolean;
+}
+interface AttachParquetIndexResult {
+  schema: string;
+  /** Tables for which a view was created. */
+  tables: string[];
+}
+declare function attachParquetIndex(runner: SnapshotQueryRunner, opts: AttachParquetIndexOptions): Promise<AttachParquetIndexResult>;
+export { type AttachParquetIndexOptions, type AttachParquetIndexResult, type AttachSnapshotOptions, type AttachSnapshotResult, type NodeDuckDBOptions, type NodeHarness, type NodeHarnessOptions, type SnapshotQueryRunner, attachParquetIndex, attachSnapshotIndex, createNodeDuckDBHandle, createNodeHarness, resetNodeDuckDB, snapshotAlias };

package/dist/adapters/node.mjs ADDED Viewed

@@ -0,0 +1,133 @@
+import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine } from "../_chunks/engine.mjs";
+import { createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
+import { createFilesystemDataSource, createFilesystemManifestStore } from "./filesystem.mjs";
+import path from "node:path";
+import { encodeSiteId } from "gscdump/tenant";
+function createNodeHarness(opts) {
+	const dataDir = opts.dataDir;
+	const userId = opts.userId ?? "local";
+	const manifestFilename = opts.manifestFilename ?? "manifest.json";
+	const handle = createNodeDuckDBHandle();
+	const factory = { getDuckDB: async () => handle };
+	const dataSource = createFilesystemDataSource({ rootDir: dataDir });
+	const engine = createStorageEngine({
+		dataSource,
+		manifestStore: createFilesystemManifestStore({ path: path.join(dataDir, manifestFilename) }),
+		codec: createDuckDBCodec(factory),
+		executor: createDuckDBExecutor(factory)
+	});
+	async function runRawSql(runOpts) {
+		const result = await engine.runSQL({
+			ctx: {
+				userId,
+				siteId: encodeSiteId(runOpts.siteUrl)
+			},
+			table: runOpts.table,
+			fileSets: { FILES: { table: runOpts.table } },
+			sql: runOpts.sql,
+			params: runOpts.params ?? []
+		});
+		return {
+			rows: result.rows,
+			sql: result.sql,
+			keys: result.objectKeys
+		};
+	}
+	return {
+		engine,
+		dataSource,
+		dataDir,
+		userId,
+		siteIdFor: encodeSiteId,
+		runRawSql
+	};
+}
+const IDENT_RE = /^[A-Z_][\w$]*$/i;
+async function attachParquetIndex(runner, opts) {
+	const schema = opts.schema ?? "main";
+	const forceDownload = opts.forceDownload !== false;
+	if (!IDENT_RE.test(schema)) throw new TypeError(`attachParquetIndex: invalid schema identifier ${JSON.stringify(schema)}`);
+	for (const table of Object.keys(opts.tables)) if (!IDENT_RE.test(table)) throw new TypeError(`attachParquetIndex: invalid table identifier ${JSON.stringify(table)}`);
+	await runner("LOAD httpfs").catch(() => void 0);
+	if (forceDownload) await runner("SET force_download=true");
+	await runner(`CREATE SCHEMA IF NOT EXISTS ${schema}`);
+	const created = [];
+	for (const [table, urls] of Object.entries(opts.tables)) {
+		if (urls.length === 0) continue;
+		await runner(`CREATE OR REPLACE VIEW ${schema}.${table} AS SELECT * FROM read_parquet([${urls.map((u) => `'${u.replace(/'/g, "''")}'`).join(", ")}], union_by_name = true)`);
+		created.push(table);
+	}
+	return {
+		schema,
+		tables: created
+	};
+}
+const YEAR_MONTH_RE = /^\d{4}-\d{2}$/;
+const SCHEMA_IDENT_RE = /^[A-Z_][\w$]*$/i;
+const COLD_FILENAME_RE = /^cold-(\d{4}-\d{2})\.duckdb$/;
+function snapshotAlias(fileName) {
+	if (fileName === "hot.duckdb") return "hot";
+	const m = fileName.match(COLD_FILENAME_RE);
+	if (!m?.[1]) throw new TypeError(`snapshotAlias: unrecognised filename ${JSON.stringify(fileName)}`);
+	return `cold_${m[1].replace("-", "_")}`;
+}
+async function attachSnapshotIndex(runner, opts) {
+	const { index, attachUrls } = opts;
+	const schema = opts.schema ?? "main";
+	const forceDownload = opts.forceDownload !== false;
+	if (index?.version !== 1) throw new TypeError(`attachSnapshotIndex: unsupported snapshot index version ${String(index?.version)}; expected 1`);
+	if (!SCHEMA_IDENT_RE.test(schema)) throw new TypeError(`attachSnapshotIndex: invalid schema identifier ${JSON.stringify(schema)}`);
+	for (const ym of index.cold) if (!YEAR_MONTH_RE.test(ym)) throw new TypeError(`attachSnapshotIndex: invalid YYYY-MM entry ${JSON.stringify(ym)} in index.cold`);
+	await runner("LOAD httpfs").catch(() => void 0);
+	if (forceDownload) await runner("SET force_download=true");
+	const plan = [];
+	for (const ym of index.cold) {
+		const fileName = `cold-${ym}.duckdb`;
+		const url = attachUrls[fileName];
+		if (!url) throw new Error(`attachSnapshotIndex: attachUrls missing entry for ${fileName}`);
+		plan.push({
+			fileName,
+			alias: snapshotAlias(fileName),
+			url
+		});
+	}
+	if (index.hot) {
+		const fileName = "hot.duckdb";
+		const url = attachUrls[fileName];
+		if (!url) throw new Error(`attachSnapshotIndex: attachUrls missing entry for ${fileName}`);
+		plan.push({
+			fileName,
+			alias: snapshotAlias(fileName),
+			url
+		});
+	}
+	const aliases = [];
+	for (const { alias, url } of plan) {
+		await runner(`ATTACH '${url.replace(/'/g, "''")}' AS ${alias} (READ_ONLY)`);
+		aliases.push(alias);
+	}
+	const aliasSet = new Set(aliases);
+	const tableRows = await runner("SELECT database_name, table_name FROM duckdb_tables()");
+	const present = /* @__PURE__ */ new Map();
+	for (const row of tableRows) {
+		const db = String(row.database_name ?? "");
+		const table = String(row.table_name ?? "");
+		if (!aliasSet.has(db) || !table) continue;
+		const list = present.get(table);
+		if (list) list.push(db);
+		else present.set(table, [db]);
+	}
+	const tables = [];
+	for (const [table, dbs] of present) {
+		if (!SCHEMA_IDENT_RE.test(table)) continue;
+		const dbsSet = new Set(dbs);
+		await runner(`CREATE OR REPLACE VIEW ${schema}.${table} AS ${aliases.filter((a) => dbsSet.has(a)).map((db) => `SELECT * FROM ${db}.${table}`).join(" UNION ALL BY NAME ")}`);
+		tables.push(table);
+	}
+	return {
+		schema,
+		aliases,
+		tables
+	};
+}
+export { attachParquetIndex, attachSnapshotIndex, createNodeDuckDBHandle, createNodeHarness, resetNodeDuckDB, snapshotAlias };

package/dist/analyzer/index.d.mts CHANGED Viewed

@@ -1,59 +1,13 @@
 import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
-import { r as FileSet, t as AnalysisQuerySource } from "../_chunks/source-types.mjs";
-import { a as Analyzer, c as ReduceContext, d as SqlPlan, f as TypedRowQuery, i as createAnalyzerRegistry, l as RowQueriesPlan, n as AnalyzerRegistryInit, o as Capability, r as AnalyzerVariants, s as Plan, t as AnalyzerRegistry, u as SqlExtraQuery } from "../_chunks/registry.mjs";
-import { BuilderState } from "gscdump/query";
-interface SqlPlanSpec {
-  sql: string;
-  params: unknown[];
-  current: FileSet;
-  previous?: FileSet;
-  extraFiles?: Record<string, FileSet>;
-  extraQueries?: SqlExtraQuery[];
-  requiresAttachedTables?: boolean;
-}
-interface ReduceCtx<InputRow> {
-  /** Extra SQL-query results keyed by `SqlExtraQuery.name` (SQL path only). */
-  extras?: Record<string, InputRow[]>;
-}
-type Reducer<Params, InputRow, Result> = (rows: InputRow[] | Record<string, InputRow[]>, params: Params, ctx: ReduceCtx<InputRow>) => {
-  results: Result;
-  meta?: Record<string, unknown>;
-};
-interface DefineAnalyzerOptions<Params extends AnalysisParams, InputRow, Result> {
-  id: string;
-  /**
-   * Shared reducer used by both SQL and row paths. Use this when the
-   * post-aggregation row count is small and filter/sort/derive can live in
-   * one place. Mutually exclusive with `reduceSql` / `reduceRows`.
-   */
-  reduce?: Reducer<Params, InputRow, Result>;
-  /** SQL-only reducer. Required when `buildSql` is set without `reduce`. */
-  reduceSql?: Reducer<Params, InputRow, Result>;
-  /** Row-only reducer. Required when `buildRows` is set without `reduce`. */
-  reduceRows?: Reducer<Params, InputRow, Result>;
-  /** SQL plan builder. Omit if the analyzer has no SQL path. */
-  buildSql?: (params: Params) => SqlPlanSpec;
-  /** Row plan builder. Omit if the analyzer has no row path. */
-  buildRows?: (params: Params) => Record<string, BuilderState>;
-  /** Capabilities required by the SQL plan. Defaults to `['executeSql', 'partitionedParquet']`. */
-  sqlRequires?: readonly Capability[];
-  /** Capabilities required by the row plan. Defaults to `[]`. */
-  rowsRequires?: readonly Capability[];
-}
-interface DefinedAnalyzer {
-  id: string;
-  sql?: Analyzer;
-  rows?: Analyzer;
-}
-declare function defineAnalyzer<Params extends AnalysisParams, InputRow, Result>(opts: DefineAnalyzerOptions<Params, InputRow, Result>): DefinedAnalyzer;
+import { _ as SqlExtraQuery, a as DefineAnalyzerOptions, b as requireAdapter, c as Reducer, d as Analyzer, f as BuildContext, g as RowQueriesPlan, h as RequiredCapability, i as createAnalyzerRegistry, l as SqlPlanSpec, m as ReduceContext, n as AnalyzerRegistryInit, o as DefinedAnalyzer, p as Plan, r as AnalyzerVariants, s as ReduceCtx, t as AnalyzerRegistry, u as defineAnalyzer, v as SqlPlan, x as AnalysisQuerySource, y as TypedRowQuery } from "../_chunks/registry.mjs";
 declare class AnalyzerCapabilityError extends Error {
   readonly tool: string;
-  readonly missing: readonly Capability[];
-  constructor(tool: string, missing: readonly Capability[]);
+  readonly missing: readonly RequiredCapability[];
+  constructor(tool: string, missing: readonly RequiredCapability[]);
 }
 /**
  * Run an analyzer against a generic `AnalysisQuerySource`. The registry is
  * an explicit parameter — callers build one via `createAnalyzerRegistry`.
  */
 declare function runAnalyzerFromSource(source: AnalysisQuerySource, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
-export { type Analyzer, AnalyzerCapabilityError, type AnalyzerRegistry, type AnalyzerRegistryInit, type AnalyzerVariants, type Capability, type DefineAnalyzerOptions, type DefinedAnalyzer, type Plan, type ReduceContext, type ReduceCtx, type Reducer, type RowQueriesPlan, type SqlExtraQuery, type SqlPlan, type SqlPlanSpec, type TypedRowQuery, createAnalyzerRegistry, defineAnalyzer, runAnalyzerFromSource };
+export { type Analyzer, AnalyzerCapabilityError, type AnalyzerRegistry, type AnalyzerRegistryInit, type AnalyzerVariants, type BuildContext, type DefineAnalyzerOptions, type DefinedAnalyzer, type Plan, type ReduceContext, type ReduceCtx, type Reducer, type RequiredCapability, type RowQueriesPlan, type SqlExtraQuery, type SqlPlan, type SqlPlanSpec, type TypedRowQuery, createAnalyzerRegistry, defineAnalyzer, requireAdapter, runAnalyzerFromSource };