@gscdump/engine 0.19.1 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
1
+ import { i as dedupeByNaturalKey, r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
2
2
  import { a as inferSearchType, c as objectKey, d as tenantPrefix, n as dayPartition, r as hourPartition } from "./storage.mjs";
3
3
  import { c as dedupeOverlappingTiers, i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./parquet-plan.mjs";
4
4
  import { sqlEscape } from "../sql-bind.mjs";
@@ -57,7 +57,7 @@ function createDuckDBCodec(factory) {
57
57
  const outName = db.makeTempPath("parquet");
58
58
  const fileList = inputUris.map((u) => `'${sqlEscape(u)}'`).join(", ");
59
59
  try {
60
- await db.query(`COPY (SELECT * FROM read_parquet([${fileList}], union_by_name=true)) TO '${sqlEscape(outName)}' (FORMAT PARQUET)`);
60
+ await db.query(`COPY (${dedupedMergeSql(ctx.table, fileList)}) TO '${sqlEscape(outName)}' (FORMAT PARQUET)`);
61
61
  const bytes = await db.copyFileToBuffer(outName);
62
62
  const countRows = await db.query(`SELECT count(*)::BIGINT AS n FROM read_parquet('${sqlEscape(outName)}')`);
63
63
  const rowCount = Number(countRows[0]?.n ?? 0);
@@ -82,7 +82,7 @@ function createDuckDBCodec(factory) {
82
82
  }
83
83
  try {
84
84
  const fileList = inNames.map((n) => `'${sqlEscape(n)}'`).join(", ");
85
- await db.query(`COPY (SELECT * FROM read_parquet([${fileList}], union_by_name = true)) TO '${sqlEscape(outName)}' (FORMAT PARQUET)`);
85
+ await db.query(`COPY (${dedupedMergeSql(ctx.table, fileList)}) TO '${sqlEscape(outName)}' (FORMAT PARQUET)`);
86
86
  registered.push(outName);
87
87
  const bytes = await db.copyFileToBuffer(outName);
88
88
  const countRows = await db.query(`SELECT count(*)::BIGINT AS n FROM read_parquet('${sqlEscape(outName)}')`);
@@ -98,6 +98,12 @@ function createDuckDBCodec(factory) {
98
98
  }
99
99
  };
100
100
  }
101
+ function dedupedMergeSql(table, fileListSql) {
102
+ const base = `SELECT * FROM read_parquet([${fileListSql}], union_by_name = true)`;
103
+ const key = SCHEMAS[table].sortKey;
104
+ if (key.length === 0) return base;
105
+ return `${base} QUALIFY row_number() OVER (PARTITION BY ${key.map((c) => `"${c.replace(/"/g, "\"\"")}"`).join(", ")}) = 1`;
106
+ }
101
107
  function rewriteEmptyFileSets(sql, placeholders, defaultTable, placeholderTables) {
102
108
  let out = sql;
103
109
  for (const [name, keys] of Object.entries(placeholders)) {
@@ -312,7 +318,7 @@ function createStorageEngine(opts) {
312
318
  partitions: [partition],
313
319
  searchType: inferSearchType({ searchType })
314
320
  });
315
- const normalizedRows = rows.map((r) => normalizeRow(ctx.table, r));
321
+ const normalizedRows = dedupeByNaturalKey(ctx.table, rows.map((r) => normalizeRow(ctx.table, r)));
316
322
  const key = objectKey(ctx, ctx.table, partition, now, searchType);
317
323
  const { bytes: writtenBytes, rowCount } = await codec.writeRows({ table: ctx.table }, normalizedRows, key, dataSource);
318
324
  let bytes = writtenBytes;
@@ -1,4 +1,4 @@
1
- import { i as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
1
+ import { a as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
2
2
  import { a as inferSearchType, c as objectKey, f as weekPartition, l as quarterOfMonth, n as dayPartition, o as mondayOfWeek, s as monthPartition, u as quarterPartition } from "./storage.mjs";
3
3
  import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
4
4
  import { MS_PER_DAY } from "gscdump";
@@ -1,9 +1,9 @@
1
- import { t as SCHEMAS, u as drizzleSchema } from "./schema.mjs";
1
+ import { f as drizzleSchema, t as SCHEMAS } from "./schema.mjs";
2
2
  import { l as enumeratePartitions } from "./parquet-plan.mjs";
3
3
  import { escapeLike } from "../sql-fragments.mjs";
4
4
  import "../planner.mjs";
5
5
  import { PgDialect } from "drizzle-orm/pg-core";
6
- import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
6
+ import { UnresolvableDatasetError, buildLogicalComparisonPlan, buildLogicalPlan, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
7
7
  import { normalizeUrl } from "gscdump/normalize";
8
8
  import { sql } from "drizzle-orm";
9
9
  const DIMENSION_SURFACES = {
@@ -88,19 +88,22 @@ const LOGICAL_DATASETS = {
88
88
  column: "date",
89
89
  surfaces: ["api", "stored"]
90
90
  }
91
+ } },
92
+ hourly_pages: { dimensions: {
93
+ page: {
94
+ column: "url",
95
+ surfaces: ["api", "stored"]
96
+ },
97
+ date: {
98
+ column: "date",
99
+ surfaces: ["api", "stored"]
100
+ },
101
+ hour: {
102
+ column: "hour",
103
+ surfaces: ["api", "stored"]
104
+ }
91
105
  } }
92
106
  };
93
- function inferLogicalDataset(dimensions, filterDims = []) {
94
- const allDims = new Set([...dimensions, ...filterDims]);
95
- const has = (d) => allDims.has(d);
96
- if (has("searchAppearance")) return "search_appearance";
97
- if (has("page") && (has("query") || has("queryCanonical"))) return "page_keywords";
98
- if (has("query") || has("queryCanonical")) return "keywords";
99
- if (has("page")) return "pages";
100
- if (has("country")) return "countries";
101
- if (has("device")) return "devices";
102
- return "devices";
103
- }
104
107
  function dimensionColumn(dim, dataset) {
105
108
  return LOGICAL_DATASETS[dataset].dimensions[dim]?.column ?? dim;
106
109
  }
@@ -141,6 +144,7 @@ function createSqlFragments(config) {
141
144
  return datasetToTableKey[dataset];
142
145
  }
143
146
  function inferTable(dimensions, filterDims = []) {
147
+ if (!isDatasetResolvable(dimensions, filterDims)) throw new UnresolvableDatasetError(dimensions, filterDims);
144
148
  return tableKeyForDataset(inferLogicalDataset(dimensions, filterDims));
145
149
  }
146
150
  const urlToPathExpr = urlToPathExprOverride ?? defaultSqliteUrlToPathExpr;
@@ -735,7 +739,8 @@ const PG_BASE_CONFIG = {
735
739
  page_keywords: "page_keywords",
736
740
  countries: "countries",
737
741
  devices: "devices",
738
- search_appearance: "search_appearance"
742
+ search_appearance: "search_appearance",
743
+ hourly_pages: "hourly_pages"
739
744
  },
740
745
  metricCast: "DOUBLE",
741
746
  regexPredicate: (expr, pattern, negate) => negate ? sql`NOT regexp_matches(${expr}, ${pattern})` : sql`regexp_matches(${expr}, ${pattern})`,
@@ -856,4 +861,4 @@ function assertSchemaInSync(options) {
856
861
  if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
857
862
  }
858
863
  }
859
- export { LOGICAL_DATASETS as C, supportsDimensionOnSurface as D, inferLogicalDataset as E, DIMENSION_SURFACES as S, dimensionColumn as T, resolveComparisonSQL as _, pgResolverAdapter as a, createResolverAdapter as b, getFilterDimensions as c, matchesMetricFilter as d, matchesTopLevelPage as f, mergeExtras as g, buildTotalsSql as h, createParquetResolverAdapter as i, getInternalFilters as l, buildExtrasQueries as m, runComparisonQuery as n, dimensionValue as o, metricValue as p, runOptimizedQuery as r, getDimensionFilters as s, assertSchemaInSync as t, matchesDimensionFilter as u, resolveToSQL as v, assertDimensionsSupported as w, createSqlFragments as x, resolveToSQLOptimized as y };
864
+ export { LOGICAL_DATASETS as C, inferLogicalDataset as D, dimensionColumn as E, isDatasetResolvable as O, DIMENSION_SURFACES as S, assertDimensionsSupported as T, resolveComparisonSQL as _, pgResolverAdapter as a, createResolverAdapter as b, getFilterDimensions as c, matchesMetricFilter as d, matchesTopLevelPage as f, mergeExtras as g, buildTotalsSql as h, createParquetResolverAdapter as i, supportsDimensionOnSurface as k, getInternalFilters as l, buildExtrasQueries as m, runComparisonQuery as n, dimensionValue as o, metricValue as p, runOptimizedQuery as r, getDimensionFilters as s, assertSchemaInSync as t, matchesDimensionFilter as u, resolveToSQL as v, UnresolvableDatasetError as w, createSqlFragments as x, resolveToSQLOptimized as y };
@@ -1,4 +1,4 @@
1
- import { ColumnDef, ColumnType, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
1
+ import { ColumnDef, ColumnType, Row, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
2
2
  declare const pages: import("drizzle-orm/pg-core").PgTableWithColumns<{
3
3
  name: "pages";
4
4
  schema: undefined;
@@ -1479,5 +1479,25 @@ declare function currentSchemaVersion(table: TableName): number;
1479
1479
  declare function schemaFor(table: TableName): TableSchema;
1480
1480
  declare function allTables(): readonly TableName[];
1481
1481
  declare function inferTable(dimensions: readonly string[]): TableName;
1482
+ /**
1483
+ * Natural-key columns for a table: the `sortKey` dimensions that uniquely
1484
+ * identify a metric row (e.g. `['date', 'url']` for `pages`).
1485
+ */
1486
+ declare function naturalKeyColumns(table: TableName): readonly string[];
1487
+ /**
1488
+ * Collapse rows that share a natural key (see {@link naturalKeyColumns}) to a
1489
+ * single survivor.
1490
+ *
1491
+ * Correct write and compaction inputs never collide on the natural key — each
1492
+ * (date, dimension) tuple is produced exactly once per slice — so on healthy
1493
+ * data this is a no-op. It exists as a recurrence guard: the 2026-04 monthly
1494
+ * compaction corruption folded a complete month back on top of its own daily
1495
+ * inputs, doubling every row. Absorbing duplicates at every write/compaction
1496
+ * boundary keeps that class of bug from inflating impressions downstream.
1497
+ *
1498
+ * Last occurrence wins. Observed duplicates are byte-identical, so the choice
1499
+ * of survivor is immaterial.
1500
+ */
1501
+ declare function dedupeByNaturalKey(table: TableName, rows: readonly Row[]): Row[];
1482
1502
  declare function dimensionToColumn(dim: string, _table: TableName): string;
1483
- export { page_keywords as _, allTables as a, inferTable as c, TABLE_METADATA as d, countries as f, keywords as g, hourly_pages as h, TableSchema$1 as i, schemaFor as l, drizzleSchema as m, ColumnType as n, currentSchemaVersion as o, devices as p, SCHEMAS as r, dimensionToColumn as s, ColumnDef as t, DrizzleSchema as u, pages as v, search_appearance as y };
1503
+ export { hourly_pages as _, allTables as a, pages as b, dimensionToColumn as c, schemaFor as d, DrizzleSchema as f, drizzleSchema as g, devices as h, TableSchema$1 as i, inferTable as l, countries as m, ColumnType as n, currentSchemaVersion as o, TABLE_METADATA as p, SCHEMAS as r, dedupeByNaturalKey as s, ColumnDef as t, naturalKeyColumns as u, keywords as v, search_appearance as x, page_keywords as y };
@@ -147,9 +147,23 @@ function inferTable(dimensions) {
147
147
  if (dims.has("searchAppearance")) return "search_appearance";
148
148
  return "devices";
149
149
  }
150
+ function naturalKeyColumns(table) {
151
+ return TABLE_METADATA[table].sortKey;
152
+ }
153
+ function dedupeByNaturalKey(table, rows) {
154
+ const key = TABLE_METADATA[table].sortKey;
155
+ if (key.length === 0) return rows.slice();
156
+ const seen = /* @__PURE__ */ new Map();
157
+ for (const row of rows) {
158
+ const r = row;
159
+ const k = key.map((col) => `${r[col] ?? ""}`).join("\0");
160
+ seen.set(k, row);
161
+ }
162
+ return [...seen.values()];
163
+ }
150
164
  function dimensionToColumn(dim, _table) {
151
165
  if (dim === "page") return "url";
152
166
  if (dim === "queryCanonical") return "query_canonical";
153
167
  return dim;
154
168
  }
155
- export { inferTable as a, countries as c, hourly_pages as d, keywords as f, search_appearance as h, dimensionToColumn as i, devices as l, pages as m, allTables as n, schemaFor as o, page_keywords as p, currentSchemaVersion as r, TABLE_METADATA as s, SCHEMAS as t, drizzleSchema as u };
169
+ export { search_appearance as _, dimensionToColumn as a, schemaFor as c, devices as d, drizzleSchema as f, pages as g, page_keywords as h, dedupeByNaturalKey as i, TABLE_METADATA as l, keywords as m, allTables as n, inferTable as o, hourly_pages as p, currentSchemaVersion as r, naturalKeyColumns as s, SCHEMAS as t, countries as u };
@@ -1,4 +1,4 @@
1
- import { s as TABLE_METADATA, t as SCHEMAS } from "../_chunks/schema.mjs";
1
+ import { i as dedupeByNaturalKey, l as TABLE_METADATA, t as SCHEMAS } from "../_chunks/schema.mjs";
2
2
  import { parquetReadObjects } from "hyparquet";
3
3
  import { parquetWriteBuffer } from "hyparquet-writer";
4
4
  const ROW_GROUP_SIZE = 25e3;
@@ -134,11 +134,12 @@ function createHyparquetCodec(options = {}) {
134
134
  const rows = await decodeParquetToRows(await dataSource.read(key));
135
135
  allRows.push(...rows);
136
136
  }
137
- const bytes = encodeRowsToParquet(ctx.table, allRows);
137
+ const rows = dedupeByNaturalKey(ctx.table, allRows);
138
+ const bytes = encodeRowsToParquet(ctx.table, rows);
138
139
  await dataSource.write(outputKey, bytes);
139
140
  return {
140
141
  bytes: bytes.byteLength,
141
- rowCount: allRows.length
142
+ rowCount: rows.length
142
143
  };
143
144
  }
144
145
  };
package/dist/index.d.mts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, J as enumeratePartitions, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, Y as splitOverlappingTiers, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as dedupeOverlappingTiers, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
2
2
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
- import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages } from "./_chunks/schema.mjs";
3
+ import { _ as hourly_pages, a as allTables, b as pages, c as dimensionToColumn, f as DrizzleSchema, g as drizzleSchema, h as devices, i as TableSchema, l as inferTable, m as countries, n as ColumnType, o as currentSchemaVersion, p as TABLE_METADATA, r as SCHEMAS, t as ColumnDef, v as keywords, y as page_keywords } from "./_chunks/schema.mjs";
4
4
  import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
5
5
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
6
6
  import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
package/dist/index.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
2
- import { a as inferTable, c as countries, d as hourly_pages, f as keywords, i as dimensionToColumn, l as devices, m as pages, n as allTables, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
2
+ import { a as dimensionToColumn, d as devices, f as drizzleSchema, g as pages, h as page_keywords, l as TABLE_METADATA, m as keywords, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
3
3
  import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
4
4
  import { a as RAW_DAILY_COMPACT_THRESHOLD, c as dedupeOverlappingTiers, i as substituteNamedFiles, l as enumeratePartitions, r as resolveParquetSQL, s as countRawDailies, t as FILES_PLACEHOLDER, u as splitOverlappingTiers } from "./_chunks/parquet-plan.mjs";
5
5
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
@@ -1,6 +1,6 @@
1
1
  import { M as TableName$1, T as SearchType$1 } from "../_chunks/storage.mjs";
2
2
  import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
3
- import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
3
+ import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities, UnresolvableDatasetError, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
4
4
  import { SQL } from "drizzle-orm";
5
5
  import { Grain, TableName } from "@gscdump/contracts";
6
6
  import { BuilderState, Dimension, FilterInput, InternalFilter, Metric } from "gscdump/query";
@@ -14,7 +14,6 @@ interface LogicalDatasetDefinition {
14
14
  }
15
15
  declare const DIMENSION_SURFACES: Record<Dimension, readonly DimensionSurface[]>;
16
16
  declare const LOGICAL_DATASETS: Record<LogicalDataset, LogicalDatasetDefinition>;
17
- declare function inferLogicalDataset(dimensions: readonly Dimension[], filterDims?: readonly Dimension[]): LogicalDataset;
18
17
  declare function dimensionColumn(dim: Dimension, dataset: LogicalDataset): string;
19
18
  declare function supportsDimensionOnSurface(dim: Dimension, surface: DimensionSurface): boolean;
20
19
  declare function assertDimensionsSupported(dimensions: readonly Dimension[], surface: DimensionSurface, context: string): void;
@@ -175,4 +174,4 @@ interface AssertSchemaInSyncOptions {
175
174
  mode: 'exact' | 'superset';
176
175
  }
177
176
  declare function assertSchemaInSync(options: AssertSchemaInSyncOptions): void;
178
- export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
177
+ export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
@@ -1,2 +1,2 @@
1
- import { C as LOGICAL_DATASETS, D as supportsDimensionOnSurface, E as inferLogicalDataset, S as DIMENSION_SURFACES, T as dimensionColumn, _ as resolveComparisonSQL, a as pgResolverAdapter, b as createResolverAdapter, c as getFilterDimensions, d as matchesMetricFilter, f as matchesTopLevelPage, g as mergeExtras, h as buildTotalsSql, i as createParquetResolverAdapter, l as getInternalFilters, m as buildExtrasQueries, n as runComparisonQuery, o as dimensionValue, p as metricValue, r as runOptimizedQuery, s as getDimensionFilters, t as assertSchemaInSync, u as matchesDimensionFilter, v as resolveToSQL, w as assertDimensionsSupported, x as createSqlFragments, y as resolveToSQLOptimized } from "../_chunks/resolver.mjs";
2
- export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
1
+ import { C as LOGICAL_DATASETS, D as inferLogicalDataset, E as dimensionColumn, O as isDatasetResolvable, S as DIMENSION_SURFACES, T as assertDimensionsSupported, _ as resolveComparisonSQL, a as pgResolverAdapter, b as createResolverAdapter, c as getFilterDimensions, d as matchesMetricFilter, f as matchesTopLevelPage, g as mergeExtras, h as buildTotalsSql, i as createParquetResolverAdapter, k as supportsDimensionOnSurface, l as getInternalFilters, m as buildExtrasQueries, n as runComparisonQuery, o as dimensionValue, p as metricValue, r as runOptimizedQuery, s as getDimensionFilters, t as assertSchemaInSync, u as matchesDimensionFilter, v as resolveToSQL, w as UnresolvableDatasetError, x as createSqlFragments, y as resolveToSQLOptimized } from "../_chunks/resolver.mjs";
2
+ export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
package/dist/schema.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, l as schemaFor, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages, y as search_appearance } from "./_chunks/schema.mjs";
2
- export { type ColumnDef, type ColumnType, type DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
1
+ import { _ as hourly_pages, a as allTables, b as pages, c as dimensionToColumn, d as schemaFor, f as DrizzleSchema, g as drizzleSchema, h as devices, i as TableSchema, l as inferTable, m as countries, n as ColumnType, o as currentSchemaVersion, p as TABLE_METADATA, r as SCHEMAS, s as dedupeByNaturalKey, t as ColumnDef, u as naturalKeyColumns, v as keywords, x as search_appearance, y as page_keywords } from "./_chunks/schema.mjs";
2
+ export { type ColumnDef, type ColumnType, type DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema, allTables, countries, currentSchemaVersion, dedupeByNaturalKey, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, naturalKeyColumns, page_keywords, pages, schemaFor, search_appearance };
package/dist/schema.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import { a as inferTable, c as countries, d as hourly_pages, f as keywords, h as search_appearance, i as dimensionToColumn, l as devices, m as pages, n as allTables, o as schemaFor, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
2
- export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
1
+ import { _ as search_appearance, a as dimensionToColumn, c as schemaFor, d as devices, f as drizzleSchema, g as pages, h as page_keywords, i as dedupeByNaturalKey, l as TABLE_METADATA, m as keywords, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, s as naturalKeyColumns, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
2
+ export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dedupeByNaturalKey, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, naturalKeyColumns, page_keywords, pages, schemaFor, search_appearance };
@@ -1,5 +1,5 @@
1
1
  import { n as coerceRows } from "../_chunks/coerce.mjs";
2
- import { a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL, w as assertDimensionsSupported } from "../_chunks/resolver.mjs";
2
+ import { T as assertDimensionsSupported, a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL } from "../_chunks/resolver.mjs";
3
3
  import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
4
4
  var AttachedTableMissingError = class extends Error {
5
5
  missing;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.19.1",
4
+ "version": "0.19.3",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -169,8 +169,8 @@
169
169
  "dependencies": {
170
170
  "drizzle-orm": "^0.45.2",
171
171
  "proper-lockfile": "^4.1.2",
172
- "gscdump": "0.19.1",
173
- "@gscdump/contracts": "0.19.1"
172
+ "@gscdump/contracts": "0.19.3",
173
+ "gscdump": "0.19.3"
174
174
  },
175
175
  "devDependencies": {
176
176
  "@duckdb/duckdb-wasm": "^1.32.0",