npm - @gscdump/engine - Versions diffs - 0.18.6 → 0.19.1 - Mend

@gscdump/engine 0.18.6 → 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/_chunks/engine.mjs +20 -3
package/dist/_chunks/{compiler.mjs → parquet-plan.mjs} +93 -2
package/dist/_chunks/planner.d.mts +2 -2
package/dist/_chunks/resolver.mjs +2 -2
package/dist/_chunks/schema.mjs +1 -1
package/dist/_chunks/storage.d.mts +43 -1
package/dist/entities.d.mts +1 -0
package/dist/index.d.mts +3 -3
package/dist/index.mjs +2 -2
package/dist/planner.d.mts +3 -3
package/dist/planner.mjs +2 -2
package/package.json +3 -3

package/dist/_chunks/engine.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
 import { a as inferSearchType, c as objectKey, d as tenantPrefix, n as dayPartition, r as hourPartition } from "./storage.mjs";
-import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./compiler.mjs";
+import { c as dedupeOverlappingTiers, i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./parquet-plan.mjs";
 import { sqlEscape } from "../sql-bind.mjs";
 import { buildLogicalPlan } from "gscdump/query/plan";
 import { normalizeUrl } from "gscdump/normalize";
@@ -273,6 +273,23 @@ function normalizeRow(table, row) {
 		url: normalized
 	};
 }
+const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
+function queryRangeOf(partitions) {
+	if (!partitions) return void 0;
+	let min;
+	let max;
+	for (const p of partitions) {
+		const m = DAILY_PARTITION_RE.exec(p);
+		if (!m) continue;
+		const d = m[1];
+		if (min === void 0 || d < min) min = d;
+		if (max === void 0 || d > max) max = d;
+	}
+	return min !== void 0 ? {
+		start: min,
+		end: max
+	} : void 0;
+}
 function createStorageEngine(opts) {
 	const { dataSource, manifestStore, codec, executor } = opts;
 	const defaultNow = opts.now ?? (() => Date.now());
@@ -400,13 +417,13 @@ function createStorageEngine(opts) {
 		const entries = Object.entries(opts.fileSets);
 		const perSet = await Promise.all(entries.map(async ([name, ref]) => {
 			if (ref.keys !== void 0) return [name, ref.keys];
-			return [name, (await manifestStore.listLive({
+			return [name, dedupeOverlappingTiers(await manifestStore.listLive({
 				userId: opts.ctx.userId,
 				siteId: opts.ctx.siteId,
 				table: ref.table,
 				partitions: ref.partitions,
 				...opts.searchType !== void 0 ? { searchType: opts.searchType } : {}
-			})).map((e) => e.objectKey)];
+			}), queryRangeOf(ref.partitions)).map((e) => e.objectKey)];
 		}));
 		opts.signal?.throwIfAborted();
 		const fileKeys = {};

package/dist/_chunks/{compiler.mjs → parquet-plan.mjs} RENAMED Viewed

@@ -6,6 +6,7 @@ import { buildLogicalPlan } from "gscdump/query/plan";
 const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
 const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
 const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
+const QUARTERLY_PARTITION_RE = /^quarterly\/(\d{4})-Q([1-4])$/;
 const DEFAULT_THRESHOLDS = {
 	raw: 7,
 	d7: 30,
@@ -146,6 +147,96 @@ function enumeratePartitions(startDate, endDate) {
 	}
 	return out;
 }
+function partitionSpan(partition) {
+	let m = partition.match(DAILY_PARTITION_RE);
+	if (m) {
+		const ms = Date.parse(`${m[1]}T00:00:00Z`);
+		return {
+			rank: 0,
+			startMs: ms,
+			endMs: ms
+		};
+	}
+	m = partition.match(WEEKLY_PARTITION_RE);
+	if (m) {
+		const ms = Date.parse(`${m[1]}T00:00:00Z`);
+		return {
+			rank: 1,
+			startMs: ms,
+			endMs: ms + 6 * MS_PER_DAY
+		};
+	}
+	m = partition.match(MONTHLY_PARTITION_RE);
+	if (m) {
+		const [y, mo] = m[1].split("-").map(Number);
+		return {
+			rank: 2,
+			startMs: Date.UTC(y, mo - 1, 1),
+			endMs: Date.UTC(y, mo, 0)
+		};
+	}
+	m = partition.match(QUARTERLY_PARTITION_RE);
+	if (m) {
+		const y = Number(m[1]);
+		const q = Number(m[2]);
+		return {
+			rank: 3,
+			startMs: Date.UTC(y, (q - 1) * 3, 1),
+			endMs: Date.UTC(y, q * 3, 0)
+		};
+	}
+}
+function splitOverlappingTiers(entries, queryRange) {
+	const rangeStartMs = queryRange ? Date.parse(`${queryRange.start}T00:00:00Z`) : void 0;
+	const rangeEndMs = queryRange ? Date.parse(`${queryRange.end}T00:00:00Z`) : void 0;
+	const spanned = [];
+	const kept = [];
+	const subsumed = [];
+	for (const entry of entries) {
+		const span = partitionSpan(entry.partition);
+		if (!span) {
+			kept.push(entry);
+			continue;
+		}
+		const days = [];
+		for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) {
+			if (rangeStartMs !== void 0 && (t < rangeStartMs || t > rangeEndMs)) continue;
+			days.push(t);
+		}
+		if (queryRange && days.length === 0) {
+			subsumed.push(entry);
+			continue;
+		}
+		spanned.push({
+			entry,
+			rank: span.rank,
+			days
+		});
+	}
+	spanned.sort((a, b) => a.rank - b.rank || b.entry.createdAt - a.entry.createdAt);
+	const coveredBySearchType = /* @__PURE__ */ new Map();
+	for (const { entry, days } of spanned) {
+		const slice = inferSearchType(entry);
+		let covered = coveredBySearchType.get(slice);
+		if (!covered) {
+			covered = /* @__PURE__ */ new Set();
+			coveredBySearchType.set(slice, covered);
+		}
+		if (days.every((d) => covered.has(d))) {
+			subsumed.push(entry);
+			continue;
+		}
+		kept.push(entry);
+		for (const d of days) covered.add(d);
+	}
+	return {
+		kept,
+		subsumed
+	};
+}
+function dedupeOverlappingTiers(entries, queryRange) {
+	return splitOverlappingTiers(entries, queryRange).kept;
+}
 function monthEndMs(month) {
 	const [y, m] = month.split("-").map(Number);
 	return Date.UTC(y, m, 0, 23, 59, 59, 999);
@@ -278,7 +369,7 @@ function compileLogicalQueryPlan(plan, table = plan.dataset) {
 		filesPlaceholder: FILES_PLACEHOLDER
 	};
 }
-function resolveToSQL(state, table) {
+function resolveParquetSQL(state, table) {
 	const plan = buildLogicalPlan(state, { regex: true });
 	return compileLogicalQueryPlan(plan, table ?? plan.dataset);
 }
@@ -290,4 +381,4 @@ function substituteNamedFiles(sql, sets) {
 	for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
 	return out;
 }
-export { RAW_DAILY_COMPACT_THRESHOLD as a, enumeratePartitions as c, substituteNamedFiles as i, compileLogicalQueryPlan as n, compactTieredImpl as o, resolveToSQL as r, countRawDailies as s, FILES_PLACEHOLDER as t };
+export { RAW_DAILY_COMPACT_THRESHOLD as a, dedupeOverlappingTiers as c, substituteNamedFiles as i, enumeratePartitions as l, compileLogicalQueryPlan as n, compactTieredImpl as o, resolveParquetSQL as r, countRawDailies as s, FILES_PLACEHOLDER as t, splitOverlappingTiers as u };

package/dist/_chunks/planner.d.mts CHANGED Viewed

@@ -10,6 +10,6 @@ interface ResolvedQuery {
 }
 declare const FILES_PLACEHOLDER = "{{FILES}}";
 declare function compileLogicalQueryPlan(plan: LogicalQueryPlan, table?: TableName): ResolvedQuery;
-declare function resolveToSQL(state: BuilderState, table?: TableName): ResolvedQuery;
+declare function resolveParquetSQL(state: BuilderState, table?: TableName): ResolvedQuery;
 declare function substituteNamedFiles(sql: string, sets: Record<string, string[]>): string;
-export { substituteNamedFiles as a, resolveToSQL as i, ResolvedQuery as n, compileLogicalQueryPlan as r, FILES_PLACEHOLDER as t };
+export { substituteNamedFiles as a, resolveParquetSQL as i, ResolvedQuery as n, compileLogicalQueryPlan as r, FILES_PLACEHOLDER as t };

package/dist/_chunks/resolver.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 import { t as SCHEMAS, u as drizzleSchema } from "./schema.mjs";
-import { c as enumeratePartitions } from "./compiler.mjs";
+import { l as enumeratePartitions } from "./parquet-plan.mjs";
 import { escapeLike } from "../sql-fragments.mjs";
 import "../planner.mjs";
 import { PgDialect } from "drizzle-orm/pg-core";
@@ -99,7 +99,7 @@ function inferLogicalDataset(dimensions, filterDims = []) {
 	if (has("page")) return "pages";
 	if (has("country")) return "countries";
 	if (has("device")) return "devices";
-	return "keywords";
+	return "devices";
 }
 function dimensionColumn(dim, dataset) {
 	return LOGICAL_DATASETS[dataset].dimensions[dim]?.column ?? dim;

package/dist/_chunks/schema.mjs CHANGED Viewed

@@ -145,7 +145,7 @@ function inferTable(dimensions) {
 	if (dims.has("country")) return "countries";
 	if (dims.has("device")) return "devices";
 	if (dims.has("searchAppearance")) return "search_appearance";
-	return "keywords";
+	return "devices";
 }
 function dimensionToColumn(dim, _table) {
 	if (dim === "page") return "url";

package/dist/_chunks/storage.d.mts CHANGED Viewed

@@ -17,6 +17,48 @@ declare function countRawDailies(entries: ReadonlyArray<{
   partition: string;
 }>): number;
 declare function enumeratePartitions(startDate: string, endDate: string): string[];
+/**
+ * Split manifest entries into the set worth reading (`kept`) and the set whose
+ * every covered day is already served by a finer-or-newer live entry
+ * (`subsumed`).
+ *
+ * Tiered compaction (daily→weekly→monthly→quarterly) is meant to retire its
+ * inputs, but coarse files can outlive their finer counterparts: a D1→R2
+ * backfill writes daily files that compact to monthly while a later re-sync
+ * writes fresh daily/weekly for the same dates, and same-partition re-writes
+ * leave a stale prior version live. All stay live, the resolver unions every
+ * live tier whose partition intersects the range, and `union_by_name` sums the
+ * overlap — impressions/clicks double-count.
+ *
+ * Entries are walked finest-tier-first, newest-first within a tier, so a
+ * coarse or stale file is dropped only when every day it covers is already
+ * claimed. Subsumption is evaluated per searchType — a `web` monthly never
+ * cancels a `discover` weekly, they cover disjoint data. Partial
+ * month-boundary overlap (a weekly straddling two months alongside a kept
+ * monthly) still double-counts those boundary days — eliminating that needs
+ * per-file date predicates in the SQL, tracked separately. Unrecognised
+ * partition shapes (`hourly/`, sidecar keys) are always kept.
+ *
+ * `queryRange` clamps every entry's day-span to the window the caller will
+ * actually read. This is required when `entries` came from a partition-
+ * filtered `listLive` (`runSQL` enumerates only the partitions intersecting
+ * the query): a `monthly/2026-04` whose Apr 27-30 falls past the query end
+ * must not be judged "unsubsumed" just because `weekly/2026-04-27` wasn't
+ * enumerated — those out-of-window days are SQL-filtered to nothing anyway.
+ * Omit `queryRange` when `entries` is the full manifest (e.g. analysis-sources).
+ */
+declare function splitOverlappingTiers(entries: ManifestEntry[], queryRange?: {
+  start: string;
+  end: string;
+}): {
+  kept: ManifestEntry[];
+  subsumed: ManifestEntry[];
+};
+/** Entries worth reading — see {@link splitOverlappingTiers}. */
+declare function dedupeOverlappingTiers(entries: ManifestEntry[], queryRange?: {
+  start: string;
+  end: string;
+}): ManifestEntry[];
 /**
  * Default `searchType` for entries written before the field landed and for
  * sync paths that don't request a specific type. GSC's own default; the
@@ -508,4 +550,4 @@ declare function dayPartition(date: string): string;
  */
 declare function hourPartition(date: string): string;
 declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
-export { SyncStateKind as A, hourPartition as B, Row$1 as C, SyncState as D, StorageEngine as E, WatermarkFilter as F, RAW_DAILY_COMPACT_THRESHOLD as G, inferSearchType as H, WatermarkScope as I, countRawDailies as K, WriteCtx as L, TableName$1 as M, TenantCtx$1 as N, SyncStateDetail as O, Watermark as P, WriteResult as R, QueryResult as S, SearchType$1 as T, objectKey as U, inferLegacyTier as V, CompactionThresholds as W, PurgeUrlsResult as _, EngineOptions as a, QueryExecuteResult as b, Grain$1 as c, ManifestEntry as d, ManifestPurgeResult as f, PurgeResult as g, PurgeFilter as h, DataSource as i, SyncStateScope as j, SyncStateFilter as k, ListLiveFilter as l, ParquetCodec as m, CompactionTier as n, FileSetRef as o, ManifestStore as p, enumeratePartitions as q, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, LockScope as u, QueryCtx as v, RunSQLOptions as w, QueryExecutor as x, QueryExecuteOptions as y, dayPartition as z };
+export { SyncStateKind as A, hourPartition as B, Row$1 as C, SyncState as D, StorageEngine as E, WatermarkFilter as F, RAW_DAILY_COMPACT_THRESHOLD as G, inferSearchType as H, WatermarkScope as I, enumeratePartitions as J, countRawDailies as K, WriteCtx as L, TableName$1 as M, TenantCtx$1 as N, SyncStateDetail as O, Watermark as P, WriteResult as R, QueryResult as S, SearchType$1 as T, objectKey as U, inferLegacyTier as V, CompactionThresholds as W, splitOverlappingTiers as Y, PurgeUrlsResult as _, EngineOptions as a, QueryExecuteResult as b, Grain$1 as c, ManifestEntry as d, ManifestPurgeResult as f, PurgeResult as g, PurgeFilter as h, DataSource as i, SyncStateScope as j, SyncStateFilter as k, ListLiveFilter as l, ParquetCodec as m, CompactionTier as n, FileSetRef as o, ManifestStore as p, dedupeOverlappingTiers as q, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, LockScope as u, QueryCtx as v, RunSQLOptions as w, QueryExecutor as x, QueryExecuteOptions as y, dayPartition as z };

package/dist/entities.d.mts CHANGED Viewed

@@ -81,6 +81,7 @@ declare function hashUrl(url: string): string;
  * `parquetUri`.
  */
 interface InspectionParquetRow {
+  [column: string]: string | number | null;
   urlHash: string;
   url: string;
   inspectedAt: string;

package/dist/index.d.mts CHANGED Viewed

@@ -1,9 +1,9 @@
-import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as enumeratePartitions, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
+import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, J as enumeratePartitions, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, Y as splitOverlappingTiers, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as dedupeOverlappingTiers, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
 import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
 import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages } from "./_chunks/schema.mjs";
 import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
 import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
-import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
+import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
 import { rebuildDailyFromHourly } from "./rollups.mjs";
 import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
 import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
@@ -147,4 +147,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
 declare const MIN_COUNTRY_IMPRESSIONS = 10;
 declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
 declare const MAX_TRACKED_URLS_PER_SITE = 200000;
-export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
+export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, dedupeOverlappingTiers, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };

package/dist/index.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
 import { a as inferTable, c as countries, d as hourly_pages, f as keywords, i as dimensionToColumn, l as devices, m as pages, n as allTables, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
 import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
-import { a as RAW_DAILY_COMPACT_THRESHOLD, c as enumeratePartitions, i as substituteNamedFiles, r as resolveToSQL, s as countRawDailies, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
+import { a as RAW_DAILY_COMPACT_THRESHOLD, c as dedupeOverlappingTiers, i as substituteNamedFiles, l as enumeratePartitions, r as resolveParquetSQL, s as countRawDailies, t as FILES_PLACEHOLDER, u as splitOverlappingTiers } from "./_chunks/parquet-plan.mjs";
 import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
 import { a as createDuckDBCodec, i as canonicalEmptyParquetSchema, n as createStorageEngine, o as createDuckDBExecutor, r as gcOrphansImpl, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
 import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
@@ -215,4 +215,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
 const MIN_COUNTRY_IMPRESSIONS = 10;
 const MAX_SITEMAP_URLS_PER_SITE = 5e4;
 const MAX_TRACKED_URLS_PER_SITE = 2e5;
-export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
+export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, dedupeOverlappingTiers, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };

package/dist/planner.d.mts CHANGED Viewed

@@ -1,3 +1,3 @@
-import { q as enumeratePartitions } from "./_chunks/storage.mjs";
-import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
-export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
+import { J as enumeratePartitions } from "./_chunks/storage.mjs";
+import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
+export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };

package/dist/planner.mjs CHANGED Viewed

@@ -1,2 +1,2 @@
-import { c as enumeratePartitions, i as substituteNamedFiles, n as compileLogicalQueryPlan, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
-export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
+import { i as substituteNamedFiles, l as enumeratePartitions, n as compileLogicalQueryPlan, r as resolveParquetSQL, t as FILES_PLACEHOLDER } from "./_chunks/parquet-plan.mjs";
+export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@gscdump/engine",
   "type": "module",
-  "version": "0.18.6",
+  "version": "0.19.1",
   "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
   "author": {
     "name": "Harlan Wilton",
@@ -169,8 +169,8 @@
   "dependencies": {
     "drizzle-orm": "^0.45.2",
     "proper-lockfile": "^4.1.2",
-    "@gscdump/contracts": "0.18.6",
-    "gscdump": "0.18.6"
+    "gscdump": "0.19.1",
+    "@gscdump/contracts": "0.19.1"
   },
   "devDependencies": {
     "@duckdb/duckdb-wasm": "^1.32.0",