@gscdump/engine 0.18.5 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
2
2
  import { a as inferSearchType, c as objectKey, d as tenantPrefix, n as dayPartition, r as hourPartition } from "./storage.mjs";
3
- import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./compiler.mjs";
3
+ import { c as dedupeOverlappingTiers, i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./parquet-plan.mjs";
4
4
  import { sqlEscape } from "../sql-bind.mjs";
5
5
  import { buildLogicalPlan } from "gscdump/query/plan";
6
6
  import { normalizeUrl } from "gscdump/normalize";
@@ -400,7 +400,7 @@ function createStorageEngine(opts) {
400
400
  const entries = Object.entries(opts.fileSets);
401
401
  const perSet = await Promise.all(entries.map(async ([name, ref]) => {
402
402
  if (ref.keys !== void 0) return [name, ref.keys];
403
- return [name, (await manifestStore.listLive({
403
+ return [name, dedupeOverlappingTiers(await manifestStore.listLive({
404
404
  userId: opts.ctx.userId,
405
405
  siteId: opts.ctx.siteId,
406
406
  table: ref.table,
@@ -6,6 +6,7 @@ import { buildLogicalPlan } from "gscdump/query/plan";
6
6
  const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
7
7
  const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
8
8
  const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
9
+ const QUARTERLY_PARTITION_RE = /^quarterly\/(\d{4})-Q([1-4])$/;
9
10
  const DEFAULT_THRESHOLDS = {
10
11
  raw: 7,
11
12
  d7: 30,
@@ -146,6 +147,87 @@ function enumeratePartitions(startDate, endDate) {
146
147
  }
147
148
  return out;
148
149
  }
150
+ function partitionSpan(partition) {
151
+ let m = partition.match(DAILY_PARTITION_RE);
152
+ if (m) {
153
+ const ms = Date.parse(`${m[1]}T00:00:00Z`);
154
+ return {
155
+ rank: 0,
156
+ startMs: ms,
157
+ endMs: ms
158
+ };
159
+ }
160
+ m = partition.match(WEEKLY_PARTITION_RE);
161
+ if (m) {
162
+ const ms = Date.parse(`${m[1]}T00:00:00Z`);
163
+ return {
164
+ rank: 1,
165
+ startMs: ms,
166
+ endMs: ms + 6 * MS_PER_DAY
167
+ };
168
+ }
169
+ m = partition.match(MONTHLY_PARTITION_RE);
170
+ if (m) {
171
+ const [y, mo] = m[1].split("-").map(Number);
172
+ return {
173
+ rank: 2,
174
+ startMs: Date.UTC(y, mo - 1, 1),
175
+ endMs: Date.UTC(y, mo, 0)
176
+ };
177
+ }
178
+ m = partition.match(QUARTERLY_PARTITION_RE);
179
+ if (m) {
180
+ const y = Number(m[1]);
181
+ const q = Number(m[2]);
182
+ return {
183
+ rank: 3,
184
+ startMs: Date.UTC(y, (q - 1) * 3, 1),
185
+ endMs: Date.UTC(y, q * 3, 0)
186
+ };
187
+ }
188
+ }
189
+ function splitOverlappingTiers(entries) {
190
+ const spanned = [];
191
+ const kept = [];
192
+ for (const entry of entries) {
193
+ const span = partitionSpan(entry.partition);
194
+ if (!span) {
195
+ kept.push(entry);
196
+ continue;
197
+ }
198
+ const days = [];
199
+ for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) days.push(t);
200
+ spanned.push({
201
+ entry,
202
+ rank: span.rank,
203
+ days
204
+ });
205
+ }
206
+ spanned.sort((a, b) => a.rank - b.rank || b.entry.createdAt - a.entry.createdAt);
207
+ const coveredBySearchType = /* @__PURE__ */ new Map();
208
+ const subsumed = [];
209
+ for (const { entry, days } of spanned) {
210
+ const slice = inferSearchType(entry);
211
+ let covered = coveredBySearchType.get(slice);
212
+ if (!covered) {
213
+ covered = /* @__PURE__ */ new Set();
214
+ coveredBySearchType.set(slice, covered);
215
+ }
216
+ if (days.every((d) => covered.has(d))) {
217
+ subsumed.push(entry);
218
+ continue;
219
+ }
220
+ kept.push(entry);
221
+ for (const d of days) covered.add(d);
222
+ }
223
+ return {
224
+ kept,
225
+ subsumed
226
+ };
227
+ }
228
+ function dedupeOverlappingTiers(entries) {
229
+ return splitOverlappingTiers(entries).kept;
230
+ }
149
231
  function monthEndMs(month) {
150
232
  const [y, m] = month.split("-").map(Number);
151
233
  return Date.UTC(y, m, 0, 23, 59, 59, 999);
@@ -278,7 +360,7 @@ function compileLogicalQueryPlan(plan, table = plan.dataset) {
278
360
  filesPlaceholder: FILES_PLACEHOLDER
279
361
  };
280
362
  }
281
- function resolveToSQL(state, table) {
363
+ function resolveParquetSQL(state, table) {
282
364
  const plan = buildLogicalPlan(state, { regex: true });
283
365
  return compileLogicalQueryPlan(plan, table ?? plan.dataset);
284
366
  }
@@ -290,4 +372,4 @@ function substituteNamedFiles(sql, sets) {
290
372
  for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
291
373
  return out;
292
374
  }
293
- export { RAW_DAILY_COMPACT_THRESHOLD as a, enumeratePartitions as c, substituteNamedFiles as i, compileLogicalQueryPlan as n, compactTieredImpl as o, resolveToSQL as r, countRawDailies as s, FILES_PLACEHOLDER as t };
375
+ export { RAW_DAILY_COMPACT_THRESHOLD as a, dedupeOverlappingTiers as c, substituteNamedFiles as i, enumeratePartitions as l, compileLogicalQueryPlan as n, compactTieredImpl as o, resolveParquetSQL as r, countRawDailies as s, FILES_PLACEHOLDER as t, splitOverlappingTiers as u };
@@ -10,6 +10,6 @@ interface ResolvedQuery {
10
10
  }
11
11
  declare const FILES_PLACEHOLDER = "{{FILES}}";
12
12
  declare function compileLogicalQueryPlan(plan: LogicalQueryPlan, table?: TableName): ResolvedQuery;
13
- declare function resolveToSQL(state: BuilderState, table?: TableName): ResolvedQuery;
13
+ declare function resolveParquetSQL(state: BuilderState, table?: TableName): ResolvedQuery;
14
14
  declare function substituteNamedFiles(sql: string, sets: Record<string, string[]>): string;
15
- export { substituteNamedFiles as a, resolveToSQL as i, ResolvedQuery as n, compileLogicalQueryPlan as r, FILES_PLACEHOLDER as t };
15
+ export { substituteNamedFiles as a, resolveParquetSQL as i, ResolvedQuery as n, compileLogicalQueryPlan as r, FILES_PLACEHOLDER as t };
@@ -1,5 +1,5 @@
1
1
  import { t as SCHEMAS, u as drizzleSchema } from "./schema.mjs";
2
- import { c as enumeratePartitions } from "./compiler.mjs";
2
+ import { l as enumeratePartitions } from "./parquet-plan.mjs";
3
3
  import { escapeLike } from "../sql-fragments.mjs";
4
4
  import "../planner.mjs";
5
5
  import { PgDialect } from "drizzle-orm/pg-core";
@@ -99,7 +99,7 @@ function inferLogicalDataset(dimensions, filterDims = []) {
99
99
  if (has("page")) return "pages";
100
100
  if (has("country")) return "countries";
101
101
  if (has("device")) return "devices";
102
- return "keywords";
102
+ return "devices";
103
103
  }
104
104
  function dimensionColumn(dim, dataset) {
105
105
  return LOGICAL_DATASETS[dataset].dimensions[dim]?.column ?? dim;
@@ -145,7 +145,7 @@ function inferTable(dimensions) {
145
145
  if (dims.has("country")) return "countries";
146
146
  if (dims.has("device")) return "devices";
147
147
  if (dims.has("searchAppearance")) return "search_appearance";
148
- return "keywords";
148
+ return "devices";
149
149
  }
150
150
  function dimensionToColumn(dim, _table) {
151
151
  if (dim === "page") return "url";
@@ -17,6 +17,34 @@ declare function countRawDailies(entries: ReadonlyArray<{
17
17
  partition: string;
18
18
  }>): number;
19
19
  declare function enumeratePartitions(startDate: string, endDate: string): string[];
20
+ /**
21
+ * Split manifest entries into the set worth reading (`kept`) and the set whose
22
+ * every covered day is already served by a finer-or-newer live entry
23
+ * (`subsumed`).
24
+ *
25
+ * Tiered compaction (daily→weekly→monthly→quarterly) is meant to retire its
26
+ * inputs, but coarse files can outlive their finer counterparts: a D1→R2
27
+ * backfill writes daily files that compact to monthly while a later re-sync
28
+ * writes fresh daily/weekly for the same dates, and same-partition re-writes
29
+ * leave a stale prior version live. All stay live, the resolver unions every
30
+ * live tier whose partition intersects the range, and `union_by_name` sums the
31
+ * overlap — impressions/clicks double-count.
32
+ *
33
+ * Entries are walked finest-tier-first, newest-first within a tier, so a
34
+ * coarse or stale file is dropped only when every day it covers is already
35
+ * claimed. Subsumption is evaluated per searchType — a `web` monthly never
36
+ * cancels a `discover` weekly, they cover disjoint data. Partial
37
+ * month-boundary overlap (a weekly straddling two months alongside a kept
38
+ * monthly) still double-counts those boundary days — eliminating that needs
39
+ * per-file date predicates in the SQL, tracked separately. Unrecognised
40
+ * partition shapes (`hourly/`, sidecar keys) are always kept.
41
+ */
42
+ declare function splitOverlappingTiers(entries: ManifestEntry[]): {
43
+ kept: ManifestEntry[];
44
+ subsumed: ManifestEntry[];
45
+ };
46
+ /** Entries worth reading — see {@link splitOverlappingTiers}. */
47
+ declare function dedupeOverlappingTiers(entries: ManifestEntry[]): ManifestEntry[];
20
48
  /**
21
49
  * Default `searchType` for entries written before the field landed and for
22
50
  * sync paths that don't request a specific type. GSC's own default; the
@@ -508,4 +536,4 @@ declare function dayPartition(date: string): string;
508
536
  */
509
537
  declare function hourPartition(date: string): string;
510
538
  declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
511
- export { SyncStateKind as A, hourPartition as B, Row$1 as C, SyncState as D, StorageEngine as E, WatermarkFilter as F, RAW_DAILY_COMPACT_THRESHOLD as G, inferSearchType as H, WatermarkScope as I, countRawDailies as K, WriteCtx as L, TableName$1 as M, TenantCtx$1 as N, SyncStateDetail as O, Watermark as P, WriteResult as R, QueryResult as S, SearchType$1 as T, objectKey as U, inferLegacyTier as V, CompactionThresholds as W, PurgeUrlsResult as _, EngineOptions as a, QueryExecuteResult as b, Grain$1 as c, ManifestEntry as d, ManifestPurgeResult as f, PurgeResult as g, PurgeFilter as h, DataSource as i, SyncStateScope as j, SyncStateFilter as k, ListLiveFilter as l, ParquetCodec as m, CompactionTier as n, FileSetRef as o, ManifestStore as p, enumeratePartitions as q, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, LockScope as u, QueryCtx as v, RunSQLOptions as w, QueryExecutor as x, QueryExecuteOptions as y, dayPartition as z };
539
+ export { SyncStateKind as A, hourPartition as B, Row$1 as C, SyncState as D, StorageEngine as E, WatermarkFilter as F, RAW_DAILY_COMPACT_THRESHOLD as G, inferSearchType as H, WatermarkScope as I, enumeratePartitions as J, countRawDailies as K, WriteCtx as L, TableName$1 as M, TenantCtx$1 as N, SyncStateDetail as O, Watermark as P, WriteResult as R, QueryResult as S, SearchType$1 as T, objectKey as U, inferLegacyTier as V, CompactionThresholds as W, splitOverlappingTiers as Y, PurgeUrlsResult as _, EngineOptions as a, QueryExecuteResult as b, Grain$1 as c, ManifestEntry as d, ManifestPurgeResult as f, PurgeResult as g, PurgeFilter as h, DataSource as i, SyncStateScope as j, SyncStateFilter as k, ListLiveFilter as l, ParquetCodec as m, CompactionTier as n, FileSetRef as o, ManifestStore as p, dedupeOverlappingTiers as q, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, LockScope as u, QueryCtx as v, RunSQLOptions as w, QueryExecutor as x, QueryExecuteOptions as y, dayPartition as z };
@@ -81,6 +81,7 @@ declare function hashUrl(url: string): string;
81
81
  * `parquetUri`.
82
82
  */
83
83
  interface InspectionParquetRow {
84
+ [column: string]: string | number | null;
84
85
  urlHash: string;
85
86
  url: string;
86
87
  inspectedAt: string;
package/dist/index.d.mts CHANGED
@@ -1,9 +1,9 @@
1
- import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as enumeratePartitions, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
1
+ import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, J as enumeratePartitions, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, Y as splitOverlappingTiers, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as dedupeOverlappingTiers, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
2
2
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
3
  import { _ as page_keywords, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as keywords, h as hourly_pages, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema, v as pages } from "./_chunks/schema.mjs";
4
4
  import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
5
5
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
6
- import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
6
+ import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
7
7
  import { rebuildDailyFromHourly } from "./rollups.mjs";
8
8
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
9
9
  import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
@@ -147,4 +147,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
147
147
  declare const MIN_COUNTRY_IMPRESSIONS = 10;
148
148
  declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
149
149
  declare const MAX_TRACKED_URLS_PER_SITE = 200000;
150
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
150
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, dedupeOverlappingTiers, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/index.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
2
2
  import { a as inferTable, c as countries, d as hourly_pages, f as keywords, i as dimensionToColumn, l as devices, m as pages, n as allTables, p as page_keywords, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
3
3
  import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
4
- import { a as RAW_DAILY_COMPACT_THRESHOLD, c as enumeratePartitions, i as substituteNamedFiles, r as resolveToSQL, s as countRawDailies, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
4
+ import { a as RAW_DAILY_COMPACT_THRESHOLD, c as dedupeOverlappingTiers, i as substituteNamedFiles, l as enumeratePartitions, r as resolveParquetSQL, s as countRawDailies, t as FILES_PLACEHOLDER, u as splitOverlappingTiers } from "./_chunks/parquet-plan.mjs";
5
5
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
6
6
  import { a as createDuckDBCodec, i as canonicalEmptyParquetSchema, n as createStorageEngine, o as createDuckDBExecutor, r as gcOrphansImpl, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
7
7
  import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
@@ -215,4 +215,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
215
215
  const MIN_COUNTRY_IMPRESSIONS = 10;
216
216
  const MAX_SITEMAP_URLS_PER_SITE = 5e4;
217
217
  const MAX_TRACKED_URLS_PER_SITE = 2e5;
218
- export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
218
+ export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, dedupeOverlappingTiers, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
@@ -1,3 +1,3 @@
1
- import { q as enumeratePartitions } from "./_chunks/storage.mjs";
2
- import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
3
- export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
1
+ import { J as enumeratePartitions } from "./_chunks/storage.mjs";
2
+ import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
3
+ export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };
package/dist/planner.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import { c as enumeratePartitions, i as substituteNamedFiles, n as compileLogicalQueryPlan, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
2
- export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
1
+ import { i as substituteNamedFiles, l as enumeratePartitions, n as compileLogicalQueryPlan, r as resolveParquetSQL, t as FILES_PLACEHOLDER } from "./_chunks/parquet-plan.mjs";
2
+ export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.18.5",
4
+ "version": "0.19.0",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -169,8 +169,8 @@
169
169
  "dependencies": {
170
170
  "drizzle-orm": "^0.45.2",
171
171
  "proper-lockfile": "^4.1.2",
172
- "@gscdump/contracts": "0.18.5",
173
- "gscdump": "0.18.5"
172
+ "gscdump": "0.19.0",
173
+ "@gscdump/contracts": "0.19.0"
174
174
  },
175
175
  "devDependencies": {
176
176
  "@duckdb/duckdb-wasm": "^1.32.0",