@gscdump/engine 0.24.1 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ import { dayPartition, inferSearchType, mondayOfWeek, monthPartition, objectKey, quarterOfMonth, quarterPartition, weekPartition } from "./layout.mjs";
2
+ import { currentSchemaVersion } from "./schema.mjs";
3
+ import { MS_PER_DAY } from "gscdump";
4
+ const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
5
+ const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
6
+ const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
7
+ const QUARTERLY_PARTITION_RE = /^quarterly\/(\d{4})-Q([1-4])$/;
8
+ const DEFAULT_THRESHOLDS = {
9
+ raw: 7,
10
+ d7: 30,
11
+ d30: 90
12
+ };
13
+ function countRawDailies(entries) {
14
+ return entries.filter((e) => e.tier === "raw" || e.tier == null && e.partition.startsWith("daily/")).length;
15
+ }
16
+ const PENDING_WINDOW_DAYS = 4;
17
+ const STAGES = [
18
+ {
19
+ inputTier: "raw",
20
+ outputTier: "d7",
21
+ cutoffDays: DEFAULT_THRESHOLDS.raw,
22
+ bucketKey: (e) => {
23
+ const m = e.partition.match(DAILY_PARTITION_RE);
24
+ if (!m) return void 0;
25
+ return mondayOfWeek(m[1]);
26
+ },
27
+ bucketLatestMs: (monday) => Date.parse(`${monday}T00:00:00Z`) + 6 * MS_PER_DAY,
28
+ outputPartition: weekPartition
29
+ },
30
+ {
31
+ inputTier: "d7",
32
+ outputTier: "d30",
33
+ cutoffDays: DEFAULT_THRESHOLDS.d7,
34
+ bucketKey: (e) => {
35
+ const m = e.partition.match(WEEKLY_PARTITION_RE);
36
+ if (!m) return void 0;
37
+ return m[1].slice(0, 7);
38
+ },
39
+ bucketLatestMs: monthEndMs,
40
+ outputPartition: monthPartition
41
+ },
42
+ {
43
+ inputTier: "d30",
44
+ outputTier: "d90",
45
+ cutoffDays: DEFAULT_THRESHOLDS.d30,
46
+ bucketKey: (e) => {
47
+ const m = e.partition.match(MONTHLY_PARTITION_RE);
48
+ if (!m) return void 0;
49
+ return quarterOfMonth(m[1]);
50
+ },
51
+ bucketLatestMs: quarterEndMs,
52
+ outputPartition: quarterPartition
53
+ }
54
+ ];
55
+ async function compactTieredImpl(deps, ctx, now, overrides = {}) {
56
+ const thresholds = {
57
+ ...DEFAULT_THRESHOLDS,
58
+ ...overrides
59
+ };
60
+ const stagesWithThresholds = STAGES.map((s) => ({
61
+ ...s,
62
+ cutoffDays: s.outputTier === "d7" ? thresholds.raw : s.outputTier === "d30" ? thresholds.d7 : thresholds.d30
63
+ }));
64
+ for (const stage of stagesWithThresholds) await runStage(deps, ctx, stage, now);
65
+ }
66
+ async function runStage(deps, ctx, stage, now) {
67
+ const cutoff = now - Math.max(stage.cutoffDays, PENDING_WINDOW_DAYS) * MS_PER_DAY;
68
+ const candidates = await deps.manifestStore.listLive({
69
+ userId: ctx.userId,
70
+ siteId: ctx.siteId,
71
+ table: ctx.table,
72
+ tier: stage.inputTier
73
+ });
74
+ const buckets = /* @__PURE__ */ new Map();
75
+ for (const entry of candidates) {
76
+ if (entry.partition.startsWith("hourly/")) continue;
77
+ const key = stage.bucketKey(entry);
78
+ if (!key) continue;
79
+ if (stage.bucketLatestMs(key) >= cutoff) continue;
80
+ const compositeKey = `${inferSearchType(entry)}\0${key}`;
81
+ if (!buckets.has(compositeKey)) buckets.set(compositeKey, []);
82
+ buckets.get(compositeKey).push(entry);
83
+ }
84
+ for (const [compositeKey, entries] of buckets) {
85
+ const [searchType, bucket] = compositeKey.split("\0");
86
+ const targetPartition = stage.outputPartition(bucket);
87
+ if (entries.length === 1 && entries[0].partition === targetPartition) continue;
88
+ await deps.manifestStore.withLock({
89
+ userId: ctx.userId,
90
+ siteId: ctx.siteId,
91
+ table: ctx.table,
92
+ partition: targetPartition
93
+ }, async () => {
94
+ const key = objectKey(ctx, ctx.table, targetPartition, now, searchType);
95
+ const { bytes, rowCount } = await deps.codec.compactRows({ table: ctx.table }, entries.map((e) => e.objectKey), key, deps.dataSource);
96
+ const newEntry = {
97
+ userId: ctx.userId,
98
+ siteId: ctx.siteId,
99
+ table: ctx.table,
100
+ partition: targetPartition,
101
+ objectKey: key,
102
+ rowCount,
103
+ bytes,
104
+ createdAt: now,
105
+ schemaVersion: currentSchemaVersion(ctx.table),
106
+ tier: stage.outputTier,
107
+ ...searchType !== "web" ? { searchType } : {}
108
+ };
109
+ await deps.manifestStore.registerVersion(newEntry, entries);
110
+ });
111
+ }
112
+ }
113
+ function enumeratePartitions(startDate, endDate) {
114
+ const out = [];
115
+ const [sy, sm, sd] = startDate.split("-").map(Number);
116
+ const [ey, em, ed] = endDate.split("-").map(Number);
117
+ const start = Date.UTC(sy, sm - 1, sd);
118
+ const end = Date.UTC(ey, em - 1, ed);
119
+ if (end < start) return out;
120
+ const seenWeeks = /* @__PURE__ */ new Set();
121
+ const seenMonths = /* @__PURE__ */ new Set();
122
+ const seenQuarters = /* @__PURE__ */ new Set();
123
+ for (let t = start; t <= end; t += 864e5) {
124
+ const d = new Date(t);
125
+ const y = d.getUTCFullYear();
126
+ const m = String(d.getUTCMonth() + 1).padStart(2, "0");
127
+ const isoDay = `${y}-${m}-${String(d.getUTCDate()).padStart(2, "0")}`;
128
+ const isoMonth = `${y}-${m}`;
129
+ out.push(dayPartition(isoDay));
130
+ const monday = mondayOfWeek(isoDay);
131
+ if (!seenWeeks.has(monday)) {
132
+ seenWeeks.add(monday);
133
+ out.push(weekPartition(monday));
134
+ }
135
+ if (!seenMonths.has(isoMonth)) {
136
+ seenMonths.add(isoMonth);
137
+ out.push(monthPartition(isoMonth));
138
+ }
139
+ const quarter = quarterOfMonth(isoMonth);
140
+ if (!seenQuarters.has(quarter)) {
141
+ seenQuarters.add(quarter);
142
+ out.push(quarterPartition(quarter));
143
+ }
144
+ }
145
+ return out;
146
+ }
147
+ function partitionSpan(partition) {
148
+ let m = partition.match(DAILY_PARTITION_RE);
149
+ if (m) {
150
+ const ms = Date.parse(`${m[1]}T00:00:00Z`);
151
+ return {
152
+ rank: 0,
153
+ startMs: ms,
154
+ endMs: ms
155
+ };
156
+ }
157
+ m = partition.match(WEEKLY_PARTITION_RE);
158
+ if (m) {
159
+ const ms = Date.parse(`${m[1]}T00:00:00Z`);
160
+ return {
161
+ rank: 1,
162
+ startMs: ms,
163
+ endMs: ms + 6 * MS_PER_DAY
164
+ };
165
+ }
166
+ m = partition.match(MONTHLY_PARTITION_RE);
167
+ if (m) {
168
+ const [y, mo] = m[1].split("-").map(Number);
169
+ return {
170
+ rank: 2,
171
+ startMs: Date.UTC(y, mo - 1, 1),
172
+ endMs: Date.UTC(y, mo, 0)
173
+ };
174
+ }
175
+ m = partition.match(QUARTERLY_PARTITION_RE);
176
+ if (m) {
177
+ const y = Number(m[1]);
178
+ const q = Number(m[2]);
179
+ return {
180
+ rank: 3,
181
+ startMs: Date.UTC(y, (q - 1) * 3, 1),
182
+ endMs: Date.UTC(y, q * 3, 0)
183
+ };
184
+ }
185
+ }
186
+ function splitOverlappingTiers(entries, queryRange) {
187
+ const rangeStartMs = queryRange ? Date.parse(`${queryRange.start}T00:00:00Z`) : void 0;
188
+ const rangeEndMs = queryRange ? Date.parse(`${queryRange.end}T00:00:00Z`) : void 0;
189
+ const spanned = [];
190
+ const kept = [];
191
+ const subsumed = [];
192
+ for (const entry of entries) {
193
+ const span = partitionSpan(entry.partition);
194
+ if (!span) {
195
+ kept.push(entry);
196
+ continue;
197
+ }
198
+ const days = [];
199
+ for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) {
200
+ if (rangeStartMs !== void 0 && (t < rangeStartMs || t > rangeEndMs)) continue;
201
+ days.push(t);
202
+ }
203
+ if (queryRange && days.length === 0) {
204
+ subsumed.push(entry);
205
+ continue;
206
+ }
207
+ spanned.push({
208
+ entry,
209
+ rank: span.rank,
210
+ days
211
+ });
212
+ }
213
+ spanned.sort((a, b) => a.rank - b.rank || b.entry.createdAt - a.entry.createdAt);
214
+ const coveredBySearchType = /* @__PURE__ */ new Map();
215
+ for (const { entry, days } of spanned) {
216
+ const slice = inferSearchType(entry);
217
+ let covered = coveredBySearchType.get(slice);
218
+ if (!covered) {
219
+ covered = /* @__PURE__ */ new Set();
220
+ coveredBySearchType.set(slice, covered);
221
+ }
222
+ if (days.every((d) => covered.has(d))) {
223
+ subsumed.push(entry);
224
+ continue;
225
+ }
226
+ kept.push(entry);
227
+ for (const d of days) covered.add(d);
228
+ }
229
+ return {
230
+ kept,
231
+ subsumed
232
+ };
233
+ }
234
+ function dedupeOverlappingTiers(entries, queryRange) {
235
+ return splitOverlappingTiers(entries, queryRange).kept;
236
+ }
237
+ function monthEndMs(month) {
238
+ const [y, m] = month.split("-").map(Number);
239
+ return Date.UTC(y, m, 0, 23, 59, 59, 999);
240
+ }
241
+ function quarterEndMs(quarter) {
242
+ const [yStr, qStr] = quarter.split("-Q");
243
+ const y = Number(yStr);
244
+ const q = Number(qStr);
245
+ return Date.UTC(y, q * 3, 0, 23, 59, 59, 999);
246
+ }
247
+ export { compactTieredImpl, countRawDailies, dedupeOverlappingTiers, enumeratePartitions, splitOverlappingTiers };
@@ -1,9 +1,10 @@
1
+ import { dayPartition, hourPartition, inferSearchType, objectKey, tenantPrefix } from "./layout.mjs";
1
2
  import { SCHEMAS, currentSchemaVersion, dedupeByNaturalKey } from "./schema.mjs";
2
- import { dayPartition, hourPartition, inferSearchType, objectKey, tenantPrefix } from "./storage.mjs";
3
- import { compactTieredImpl, compileLogicalQueryPlan, dedupeOverlappingTiers, substituteNamedFiles } from "./parquet-plan.mjs";
3
+ import { compactTieredImpl, dedupeOverlappingTiers, splitOverlappingTiers } from "./compaction.mjs";
4
+ import { compileLogicalQueryPlan, substituteNamedFiles } from "./parquet-plan.mjs";
4
5
  import { sqlEscape } from "../sql-bind.mjs";
5
6
  import { buildLogicalPlan } from "gscdump/query/plan";
6
- import { normalizeUrl } from "gscdump/normalize";
7
+ import { normalizeUrl } from "gscdump";
7
8
  async function encodeBytes(db, table, rows) {
8
9
  const inName = db.makeTempPath("json");
9
10
  const outName = db.makeTempPath("parquet");
@@ -485,6 +486,22 @@ function createStorageEngine(opts) {
485
486
  codec
486
487
  }, ctx, (ctx.now ?? defaultNow)(), thresholds);
487
488
  }
489
+ async function reconcileSubsumed(ctx) {
490
+ const { subsumed } = splitOverlappingTiers(await manifestStore.listLive({
491
+ userId: ctx.userId,
492
+ siteId: ctx.siteId,
493
+ table: ctx.table
494
+ }));
495
+ if (subsumed.length === 0) return {
496
+ retired: 0,
497
+ partitions: []
498
+ };
499
+ await manifestStore.registerVersions([], subsumed);
500
+ return {
501
+ retired: subsumed.length,
502
+ partitions: subsumed.map((e) => e.partition)
503
+ };
504
+ }
488
505
  async function gcOrphans(ctx, graceMs) {
489
506
  return gcOrphansImpl({
490
507
  dataSource,
@@ -590,6 +607,7 @@ function createStorageEngine(opts) {
590
607
  query,
591
608
  runSQL,
592
609
  compactTiered,
610
+ reconcileSubsumed,
593
611
  gcOrphans,
594
612
  purgeTenant,
595
613
  purgeUrls,
@@ -601,4 +619,4 @@ function createStorageEngine(opts) {
601
619
  readObject: (key) => dataSource.read(key)
602
620
  };
603
621
  }
604
- export { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor, createStorageEngine, gcOrphansImpl };
622
+ export { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor, createStorageEngine };
@@ -1,252 +1,7 @@
1
- import { currentSchemaVersion, dimensionToColumn } from "./schema.mjs";
2
- import { dayPartition, inferSearchType, mondayOfWeek, monthPartition, objectKey, quarterOfMonth, quarterPartition, weekPartition } from "./storage.mjs";
1
+ import { dimensionToColumn } from "./schema.mjs";
2
+ import { enumeratePartitions } from "./compaction.mjs";
3
3
  import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
4
- import { MS_PER_DAY } from "gscdump";
5
4
  import { buildLogicalPlan } from "gscdump/query/plan";
6
- const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
7
- const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
8
- const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
9
- const QUARTERLY_PARTITION_RE = /^quarterly\/(\d{4})-Q([1-4])$/;
10
- const DEFAULT_THRESHOLDS = {
11
- raw: 7,
12
- d7: 30,
13
- d30: 90
14
- };
15
- const RAW_DAILY_COMPACT_THRESHOLD = 7;
16
- function countRawDailies(entries) {
17
- return entries.filter((e) => e.tier === "raw" || e.tier == null && e.partition.startsWith("daily/")).length;
18
- }
19
- const PENDING_WINDOW_DAYS = 4;
20
- const STAGES = [
21
- {
22
- inputTier: "raw",
23
- outputTier: "d7",
24
- cutoffDays: DEFAULT_THRESHOLDS.raw,
25
- bucketKey: (e) => {
26
- const m = e.partition.match(DAILY_PARTITION_RE);
27
- if (!m) return void 0;
28
- return mondayOfWeek(m[1]);
29
- },
30
- bucketLatestMs: (monday) => Date.parse(`${monday}T00:00:00Z`) + 6 * MS_PER_DAY,
31
- outputPartition: weekPartition
32
- },
33
- {
34
- inputTier: "d7",
35
- outputTier: "d30",
36
- cutoffDays: DEFAULT_THRESHOLDS.d7,
37
- bucketKey: (e) => {
38
- const m = e.partition.match(WEEKLY_PARTITION_RE);
39
- if (!m) return void 0;
40
- return m[1].slice(0, 7);
41
- },
42
- bucketLatestMs: monthEndMs,
43
- outputPartition: monthPartition
44
- },
45
- {
46
- inputTier: "d30",
47
- outputTier: "d90",
48
- cutoffDays: DEFAULT_THRESHOLDS.d30,
49
- bucketKey: (e) => {
50
- const m = e.partition.match(MONTHLY_PARTITION_RE);
51
- if (!m) return void 0;
52
- return quarterOfMonth(m[1]);
53
- },
54
- bucketLatestMs: quarterEndMs,
55
- outputPartition: quarterPartition
56
- }
57
- ];
58
- async function compactTieredImpl(deps, ctx, now, overrides = {}) {
59
- const thresholds = {
60
- ...DEFAULT_THRESHOLDS,
61
- ...overrides
62
- };
63
- const stagesWithThresholds = STAGES.map((s) => ({
64
- ...s,
65
- cutoffDays: s.outputTier === "d7" ? thresholds.raw : s.outputTier === "d30" ? thresholds.d7 : thresholds.d30
66
- }));
67
- for (const stage of stagesWithThresholds) await runStage(deps, ctx, stage, now);
68
- }
69
- async function runStage(deps, ctx, stage, now) {
70
- const cutoff = now - Math.max(stage.cutoffDays, PENDING_WINDOW_DAYS) * MS_PER_DAY;
71
- const candidates = await deps.manifestStore.listLive({
72
- userId: ctx.userId,
73
- siteId: ctx.siteId,
74
- table: ctx.table,
75
- tier: stage.inputTier
76
- });
77
- const buckets = /* @__PURE__ */ new Map();
78
- for (const entry of candidates) {
79
- if (entry.partition.startsWith("hourly/")) continue;
80
- const key = stage.bucketKey(entry);
81
- if (!key) continue;
82
- if (stage.bucketLatestMs(key) >= cutoff) continue;
83
- const compositeKey = `${inferSearchType(entry)}\0${key}`;
84
- if (!buckets.has(compositeKey)) buckets.set(compositeKey, []);
85
- buckets.get(compositeKey).push(entry);
86
- }
87
- for (const [compositeKey, entries] of buckets) {
88
- const [searchType, bucket] = compositeKey.split("\0");
89
- const targetPartition = stage.outputPartition(bucket);
90
- if (entries.length === 1 && entries[0].partition === targetPartition) continue;
91
- await deps.manifestStore.withLock({
92
- userId: ctx.userId,
93
- siteId: ctx.siteId,
94
- table: ctx.table,
95
- partition: targetPartition
96
- }, async () => {
97
- const key = objectKey(ctx, ctx.table, targetPartition, now, searchType);
98
- const { bytes, rowCount } = await deps.codec.compactRows({ table: ctx.table }, entries.map((e) => e.objectKey), key, deps.dataSource);
99
- const newEntry = {
100
- userId: ctx.userId,
101
- siteId: ctx.siteId,
102
- table: ctx.table,
103
- partition: targetPartition,
104
- objectKey: key,
105
- rowCount,
106
- bytes,
107
- createdAt: now,
108
- schemaVersion: currentSchemaVersion(ctx.table),
109
- tier: stage.outputTier,
110
- ...searchType !== "web" ? { searchType } : {}
111
- };
112
- await deps.manifestStore.registerVersion(newEntry, entries);
113
- });
114
- }
115
- }
116
- function enumeratePartitions(startDate, endDate) {
117
- const out = [];
118
- const [sy, sm, sd] = startDate.split("-").map(Number);
119
- const [ey, em, ed] = endDate.split("-").map(Number);
120
- const start = Date.UTC(sy, sm - 1, sd);
121
- const end = Date.UTC(ey, em - 1, ed);
122
- if (end < start) return out;
123
- const seenWeeks = /* @__PURE__ */ new Set();
124
- const seenMonths = /* @__PURE__ */ new Set();
125
- const seenQuarters = /* @__PURE__ */ new Set();
126
- for (let t = start; t <= end; t += 864e5) {
127
- const d = new Date(t);
128
- const y = d.getUTCFullYear();
129
- const m = String(d.getUTCMonth() + 1).padStart(2, "0");
130
- const isoDay = `${y}-${m}-${String(d.getUTCDate()).padStart(2, "0")}`;
131
- const isoMonth = `${y}-${m}`;
132
- out.push(dayPartition(isoDay));
133
- const monday = mondayOfWeek(isoDay);
134
- if (!seenWeeks.has(monday)) {
135
- seenWeeks.add(monday);
136
- out.push(weekPartition(monday));
137
- }
138
- if (!seenMonths.has(isoMonth)) {
139
- seenMonths.add(isoMonth);
140
- out.push(monthPartition(isoMonth));
141
- }
142
- const quarter = quarterOfMonth(isoMonth);
143
- if (!seenQuarters.has(quarter)) {
144
- seenQuarters.add(quarter);
145
- out.push(quarterPartition(quarter));
146
- }
147
- }
148
- return out;
149
- }
150
- function partitionSpan(partition) {
151
- let m = partition.match(DAILY_PARTITION_RE);
152
- if (m) {
153
- const ms = Date.parse(`${m[1]}T00:00:00Z`);
154
- return {
155
- rank: 0,
156
- startMs: ms,
157
- endMs: ms
158
- };
159
- }
160
- m = partition.match(WEEKLY_PARTITION_RE);
161
- if (m) {
162
- const ms = Date.parse(`${m[1]}T00:00:00Z`);
163
- return {
164
- rank: 1,
165
- startMs: ms,
166
- endMs: ms + 6 * MS_PER_DAY
167
- };
168
- }
169
- m = partition.match(MONTHLY_PARTITION_RE);
170
- if (m) {
171
- const [y, mo] = m[1].split("-").map(Number);
172
- return {
173
- rank: 2,
174
- startMs: Date.UTC(y, mo - 1, 1),
175
- endMs: Date.UTC(y, mo, 0)
176
- };
177
- }
178
- m = partition.match(QUARTERLY_PARTITION_RE);
179
- if (m) {
180
- const y = Number(m[1]);
181
- const q = Number(m[2]);
182
- return {
183
- rank: 3,
184
- startMs: Date.UTC(y, (q - 1) * 3, 1),
185
- endMs: Date.UTC(y, q * 3, 0)
186
- };
187
- }
188
- }
189
- function splitOverlappingTiers(entries, queryRange) {
190
- const rangeStartMs = queryRange ? Date.parse(`${queryRange.start}T00:00:00Z`) : void 0;
191
- const rangeEndMs = queryRange ? Date.parse(`${queryRange.end}T00:00:00Z`) : void 0;
192
- const spanned = [];
193
- const kept = [];
194
- const subsumed = [];
195
- for (const entry of entries) {
196
- const span = partitionSpan(entry.partition);
197
- if (!span) {
198
- kept.push(entry);
199
- continue;
200
- }
201
- const days = [];
202
- for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) {
203
- if (rangeStartMs !== void 0 && (t < rangeStartMs || t > rangeEndMs)) continue;
204
- days.push(t);
205
- }
206
- if (queryRange && days.length === 0) {
207
- subsumed.push(entry);
208
- continue;
209
- }
210
- spanned.push({
211
- entry,
212
- rank: span.rank,
213
- days
214
- });
215
- }
216
- spanned.sort((a, b) => a.rank - b.rank || b.entry.createdAt - a.entry.createdAt);
217
- const coveredBySearchType = /* @__PURE__ */ new Map();
218
- for (const { entry, days } of spanned) {
219
- const slice = inferSearchType(entry);
220
- let covered = coveredBySearchType.get(slice);
221
- if (!covered) {
222
- covered = /* @__PURE__ */ new Set();
223
- coveredBySearchType.set(slice, covered);
224
- }
225
- if (days.every((d) => covered.has(d))) {
226
- subsumed.push(entry);
227
- continue;
228
- }
229
- kept.push(entry);
230
- for (const d of days) covered.add(d);
231
- }
232
- return {
233
- kept,
234
- subsumed
235
- };
236
- }
237
- function dedupeOverlappingTiers(entries, queryRange) {
238
- return splitOverlappingTiers(entries, queryRange).kept;
239
- }
240
- function monthEndMs(month) {
241
- const [y, m] = month.split("-").map(Number);
242
- return Date.UTC(y, m, 0, 23, 59, 59, 999);
243
- }
244
- function quarterEndMs(quarter) {
245
- const [yStr, qStr] = quarter.split("-Q");
246
- const y = Number(yStr);
247
- const q = Number(qStr);
248
- return Date.UTC(y, q * 3, 0, 23, 59, 59, 999);
249
- }
250
5
  const FILES_PLACEHOLDER = "{{FILES}}";
251
6
  function buildDimensionWhere(filters, table) {
252
7
  const clauses = [];
@@ -381,4 +136,4 @@ function substituteNamedFiles(sql, sets) {
381
136
  for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
382
137
  return out;
383
138
  }
384
- export { FILES_PLACEHOLDER, RAW_DAILY_COMPACT_THRESHOLD, compactTieredImpl, compileLogicalQueryPlan, countRawDailies, dedupeOverlappingTiers, enumeratePartitions, resolveParquetSQL, splitOverlappingTiers, substituteNamedFiles };
139
+ export { FILES_PLACEHOLDER, compileLogicalQueryPlan, resolveParquetSQL, substituteNamedFiles };
@@ -1,10 +1,10 @@
1
1
  import { SCHEMAS, drizzleSchema } from "./schema.mjs";
2
- import { enumeratePartitions } from "./parquet-plan.mjs";
2
+ import { enumeratePartitions } from "./compaction.mjs";
3
3
  import { escapeLike } from "../sql-fragments.mjs";
4
4
  import "../planner.mjs";
5
- import { PgDialect, pgTable, varchar } from "drizzle-orm/pg-core";
6
5
  import { UnresolvableDatasetError, buildLogicalComparisonPlan, buildLogicalPlan, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
7
- import { normalizeUrl } from "gscdump/normalize";
6
+ import { normalizeUrl } from "gscdump";
7
+ import { PgDialect, pgTable, varchar } from "drizzle-orm/pg-core";
8
8
  import { sql } from "drizzle-orm";
9
9
  const DIMENSION_SURFACES = {
10
10
  page: ["api", "stored"],
@@ -68,5 +68,12 @@ function icebergTableSpec(table) {
68
68
  };
69
69
  }
70
70
  const ICEBERG_SCHEMAS = Object.fromEntries(ICEBERG_TABLES.map((t) => [t, icebergTableSpec(t)]));
71
- new Set(ICEBERG_TABLES);
72
- export { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, icebergTableSpec };
71
+ const ICEBERG_TABLE_SET = new Set(ICEBERG_TABLES);
72
+ function isIcebergTable(table) {
73
+ return ICEBERG_TABLE_SET.has(table);
74
+ }
75
+ function assertIcebergTable(table) {
76
+ if (!isIcebergTable(table)) throw new Error(`Unknown Iceberg table '${table}'. Expected one of: ${ICEBERG_TABLES.join(", ")}`);
77
+ return table;
78
+ }
79
+ export { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, assertIcebergTable, icebergTableSpec, isIcebergTable };
@@ -94,6 +94,16 @@ declare const ICEBERG_PARTITION_SPEC: readonly IcebergPartitionField[];
94
94
  declare function icebergTableSpec(table: IcebergTableName): IcebergTableSpec;
95
95
  /** All Iceberg table specs, keyed by table name. */
96
96
  declare const ICEBERG_SCHEMAS: Record<IcebergTableName, IcebergTableSpec>;
97
+ /** True when `table` is one of the canonical {@link ICEBERG_TABLES}. */
98
+ declare function isIcebergTable(table: string): table is IcebergTableName;
99
+ /**
100
+ * Narrow an arbitrary table name to a canonical {@link IcebergTableName},
101
+ * throwing a clear error otherwise. Guards write paths that index
102
+ * `ICEBERG_SCHEMAS` (a `Record<IcebergTableName, …>`) — a non-canonical name
103
+ * silently yields `undefined` there, propagating a corrupt/empty spec into the
104
+ * Iceberg job instead of failing loudly.
105
+ */
106
+ declare function assertIcebergTable(table: string): IcebergTableName;
97
107
  /** icebird's lowercase Iceberg primitive types (subset we use). */
98
108
  type IcebergPrimitiveType = 'string' | 'int' | 'long' | 'double' | 'date';
99
109
  /** A field in an icebird table `Schema`. */
@@ -370,4 +380,4 @@ interface LocalIcebergSinkOptions extends SinkOptions {
370
380
  /** S3-compatible warehouse location (POC: MinIO). */
371
381
  warehouse: string;
372
382
  }
373
- export { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, listIcebergDataFiles, listIcebergTables };
383
+ export { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables };
@@ -1,4 +1,4 @@
1
- import "./storage.mjs";
1
+ import "./layout.mjs";
2
2
  import { assertDimensionsSupported, getFilterDimensions, pgResolverAdapter, resolveToSQL } from "./resolver.mjs";
3
3
  import { runAnalyzerFromSource } from "./dispatch.mjs";
4
4
  function coerceRow(row) {