@gscdump/engine 0.26.2 → 0.26.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,16 @@ function emptyTypesKey(ctx) {
26
26
  function inspectionParquetKey(ctx) {
27
27
  return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/index.parquet` : `u_${ctx.userId}/entities/inspections/index.parquet`;
28
28
  }
29
+ function inspectionEventsPrefix(ctx) {
30
+ return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/events` : `u_${ctx.userId}/entities/inspections/events`;
31
+ }
32
+ function inspectionEventKey(ctx, yearMonth, batchId) {
33
+ return `${inspectionEventsPrefix(ctx)}/${yearMonth}/${batchId}.parquet`;
34
+ }
35
+ function inspectionBaseKey(ctx) {
36
+ return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/base.parquet` : `u_${ctx.userId}/entities/inspections/base.parquet`;
37
+ }
38
+ const INSPECTION_EVENT_KEY_RE = /\/inspections\/events\/\d{4}-\d{2}\/[^/]+\.parquet$/;
29
39
  function inspectionHistoryPrefix(ctx, yearMonth) {
30
40
  return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/history/${yearMonth}` : `u_${ctx.userId}/entities/inspections/history/${yearMonth}`;
31
41
  }
@@ -129,6 +139,59 @@ const INSPECTION_PARQUET_COLUMNS = [
129
139
  nullable: true
130
140
  }
131
141
  ];
142
+ const INSPECTION_EVENT_COLUMNS = [
143
+ ...INSPECTION_PARQUET_COLUMNS,
144
+ {
145
+ name: "crawlingUserAgent",
146
+ type: "VARCHAR",
147
+ nullable: true
148
+ },
149
+ {
150
+ name: "richResultsItems",
151
+ type: "VARCHAR",
152
+ nullable: true
153
+ },
154
+ {
155
+ name: "sitemaps",
156
+ type: "VARCHAR",
157
+ nullable: true
158
+ },
159
+ {
160
+ name: "referringUrls",
161
+ type: "VARCHAR",
162
+ nullable: true
163
+ },
164
+ {
165
+ name: "mobileIssues",
166
+ type: "VARCHAR",
167
+ nullable: true
168
+ },
169
+ {
170
+ name: "inspectionResultLink",
171
+ type: "VARCHAR",
172
+ nullable: true
173
+ },
174
+ {
175
+ name: "firstCheckedAt",
176
+ type: "VARCHAR",
177
+ nullable: true
178
+ },
179
+ {
180
+ name: "checkCount",
181
+ type: "INTEGER",
182
+ nullable: true
183
+ },
184
+ {
185
+ name: "nextCheckAfter",
186
+ type: "BIGINT",
187
+ nullable: true
188
+ },
189
+ {
190
+ name: "nextCheckPriority",
191
+ type: "VARCHAR",
192
+ nullable: true
193
+ }
194
+ ];
132
195
  function createInspectionStore(opts) {
133
196
  const ds = opts.dataSource;
134
197
  function shardFor(record) {
@@ -193,6 +256,89 @@ function createInspectionStore(opts) {
193
256
  bytes: bytes.byteLength
194
257
  };
195
258
  },
259
+ async appendInspectionEvents(ctx, rows, options) {
260
+ if (rows.length === 0) return {
261
+ keys: [],
262
+ rowCount: 0
263
+ };
264
+ const batchId = options?.batchId ?? randomBatchId();
265
+ const byMonth = /* @__PURE__ */ new Map();
266
+ for (const r of rows) {
267
+ const m = YEAR_MONTH_RE.exec(r.inspectedAt);
268
+ const month = m ? `${m[1]}-${m[2]}` : "unknown";
269
+ const bucket = byMonth.get(month) ?? [];
270
+ bucket.push(r);
271
+ byMonth.set(month, bucket);
272
+ }
273
+ const keys = [];
274
+ for (const [month, batch] of byMonth) {
275
+ const bytes = encodeRowsToParquetFlex(batch, {
276
+ columns: INSPECTION_EVENT_COLUMNS,
277
+ sortKey: ["urlHash"]
278
+ });
279
+ const key = inspectionEventKey(ctx, month, batchId);
280
+ await ds.write(key, bytes);
281
+ keys.push(key);
282
+ }
283
+ return {
284
+ keys,
285
+ rowCount: rows.length
286
+ };
287
+ },
288
+ async compactInspections(ctx) {
289
+ const eventKeys = (await ds.list(`${inspectionEventsPrefix(ctx)}/`)).filter((k) => INSPECTION_EVENT_KEY_RE.test(k));
290
+ if (eventKeys.length === 0) return {
291
+ baseRowCount: 0,
292
+ eventsFolded: 0,
293
+ eventFilesDeleted: 0
294
+ };
295
+ const baseKey = inspectionBaseKey(ctx);
296
+ const baseBytes = await readOptional(ds, baseKey);
297
+ const baseRows = baseBytes ? await decodeParquetToRows(baseBytes) : [];
298
+ const latest = /* @__PURE__ */ new Map();
299
+ const earliestChecked = /* @__PURE__ */ new Map();
300
+ const consider = (row) => {
301
+ const h = String(row.urlHash);
302
+ const prev = latest.get(h);
303
+ if (!prev || String(row.inspectedAt ?? "") > String(prev.inspectedAt ?? "")) latest.set(h, row);
304
+ const fc = row.firstCheckedAt;
305
+ if (fc != null) {
306
+ const fcStr = String(fc);
307
+ const cur = earliestChecked.get(h);
308
+ if (cur === void 0 || fcStr < cur) earliestChecked.set(h, fcStr);
309
+ }
310
+ };
311
+ for (const row of baseRows) consider(row);
312
+ let eventsFolded = 0;
313
+ const consumed = [];
314
+ for (const key of eventKeys.sort()) {
315
+ const bytes = await readOptional(ds, key);
316
+ if (!bytes) continue;
317
+ consumed.push(key);
318
+ const rows = await decodeParquetToRows(bytes);
319
+ for (const row of rows) {
320
+ consider(row);
321
+ eventsFolded++;
322
+ }
323
+ }
324
+ const merged = [];
325
+ for (const [h, row] of latest) {
326
+ const fc = earliestChecked.get(h);
327
+ if (fc !== void 0) row.firstCheckedAt = fc;
328
+ merged.push(row);
329
+ }
330
+ const bytes = encodeRowsToParquetFlex(merged, {
331
+ columns: INSPECTION_EVENT_COLUMNS,
332
+ sortKey: ["urlHash"]
333
+ });
334
+ await ds.write(baseKey, bytes);
335
+ if (consumed.length > 0) await ds.delete(consumed);
336
+ return {
337
+ baseRowCount: merged.length,
338
+ eventsFolded,
339
+ eventFilesDeleted: consumed.length
340
+ };
341
+ },
196
342
  parquetUri(ctx) {
197
343
  return ds.uri?.(inspectionParquetKey(ctx));
198
344
  }
@@ -574,6 +720,92 @@ function createSitemapStore(opts) {
574
720
  await ds.write(indexKey, bytes);
575
721
  if (consumed.length > 0) await ds.delete(consumed);
576
722
  }
723
+ },
724
+ async reconcile(ctx, { liveFeedpaths, at: atOpt }) {
725
+ const at = atOpt ?? now();
726
+ const liveHashes = new Set(liveFeedpaths.map((fp) => hash(fp)));
727
+ const present = /* @__PURE__ */ new Set();
728
+ for (const key of await ds.list(`${sitemapUrlsIndexPrefix(ctx)}/`)) {
729
+ const m = /\/by-feed\/([0-9a-f]+)\/index\.parquet$/.exec(key);
730
+ if (m) present.add(m[1]);
731
+ }
732
+ const deltasByFeed = /* @__PURE__ */ new Map();
733
+ for (const key of await ds.list(`${sitemapUrlsPrefix(ctx)}/deltas/`)) {
734
+ const m = SITEMAP_URLS_DELTA_PREFIX_RE.exec(key);
735
+ if (!m) continue;
736
+ present.add(m[2]);
737
+ const list = deltasByFeed.get(m[2]) ?? [];
738
+ list.push(key);
739
+ deltasByFeed.set(m[2], list);
740
+ }
741
+ let feedpathsPruned = 0;
742
+ let urlsRemoved = 0;
743
+ for (const fpHash of present) {
744
+ if (liveHashes.has(fpHash)) continue;
745
+ const indexKey = sitemapUrlsIndexKey(ctx, fpHash);
746
+ const indexBytes = await readOptional(ds, indexKey);
747
+ const indexRows = indexBytes ? await decodeParquetToRows(indexBytes) : [];
748
+ const live = /* @__PURE__ */ new Map();
749
+ const removed = /* @__PURE__ */ new Map();
750
+ for (const row of indexRows) {
751
+ const r = rowToUrlRecord(row);
752
+ if (r.removedAt != null) removed.set(r.urlHash, r);
753
+ else live.set(r.urlHash, r);
754
+ }
755
+ const consumed = [];
756
+ for (const key of (deltasByFeed.get(fpHash) ?? []).sort()) {
757
+ const bytes = await readOptional(ds, key);
758
+ if (!bytes) continue;
759
+ consumed.push(key);
760
+ const rows = await decodeParquetToRows(bytes);
761
+ for (const r of rows) {
762
+ const urlHash = String(r.url_hash);
763
+ const dat = Number(r.at);
764
+ if (String(r.op) === "added") {
765
+ const prev = live.get(urlHash) ?? removed.get(urlHash);
766
+ removed.delete(urlHash);
767
+ live.set(urlHash, {
768
+ feedpath: String(r.feedpath),
769
+ feedpathHash: fpHash,
770
+ urlHash,
771
+ loc: String(r.loc),
772
+ lastmod: r.lastmod == null ? void 0 : String(r.lastmod),
773
+ firstSeenAt: prev?.firstSeenAt ?? dat,
774
+ lastSeenAt: dat
775
+ });
776
+ } else if (String(r.op) === "removed") {
777
+ const prev = live.get(urlHash);
778
+ live.delete(urlHash);
779
+ if (prev) removed.set(urlHash, {
780
+ ...prev,
781
+ removedAt: dat
782
+ });
783
+ }
784
+ }
785
+ }
786
+ const hadLive = live.size > 0;
787
+ if (!hadLive && consumed.length === 0) continue;
788
+ for (const [urlHash, r] of live) {
789
+ removed.set(urlHash, {
790
+ ...r,
791
+ removedAt: at
792
+ });
793
+ urlsRemoved++;
794
+ }
795
+ const merged = [...removed.values()];
796
+ merged.sort((a, b) => a.urlHash < b.urlHash ? -1 : a.urlHash > b.urlHash ? 1 : 0);
797
+ const bytes = encodeRowsToParquetFlex(merged.map(urlRecordToRow), {
798
+ columns: URLS_INDEX_COLUMNS,
799
+ sortKey: ["feedpath_hash", "url_hash"]
800
+ });
801
+ await ds.write(indexKey, bytes);
802
+ if (consumed.length > 0) await ds.delete(consumed);
803
+ if (hadLive) feedpathsPruned++;
804
+ }
805
+ return {
806
+ feedpathsPruned,
807
+ urlsRemoved
808
+ };
577
809
  }
578
810
  };
579
811
  }
@@ -661,4 +893,4 @@ function createEmptyTypesStore(opts) {
661
893
  }
662
894
  };
663
895
  }
664
- export { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
896
+ export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
@@ -1,4 +1,4 @@
1
- import { ColumnDef, ColumnType, Row, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
1
+ import { ColumnDef as ColumnDef$1, ColumnType, Row, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
2
2
  declare const pages: import("drizzle-orm/pg-core").PgTableWithColumns<{
3
3
  name: "pages";
4
4
  schema: undefined;
@@ -2259,4 +2259,4 @@ declare function naturalKeyColumns(table: TableName): readonly string[];
2259
2259
  */
2260
2260
  declare function dedupeByNaturalKey(table: TableName, rows: readonly Row[]): Row[];
2261
2261
  declare function dimensionToColumn(dim: string, _table: TableName): string;
2262
- export { type ColumnDef, type ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema$1 as TableSchema, allTables, countries, currentSchemaVersion, dates, dedupeByNaturalKey, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, naturalKeyColumns, page_queries, pages, queries, schemaFor, search_appearance, search_appearance_page_queries, search_appearance_pages, search_appearance_queries };
2262
+ export { type ColumnDef$1 as ColumnDef, type ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema$1 as TableSchema, allTables, countries, currentSchemaVersion, dates, dedupeByNaturalKey, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, naturalKeyColumns, page_queries, pages, queries, schemaFor, search_appearance, search_appearance_page_queries, search_appearance_pages, search_appearance_queries };
@@ -1,6 +1,6 @@
1
1
  import { DataSource } from "./_chunks/storage.mjs";
2
2
  import { ScheduleState } from "./schedule.mjs";
3
- import { TenantCtx } from "@gscdump/contracts";
3
+ import { ColumnDef, TenantCtx } from "@gscdump/contracts";
4
4
  /**
5
5
  * GSC URL inspection result fields we persist. Mirrors the
6
6
  * `searchconsole_v1.Schema$UrlInspectionResult` shape but as plain JSON
@@ -60,6 +60,16 @@ interface InspectionHistoryShard {
60
60
  declare function inspectionIndexKey(ctx: TenantCtx): string;
61
61
  declare function emptyTypesKey(ctx: TenantCtx): string;
62
62
  declare function inspectionParquetKey(ctx: TenantCtx): string;
63
+ /** Directory prefix holding a tenant's immutable inspection-event parquets. */
64
+ declare function inspectionEventsPrefix(ctx: TenantCtx): string;
65
+ /**
66
+ * Object key for one immutable inspection-event batch, partitioned by the
67
+ * `YYYY-MM` of the records' `inspectedAt`. The `batchId` is caller-supplied so
68
+ * a job retry re-writes the SAME key (idempotent whole-file overwrite).
69
+ */
70
+ declare function inspectionEventKey(ctx: TenantCtx, yearMonth: string, batchId: string): string;
71
+ /** Compacted latest-per-url base produced by `compactInspections`. */
72
+ declare function inspectionBaseKey(ctx: TenantCtx): string;
63
73
  /**
64
74
  * Directory prefix for a month's history shards. Each shard is a UUID-keyed
65
75
  * blob under this prefix; `appendHistory` writes one per call, `loadHistory`
@@ -99,6 +109,44 @@ interface InspectionParquetRow {
99
109
  scheduleConsecutiveUnchanged: number | null;
100
110
  schedulePolicyVersion: number | null;
101
111
  }
112
+ /**
113
+ * Row shape for the append-only inspection-event store. Superset of
114
+ * {@link InspectionParquetRow}: carries the full-fidelity columns the lossy
115
+ * `materialize` parquet dropped (`crawlingUserAgent`, `richResultsItems`,
116
+ * `sitemaps`, `referringUrls`, `mobileIssues`, `inspectionResultLink`,
117
+ * `firstCheckedAt`, `checkCount`). Object/array fields are persisted as JSON
118
+ * strings — read paths unpack them with DuckDB's JSON functions.
119
+ *
120
+ * `firstCheckedAt` / `checkCount` are caller-managed: the writer carries the
121
+ * earliest-seen timestamp + running observation count forward. Compaction
122
+ * preserves the EARLIEST `firstCheckedAt` per url (mirrors the sitemap store's
123
+ * `firstSeenAt` preservation); every other column is taken from the
124
+ * newest-by-`inspectedAt` event.
125
+ */
126
+ interface InspectionEventRow extends InspectionParquetRow {
127
+ crawlingUserAgent: string | null;
128
+ /** JSON-encoded `RichResultsItem[]`. */
129
+ richResultsItems: string | null;
130
+ /** JSON-encoded list of sitemap URLs referencing this page. */
131
+ sitemaps: string | null;
132
+ /** JSON-encoded list of referring URLs. */
133
+ referringUrls: string | null;
134
+ /** JSON-encoded mobile-usability issues. */
135
+ mobileIssues: string | null;
136
+ inspectionResultLink: string | null;
137
+ /** ISO-8601 timestamp of the first inspection we ever recorded for this url. */
138
+ firstCheckedAt: string | null;
139
+ /** Total number of inspections recorded for this url. */
140
+ checkCount: number | null;
141
+ /**
142
+ * Stored next-recheck unix-seconds + priority as computed AT INSPECT TIME.
143
+ * Carried verbatim (NOT recomputed at read) because the scheduling policy can
144
+ * change over time — `__gsc/inspections` must replay the historical value to
145
+ * keep its frozen wire shape byte-stable.
146
+ */
147
+ nextCheckAfter: number | null;
148
+ nextCheckPriority: string | null;
149
+ }
102
150
  /**
103
151
  * Hard cap on a single `appendHistory` shard payload. Encoded bytes >
104
152
  * this threshold throws — the caller logs and moves on (D1 is
@@ -141,6 +189,40 @@ interface InspectionStore {
141
189
  rowCount: number;
142
190
  bytes: number;
143
191
  }>;
192
+ /**
193
+ * Append a batch of inspection results as an immutable per-batch parquet
194
+ * under `events/<YYYY-MM>/<batchId>.parquet`, partitioned by the `YYYY-MM`
195
+ * of each row's `inspectedAt` (a batch spanning a month boundary writes one
196
+ * file per month). No read-before-write; idempotent under job retry (same
197
+ * `batchId` → same key → whole-file overwrite). Rows carry the FULL column
198
+ * set ({@link INSPECTION_EVENT_COLUMNS}); this is the append-only
199
+ * source-of-truth that supersedes {@link InspectionStore.materialize}.
200
+ *
201
+ * Returns the keys written + total row count. Empty input is a no-op.
202
+ */
203
+ appendInspectionEvents: (ctx: TenantCtx, rows: readonly InspectionEventRow[], opts?: {
204
+ batchId?: string;
205
+ }) => Promise<{
206
+ keys: string[];
207
+ rowCount: number;
208
+ }>;
209
+ /**
210
+ * Fold every outstanding event file into the `base.parquet`: latest-per-url
211
+ * by max `inspectedAt` (newest-wins), preserving the earliest non-null
212
+ * `firstCheckedAt` per url. Writes the new base then deletes the consumed
213
+ * event files — file-level only, never row-level (ADR-0002). Idempotent +
214
+ * re-runnable: a crash after the base write but before the delete just
215
+ * re-folds the same events (newest-wins makes that a no-op). A real read
216
+ * failure on the existing base propagates rather than rebuilding from events
217
+ * alone (which would drop URLs only the base held).
218
+ *
219
+ * No-op (no base rewrite) when there are zero outstanding events.
220
+ */
221
+ compactInspections: (ctx: TenantCtx) => Promise<{
222
+ baseRowCount: number;
223
+ eventsFolded: number;
224
+ eventFilesDeleted: number;
225
+ }>;
144
226
  /**
145
227
  * DuckDB-resolvable URI for the materialised parquet sidecar, or
146
228
  * `undefined` if the underlying `DataSource` has no native URI shape
@@ -156,6 +238,14 @@ interface InspectionStore {
156
238
  interface CreateInspectionStoreOptions {
157
239
  dataSource: DataSource;
158
240
  }
241
+ /**
242
+ * Column schema for the append-only inspection-event store + its compacted
243
+ * base. Superset of {@link INSPECTION_PARQUET_COLUMNS}: the 16 promoted columns
244
+ * plus the 8 full-fidelity ones the lossy `materialize` parquet dropped. The
245
+ * event files and `base.parquet` share this schema so DuckDB
246
+ * `read_parquet([...], union_by_name = true)` merges base + events cleanly.
247
+ */
248
+ declare const INSPECTION_EVENT_COLUMNS: readonly ColumnDef[];
159
249
  declare function createInspectionStore(opts: CreateInspectionStoreOptions): InspectionStore;
160
250
  /** GSC sitemap record we persist. Matches `Schema$WmxSitemap` but as plain JSON. */
161
251
  interface SitemapRecord {
@@ -229,6 +319,12 @@ interface SnapshotUrlsResult {
229
319
  /** True when contentHash matched prior; the call performed zero writes. */
230
320
  unchanged: boolean;
231
321
  }
322
+ interface ReconcileResult {
323
+ /** Feedpaths that were absent from the live set and had their live URLs pruned. */
324
+ feedpathsPruned: number;
325
+ /** Total URL rows transitioned live → removed across pruned feedpaths. */
326
+ urlsRemoved: number;
327
+ }
232
328
  interface DeltaEntry {
233
329
  feedpath: string;
234
330
  feedpathHash: string;
@@ -280,6 +376,21 @@ interface SitemapStore {
280
376
  * regardless of total site URL count.
281
377
  */
282
378
  compactUrls: (ctx: TenantCtx) => Promise<void>;
379
+ /**
380
+ * Site-wide convergence: mark every still-live URL whose owning feedpath is
381
+ * absent from `liveFeedpaths` as removed. `compactUrls`/`snapshotUrls` only
382
+ * prune URLs *inside* a feedpath that was re-observed; a whole feed dropped
383
+ * from the sitemap list (no `snapshotUrls` call) leaves its URLs frozen-live
384
+ * forever. This is the sidecar mirror of the D1 generation sweep: it rewrites
385
+ * each dropped feedpath's `by-feed/<hash>/index.parquet` with `removedAt` set
386
+ * and deletes its outstanding deltas (write-new-base + delete-deltas,
387
+ * ADR-0002). Bounded per feedpath, so memory stays flat regardless of site
388
+ * size. Live feedpaths are never touched.
389
+ */
390
+ reconcile: (ctx: TenantCtx, opts: {
391
+ liveFeedpaths: readonly string[];
392
+ at?: number;
393
+ }) => Promise<ReconcileResult>;
283
394
  }
284
395
  interface CreateSitemapStoreOptions {
285
396
  dataSource: DataSource;
@@ -331,4 +442,4 @@ interface CreateEmptyTypesStoreOptions {
331
442
  now?: () => number;
332
443
  }
333
444
  declare function createEmptyTypesStore(opts: CreateEmptyTypesStoreOptions): EmptyTypesStore;
334
- export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
445
+ export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionEventRow, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, ReconcileResult, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
package/dist/entities.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
2
- export { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
1
+ import { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
2
+ export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
@@ -1,5 +1,5 @@
1
1
  import { DataSource, FileSetRef, Row as Row$1 } from "./_chunks/storage.mjs";
2
- import { ColumnDef } from "./_chunks/schema.mjs";
2
+ import { ColumnDef as ColumnDef$1 } from "./_chunks/schema.mjs";
3
3
  import { EngineError } from "./_chunks/errors.mjs";
4
4
  import { SearchType } from "gscdump/query";
5
5
  import { TenantCtx } from "@gscdump/contracts";
@@ -70,7 +70,7 @@ interface RollupDef {
70
70
  * Types map the same way as the fact-table encoder: VARCHAR / DATE go
71
71
  * through BYTE_ARRAY/UTF8; BIGINT → INT64; INTEGER → INT32; DOUBLE → DOUBLE.
72
72
  */
73
- parquetColumns?: readonly ColumnDef[];
73
+ parquetColumns?: readonly ColumnDef$1[];
74
74
  /** Sort-key column names for parquet row-group stats. Optional. */
75
75
  parquetSortKey?: readonly string[];
76
76
  /**
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.26.2",
4
+ "version": "0.26.4",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -190,8 +190,8 @@
190
190
  "drizzle-orm": "1.0.0-rc.3",
191
191
  "icebird": "^0.8.8",
192
192
  "proper-lockfile": "^4.1.2",
193
- "gscdump": "0.26.2",
194
- "@gscdump/contracts": "0.26.2"
193
+ "@gscdump/contracts": "0.26.4",
194
+ "gscdump": "0.26.4"
195
195
  },
196
196
  "devDependencies": {
197
197
  "@duckdb/duckdb-wasm": "^1.32.0",