@gscdump/engine 0.26.1 → 0.26.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/entities.mjs +137 -1
- package/dist/_chunks/schema.d.mts +2 -2
- package/dist/entities.d.mts +84 -2
- package/dist/entities.mjs +2 -2
- package/dist/rollups.d.mts +2 -2
- package/package.json +3 -3
|
@@ -26,6 +26,16 @@ function emptyTypesKey(ctx) {
|
|
|
26
26
|
function inspectionParquetKey(ctx) {
|
|
27
27
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/index.parquet` : `u_${ctx.userId}/entities/inspections/index.parquet`;
|
|
28
28
|
}
|
|
29
|
+
function inspectionEventsPrefix(ctx) {
|
|
30
|
+
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/events` : `u_${ctx.userId}/entities/inspections/events`;
|
|
31
|
+
}
|
|
32
|
+
function inspectionEventKey(ctx, yearMonth, batchId) {
|
|
33
|
+
return `${inspectionEventsPrefix(ctx)}/${yearMonth}/${batchId}.parquet`;
|
|
34
|
+
}
|
|
35
|
+
function inspectionBaseKey(ctx) {
|
|
36
|
+
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/base.parquet` : `u_${ctx.userId}/entities/inspections/base.parquet`;
|
|
37
|
+
}
|
|
38
|
+
const INSPECTION_EVENT_KEY_RE = /\/inspections\/events\/\d{4}-\d{2}\/[^/]+\.parquet$/;
|
|
29
39
|
function inspectionHistoryPrefix(ctx, yearMonth) {
|
|
30
40
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/history/${yearMonth}` : `u_${ctx.userId}/entities/inspections/history/${yearMonth}`;
|
|
31
41
|
}
|
|
@@ -129,6 +139,49 @@ const INSPECTION_PARQUET_COLUMNS = [
|
|
|
129
139
|
nullable: true
|
|
130
140
|
}
|
|
131
141
|
];
|
|
142
|
+
const INSPECTION_EVENT_COLUMNS = [
|
|
143
|
+
...INSPECTION_PARQUET_COLUMNS,
|
|
144
|
+
{
|
|
145
|
+
name: "crawlingUserAgent",
|
|
146
|
+
type: "VARCHAR",
|
|
147
|
+
nullable: true
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
name: "richResultsItems",
|
|
151
|
+
type: "VARCHAR",
|
|
152
|
+
nullable: true
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
name: "sitemaps",
|
|
156
|
+
type: "VARCHAR",
|
|
157
|
+
nullable: true
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
name: "referringUrls",
|
|
161
|
+
type: "VARCHAR",
|
|
162
|
+
nullable: true
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
name: "mobileIssues",
|
|
166
|
+
type: "VARCHAR",
|
|
167
|
+
nullable: true
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
name: "inspectionResultLink",
|
|
171
|
+
type: "VARCHAR",
|
|
172
|
+
nullable: true
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
name: "firstCheckedAt",
|
|
176
|
+
type: "VARCHAR",
|
|
177
|
+
nullable: true
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
name: "checkCount",
|
|
181
|
+
type: "INTEGER",
|
|
182
|
+
nullable: true
|
|
183
|
+
}
|
|
184
|
+
];
|
|
132
185
|
function createInspectionStore(opts) {
|
|
133
186
|
const ds = opts.dataSource;
|
|
134
187
|
function shardFor(record) {
|
|
@@ -193,6 +246,89 @@ function createInspectionStore(opts) {
|
|
|
193
246
|
bytes: bytes.byteLength
|
|
194
247
|
};
|
|
195
248
|
},
|
|
249
|
+
async appendInspectionEvents(ctx, rows, options) {
|
|
250
|
+
if (rows.length === 0) return {
|
|
251
|
+
keys: [],
|
|
252
|
+
rowCount: 0
|
|
253
|
+
};
|
|
254
|
+
const batchId = options?.batchId ?? randomBatchId();
|
|
255
|
+
const byMonth = /* @__PURE__ */ new Map();
|
|
256
|
+
for (const r of rows) {
|
|
257
|
+
const m = YEAR_MONTH_RE.exec(r.inspectedAt);
|
|
258
|
+
const month = m ? `${m[1]}-${m[2]}` : "unknown";
|
|
259
|
+
const bucket = byMonth.get(month) ?? [];
|
|
260
|
+
bucket.push(r);
|
|
261
|
+
byMonth.set(month, bucket);
|
|
262
|
+
}
|
|
263
|
+
const keys = [];
|
|
264
|
+
for (const [month, batch] of byMonth) {
|
|
265
|
+
const bytes = encodeRowsToParquetFlex(batch, {
|
|
266
|
+
columns: INSPECTION_EVENT_COLUMNS,
|
|
267
|
+
sortKey: ["urlHash"]
|
|
268
|
+
});
|
|
269
|
+
const key = inspectionEventKey(ctx, month, batchId);
|
|
270
|
+
await ds.write(key, bytes);
|
|
271
|
+
keys.push(key);
|
|
272
|
+
}
|
|
273
|
+
return {
|
|
274
|
+
keys,
|
|
275
|
+
rowCount: rows.length
|
|
276
|
+
};
|
|
277
|
+
},
|
|
278
|
+
async compactInspections(ctx) {
|
|
279
|
+
const eventKeys = (await ds.list(`${inspectionEventsPrefix(ctx)}/`)).filter((k) => INSPECTION_EVENT_KEY_RE.test(k));
|
|
280
|
+
if (eventKeys.length === 0) return {
|
|
281
|
+
baseRowCount: 0,
|
|
282
|
+
eventsFolded: 0,
|
|
283
|
+
eventFilesDeleted: 0
|
|
284
|
+
};
|
|
285
|
+
const baseKey = inspectionBaseKey(ctx);
|
|
286
|
+
const baseBytes = await readOptional(ds, baseKey);
|
|
287
|
+
const baseRows = baseBytes ? await decodeParquetToRows(baseBytes) : [];
|
|
288
|
+
const latest = /* @__PURE__ */ new Map();
|
|
289
|
+
const earliestChecked = /* @__PURE__ */ new Map();
|
|
290
|
+
const consider = (row) => {
|
|
291
|
+
const h = String(row.urlHash);
|
|
292
|
+
const prev = latest.get(h);
|
|
293
|
+
if (!prev || String(row.inspectedAt ?? "") > String(prev.inspectedAt ?? "")) latest.set(h, row);
|
|
294
|
+
const fc = row.firstCheckedAt;
|
|
295
|
+
if (fc != null) {
|
|
296
|
+
const fcStr = String(fc);
|
|
297
|
+
const cur = earliestChecked.get(h);
|
|
298
|
+
if (cur === void 0 || fcStr < cur) earliestChecked.set(h, fcStr);
|
|
299
|
+
}
|
|
300
|
+
};
|
|
301
|
+
for (const row of baseRows) consider(row);
|
|
302
|
+
let eventsFolded = 0;
|
|
303
|
+
const consumed = [];
|
|
304
|
+
for (const key of eventKeys.sort()) {
|
|
305
|
+
const bytes = await readOptional(ds, key);
|
|
306
|
+
if (!bytes) continue;
|
|
307
|
+
consumed.push(key);
|
|
308
|
+
const rows = await decodeParquetToRows(bytes);
|
|
309
|
+
for (const row of rows) {
|
|
310
|
+
consider(row);
|
|
311
|
+
eventsFolded++;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
const merged = [];
|
|
315
|
+
for (const [h, row] of latest) {
|
|
316
|
+
const fc = earliestChecked.get(h);
|
|
317
|
+
if (fc !== void 0) row.firstCheckedAt = fc;
|
|
318
|
+
merged.push(row);
|
|
319
|
+
}
|
|
320
|
+
const bytes = encodeRowsToParquetFlex(merged, {
|
|
321
|
+
columns: INSPECTION_EVENT_COLUMNS,
|
|
322
|
+
sortKey: ["urlHash"]
|
|
323
|
+
});
|
|
324
|
+
await ds.write(baseKey, bytes);
|
|
325
|
+
if (consumed.length > 0) await ds.delete(consumed);
|
|
326
|
+
return {
|
|
327
|
+
baseRowCount: merged.length,
|
|
328
|
+
eventsFolded,
|
|
329
|
+
eventFilesDeleted: consumed.length
|
|
330
|
+
};
|
|
331
|
+
},
|
|
196
332
|
parquetUri(ctx) {
|
|
197
333
|
return ds.uri?.(inspectionParquetKey(ctx));
|
|
198
334
|
}
|
|
@@ -661,4 +797,4 @@ function createEmptyTypesStore(opts) {
|
|
|
661
797
|
}
|
|
662
798
|
};
|
|
663
799
|
}
|
|
664
|
-
export { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
800
|
+
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ColumnDef, ColumnType, Row, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
|
|
1
|
+
import { ColumnDef as ColumnDef$1, ColumnType, Row, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
|
|
2
2
|
declare const pages: import("drizzle-orm/pg-core").PgTableWithColumns<{
|
|
3
3
|
name: "pages";
|
|
4
4
|
schema: undefined;
|
|
@@ -2259,4 +2259,4 @@ declare function naturalKeyColumns(table: TableName): readonly string[];
|
|
|
2259
2259
|
*/
|
|
2260
2260
|
declare function dedupeByNaturalKey(table: TableName, rows: readonly Row[]): Row[];
|
|
2261
2261
|
declare function dimensionToColumn(dim: string, _table: TableName): string;
|
|
2262
|
-
export { type ColumnDef, type ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema$1 as TableSchema, allTables, countries, currentSchemaVersion, dates, dedupeByNaturalKey, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, naturalKeyColumns, page_queries, pages, queries, schemaFor, search_appearance, search_appearance_page_queries, search_appearance_pages, search_appearance_queries };
|
|
2262
|
+
export { type ColumnDef$1 as ColumnDef, type ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema$1 as TableSchema, allTables, countries, currentSchemaVersion, dates, dedupeByNaturalKey, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, naturalKeyColumns, page_queries, pages, queries, schemaFor, search_appearance, search_appearance_page_queries, search_appearance_pages, search_appearance_queries };
|
package/dist/entities.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { DataSource } from "./_chunks/storage.mjs";
|
|
2
2
|
import { ScheduleState } from "./schedule.mjs";
|
|
3
|
-
import { TenantCtx } from "@gscdump/contracts";
|
|
3
|
+
import { ColumnDef, TenantCtx } from "@gscdump/contracts";
|
|
4
4
|
/**
|
|
5
5
|
* GSC URL inspection result fields we persist. Mirrors the
|
|
6
6
|
* `searchconsole_v1.Schema$UrlInspectionResult` shape but as plain JSON
|
|
@@ -60,6 +60,16 @@ interface InspectionHistoryShard {
|
|
|
60
60
|
declare function inspectionIndexKey(ctx: TenantCtx): string;
|
|
61
61
|
declare function emptyTypesKey(ctx: TenantCtx): string;
|
|
62
62
|
declare function inspectionParquetKey(ctx: TenantCtx): string;
|
|
63
|
+
/** Directory prefix holding a tenant's immutable inspection-event parquets. */
|
|
64
|
+
declare function inspectionEventsPrefix(ctx: TenantCtx): string;
|
|
65
|
+
/**
|
|
66
|
+
* Object key for one immutable inspection-event batch, partitioned by the
|
|
67
|
+
* `YYYY-MM` of the records' `inspectedAt`. The `batchId` is caller-supplied so
|
|
68
|
+
* a job retry re-writes the SAME key (idempotent whole-file overwrite).
|
|
69
|
+
*/
|
|
70
|
+
declare function inspectionEventKey(ctx: TenantCtx, yearMonth: string, batchId: string): string;
|
|
71
|
+
/** Compacted latest-per-url base produced by `compactInspections`. */
|
|
72
|
+
declare function inspectionBaseKey(ctx: TenantCtx): string;
|
|
63
73
|
/**
|
|
64
74
|
* Directory prefix for a month's history shards. Each shard is a UUID-keyed
|
|
65
75
|
* blob under this prefix; `appendHistory` writes one per call, `loadHistory`
|
|
@@ -99,6 +109,36 @@ interface InspectionParquetRow {
|
|
|
99
109
|
scheduleConsecutiveUnchanged: number | null;
|
|
100
110
|
schedulePolicyVersion: number | null;
|
|
101
111
|
}
|
|
112
|
+
/**
|
|
113
|
+
* Row shape for the append-only inspection-event store. Superset of
|
|
114
|
+
* {@link InspectionParquetRow}: carries the full-fidelity columns the lossy
|
|
115
|
+
* `materialize` parquet dropped (`crawlingUserAgent`, `richResultsItems`,
|
|
116
|
+
* `sitemaps`, `referringUrls`, `mobileIssues`, `inspectionResultLink`,
|
|
117
|
+
* `firstCheckedAt`, `checkCount`). Object/array fields are persisted as JSON
|
|
118
|
+
* strings — read paths unpack them with DuckDB's JSON functions.
|
|
119
|
+
*
|
|
120
|
+
* `firstCheckedAt` / `checkCount` are caller-managed: the writer carries the
|
|
121
|
+
* earliest-seen timestamp + running observation count forward. Compaction
|
|
122
|
+
* preserves the EARLIEST `firstCheckedAt` per url (mirrors the sitemap store's
|
|
123
|
+
* `firstSeenAt` preservation); every other column is taken from the
|
|
124
|
+
* newest-by-`inspectedAt` event.
|
|
125
|
+
*/
|
|
126
|
+
interface InspectionEventRow extends InspectionParquetRow {
|
|
127
|
+
crawlingUserAgent: string | null;
|
|
128
|
+
/** JSON-encoded `RichResultsItem[]`. */
|
|
129
|
+
richResultsItems: string | null;
|
|
130
|
+
/** JSON-encoded list of sitemap URLs referencing this page. */
|
|
131
|
+
sitemaps: string | null;
|
|
132
|
+
/** JSON-encoded list of referring URLs. */
|
|
133
|
+
referringUrls: string | null;
|
|
134
|
+
/** JSON-encoded mobile-usability issues. */
|
|
135
|
+
mobileIssues: string | null;
|
|
136
|
+
inspectionResultLink: string | null;
|
|
137
|
+
/** ISO-8601 timestamp of the first inspection we ever recorded for this url. */
|
|
138
|
+
firstCheckedAt: string | null;
|
|
139
|
+
/** Total number of inspections recorded for this url. */
|
|
140
|
+
checkCount: number | null;
|
|
141
|
+
}
|
|
102
142
|
/**
|
|
103
143
|
* Hard cap on a single `appendHistory` shard payload. Encoded bytes >
|
|
104
144
|
* this threshold throws — the caller logs and moves on (D1 is
|
|
@@ -141,6 +181,40 @@ interface InspectionStore {
|
|
|
141
181
|
rowCount: number;
|
|
142
182
|
bytes: number;
|
|
143
183
|
}>;
|
|
184
|
+
/**
|
|
185
|
+
* Append a batch of inspection results as an immutable per-batch parquet
|
|
186
|
+
* under `events/<YYYY-MM>/<batchId>.parquet`, partitioned by the `YYYY-MM`
|
|
187
|
+
* of each row's `inspectedAt` (a batch spanning a month boundary writes one
|
|
188
|
+
* file per month). No read-before-write; idempotent under job retry (same
|
|
189
|
+
* `batchId` → same key → whole-file overwrite). Rows carry the FULL column
|
|
190
|
+
* set ({@link INSPECTION_EVENT_COLUMNS}); this is the append-only
|
|
191
|
+
* source-of-truth that supersedes {@link InspectionStore.materialize}.
|
|
192
|
+
*
|
|
193
|
+
* Returns the keys written + total row count. Empty input is a no-op.
|
|
194
|
+
*/
|
|
195
|
+
appendInspectionEvents: (ctx: TenantCtx, rows: readonly InspectionEventRow[], opts?: {
|
|
196
|
+
batchId?: string;
|
|
197
|
+
}) => Promise<{
|
|
198
|
+
keys: string[];
|
|
199
|
+
rowCount: number;
|
|
200
|
+
}>;
|
|
201
|
+
/**
|
|
202
|
+
* Fold every outstanding event file into the `base.parquet`: latest-per-url
|
|
203
|
+
* by max `inspectedAt` (newest-wins), preserving the earliest non-null
|
|
204
|
+
* `firstCheckedAt` per url. Writes the new base then deletes the consumed
|
|
205
|
+
* event files — file-level only, never row-level (ADR-0002). Idempotent +
|
|
206
|
+
* re-runnable: a crash after the base write but before the delete just
|
|
207
|
+
* re-folds the same events (newest-wins makes that a no-op). A real read
|
|
208
|
+
* failure on the existing base propagates rather than rebuilding from events
|
|
209
|
+
* alone (which would drop URLs only the base held).
|
|
210
|
+
*
|
|
211
|
+
* No-op (no base rewrite) when there are zero outstanding events.
|
|
212
|
+
*/
|
|
213
|
+
compactInspections: (ctx: TenantCtx) => Promise<{
|
|
214
|
+
baseRowCount: number;
|
|
215
|
+
eventsFolded: number;
|
|
216
|
+
eventFilesDeleted: number;
|
|
217
|
+
}>;
|
|
144
218
|
/**
|
|
145
219
|
* DuckDB-resolvable URI for the materialised parquet sidecar, or
|
|
146
220
|
* `undefined` if the underlying `DataSource` has no native URI shape
|
|
@@ -156,6 +230,14 @@ interface InspectionStore {
|
|
|
156
230
|
interface CreateInspectionStoreOptions {
|
|
157
231
|
dataSource: DataSource;
|
|
158
232
|
}
|
|
233
|
+
/**
|
|
234
|
+
* Column schema for the append-only inspection-event store + its compacted
|
|
235
|
+
* base. Superset of {@link INSPECTION_PARQUET_COLUMNS}: the 16 promoted columns
|
|
236
|
+
* plus the 8 full-fidelity ones the lossy `materialize` parquet dropped. The
|
|
237
|
+
* event files and `base.parquet` share this schema so DuckDB
|
|
238
|
+
* `read_parquet([...], union_by_name = true)` merges base + events cleanly.
|
|
239
|
+
*/
|
|
240
|
+
declare const INSPECTION_EVENT_COLUMNS: readonly ColumnDef[];
|
|
159
241
|
declare function createInspectionStore(opts: CreateInspectionStoreOptions): InspectionStore;
|
|
160
242
|
/** GSC sitemap record we persist. Matches `Schema$WmxSitemap` but as plain JSON. */
|
|
161
243
|
interface SitemapRecord {
|
|
@@ -331,4 +413,4 @@ interface CreateEmptyTypesStoreOptions {
|
|
|
331
413
|
now?: () => number;
|
|
332
414
|
}
|
|
333
415
|
declare function createEmptyTypesStore(opts: CreateEmptyTypesStoreOptions): EmptyTypesStore;
|
|
334
|
-
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
416
|
+
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionEventRow, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
package/dist/entities.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
2
|
-
export { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
1
|
+
import { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
2
|
+
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
package/dist/rollups.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { DataSource, FileSetRef, Row as Row$1 } from "./_chunks/storage.mjs";
|
|
2
|
-
import { ColumnDef } from "./_chunks/schema.mjs";
|
|
2
|
+
import { ColumnDef as ColumnDef$1 } from "./_chunks/schema.mjs";
|
|
3
3
|
import { EngineError } from "./_chunks/errors.mjs";
|
|
4
4
|
import { SearchType } from "gscdump/query";
|
|
5
5
|
import { TenantCtx } from "@gscdump/contracts";
|
|
@@ -70,7 +70,7 @@ interface RollupDef {
|
|
|
70
70
|
* Types map the same way as the fact-table encoder: VARCHAR / DATE go
|
|
71
71
|
* through BYTE_ARRAY/UTF8; BIGINT → INT64; INTEGER → INT32; DOUBLE → DOUBLE.
|
|
72
72
|
*/
|
|
73
|
-
parquetColumns?: readonly ColumnDef[];
|
|
73
|
+
parquetColumns?: readonly ColumnDef$1[];
|
|
74
74
|
/** Sort-key column names for parquet row-group stats. Optional. */
|
|
75
75
|
parquetSortKey?: readonly string[];
|
|
76
76
|
/**
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.26.
|
|
4
|
+
"version": "0.26.3",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -190,8 +190,8 @@
|
|
|
190
190
|
"drizzle-orm": "1.0.0-rc.3",
|
|
191
191
|
"icebird": "^0.8.8",
|
|
192
192
|
"proper-lockfile": "^4.1.2",
|
|
193
|
-
"
|
|
194
|
-
"gscdump": "0.26.
|
|
193
|
+
"gscdump": "0.26.3",
|
|
194
|
+
"@gscdump/contracts": "0.26.3"
|
|
195
195
|
},
|
|
196
196
|
"devDependencies": {
|
|
197
197
|
"@duckdb/duckdb-wasm": "^1.32.0",
|