@gscdump/engine 0.25.14 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/entities.mjs CHANGED
@@ -1,641 +1,2 @@
1
- import { decodeParquetToRows, encodeRowsToParquetFlex } from "./adapters/hyparquet.mjs";
2
- const YEAR_MONTH_RE = /^(\d{4})-(\d{2})-/;
3
- function inspectionIndexKey(ctx) {
4
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/index.json` : `u_${ctx.userId}/entities/inspections/index.json`;
5
- }
6
- function emptyTypesKey(ctx) {
7
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/empty-types.json` : `u_${ctx.userId}/entities/empty-types.json`;
8
- }
9
- function inspectionParquetKey(ctx) {
10
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/index.parquet` : `u_${ctx.userId}/entities/inspections/index.parquet`;
11
- }
12
- function inspectionHistoryPrefix(ctx, yearMonth) {
13
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/history/${yearMonth}` : `u_${ctx.userId}/entities/inspections/history/${yearMonth}`;
14
- }
15
- function inspectionHistoryShardKey(ctx, yearMonth, batchId) {
16
- return `${inspectionHistoryPrefix(ctx, yearMonth)}/${batchId}.json`;
17
- }
18
- function hashUrl(url) {
19
- let hi = 2166136261;
20
- let lo = 3421674724;
21
- for (let i = 0; i < url.length; i++) {
22
- const c = url.charCodeAt(i);
23
- lo ^= c;
24
- const loMul = Math.imul(lo, 435) >>> 0;
25
- const carry = Math.floor(lo * 435 / 4294967296);
26
- const hiMul = Math.imul(hi, 435) + Math.imul(lo, 1) + carry >>> 0;
27
- lo = loMul;
28
- hi = hiMul;
29
- }
30
- return (hi >>> 0).toString(16).padStart(8, "0") + (lo >>> 0).toString(16).padStart(8, "0");
31
- }
32
- const INSPECTION_HISTORY_MAX_BYTES = 5 * 1024 * 1024;
33
- const INSPECTION_PARQUET_COLUMNS = [
34
- {
35
- name: "urlHash",
36
- type: "VARCHAR",
37
- nullable: false
38
- },
39
- {
40
- name: "url",
41
- type: "VARCHAR",
42
- nullable: false
43
- },
44
- {
45
- name: "inspectedAt",
46
- type: "VARCHAR",
47
- nullable: false
48
- },
49
- {
50
- name: "indexStatus",
51
- type: "VARCHAR",
52
- nullable: true
53
- },
54
- {
55
- name: "lastCrawlTime",
56
- type: "VARCHAR",
57
- nullable: true
58
- },
59
- {
60
- name: "googleCanonical",
61
- type: "VARCHAR",
62
- nullable: true
63
- },
64
- {
65
- name: "userCanonical",
66
- type: "VARCHAR",
67
- nullable: true
68
- },
69
- {
70
- name: "coverageState",
71
- type: "VARCHAR",
72
- nullable: true
73
- },
74
- {
75
- name: "robotsTxtState",
76
- type: "VARCHAR",
77
- nullable: true
78
- },
79
- {
80
- name: "indexingState",
81
- type: "VARCHAR",
82
- nullable: true
83
- },
84
- {
85
- name: "pageFetchState",
86
- type: "VARCHAR",
87
- nullable: true
88
- },
89
- {
90
- name: "mobileUsabilityVerdict",
91
- type: "VARCHAR",
92
- nullable: true
93
- },
94
- {
95
- name: "richResultsVerdict",
96
- type: "VARCHAR",
97
- nullable: true
98
- },
99
- {
100
- name: "scheduleNextAt",
101
- type: "BIGINT",
102
- nullable: true
103
- },
104
- {
105
- name: "scheduleConsecutiveUnchanged",
106
- type: "INTEGER",
107
- nullable: true
108
- },
109
- {
110
- name: "schedulePolicyVersion",
111
- type: "INTEGER",
112
- nullable: true
113
- }
114
- ];
115
- function createInspectionStore(opts) {
116
- const ds = opts.dataSource;
117
- function shardFor(record) {
118
- const m = YEAR_MONTH_RE.exec(record.inspectedAt);
119
- return m ? `${m[1]}-${m[2]}` : "unknown";
120
- }
121
- function randomBatchId() {
122
- return typeof crypto !== "undefined" && "randomUUID" in crypto ? crypto.randomUUID() : `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
123
- }
124
- return {
125
- async appendHistory(ctx, records, options) {
126
- if (records.length === 0) return;
127
- const batchId = options?.batchId ?? randomBatchId();
128
- const byMonth = /* @__PURE__ */ new Map();
129
- for (const r of records) {
130
- const month = shardFor(r);
131
- if (!byMonth.has(month)) byMonth.set(month, []);
132
- byMonth.get(month).push(r);
133
- }
134
- for (const [yearMonth, batch] of byMonth) {
135
- const shard = {
136
- version: 1,
137
- records: batch
138
- };
139
- const bytes = new TextEncoder().encode(JSON.stringify(shard));
140
- if (bytes.byteLength > 5242880) throw new Error(`inspection history shard exceeds ${INSPECTION_HISTORY_MAX_BYTES} bytes (got ${bytes.byteLength}); split the batch`);
141
- await ds.write(inspectionHistoryShardKey(ctx, yearMonth, batchId), bytes);
142
- }
143
- },
144
- async loadHistory(ctx, yearMonth) {
145
- const keys = await ds.list(inspectionHistoryPrefix(ctx, yearMonth));
146
- if (keys.length === 0) return void 0;
147
- const out = [];
148
- for (const key of keys) {
149
- const bytes = await ds.read(key).catch(() => void 0);
150
- if (!bytes) continue;
151
- const shard = await Promise.resolve().then(() => JSON.parse(new TextDecoder().decode(bytes))).catch((err) => {
152
- console.warn("[inspection.loadHistory] failed to decode shard", {
153
- key,
154
- error: err.message
155
- });
156
- });
157
- if (shard?.records) out.push(...shard.records);
158
- }
159
- return {
160
- version: 1,
161
- records: out
162
- };
163
- },
164
- async materialize(ctx, rowIter) {
165
- const rows = Array.from(rowIter);
166
- rows.sort((a, b) => a.urlHash < b.urlHash ? -1 : a.urlHash > b.urlHash ? 1 : 0);
167
- const bytes = encodeRowsToParquetFlex(rows, {
168
- columns: INSPECTION_PARQUET_COLUMNS,
169
- sortKey: ["urlHash"]
170
- });
171
- const key = inspectionParquetKey(ctx);
172
- await ds.write(key, bytes);
173
- return {
174
- key,
175
- rowCount: rows.length,
176
- bytes: bytes.byteLength
177
- };
178
- },
179
- parquetUri(ctx) {
180
- return ds.uri?.(inspectionParquetKey(ctx));
181
- }
182
- };
183
- }
184
- function sitemapIndexKey(ctx) {
185
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/index.json` : `u_${ctx.userId}/entities/sitemaps/index.json`;
186
- }
187
- function sitemapHistoryKey(ctx, feedpathHash, capturedAtMs) {
188
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/history/${feedpathHash}__${capturedAtMs}.json` : `u_${ctx.userId}/entities/sitemaps/history/${feedpathHash}__${capturedAtMs}.json`;
189
- }
190
- function sitemapUrlsPrefix(ctx) {
191
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/urls` : `u_${ctx.userId}/entities/sitemaps/urls`;
192
- }
193
- function sitemapUrlsIndexPrefix(ctx) {
194
- return `${sitemapUrlsPrefix(ctx)}/by-feed`;
195
- }
196
- function sitemapUrlsIndexKey(ctx, feedpathHash) {
197
- return `${sitemapUrlsIndexPrefix(ctx)}/${feedpathHash}/index.parquet`;
198
- }
199
- function sitemapUrlsDeltaKey(ctx, feedpathHash, date) {
200
- return `${sitemapUrlsPrefix(ctx)}/deltas/${date}__${feedpathHash}.parquet`;
201
- }
202
- const SITEMAP_URLS_DELTA_PREFIX_RE = /\/urls\/deltas\/(\d{4}-\d{2}-\d{2})__([0-9a-f]+)\.parquet$/;
203
- const URLS_INDEX_COLUMNS = [
204
- {
205
- name: "feedpath",
206
- type: "VARCHAR",
207
- nullable: false
208
- },
209
- {
210
- name: "feedpath_hash",
211
- type: "VARCHAR",
212
- nullable: false
213
- },
214
- {
215
- name: "url_hash",
216
- type: "VARCHAR",
217
- nullable: false
218
- },
219
- {
220
- name: "loc",
221
- type: "VARCHAR",
222
- nullable: false
223
- },
224
- {
225
- name: "lastmod",
226
- type: "VARCHAR",
227
- nullable: true
228
- },
229
- {
230
- name: "first_seen_at",
231
- type: "BIGINT",
232
- nullable: false
233
- },
234
- {
235
- name: "last_seen_at",
236
- type: "BIGINT",
237
- nullable: false
238
- },
239
- {
240
- name: "removed_at",
241
- type: "BIGINT",
242
- nullable: true
243
- }
244
- ];
245
- const URLS_DELTA_COLUMNS = [
246
- {
247
- name: "feedpath",
248
- type: "VARCHAR",
249
- nullable: false
250
- },
251
- {
252
- name: "feedpath_hash",
253
- type: "VARCHAR",
254
- nullable: false
255
- },
256
- {
257
- name: "url_hash",
258
- type: "VARCHAR",
259
- nullable: false
260
- },
261
- {
262
- name: "op",
263
- type: "VARCHAR",
264
- nullable: false
265
- },
266
- {
267
- name: "loc",
268
- type: "VARCHAR",
269
- nullable: false
270
- },
271
- {
272
- name: "lastmod",
273
- type: "VARCHAR",
274
- nullable: true
275
- },
276
- {
277
- name: "at",
278
- type: "BIGINT",
279
- nullable: false
280
- }
281
- ];
282
- function rowToUrlRecord(row) {
283
- return {
284
- feedpath: String(row.feedpath),
285
- feedpathHash: String(row.feedpath_hash),
286
- urlHash: String(row.url_hash),
287
- loc: String(row.loc),
288
- lastmod: row.lastmod == null ? void 0 : String(row.lastmod),
289
- firstSeenAt: Number(row.first_seen_at),
290
- lastSeenAt: Number(row.last_seen_at),
291
- removedAt: row.removed_at == null ? void 0 : Number(row.removed_at)
292
- };
293
- }
294
- function urlRecordToRow(r) {
295
- return {
296
- feedpath: r.feedpath,
297
- feedpath_hash: r.feedpathHash,
298
- url_hash: r.urlHash,
299
- loc: r.loc,
300
- lastmod: r.lastmod ?? null,
301
- first_seen_at: r.firstSeenAt,
302
- last_seen_at: r.lastSeenAt,
303
- removed_at: r.removedAt ?? null
304
- };
305
- }
306
- function isoDate(ms) {
307
- return new Date(ms).toISOString().slice(0, 10);
308
- }
309
- function hashUrlList(urls) {
310
- return hashUrl(urls.map((u) => u.loc).sort().join("\n"));
311
- }
312
- function createSitemapStore(opts) {
313
- const ds = opts.dataSource;
314
- const hash = opts.hash ?? hashUrl;
315
- const now = opts.now ?? (() => Date.now());
316
- async function readJson(key) {
317
- return await ds.read(key).then((bytes) => JSON.parse(new TextDecoder().decode(bytes)), () => void 0);
318
- }
319
- async function writeJson(key, value) {
320
- await ds.write(key, new TextEncoder().encode(JSON.stringify(value)));
321
- }
322
- return {
323
- async writeSnapshot(ctx, records) {
324
- if (records.length === 0) return;
325
- const indexKey = sitemapIndexKey(ctx);
326
- const index = await readJson(indexKey) ?? {
327
- version: 1,
328
- records: {}
329
- };
330
- const stamp = now();
331
- for (const r of records) {
332
- const h = hash(r.path);
333
- index.records[h] = r;
334
- await writeJson(sitemapHistoryKey(ctx, h, stamp), {
335
- version: 1,
336
- path: r.path,
337
- capturedAt: r.capturedAt,
338
- record: r
339
- });
340
- }
341
- await writeJson(indexKey, index);
342
- },
343
- async loadIndex(ctx) {
344
- return await readJson(sitemapIndexKey(ctx)) ?? {
345
- version: 1,
346
- records: {}
347
- };
348
- },
349
- async getLatest(ctx, path) {
350
- return (await readJson(sitemapIndexKey(ctx)))?.records[hash(path)];
351
- },
352
- async snapshotUrls(ctx, feedpath, urls) {
353
- const fpHash = hash(feedpath);
354
- const contentHash = hashUrlList(urls);
355
- const at = now();
356
- const priorByHash = /* @__PURE__ */ new Map();
357
- for await (const rec of this.loadUrls(ctx, feedpath, { includeRemoved: true })) priorByHash.set(rec.urlHash, rec);
358
- const livePrior = Array.from(priorByHash.values()).filter((r) => r.removedAt == null);
359
- if (livePrior.length > 0) {
360
- if (hashUrl(livePrior.map((r) => String(r.loc)).sort().join("\n")) === contentHash) return {
361
- added: 0,
362
- removed: 0,
363
- kept: livePrior.length,
364
- contentHash,
365
- unchanged: true
366
- };
367
- }
368
- const incomingByHash = /* @__PURE__ */ new Map();
369
- for (const u of urls) incomingByHash.set(hash(u.loc), u);
370
- const deltaRows = [];
371
- let added = 0;
372
- let removed = 0;
373
- let kept = 0;
374
- const date = isoDate(at);
375
- for (const [urlHash, u] of incomingByHash) {
376
- const prev = priorByHash.get(urlHash);
377
- if (!prev || prev.removedAt != null) {
378
- added++;
379
- deltaRows.push({
380
- feedpath,
381
- feedpath_hash: fpHash,
382
- url_hash: urlHash,
383
- op: "added",
384
- loc: u.loc,
385
- lastmod: u.lastmod ?? null,
386
- at
387
- });
388
- } else kept++;
389
- }
390
- for (const [urlHash, prev] of priorByHash) {
391
- if (prev.removedAt != null) continue;
392
- if (!incomingByHash.has(urlHash)) {
393
- removed++;
394
- deltaRows.push({
395
- feedpath,
396
- feedpath_hash: fpHash,
397
- url_hash: urlHash,
398
- op: "removed",
399
- loc: prev.loc,
400
- lastmod: prev.lastmod ?? null,
401
- at
402
- });
403
- }
404
- }
405
- if (deltaRows.length > 0) {
406
- const bytes = encodeRowsToParquetFlex(deltaRows, {
407
- columns: URLS_DELTA_COLUMNS,
408
- sortKey: ["url_hash"]
409
- });
410
- await ds.write(sitemapUrlsDeltaKey(ctx, fpHash, date), bytes);
411
- }
412
- return {
413
- added,
414
- removed,
415
- kept,
416
- contentHash,
417
- unchanged: false
418
- };
419
- },
420
- async *loadUrls(ctx, feedpath, opts) {
421
- const fpHash = hash(feedpath);
422
- const includeRemoved = opts?.includeRemoved ?? false;
423
- const indexBytes = await ds.read(sitemapUrlsIndexKey(ctx, fpHash)).catch(() => void 0);
424
- const indexRows = indexBytes ? await decodeParquetToRows(indexBytes) : [];
425
- const deltaKeys = (await ds.list(`${sitemapUrlsPrefix(ctx)}/deltas/`)).sort();
426
- const live = /* @__PURE__ */ new Map();
427
- const removedMap = /* @__PURE__ */ new Map();
428
- for (const row of indexRows) {
429
- const rec = rowToUrlRecord(row);
430
- if (rec.removedAt != null) removedMap.set(rec.urlHash, rec);
431
- else live.set(rec.urlHash, rec);
432
- }
433
- for (const key of deltaKeys) {
434
- const m = SITEMAP_URLS_DELTA_PREFIX_RE.exec(key);
435
- if (!m || m[2] !== fpHash) continue;
436
- const dBytes = await ds.read(key).catch(() => void 0);
437
- if (!dBytes) continue;
438
- const dRows = await decodeParquetToRows(dBytes);
439
- for (const r of dRows) {
440
- const op = String(r.op);
441
- const urlHash = String(r.url_hash);
442
- const at = Number(r.at);
443
- if (op === "added") {
444
- const prev = live.get(urlHash) ?? removedMap.get(urlHash);
445
- removedMap.delete(urlHash);
446
- live.set(urlHash, {
447
- feedpath,
448
- feedpathHash: fpHash,
449
- urlHash,
450
- loc: String(r.loc),
451
- lastmod: r.lastmod == null ? void 0 : String(r.lastmod),
452
- firstSeenAt: prev?.firstSeenAt ?? at,
453
- lastSeenAt: at
454
- });
455
- } else if (op === "removed") {
456
- const prev = live.get(urlHash);
457
- live.delete(urlHash);
458
- if (prev) removedMap.set(urlHash, {
459
- ...prev,
460
- removedAt: at
461
- });
462
- }
463
- }
464
- }
465
- for (const rec of live.values()) yield rec;
466
- if (includeRemoved) for (const rec of removedMap.values()) yield rec;
467
- },
468
- async *loadDeltas(ctx, dateRange) {
469
- const from = dateRange?.from;
470
- const to = dateRange?.to;
471
- const keys = (await ds.list(`${sitemapUrlsPrefix(ctx)}/deltas/`)).sort();
472
- for (const key of keys) {
473
- const m = SITEMAP_URLS_DELTA_PREFIX_RE.exec(key);
474
- if (!m) continue;
475
- const date = m[1];
476
- if (from && date < from) continue;
477
- if (to && date > to) continue;
478
- const bytes = await ds.read(key).catch(() => void 0);
479
- if (!bytes) continue;
480
- const rows = await decodeParquetToRows(bytes);
481
- for (const r of rows) {
482
- const op = String(r.op);
483
- if (op !== "added" && op !== "removed") continue;
484
- yield {
485
- feedpath: String(r.feedpath),
486
- feedpathHash: String(r.feedpath_hash),
487
- urlHash: String(r.url_hash),
488
- op,
489
- loc: String(r.loc),
490
- lastmod: r.lastmod == null ? void 0 : String(r.lastmod),
491
- at: Number(r.at)
492
- };
493
- }
494
- }
495
- },
496
- async compactUrls(ctx) {
497
- const deltaKeys = await ds.list(`${sitemapUrlsPrefix(ctx)}/deltas/`);
498
- const deltasByFeed = /* @__PURE__ */ new Map();
499
- for (const key of deltaKeys) {
500
- const m = SITEMAP_URLS_DELTA_PREFIX_RE.exec(key);
501
- if (!m) continue;
502
- const list = deltasByFeed.get(m[2]) ?? [];
503
- list.push(key);
504
- deltasByFeed.set(m[2], list);
505
- }
506
- for (const [fpHash, feedDeltaKeys] of deltasByFeed) {
507
- const indexKey = sitemapUrlsIndexKey(ctx, fpHash);
508
- const indexBytes = await ds.read(indexKey).catch(() => void 0);
509
- const indexRows = indexBytes ? await decodeParquetToRows(indexBytes) : [];
510
- const live = /* @__PURE__ */ new Map();
511
- const removed = /* @__PURE__ */ new Map();
512
- for (const row of indexRows) {
513
- const rec = rowToUrlRecord(row);
514
- if (rec.removedAt != null) removed.set(rec.urlHash, rec);
515
- else live.set(rec.urlHash, rec);
516
- }
517
- const consumed = [];
518
- for (const key of feedDeltaKeys.sort()) {
519
- const bytes = await ds.read(key).catch(() => void 0);
520
- if (!bytes) continue;
521
- consumed.push(key);
522
- const rows = await decodeParquetToRows(bytes);
523
- for (const r of rows) {
524
- const urlHash = String(r.url_hash);
525
- const at = Number(r.at);
526
- const op = String(r.op);
527
- if (op === "added") {
528
- const prev = live.get(urlHash) ?? removed.get(urlHash);
529
- removed.delete(urlHash);
530
- live.set(urlHash, {
531
- feedpath: String(r.feedpath),
532
- feedpathHash: fpHash,
533
- urlHash,
534
- loc: String(r.loc),
535
- lastmod: r.lastmod == null ? void 0 : String(r.lastmod),
536
- firstSeenAt: prev?.firstSeenAt ?? at,
537
- lastSeenAt: at
538
- });
539
- } else if (op === "removed") {
540
- const prev = live.get(urlHash);
541
- live.delete(urlHash);
542
- if (prev) removed.set(urlHash, {
543
- ...prev,
544
- removedAt: at
545
- });
546
- }
547
- }
548
- }
549
- const merged = [...live.values(), ...removed.values()];
550
- merged.sort((a, b) => a.urlHash < b.urlHash ? -1 : a.urlHash > b.urlHash ? 1 : 0);
551
- const bytes = encodeRowsToParquetFlex(merged.map(urlRecordToRow), {
552
- columns: URLS_INDEX_COLUMNS,
553
- sortKey: ["feedpath_hash", "url_hash"]
554
- });
555
- await ds.write(indexKey, bytes);
556
- if (consumed.length > 0) await ds.delete(consumed);
557
- }
558
- }
559
- };
560
- }
561
- function indexingMetadataIndexKey(ctx) {
562
- return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/indexing/index.json` : `u_${ctx.userId}/entities/indexing/index.json`;
563
- }
564
- function createIndexingMetadataStore(opts) {
565
- const ds = opts.dataSource;
566
- const hash = opts.hash ?? hashUrl;
567
- async function readIndex(key) {
568
- return await ds.read(key).then((bytes) => JSON.parse(new TextDecoder().decode(bytes)), () => ({
569
- version: 1,
570
- records: {}
571
- }));
572
- }
573
- return {
574
- async writeBatch(ctx, records) {
575
- if (records.length === 0) return;
576
- const key = indexingMetadataIndexKey(ctx);
577
- const index = await readIndex(key);
578
- for (const r of records) index.records[hash(r.url)] = r;
579
- await ds.write(key, new TextEncoder().encode(JSON.stringify(index)));
580
- },
581
- async loadIndex(ctx) {
582
- return readIndex(indexingMetadataIndexKey(ctx));
583
- },
584
- async getLatest(ctx, url) {
585
- return (await readIndex(indexingMetadataIndexKey(ctx))).records[hash(url)];
586
- }
587
- };
588
- }
589
- function createEmptyTypesStore(opts) {
590
- const ds = opts.dataSource;
591
- const now = opts.now ?? (() => Date.now());
592
- async function readDoc(key) {
593
- return await ds.read(key).then((bytes) => JSON.parse(new TextDecoder().decode(bytes)), () => ({
594
- version: 1,
595
- emptyTypes: [],
596
- markedAt: {}
597
- }));
598
- }
599
- async function writeDoc(key, doc) {
600
- await ds.write(key, new TextEncoder().encode(JSON.stringify(doc)));
601
- }
602
- return {
603
- async load(ctx) {
604
- return readDoc(emptyTypesKey(ctx));
605
- },
606
- async mark(ctx, types, at) {
607
- if (types.length === 0) return readDoc(emptyTypesKey(ctx));
608
- const key = emptyTypesKey(ctx);
609
- const doc = await readDoc(key);
610
- const stamp = at ?? now();
611
- let changed = false;
612
- for (const t of types) {
613
- if (!doc.emptyTypes.includes(t)) {
614
- doc.emptyTypes.push(t);
615
- changed = true;
616
- }
617
- if (doc.markedAt[t] === void 0) {
618
- doc.markedAt[t] = stamp;
619
- changed = true;
620
- }
621
- }
622
- if (changed) {
623
- doc.emptyTypes.sort();
624
- await writeDoc(key, doc);
625
- }
626
- return doc;
627
- },
628
- async clear(ctx, types) {
629
- if (types.length === 0) return readDoc(emptyTypesKey(ctx));
630
- const key = emptyTypesKey(ctx);
631
- const doc = await readDoc(key);
632
- const drop = new Set(types);
633
- const before = doc.emptyTypes.length;
634
- doc.emptyTypes = doc.emptyTypes.filter((t) => !drop.has(t));
635
- for (const t of drop) delete doc.markedAt[t];
636
- if (doc.emptyTypes.length !== before) await writeDoc(key, doc);
637
- return doc;
638
- }
639
- };
640
- }
1
+ import { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
641
2
  export { INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
@@ -0,0 +1,2 @@
1
+ import { EngineError, EngineErrorKind, engineErrorToException, engineErrors, formatEngineError, isEngineError } from "./_chunks/errors.mjs";
2
+ export { EngineError, EngineErrorKind, engineErrorToException, engineErrors, formatEngineError, isEngineError };