@gscdump/engine 0.19.0 → 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/_chunks/engine.mjs
CHANGED
|
@@ -273,6 +273,23 @@ function normalizeRow(table, row) {
|
|
|
273
273
|
url: normalized
|
|
274
274
|
};
|
|
275
275
|
}
|
|
276
|
+
const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
|
|
277
|
+
function queryRangeOf(partitions) {
|
|
278
|
+
if (!partitions) return void 0;
|
|
279
|
+
let min;
|
|
280
|
+
let max;
|
|
281
|
+
for (const p of partitions) {
|
|
282
|
+
const m = DAILY_PARTITION_RE.exec(p);
|
|
283
|
+
if (!m) continue;
|
|
284
|
+
const d = m[1];
|
|
285
|
+
if (min === void 0 || d < min) min = d;
|
|
286
|
+
if (max === void 0 || d > max) max = d;
|
|
287
|
+
}
|
|
288
|
+
return min !== void 0 ? {
|
|
289
|
+
start: min,
|
|
290
|
+
end: max
|
|
291
|
+
} : void 0;
|
|
292
|
+
}
|
|
276
293
|
function createStorageEngine(opts) {
|
|
277
294
|
const { dataSource, manifestStore, codec, executor } = opts;
|
|
278
295
|
const defaultNow = opts.now ?? (() => Date.now());
|
|
@@ -406,7 +423,7 @@ function createStorageEngine(opts) {
|
|
|
406
423
|
table: ref.table,
|
|
407
424
|
partitions: ref.partitions,
|
|
408
425
|
...opts.searchType !== void 0 ? { searchType: opts.searchType } : {}
|
|
409
|
-
})).map((e) => e.objectKey)];
|
|
426
|
+
}), queryRangeOf(ref.partitions)).map((e) => e.objectKey)];
|
|
410
427
|
}));
|
|
411
428
|
opts.signal?.throwIfAborted();
|
|
412
429
|
const fileKeys = {};
|
|
@@ -186,9 +186,12 @@ function partitionSpan(partition) {
|
|
|
186
186
|
};
|
|
187
187
|
}
|
|
188
188
|
}
|
|
189
|
-
function splitOverlappingTiers(entries) {
|
|
189
|
+
function splitOverlappingTiers(entries, queryRange) {
|
|
190
|
+
const rangeStartMs = queryRange ? Date.parse(`${queryRange.start}T00:00:00Z`) : void 0;
|
|
191
|
+
const rangeEndMs = queryRange ? Date.parse(`${queryRange.end}T00:00:00Z`) : void 0;
|
|
190
192
|
const spanned = [];
|
|
191
193
|
const kept = [];
|
|
194
|
+
const subsumed = [];
|
|
192
195
|
for (const entry of entries) {
|
|
193
196
|
const span = partitionSpan(entry.partition);
|
|
194
197
|
if (!span) {
|
|
@@ -196,7 +199,14 @@ function splitOverlappingTiers(entries) {
|
|
|
196
199
|
continue;
|
|
197
200
|
}
|
|
198
201
|
const days = [];
|
|
199
|
-
for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY)
|
|
202
|
+
for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) {
|
|
203
|
+
if (rangeStartMs !== void 0 && (t < rangeStartMs || t > rangeEndMs)) continue;
|
|
204
|
+
days.push(t);
|
|
205
|
+
}
|
|
206
|
+
if (queryRange && days.length === 0) {
|
|
207
|
+
subsumed.push(entry);
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
200
210
|
spanned.push({
|
|
201
211
|
entry,
|
|
202
212
|
rank: span.rank,
|
|
@@ -205,7 +215,6 @@ function splitOverlappingTiers(entries) {
|
|
|
205
215
|
}
|
|
206
216
|
spanned.sort((a, b) => a.rank - b.rank || b.entry.createdAt - a.entry.createdAt);
|
|
207
217
|
const coveredBySearchType = /* @__PURE__ */ new Map();
|
|
208
|
-
const subsumed = [];
|
|
209
218
|
for (const { entry, days } of spanned) {
|
|
210
219
|
const slice = inferSearchType(entry);
|
|
211
220
|
let covered = coveredBySearchType.get(slice);
|
|
@@ -225,8 +234,8 @@ function splitOverlappingTiers(entries) {
|
|
|
225
234
|
subsumed
|
|
226
235
|
};
|
|
227
236
|
}
|
|
228
|
-
function dedupeOverlappingTiers(entries) {
|
|
229
|
-
return splitOverlappingTiers(entries).kept;
|
|
237
|
+
function dedupeOverlappingTiers(entries, queryRange) {
|
|
238
|
+
return splitOverlappingTiers(entries, queryRange).kept;
|
|
230
239
|
}
|
|
231
240
|
function monthEndMs(month) {
|
|
232
241
|
const [y, m] = month.split("-").map(Number);
|
|
@@ -38,13 +38,27 @@ declare function enumeratePartitions(startDate: string, endDate: string): string
|
|
|
38
38
|
* monthly) still double-counts those boundary days — eliminating that needs
|
|
39
39
|
* per-file date predicates in the SQL, tracked separately. Unrecognised
|
|
40
40
|
* partition shapes (`hourly/`, sidecar keys) are always kept.
|
|
41
|
+
*
|
|
42
|
+
* `queryRange` clamps every entry's day-span to the window the caller will
|
|
43
|
+
* actually read. This is required when `entries` came from a partition-
|
|
44
|
+
* filtered `listLive` (`runSQL` enumerates only the partitions intersecting
|
|
45
|
+
* the query): a `monthly/2026-04` whose Apr 27-30 falls past the query end
|
|
46
|
+
* must not be judged "unsubsumed" just because `weekly/2026-04-27` wasn't
|
|
47
|
+
* enumerated — those out-of-window days are SQL-filtered to nothing anyway.
|
|
48
|
+
* Omit `queryRange` when `entries` is the full manifest (e.g. analysis-sources).
|
|
41
49
|
*/
|
|
42
|
-
declare function splitOverlappingTiers(entries: ManifestEntry[]
|
|
50
|
+
declare function splitOverlappingTiers(entries: ManifestEntry[], queryRange?: {
|
|
51
|
+
start: string;
|
|
52
|
+
end: string;
|
|
53
|
+
}): {
|
|
43
54
|
kept: ManifestEntry[];
|
|
44
55
|
subsumed: ManifestEntry[];
|
|
45
56
|
};
|
|
46
57
|
/** Entries worth reading — see {@link splitOverlappingTiers}. */
|
|
47
|
-
declare function dedupeOverlappingTiers(entries: ManifestEntry[]
|
|
58
|
+
declare function dedupeOverlappingTiers(entries: ManifestEntry[], queryRange?: {
|
|
59
|
+
start: string;
|
|
60
|
+
end: string;
|
|
61
|
+
}): ManifestEntry[];
|
|
48
62
|
/**
|
|
49
63
|
* Default `searchType` for entries written before the field landed and for
|
|
50
64
|
* sync paths that don't request a specific type. GSC's own default; the
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.19.
|
|
4
|
+
"version": "0.19.1",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -169,8 +169,8 @@
|
|
|
169
169
|
"dependencies": {
|
|
170
170
|
"drizzle-orm": "^0.45.2",
|
|
171
171
|
"proper-lockfile": "^4.1.2",
|
|
172
|
-
"gscdump": "0.19.
|
|
173
|
-
"@gscdump/contracts": "0.19.
|
|
172
|
+
"gscdump": "0.19.1",
|
|
173
|
+
"@gscdump/contracts": "0.19.1"
|
|
174
174
|
},
|
|
175
175
|
"devDependencies": {
|
|
176
176
|
"@duckdb/duckdb-wasm": "^1.32.0",
|