@gscdump/engine 0.19.0 → 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -273,6 +273,23 @@ function normalizeRow(table, row) {
273
273
  url: normalized
274
274
  };
275
275
  }
276
+ const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
277
+ function queryRangeOf(partitions) {
278
+ if (!partitions) return void 0;
279
+ let min;
280
+ let max;
281
+ for (const p of partitions) {
282
+ const m = DAILY_PARTITION_RE.exec(p);
283
+ if (!m) continue;
284
+ const d = m[1];
285
+ if (min === void 0 || d < min) min = d;
286
+ if (max === void 0 || d > max) max = d;
287
+ }
288
+ return min !== void 0 ? {
289
+ start: min,
290
+ end: max
291
+ } : void 0;
292
+ }
276
293
  function createStorageEngine(opts) {
277
294
  const { dataSource, manifestStore, codec, executor } = opts;
278
295
  const defaultNow = opts.now ?? (() => Date.now());
@@ -406,7 +423,7 @@ function createStorageEngine(opts) {
406
423
  table: ref.table,
407
424
  partitions: ref.partitions,
408
425
  ...opts.searchType !== void 0 ? { searchType: opts.searchType } : {}
409
- })).map((e) => e.objectKey)];
426
+ }), queryRangeOf(ref.partitions)).map((e) => e.objectKey)];
410
427
  }));
411
428
  opts.signal?.throwIfAborted();
412
429
  const fileKeys = {};
@@ -186,9 +186,12 @@ function partitionSpan(partition) {
186
186
  };
187
187
  }
188
188
  }
189
- function splitOverlappingTiers(entries) {
189
+ function splitOverlappingTiers(entries, queryRange) {
190
+ const rangeStartMs = queryRange ? Date.parse(`${queryRange.start}T00:00:00Z`) : void 0;
191
+ const rangeEndMs = queryRange ? Date.parse(`${queryRange.end}T00:00:00Z`) : void 0;
190
192
  const spanned = [];
191
193
  const kept = [];
194
+ const subsumed = [];
192
195
  for (const entry of entries) {
193
196
  const span = partitionSpan(entry.partition);
194
197
  if (!span) {
@@ -196,7 +199,14 @@ function splitOverlappingTiers(entries) {
196
199
  continue;
197
200
  }
198
201
  const days = [];
199
- for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) days.push(t);
202
+ for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) {
203
+ if (rangeStartMs !== void 0 && (t < rangeStartMs || t > rangeEndMs)) continue;
204
+ days.push(t);
205
+ }
206
+ if (queryRange && days.length === 0) {
207
+ subsumed.push(entry);
208
+ continue;
209
+ }
200
210
  spanned.push({
201
211
  entry,
202
212
  rank: span.rank,
@@ -205,7 +215,6 @@ function splitOverlappingTiers(entries) {
205
215
  }
206
216
  spanned.sort((a, b) => a.rank - b.rank || b.entry.createdAt - a.entry.createdAt);
207
217
  const coveredBySearchType = /* @__PURE__ */ new Map();
208
- const subsumed = [];
209
218
  for (const { entry, days } of spanned) {
210
219
  const slice = inferSearchType(entry);
211
220
  let covered = coveredBySearchType.get(slice);
@@ -225,8 +234,8 @@ function splitOverlappingTiers(entries) {
225
234
  subsumed
226
235
  };
227
236
  }
228
- function dedupeOverlappingTiers(entries) {
229
- return splitOverlappingTiers(entries).kept;
237
+ function dedupeOverlappingTiers(entries, queryRange) {
238
+ return splitOverlappingTiers(entries, queryRange).kept;
230
239
  }
231
240
  function monthEndMs(month) {
232
241
  const [y, m] = month.split("-").map(Number);
@@ -38,13 +38,27 @@ declare function enumeratePartitions(startDate: string, endDate: string): string
38
38
  * monthly) still double-counts those boundary days — eliminating that needs
39
39
  * per-file date predicates in the SQL, tracked separately. Unrecognised
40
40
  * partition shapes (`hourly/`, sidecar keys) are always kept.
41
+ *
42
+ * `queryRange` clamps every entry's day-span to the window the caller will
43
+ * actually read. This is required when `entries` came from a partition-
44
+ * filtered `listLive` (`runSQL` enumerates only the partitions intersecting
45
+ * the query): a `monthly/2026-04` whose Apr 27-30 falls past the query end
46
+ * must not be judged "unsubsumed" just because `weekly/2026-04-27` wasn't
47
+ * enumerated — those out-of-window days are SQL-filtered to nothing anyway.
48
+ * Omit `queryRange` when `entries` is the full manifest (e.g. analysis-sources).
41
49
  */
42
- declare function splitOverlappingTiers(entries: ManifestEntry[]): {
50
+ declare function splitOverlappingTiers(entries: ManifestEntry[], queryRange?: {
51
+ start: string;
52
+ end: string;
53
+ }): {
43
54
  kept: ManifestEntry[];
44
55
  subsumed: ManifestEntry[];
45
56
  };
46
57
  /** Entries worth reading — see {@link splitOverlappingTiers}. */
47
- declare function dedupeOverlappingTiers(entries: ManifestEntry[]): ManifestEntry[];
58
+ declare function dedupeOverlappingTiers(entries: ManifestEntry[], queryRange?: {
59
+ start: string;
60
+ end: string;
61
+ }): ManifestEntry[];
48
62
  /**
49
63
  * Default `searchType` for entries written before the field landed and for
50
64
  * sync paths that don't request a specific type. GSC's own default; the
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.19.0",
4
+ "version": "0.19.1",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -169,8 +169,8 @@
169
169
  "dependencies": {
170
170
  "drizzle-orm": "^0.45.2",
171
171
  "proper-lockfile": "^4.1.2",
172
- "gscdump": "0.19.0",
173
- "@gscdump/contracts": "0.19.0"
172
+ "gscdump": "0.19.1",
173
+ "@gscdump/contracts": "0.19.1"
174
174
  },
175
175
  "devDependencies": {
176
176
  "@duckdb/duckdb-wasm": "^1.32.0",