@gscdump/engine 0.27.2 → 0.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ export { };
@@ -0,0 +1 @@
1
+ export { };
@@ -0,0 +1,120 @@
1
+ type StorageValue = null | string | number | boolean | object;
2
+ type WatchEvent = "update" | "remove";
3
+ type WatchCallback = (event: WatchEvent, key: string) => any;
4
+ type MaybePromise<T> = T | Promise<T>;
5
+ type MaybeDefined<T> = T extends any ? T : any;
6
+ type Unwatch = () => MaybePromise<void>;
7
+ interface StorageMeta {
8
+ atime?: Date;
9
+ mtime?: Date;
10
+ ttl?: number;
11
+ [key: string]: StorageValue | Date | undefined;
12
+ }
13
+ type TransactionOptions = Record<string, any>;
14
+ type GetKeysOptions = TransactionOptions & {
15
+ maxDepth?: number;
16
+ };
17
+ interface DriverFlags {
18
+ maxDepth?: boolean;
19
+ ttl?: boolean;
20
+ }
21
+ interface Driver<OptionsT = any, InstanceT = any> {
22
+ name?: string;
23
+ flags?: DriverFlags;
24
+ options?: OptionsT;
25
+ getInstance?: () => InstanceT;
26
+ hasItem: (key: string, opts: TransactionOptions) => MaybePromise<boolean>;
27
+ getItem: (key: string, opts?: TransactionOptions) => MaybePromise<StorageValue>;
28
+ /** @experimental */
29
+ getItems?: (items: {
30
+ key: string;
31
+ options?: TransactionOptions;
32
+ }[], commonOptions?: TransactionOptions) => MaybePromise<{
33
+ key: string;
34
+ value: StorageValue;
35
+ }[]>;
36
+ /** @experimental */
37
+ getItemRaw?: (key: string, opts: TransactionOptions) => MaybePromise<unknown>;
38
+ setItem?: (key: string, value: string, opts: TransactionOptions) => MaybePromise<void>;
39
+ /** @experimental */
40
+ setItems?: (items: {
41
+ key: string;
42
+ value: string;
43
+ options?: TransactionOptions;
44
+ }[], commonOptions?: TransactionOptions) => MaybePromise<void>;
45
+ /** @experimental */
46
+ setItemRaw?: (key: string, value: any, opts: TransactionOptions) => MaybePromise<void>;
47
+ removeItem?: (key: string, opts: TransactionOptions) => MaybePromise<void>;
48
+ getMeta?: (key: string, opts: TransactionOptions) => MaybePromise<StorageMeta | null>;
49
+ getKeys: (base: string, opts: GetKeysOptions) => MaybePromise<string[]>;
50
+ clear?: (base: string, opts: TransactionOptions) => MaybePromise<void>;
51
+ dispose?: () => MaybePromise<void>;
52
+ watch?: (callback: WatchCallback) => MaybePromise<Unwatch>;
53
+ }
54
+ type StorageDefinition = {
55
+ items: unknown;
56
+ [key: string]: unknown;
57
+ };
58
+ type StorageItemMap<T> = T extends StorageDefinition ? T["items"] : T;
59
+ type StorageItemType<T, K> = K extends keyof StorageItemMap<T> ? StorageItemMap<T>[K] : T extends StorageDefinition ? StorageValue : T;
60
+ interface Storage$1<T extends StorageValue = StorageValue> {
61
+ hasItem<U extends Extract<T, StorageDefinition>, K extends keyof StorageItemMap<U>>(key: K, opts?: TransactionOptions): Promise<boolean>;
62
+ hasItem(key: string, opts?: TransactionOptions): Promise<boolean>;
63
+ getItem<U extends Extract<T, StorageDefinition>, K extends string & keyof StorageItemMap<U>>(key: K, ops?: TransactionOptions): Promise<StorageItemType<T, K> | null>;
64
+ getItem<R = StorageItemType<T, string>>(key: string, opts?: TransactionOptions): Promise<R | null>;
65
+ /** @experimental */
66
+ getItems: <U extends T>(items: (string | {
67
+ key: string;
68
+ options?: TransactionOptions;
69
+ })[], commonOptions?: TransactionOptions) => Promise<{
70
+ key: string;
71
+ value: U;
72
+ }[]>;
73
+ /** @experimental See https://github.com/unjs/unstorage/issues/142 */
74
+ getItemRaw: <T = any>(key: string, opts?: TransactionOptions) => Promise<MaybeDefined<T> | null>;
75
+ setItem<U extends Extract<T, StorageDefinition>, K extends keyof StorageItemMap<U>>(key: K, value: StorageItemType<T, K>, opts?: TransactionOptions): Promise<void>;
76
+ setItem<U extends T>(key: string, value: U, opts?: TransactionOptions): Promise<void>;
77
+ /** @experimental */
78
+ setItems: <U extends T>(items: {
79
+ key: string;
80
+ value: U;
81
+ options?: TransactionOptions;
82
+ }[], commonOptions?: TransactionOptions) => Promise<void>;
83
+ /** @experimental See https://github.com/unjs/unstorage/issues/142 */
84
+ setItemRaw: <T = any>(key: string, value: MaybeDefined<T>, opts?: TransactionOptions) => Promise<void>;
85
+ removeItem<U extends Extract<T, StorageDefinition>, K extends keyof StorageItemMap<U>>(key: K, opts?: (TransactionOptions & {
86
+ removeMeta?: boolean;
87
+ }) | boolean): Promise<void>;
88
+ removeItem(key: string, opts?: (TransactionOptions & {
89
+ removeMeta?: boolean;
90
+ }) | boolean): Promise<void>;
91
+ getMeta: (key: string, opts?: (TransactionOptions & {
92
+ nativeOnly?: boolean;
93
+ }) | boolean) => MaybePromise<StorageMeta>;
94
+ setMeta: (key: string, value: StorageMeta, opts?: TransactionOptions) => Promise<void>;
95
+ removeMeta: (key: string, opts?: TransactionOptions) => Promise<void>;
96
+ getKeys: (base?: string, opts?: GetKeysOptions) => Promise<string[]>;
97
+ clear: (base?: string, opts?: TransactionOptions) => Promise<void>;
98
+ dispose: () => Promise<void>;
99
+ watch: (callback: WatchCallback) => Promise<Unwatch>;
100
+ unwatch: () => Promise<void>;
101
+ mount: (base: string, driver: Driver) => Storage$1;
102
+ unmount: (base: string, dispose?: boolean) => Promise<void>;
103
+ getMount: (key?: string) => {
104
+ base: string;
105
+ driver: Driver;
106
+ };
107
+ getMounts: (base?: string, options?: {
108
+ parents?: boolean;
109
+ }) => {
110
+ base: string;
111
+ driver: Driver;
112
+ }[];
113
+ keys: Storage$1["getKeys"];
114
+ get: Storage$1<T>["getItem"];
115
+ set: Storage$1<T>["setItem"];
116
+ has: Storage$1<T>["hasItem"];
117
+ del: Storage$1<T>["removeItem"];
118
+ remove: Storage$1<T>["removeItem"];
119
+ }
120
+ export { Storage$1 as Storage };
@@ -1,8 +1,20 @@
1
- import { Row as Row$1, SearchType, TenantCtx as TenantCtx$1 } from "./storage.mjs";
1
+ import { QueryProfiler, Row as Row$1, SearchType, TenantCtx as TenantCtx$1 } from "./storage.mjs";
2
2
  import { EngineError } from "./errors.mjs";
3
+ import { Storage } from "./libs/unstorage.mjs";
4
+ import { cachingResolver, icebergAppend, restCatalogConnect } from "./libs/icebird.mjs";
3
5
  import { Result } from "gscdump/result";
4
- import { icebergAppend, restCatalogConnect, s3SignedResolver } from "icebird";
5
6
  import { TableName } from "@gscdump/contracts";
7
+ /** Injected catalog cache: an unstorage `Storage` plus an optional defer hook. */
8
+ interface CatalogCache {
9
+ /** unstorage storage instance — the driver is the caller's choice. */
10
+ storage: Storage;
11
+ /**
12
+ * Optional hook to run a cache WRITE off the response critical path, e.g.
13
+ * Cloudflare's `ctx.waitUntil`. When omitted the writer awaits the put
14
+ * inline so it is never cut off when the response returns.
15
+ */
16
+ defer?: (write: Promise<unknown>) => void;
17
+ }
6
18
  /**
7
19
  * S3-compatible credentials for the Iceberg warehouse object store (R2 in prod,
8
20
  * MinIO in the POC). The single definition shared by every catalog/writer/sink
@@ -150,8 +162,8 @@ interface IcebergCatalogConfig {
150
162
  interface IcebergConnection {
151
163
  /** icebird REST catalog context, passed as `{ catalog }` to icebird write fns. */
152
164
  catalog: Awaited<ReturnType<typeof restCatalogConnect>>;
153
- /** icebird S3 resolver, passed as `{ resolver }` to icebird write fns. */
154
- resolver: ReturnType<typeof s3SignedResolver>;
165
+ /** icebird S3 resolver (caching-wrapped), passed as `{ resolver }` to icebird fns. */
166
+ resolver: ReturnType<typeof cachingResolver>;
155
167
  /** The namespace the fact tables live under. */
156
168
  namespace: string;
157
169
  }
@@ -168,12 +180,30 @@ declare function icebergSchemaFor(table: IcebergTableName): IcebergSchema;
168
180
  * {@link icebergSchemaFor}.
169
181
  */
170
182
  declare function icebergPartitionSpecFor(table: IcebergTableName): IcebergPartitionSpec;
183
+ /** Options for {@link connectIcebergCatalog}. */
184
+ interface ConnectIcebergOptions {
185
+ /**
186
+ * Optional cross-isolate cache (any unstorage driver). When supplied, the
187
+ * `/v1/config` REST probe is served from cache on a warm catalog, removing
188
+ * one serial network hop from cold-isolate connects. The bearer token is
189
+ * NEVER cached — only the warehouse-static routing config (`url`, `prefix`,
190
+ * `defaults`, `overrides`) is; `requestInit` is rebuilt from `config`.
191
+ */
192
+ cache?: CatalogCache;
193
+ /** Injectable clock for the cache TTL. Defaults to `Date.now`. */
194
+ clock?: () => number;
195
+ }
171
196
  /**
172
197
  * Connect to the R2 Data Catalog: a REST catalog context + a signed S3
173
198
  * resolver. Runs in Node and in `workerd` — SigV4 is Web Crypto, I/O is
174
199
  * `fetch`, no node builtins.
200
+ *
201
+ * With a `cache`, the `/v1/config` probe is skipped on a warm catalog and the
202
+ * context is rebuilt from the cached routing config plus the freshly-derived
203
+ * bearer `requestInit`. icebird reads only `url`/`prefix`/`requestInit` from
204
+ * the context downstream, so this is a faithful, secret-free reconstruction.
175
205
  */
176
- declare function connectIcebergCatalog(config: IcebergCatalogConfig): Promise<IcebergConnection>;
206
+ declare function connectIcebergCatalog(config: IcebergCatalogConfig, opts?: ConnectIcebergOptions): Promise<IcebergConnection>;
177
207
  /** Tunable retry policy for {@link icebergAppendRetrying}. */
178
208
  interface CommitRetryOptions {
179
209
  /** Total attempts, including the first. Default 6. */
@@ -266,14 +296,35 @@ interface ListIcebergDataFilesOptions {
266
296
  start: string;
267
297
  end: string;
268
298
  };
299
+ /**
300
+ * Optional cross-isolate cache (any unstorage driver). When supplied, the
301
+ * snapshot pointer is cached short (so a warm catalog skips `loadTable`) and
302
+ * the resolved file list is cached long, content-addressed by snapshot id
303
+ * (so it skips the manifest walk). Omit it to read straight from the catalog.
304
+ */
305
+ cache?: CatalogCache;
306
+ /** Injectable clock for the cache TTLs. Defaults to `Date.now`. */
307
+ clock?: () => number;
308
+ /**
309
+ * Optional read-path profiler. Emits `iceberg.snapshot` (snapshot-pointer
310
+ * load), `iceberg.cache` (resolved-files lookup + hit/miss), and
311
+ * `iceberg.walk` (manifest fetch + entry scan, with manifest/file counts) —
312
+ * the catalog cold-start breakdown a hosted reader wants in `Server-Timing`.
313
+ */
314
+ profiler?: QueryProfiler;
269
315
  }
270
316
  /**
271
- * List the parquet data files in the current snapshot of `table`, filtered
272
- * to a single partition slice `(siteId, searchType, month(date) ∈ range)`.
317
+ * List the parquet data files in the current snapshot of `table`, filtered to a
318
+ * single partition slice `(siteId, searchType, month(date) ∈ range)`.
273
319
  *
274
- * Cost: 1 REST `loadTable` + N manifest fetches (typically 1–10 small Avro
275
- * files). Iceberg returns the manifest list embedded in `metadata`, so a
276
- * cached `metadata` would let callers skip the REST call entirely.
320
+ * The shared `gsc.<table>` tables are multi-tenant, so a naive walk is O(all
321
+ * tenants). This prunes the manifest LIST by partition summaries before
322
+ * fetching any manifest's entries (see {@link buildPartitionFilter}), making
323
+ * the fetch count independent of tenant count, and — when an unstorage `cache`
324
+ * is supplied — skips the `loadTable` round-trip on a warm snapshot pointer and
325
+ * the manifest walk entirely on a resolved-files hit. The final entry-level
326
+ * partition filter is the authoritative correctness check; pruning only avoids
327
+ * reading manifests that cannot match.
277
328
  *
278
329
  * Skips deleted entries (status=2) and non-data file types (delete files).
279
330
  * Returns object keys + bytes + rowCount so the caller can build presigned
@@ -396,4 +447,4 @@ interface LocalIcebergSinkOptions extends SinkOptions {
396
447
  /** S3-compatible warehouse location (POC: MinIO). */
397
448
  warehouse: string;
398
449
  }
399
- export { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables };
450
+ export { CatalogCache, CommitRetryOptions, ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables };
@@ -72,9 +72,30 @@ interface WriteCtx extends TenantCtx {
72
72
  */
73
73
  grain?: Grain;
74
74
  }
75
+ /**
76
+ * A closed profiling span: a named slice of read-path work with its
77
+ * wall-clock cost and optional dimensional meta (file counts, row counts).
78
+ * Emitted by an injected {@link QueryProfiler}; see `./profile.ts`.
79
+ */
80
+ interface QuerySpan {
81
+ readonly name: string;
82
+ readonly ms: number;
83
+ readonly meta?: Readonly<Record<string, string | number | boolean>>;
84
+ }
85
+ /**
86
+ * Injected read-path profiler. `start(name, meta)` opens a span and returns an
87
+ * `end` thunk to call when that work finishes (merging completion-only meta).
88
+ * Absent by default — every emit site optional-chains it, so an unprofiled
89
+ * query pays nothing. Build one with `createQueryProfiler` / `collectSpans`.
90
+ */
91
+ interface QueryProfiler {
92
+ readonly start: (name: string, meta?: Record<string, string | number | boolean>) => (extra?: Record<string, string | number | boolean>) => void;
93
+ }
75
94
  interface QueryCtx extends TenantCtx {
76
95
  table?: TableName;
77
96
  signal?: AbortSignal;
97
+ /** Optional read-path profiler; forwarded into `runSQL` and the executor. */
98
+ profiler?: QueryProfiler;
78
99
  /**
79
100
  * Restrict the query to a single GSC search-type partition (`web`,
80
101
  * `discover`, etc.). Undefined preserves the cross-type union for
@@ -390,6 +411,11 @@ interface QueryExecuteOptions {
390
411
  bytes?: number;
391
412
  reason: string;
392
413
  }) => void;
414
+ /**
415
+ * Optional profiler. An instrumented executor emits `files.register` and
416
+ * `query.run` spans through it; an absent profiler is a no-op skip.
417
+ */
418
+ profiler?: QueryProfiler;
393
419
  }
394
420
  interface QueryExecuteResult {
395
421
  rows: Row[];
@@ -449,6 +475,12 @@ interface RunSQLOptions {
449
475
  * Undefined keeps the legacy cross-type union.
450
476
  */
451
477
  searchType?: SearchType;
478
+ /**
479
+ * Optional read-path profiler. `runSQL` emits `manifest.list` +
480
+ * `executor.execute` spans and forwards it into the executor for the
481
+ * finer `files.register` / `query.run` breakdown.
482
+ */
483
+ profiler?: QueryProfiler;
452
484
  }
453
485
  interface StorageEngine {
454
486
  writeDay: (ctx: WriteCtx, rows: Row[]) => Promise<void>;
@@ -541,4 +573,4 @@ interface EngineOptions {
541
573
  executor: QueryExecutor;
542
574
  now?: () => number;
543
575
  }
544
- export { CodecCtx, CompactionThresholds, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, type Grain$1 as Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, type Row$1 as Row, RunSQLOptions, type SearchType$1 as SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, type TableName$1 as TableName, type TenantCtx$1 as TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, dedupeOverlappingTiers, enumeratePartitions, splitOverlappingTiers };
576
+ export { CodecCtx, CompactionThresholds, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, type Grain$1 as Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryProfiler, QueryResult, QuerySpan, type Row$1 as Row, RunSQLOptions, type SearchType$1 as SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, type TableName$1 as TableName, type TenantCtx$1 as TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, dedupeOverlappingTiers, enumeratePartitions, splitOverlappingTiers };
@@ -1,4 +1,5 @@
1
- import { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, Sink, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables } from "../_chunks/sink.mjs";
1
+ import { CatalogCache, CommitRetryOptions, ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, Sink, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables } from "../_chunks/sink.mjs";
2
+ import { icebergCreateTable, icebergManifests, restCatalogLoadTable } from "../_chunks/libs/icebird.mjs";
2
3
  type IcebergAppendSink = Sink;
3
4
  /**
4
5
  * Create an `IcebergAppendSink` over the R2 Data Catalog.
@@ -9,4 +10,4 @@ type IcebergAppendSink = Sink;
9
10
  * with no rows never touches the network.
10
11
  */
11
12
  declare function createIcebergAppendSink(options: IcebergAppendSinkOptions): IcebergAppendSink;
12
- export { type CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type ListIcebergDataFilesOptions, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables };
13
+ export { type CatalogCache, type CommitRetryOptions, type ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type ListIcebergDataFilesOptions, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergCreateTable, icebergManifests, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables, restCatalogLoadTable };
@@ -1,7 +1,72 @@
1
1
  import { engineErrors } from "../errors.mjs";
2
2
  import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, assertIcebergTable, icebergTableSpec, isIcebergTable } from "../_chunks/schema2.mjs";
3
+ import { cachingResolver, icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "../_chunks/libs/icebird.mjs";
3
4
  import { err, ok } from "gscdump/result";
4
- import { icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "icebird";
5
+ async function cacheGet(cache, key, now) {
6
+ const boxed = await cache.storage.getItem(key).catch(() => null);
7
+ if (!boxed || typeof boxed.exp !== "number" || boxed.exp <= now) return void 0;
8
+ return boxed.v;
9
+ }
10
+ function cachePut(cache, key, value, ttlMs, now) {
11
+ const boxed = {
12
+ v: value,
13
+ exp: now + ttlMs
14
+ };
15
+ const write = cache.storage.setItem(key, boxed, { ttl: Math.ceil(ttlMs / 1e3) }).catch(() => {});
16
+ if (cache.defer) {
17
+ cache.defer(write);
18
+ return Promise.resolve();
19
+ }
20
+ return write;
21
+ }
22
+ const SITE_ID_FIELD_INDEX = ICEBERG_PARTITION_SPEC.findIndex((f) => f.sourceColumn === "site_id" && f.transform === "identity");
23
+ const SEARCH_TYPE_FIELD_INDEX = ICEBERG_PARTITION_SPEC.findIndex((f) => f.sourceColumn === "search_type" && f.transform === "identity");
24
+ const DATE_MONTH_FIELD_INDEX = ICEBERG_PARTITION_SPEC.findIndex((f) => f.transform === "month");
25
+ function toUint8(bytes) {
26
+ if (bytes == null) return null;
27
+ return bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
28
+ }
29
+ function decodeString(bytes) {
30
+ const u = toUint8(bytes);
31
+ return u == null ? null : new TextDecoder().decode(u);
32
+ }
33
+ function decodeInt(bytes) {
34
+ const u = toUint8(bytes);
35
+ if (u == null) return null;
36
+ return new DataView(u.buffer, u.byteOffset, u.byteLength).getInt32(0, true);
37
+ }
38
+ function buildPartitionFilter(siteId, searchType, wantedMonths) {
39
+ return (partitions) => {
40
+ const parts = partitions;
41
+ if (!parts || parts.length === 0) return true;
42
+ const siteSummary = parts[SITE_ID_FIELD_INDEX];
43
+ if (siteSummary && (siteSummary.lower_bound != null || siteSummary.upper_bound != null)) {
44
+ const lo = decodeString(siteSummary.lower_bound);
45
+ const hi = decodeString(siteSummary.upper_bound);
46
+ if (lo != null && hi != null && (siteId < lo || siteId > hi)) return false;
47
+ }
48
+ const searchTypeSummary = parts[SEARCH_TYPE_FIELD_INDEX];
49
+ if (searchTypeSummary && (searchTypeSummary.lower_bound != null || searchTypeSummary.upper_bound != null)) {
50
+ const lo = decodeString(searchTypeSummary.lower_bound);
51
+ const hi = decodeString(searchTypeSummary.upper_bound);
52
+ if (lo != null && hi != null && (searchType < lo || searchType > hi)) return false;
53
+ }
54
+ const monthSummary = parts[DATE_MONTH_FIELD_INDEX];
55
+ if (monthSummary && (monthSummary.lower_bound != null || monthSummary.upper_bound != null)) {
56
+ const lo = decodeInt(monthSummary.lower_bound);
57
+ const hi = decodeInt(monthSummary.upper_bound);
58
+ if (lo != null && hi != null) {
59
+ let anyInRange = false;
60
+ for (const wm of wantedMonths) if (wm >= lo && wm <= hi) {
61
+ anyInRange = true;
62
+ break;
63
+ }
64
+ if (!anyInRange) return false;
65
+ }
66
+ }
67
+ return true;
68
+ };
69
+ }
5
70
  const ICEBERG_TYPE_MAP = {
6
71
  STRING: "string",
7
72
  INT: "int",
@@ -38,20 +103,51 @@ function icebergPartitionSpecFor(table) {
38
103
  }))
39
104
  };
40
105
  }
41
- async function connectIcebergCatalog(config) {
42
- return {
43
- catalog: await restCatalogConnect({
106
+ const CATALOG_CONFIG_TTL_MS = 3600 * 1e3;
107
+ function catalogConfigKey(config) {
108
+ return `gsc-catalog-cfg\0${config.catalogUri}\0${config.warehouse}`;
109
+ }
110
+ async function connectIcebergCatalog(config, opts = {}) {
111
+ const now = (opts.clock ?? Date.now)();
112
+ const requestInit = { headers: { Authorization: `Bearer ${config.catalogToken}` } };
113
+ let catalog;
114
+ if (opts.cache) {
115
+ const cached = await cacheGet(opts.cache, catalogConfigKey(config), now);
116
+ if (cached) catalog = Object.freeze({
117
+ type: "rest",
118
+ url: cached.url,
119
+ prefix: cached.prefix,
120
+ defaults: cached.defaults,
121
+ overrides: cached.overrides,
122
+ requestInit
123
+ });
124
+ }
125
+ if (!catalog) {
126
+ catalog = await restCatalogConnect({
44
127
  url: config.catalogUri,
45
128
  warehouse: config.warehouse,
46
- requestInit: { headers: { Authorization: `Bearer ${config.catalogToken}` } }
47
- }),
48
- resolver: s3SignedResolver({
49
- accessKeyId: config.s3.accessKeyId,
50
- secretAccessKey: config.s3.secretAccessKey,
51
- region: config.s3.region ?? "auto",
52
- endpoint: config.s3.endpoint,
53
- pathStyle: true
54
- }),
129
+ requestInit
130
+ });
131
+ if (opts.cache) {
132
+ const toCache = {
133
+ url: catalog.url,
134
+ prefix: catalog.prefix,
135
+ defaults: catalog.defaults,
136
+ overrides: catalog.overrides
137
+ };
138
+ await cachePut(opts.cache, catalogConfigKey(config), toCache, CATALOG_CONFIG_TTL_MS, now);
139
+ }
140
+ }
141
+ const resolver = cachingResolver(s3SignedResolver({
142
+ accessKeyId: config.s3.accessKeyId,
143
+ secretAccessKey: config.s3.secretAccessKey,
144
+ region: config.s3.region ?? "auto",
145
+ endpoint: config.s3.endpoint,
146
+ pathStyle: true
147
+ }));
148
+ return {
149
+ catalog,
150
+ resolver,
55
151
  namespace: config.namespace
56
152
  };
57
153
  }
@@ -100,6 +196,14 @@ async function createIcebergTables(conn, tables = ICEBERG_TABLES) {
100
196
  async function listIcebergTables(conn) {
101
197
  return (await restCatalogListTables(conn.catalog, { namespace: conn.namespace })).map((t) => t.name).sort();
102
198
  }
199
+ const SNAPSHOT_REF_TTL_MS = 3e4;
200
+ const RESOLVED_FILES_TTL_MS = 1440 * 60 * 1e3;
201
+ function snapshotRefKey(namespace, table) {
202
+ return `gsc-snapref\0${namespace}\0${table}`;
203
+ }
204
+ function resolvedFilesKey(namespace, table, snapshotId, siteId, searchType, wantedMonths) {
205
+ return `gsc-files\0${namespace}\0${table}\0${snapshotId}\0${siteId}\0${searchType}\0${[...wantedMonths].sort((a, b) => a - b).join(",")}`;
206
+ }
103
207
  function monthsInRange(range) {
104
208
  const [sy, sm] = range.start.split("-").map(Number);
105
209
  const [ey, em] = range.end.split("-").map(Number);
@@ -126,16 +230,56 @@ function stripBucket(filePath) {
126
230
  const slash = rest.indexOf("/");
127
231
  return slash >= 0 ? rest.slice(slash + 1) : rest;
128
232
  }
129
- async function listIcebergDataFiles(conn, opts) {
233
+ async function loadSnapshotId(conn, opts, now) {
234
+ if (opts.cache) {
235
+ const cached = await cacheGet(opts.cache, snapshotRefKey(conn.namespace, opts.table), now);
236
+ if (cached !== void 0) return {
237
+ snapshotId: cached,
238
+ metadata: null
239
+ };
240
+ }
130
241
  const { metadata } = await restCatalogLoadTable(conn.catalog, {
131
242
  namespace: conn.namespace,
132
243
  table: opts.table
133
244
  });
134
- if (metadata["current-snapshot-id"] == null) return [];
245
+ const raw = metadata["current-snapshot-id"];
246
+ const snapshotId = raw == null ? null : String(raw);
247
+ if (opts.cache) await cachePut(opts.cache, snapshotRefKey(conn.namespace, opts.table), snapshotId, SNAPSHOT_REF_TTL_MS, now);
248
+ return {
249
+ snapshotId,
250
+ metadata
251
+ };
252
+ }
253
+ async function listIcebergDataFiles(conn, opts) {
254
+ const profiler = opts.profiler;
255
+ const now = (opts.clock ?? Date.now)();
135
256
  const wantedMonths = new Set(monthsInRange(opts.range).map(monthsSinceEpoch));
257
+ const endSnapshot = profiler?.start("iceberg.snapshot");
258
+ let { snapshotId, metadata } = await loadSnapshotId(conn, opts, now);
259
+ endSnapshot?.({ cached: metadata == null && snapshotId != null });
260
+ if (snapshotId == null) return [];
261
+ const filesKey = resolvedFilesKey(conn.namespace, opts.table, snapshotId, opts.siteId, opts.searchType, wantedMonths);
262
+ if (opts.cache) {
263
+ const endCache = profiler?.start("iceberg.cache");
264
+ const cached = await cacheGet(opts.cache, filesKey, now);
265
+ endCache?.({ hit: cached !== void 0 });
266
+ if (cached !== void 0) return cached;
267
+ }
268
+ if (!metadata) {
269
+ const reloaded = await loadSnapshotId(conn, {
270
+ ...opts,
271
+ cache: void 0
272
+ }, now);
273
+ snapshotId = reloaded.snapshotId;
274
+ metadata = reloaded.metadata;
275
+ if (snapshotId == null || !metadata) return [];
276
+ }
277
+ const endWalk = profiler?.start("iceberg.walk");
278
+ const partitionFilter = buildPartitionFilter(opts.siteId, opts.searchType, wantedMonths);
136
279
  const manifests = await icebergManifests({
137
280
  metadata,
138
- resolver: conn.resolver
281
+ resolver: conn.resolver,
282
+ partitionFilter
139
283
  });
140
284
  const out = [];
141
285
  for (const m of manifests) for (const entry of m.entries) {
@@ -154,6 +298,14 @@ async function listIcebergDataFiles(conn, opts) {
154
298
  rowCount: Number(df.record_count)
155
299
  });
156
300
  }
301
+ endWalk?.({
302
+ manifests: manifests.length,
303
+ files: out.length
304
+ });
305
+ if (opts.cache) {
306
+ const freshKey = resolvedFilesKey(conn.namespace, opts.table, snapshotId, opts.siteId, opts.searchType, wantedMonths);
307
+ await cachePut(opts.cache, freshKey, out, RESOLVED_FILES_TTL_MS, now);
308
+ }
157
309
  return out;
158
310
  }
159
311
  async function dropIcebergTables(conn, tables) {
@@ -266,4 +418,4 @@ function createIcebergAppendSink(options) {
266
418
  }
267
419
  };
268
420
  }
269
- export { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables };
421
+ export { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergCreateTable, icebergManifests, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables, restCatalogLoadTable };
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { CodecCtx, CompactionThresholds, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, enumeratePartitions } from "./_chunks/storage.mjs";
1
+ import { CodecCtx, CompactionThresholds, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryProfiler, QueryResult, QuerySpan, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, enumeratePartitions } from "./_chunks/storage.mjs";
2
2
  import { DuckDBFactory, DuckDBHandle, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor } from "./_chunks/duckdb.mjs";
3
3
  import { ColumnDef, ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, TableSchema, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
4
4
  import { EngineError, EngineErrorKind, engineErrorToException, engineErrors, formatEngineError, isEngineError } from "./_chunks/errors.mjs";
@@ -138,6 +138,25 @@ declare function inferSearchType(entry: Pick<ManifestEntry, 'searchType'>): Sear
138
138
  * decide how to handle it.
139
139
  */
140
140
  declare function inferLegacyTier(entry: Pick<ManifestEntry, 'partition' | 'tier'>): CompactionTier | undefined;
141
+ /**
142
+ * Build a {@link QueryProfiler} that records each closed span to `sink`.
143
+ *
144
+ * `start(name, meta)` stamps the open time and returns an `end` thunk; calling
145
+ * `end(extra)` records `{ name, ms, meta }` with `extra` merged over the
146
+ * open-time `meta` (so completion-only facts — row counts, buffered-file
147
+ * counts — land on the same span). `now` is injectable for deterministic
148
+ * tests; it defaults to `Date.now`.
149
+ */
150
+ declare function createQueryProfiler(sink: (span: QuerySpan) => void, now?: () => number): QueryProfiler;
151
+ /**
152
+ * A profiler that accumulates closed spans into an array — for tests, the CLI
153
+ * `query` command, or any ad-hoc "where did the time go" probe. The returned
154
+ * `spans` array is filled as spans close, in completion order.
155
+ */
156
+ declare function collectSpans(now?: () => number): {
157
+ profiler: QueryProfiler;
158
+ spans: QuerySpan[];
159
+ };
141
160
  /** A row as stored by the fake — data columns plus the injected identity columns. */
142
161
  type StoredRow = Row & {
143
162
  site_id: string;
@@ -190,4 +209,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
190
209
  declare const MIN_COUNTRY_IMPRESSIONS = 10;
191
210
  declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
192
211
  declare const MAX_TRACKED_URLS_PER_SITE = 200000;
193
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, ENGINE_QUERY_CAPABILITIES, EngineError, EngineErrorKind, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, engineErrorToException, engineErrors, enumeratePartitions, fixedPolicy, formatEngineError, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isEngineError, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes, validateEnabledSearchTypesResult };
212
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, ENGINE_QUERY_CAPABILITIES, EngineError, EngineErrorKind, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryProfiler, type QueryResult, type QuerySpan, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, collectSpans, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createQueryProfiler, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, engineErrorToException, engineErrors, enumeratePartitions, fixedPolicy, formatEngineError, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isEngineError, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes, validateEnabledSearchTypesResult };
package/dist/index.mjs CHANGED
@@ -127,6 +127,32 @@ function createIngestAccumulator(opts) {
127
127
  }
128
128
  };
129
129
  }
130
+ function createQueryProfiler(sink, now = () => Date.now()) {
131
+ return { start(name, meta) {
132
+ const t0 = now();
133
+ return (extra) => {
134
+ const merged = meta || extra ? {
135
+ ...meta,
136
+ ...extra
137
+ } : void 0;
138
+ sink(merged ? {
139
+ name,
140
+ ms: now() - t0,
141
+ meta: merged
142
+ } : {
143
+ name,
144
+ ms: now() - t0
145
+ });
146
+ };
147
+ } };
148
+ }
149
+ function collectSpans(now) {
150
+ const spans = [];
151
+ return {
152
+ profiler: createQueryProfiler((s) => spans.push(s), now),
153
+ spans
154
+ };
155
+ }
130
156
  const KEY_SEP = "\0";
131
157
  function partitionKey(slice) {
132
158
  return [
@@ -282,4 +308,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
282
308
  const MIN_COUNTRY_IMPRESSIONS = 10;
283
309
  const MAX_SITEMAP_URLS_PER_SITE = 5e4;
284
310
  const MAX_TRACKED_URLS_PER_SITE = 2e5;
285
- export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, engineErrorToException, engineErrors, enumeratePartitions, fixedPolicy, formatEngineError, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isEngineError, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes, validateEnabledSearchTypesResult };
311
+ export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, collectSpans, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createQueryProfiler, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, engineErrorToException, engineErrors, enumeratePartitions, fixedPolicy, formatEngineError, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isEngineError, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes, validateEnabledSearchTypesResult };