@gscdump/engine 0.21.3 → 0.22.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/_chunks/analysis-types.d.mts +1 -1
  2. package/dist/_chunks/coerce.mjs +1 -1
  3. package/dist/_chunks/dispatch.mjs +1 -1
  4. package/dist/_chunks/duckdb.d.mts +2 -2
  5. package/dist/_chunks/engine.mjs +4 -4
  6. package/dist/_chunks/iceberg-schema.mjs +7 -3
  7. package/dist/_chunks/index.d.mts +2 -2
  8. package/dist/_chunks/parquet-plan.mjs +3 -3
  9. package/dist/_chunks/planner.d.mts +2 -2
  10. package/dist/_chunks/registry.d.mts +4 -4
  11. package/dist/_chunks/resolver.mjs +60 -3
  12. package/dist/_chunks/schema.d.mts +1067 -275
  13. package/dist/_chunks/schema.mjs +70 -2
  14. package/dist/_chunks/sink.d.mts +49 -10
  15. package/dist/_chunks/snapshot.d.mts +1 -1
  16. package/dist/_chunks/storage.d.mts +1 -1
  17. package/dist/_chunks/storage.mjs +1 -1
  18. package/dist/_chunks/types.d.mts +1 -1
  19. package/dist/adapters/duckdb-node.d.mts +1 -1
  20. package/dist/adapters/filesystem.d.mts +1 -1
  21. package/dist/adapters/filesystem.mjs +1 -1
  22. package/dist/adapters/hyparquet.d.mts +2 -2
  23. package/dist/adapters/hyparquet.mjs +1 -1
  24. package/dist/adapters/node.d.mts +2 -2
  25. package/dist/adapters/node.mjs +1 -1
  26. package/dist/adapters/r2-manifest.d.mts +1 -1
  27. package/dist/adapters/r2-manifest.mjs +1 -1
  28. package/dist/adapters/r2.d.mts +1 -1
  29. package/dist/analysis-types.d.mts +1 -1
  30. package/dist/analyzer/index.d.mts +2 -2
  31. package/dist/analyzer/index.mjs +1 -1
  32. package/dist/contracts.d.mts +1 -1
  33. package/dist/entities.d.mts +1 -1
  34. package/dist/index.d.mts +6 -6
  35. package/dist/index.mjs +64 -8
  36. package/dist/ingest.d.mts +5 -1
  37. package/dist/ingest.mjs +57 -1
  38. package/dist/period/index.d.mts +1 -1
  39. package/dist/planner.d.mts +2 -2
  40. package/dist/planner.mjs +1 -1
  41. package/dist/report/index.d.mts +2 -2
  42. package/dist/resolver/index.d.mts +2 -2
  43. package/dist/resolver/index.mjs +1 -1
  44. package/dist/rollups.d.mts +2 -2
  45. package/dist/schema.d.mts +2 -2
  46. package/dist/schema.mjs +2 -2
  47. package/dist/sink-node.d.mts +1 -1
  48. package/dist/sink-node.mjs +1 -1
  49. package/dist/snapshot.d.mts +1 -1
  50. package/dist/source/index.d.mts +4 -4
  51. package/dist/source/index.mjs +3 -3
  52. package/package.json +8 -8
@@ -51,6 +51,27 @@ const search_appearance = pgTable("search_appearance", {
51
51
  date: dateCol(),
52
52
  ...metricCols()
53
53
  });
54
+ const search_appearance_pages = pgTable("search_appearance_pages", {
55
+ searchAppearance: varchar("searchAppearance").notNull(),
56
+ url: varchar("url").notNull(),
57
+ date: dateCol(),
58
+ ...metricCols()
59
+ });
60
+ const search_appearance_queries = pgTable("search_appearance_queries", {
61
+ searchAppearance: varchar("searchAppearance").notNull(),
62
+ query: varchar("query").notNull(),
63
+ query_canonical: varchar("query_canonical"),
64
+ date: dateCol(),
65
+ ...metricCols()
66
+ });
67
+ const search_appearance_page_queries = pgTable("search_appearance_page_queries", {
68
+ searchAppearance: varchar("searchAppearance").notNull(),
69
+ url: varchar("url").notNull(),
70
+ query: varchar("query").notNull(),
71
+ query_canonical: varchar("query_canonical"),
72
+ date: dateCol(),
73
+ ...metricCols()
74
+ });
54
75
  const hourly_pages = pgTable("hourly_pages", {
55
76
  url: varchar("url").notNull(),
56
77
  hour: varchar("hour").notNull(),
@@ -64,6 +85,9 @@ const drizzleSchema = {
64
85
  page_queries,
65
86
  dates,
66
87
  search_appearance,
88
+ search_appearance_pages,
89
+ search_appearance_queries,
90
+ search_appearance_page_queries,
67
91
  hourly_pages
68
92
  };
69
93
  const TABLE_METADATA = {
@@ -105,6 +129,47 @@ const TABLE_METADATA = {
105
129
  clusterKey: ["searchAppearance", "date"],
106
130
  version: 1
107
131
  },
132
+ search_appearance_pages: {
133
+ sortKey: [
134
+ "date",
135
+ "searchAppearance",
136
+ "url"
137
+ ],
138
+ clusterKey: [
139
+ "searchAppearance",
140
+ "url",
141
+ "date"
142
+ ],
143
+ version: 1
144
+ },
145
+ search_appearance_queries: {
146
+ sortKey: [
147
+ "date",
148
+ "searchAppearance",
149
+ "query"
150
+ ],
151
+ clusterKey: [
152
+ "searchAppearance",
153
+ "query",
154
+ "date"
155
+ ],
156
+ version: 1
157
+ },
158
+ search_appearance_page_queries: {
159
+ sortKey: [
160
+ "date",
161
+ "searchAppearance",
162
+ "url",
163
+ "query"
164
+ ],
165
+ clusterKey: [
166
+ "searchAppearance",
167
+ "url",
168
+ "query",
169
+ "date"
170
+ ],
171
+ version: 1
172
+ },
108
173
  hourly_pages: {
109
174
  sortKey: [
110
175
  "date",
@@ -149,6 +214,9 @@ const METRIC_TABLES = [
149
214
  "page_queries",
150
215
  "dates",
151
216
  "search_appearance",
217
+ "search_appearance_pages",
218
+ "search_appearance_queries",
219
+ "search_appearance_page_queries",
152
220
  "hourly_pages"
153
221
  ];
154
222
  const SCHEMAS = Object.fromEntries(METRIC_TABLES.map((t) => [t, tableSchemaFrom(t)]));
@@ -165,12 +233,12 @@ function inferTable(dimensions) {
165
233
  const dims = new Set(dimensions);
166
234
  const hasPage = dims.has("page");
167
235
  const hasQuery = dims.has("query");
236
+ if (dims.has("searchAppearance")) return hasPage && hasQuery ? "search_appearance_page_queries" : hasPage ? "search_appearance_pages" : hasQuery ? "search_appearance_queries" : "search_appearance";
168
237
  if (hasPage && hasQuery) return "page_queries";
169
238
  if (hasQuery) return "queries";
170
239
  if (hasPage) return "pages";
171
240
  if (dims.has("country")) return "countries";
172
241
  if (dims.has("device")) return "dates";
173
- if (dims.has("searchAppearance")) return "search_appearance";
174
242
  return "dates";
175
243
  }
176
244
  function naturalKeyColumns(table) {
@@ -192,4 +260,4 @@ function dimensionToColumn(dim, _table) {
192
260
  if (dim === "queryCanonical") return "query_canonical";
193
261
  return dim;
194
262
  }
195
- export { search_appearance as _, dimensionToColumn as a, schemaFor as c, dates as d, drizzleSchema as f, queries as g, pages as h, dedupeByNaturalKey as i, TABLE_METADATA as l, page_queries as m, allTables as n, inferTable as o, hourly_pages as p, currentSchemaVersion as r, naturalKeyColumns as s, SCHEMAS as t, countries as u };
263
+ export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dedupeByNaturalKey, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, naturalKeyColumns, page_queries, pages, queries, schemaFor, search_appearance, search_appearance_page_queries, search_appearance_pages, search_appearance_queries };
@@ -1,9 +1,9 @@
1
- import { C as Row$1, N as TenantCtx$1, T as SearchType } from "./storage.mjs";
1
+ import { Row as Row$1, SearchType, TenantCtx as TenantCtx$1 } from "./storage.mjs";
2
2
  import { icebergAppend, restCatalogConnect, s3SignedResolver } from "icebird";
3
3
  import { TableName } from "@gscdump/contracts";
4
- /** The 5 fact tables that exist as global Iceberg tables. */
5
- type IcebergTableName = Extract<TableName, 'pages' | 'queries' | 'countries' | 'page_queries' | 'dates'>;
6
- /** The 5 Iceberg table names, in canonical order. */
4
+ /** The 6 fact tables that exist as global Iceberg tables. */
5
+ type IcebergTableName = Extract<TableName, 'pages' | 'queries' | 'countries' | 'page_queries' | 'dates' | 'search_appearance' | 'search_appearance_pages' | 'search_appearance_queries' | 'search_appearance_page_queries'>;
6
+ /** The 6 Iceberg table names, in canonical order. */
7
7
  declare const ICEBERG_TABLES: readonly IcebergTableName[];
8
8
  /**
9
9
  * Iceberg-native column type. Superset-mapped from the engine `ColumnType`;
@@ -216,6 +216,43 @@ declare function ensureIcebergNamespace(conn: IcebergConnection): Promise<void>;
216
216
  declare function createIcebergTables(conn: IcebergConnection, tables?: readonly IcebergTableName[]): Promise<IcebergTableOpResult[]>;
217
217
  /** List the table names currently in the catalog namespace. */
218
218
  declare function listIcebergTables(conn: IcebergConnection): Promise<string[]>;
219
+ /** A data file in the current snapshot's manifest, scoped to one partition. */
220
+ interface IcebergListedDataFile {
221
+ /** Raw Iceberg `data_file.file_path` (e.g. `s3://gscdump-analytics/.../x.parquet`). */
222
+ filePath: string;
223
+ /** Object key relative to the warehouse bucket (the part after `s3://<bucket>/`). */
224
+ objectKey: string;
225
+ bytes: number;
226
+ rowCount: number;
227
+ }
228
+ interface ListIcebergDataFilesOptions {
229
+ table: IcebergTableName;
230
+ /** Partition identity column. */
231
+ siteId: string;
232
+ /** Partition identity column. */
233
+ searchType: string;
234
+ /**
235
+ * Inclusive date range. Every month touched by `[start, end]` is scanned;
236
+ * `month(date)` is the third partition transform.
237
+ */
238
+ range: {
239
+ start: string;
240
+ end: string;
241
+ };
242
+ }
243
+ /**
244
+ * List the parquet data files in the current snapshot of `table`, filtered
245
+ * to a single partition slice `(siteId, searchType, month(date) ∈ range)`.
246
+ *
247
+ * Cost: 1 REST `loadTable` + N manifest fetches (typically 1–10 small Avro
248
+ * files). Iceberg returns the manifest list embedded in `metadata`, so a
249
+ * cached `metadata` would let callers skip the REST call entirely.
250
+ *
251
+ * Skips deleted entries (status=2) and non-data file types (delete files).
252
+ * Returns object keys + bytes + rowCount so the caller can build presigned
253
+ * URLs without re-walking the catalog.
254
+ */
255
+ declare function listIcebergDataFiles(conn: IcebergConnection, opts: ListIcebergDataFilesOptions): Promise<IcebergListedDataFile[]>;
219
256
  /**
220
257
  * Drop tables from the catalog namespace, purging their data objects.
221
258
  * Defaults to every table currently in the namespace — used to clear the
@@ -245,13 +282,15 @@ interface SinkWriteResult {
245
282
  bytes?: number;
246
283
  }
247
284
  /**
248
- * Static description of a sink. All sinks are append-only under the v5
249
- * stability-cutoff model; `appendOnly` is therefore always `true` and kept
250
- * only as an explicit, self-documenting marker.
285
+ * Static description of a sink. Production ingestion is append-only under the
286
+ * v5 stability-cutoff model. Test and revision-path adapters may expose an
287
+ * overwrite capability explicitly.
251
288
  */
252
289
  interface SinkCapabilities {
253
- /** Always `true` re-emitting a slice accumulates duplicate rows. */
254
- appendOnly: true;
290
+ /** When true, re-emitting a slice accumulates duplicate rows. */
291
+ appendOnly: boolean;
292
+ /** Whether the sink exposes partition overwrite semantics. */
293
+ canOverwrite?: boolean;
255
294
  }
256
295
  /**
257
296
  * Outcome of `Sink.close()` — which tables' buffered rows reached durable
@@ -326,4 +365,4 @@ interface LocalIcebergSinkOptions extends SinkOptions {
326
365
  /** S3-compatible warehouse location (POC: MinIO). */
327
366
  warehouse: string;
328
367
  }
329
- export { ICEBERG_PARTITION_SPEC as A, icebergAppendRetrying as C, listIcebergTables as D, isCommitRateLimited as E, IcebergPartitionField as F, IcebergPartitionTransform as I, IcebergTableName as L, ICEBERG_TABLES as M, IcebergColumn as N, ICEBERG_FIELD_ID_BASE as O, IcebergColumnType as P, IcebergTableSpec as R, ensureIcebergNamespace as S, icebergSchemaFor as T, IcebergSchemaField as _, SinkCloseResult as a, createIcebergTables as b, SinkWriteResult as c, IcebergConnection as d, IcebergPartitionSpec as f, IcebergSchema as g, IcebergS3Config as h, SinkCapabilities as i, ICEBERG_SCHEMAS as j, ICEBERG_PARTITION_COLUMNS as k, CommitRetryOptions as l, IcebergPrimitiveType as m, LocalIcebergSinkOptions as n, SinkOptions as o, IcebergPartitionSpecField as p, Sink as r, SinkSlice as s, IcebergAppendSinkOptions as t, IcebergCatalogConfig as u, IcebergTableOpResult as v, icebergPartitionSpecFor as w, dropIcebergTables as x, connectIcebergCatalog as y, icebergTableSpec as z };
368
+ export { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, listIcebergDataFiles, listIcebergTables };
@@ -11,4 +11,4 @@ interface SnapshotIndex {
11
11
  hot: boolean;
12
12
  hotDays: number;
13
13
  }
14
- export { SnapshotIndex as t };
14
+ export { SnapshotIndex };
@@ -550,4 +550,4 @@ declare function dayPartition(date: string): string;
550
550
  */
551
551
  declare function hourPartition(date: string): string;
552
552
  declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
553
- export { SyncStateKind as A, hourPartition as B, Row$1 as C, SyncState as D, StorageEngine as E, WatermarkFilter as F, RAW_DAILY_COMPACT_THRESHOLD as G, inferSearchType as H, WatermarkScope as I, enumeratePartitions as J, countRawDailies as K, WriteCtx as L, TableName$1 as M, TenantCtx$1 as N, SyncStateDetail as O, Watermark as P, WriteResult as R, QueryResult as S, SearchType$1 as T, objectKey as U, inferLegacyTier as V, CompactionThresholds as W, splitOverlappingTiers as Y, PurgeUrlsResult as _, EngineOptions as a, QueryExecuteResult as b, Grain$1 as c, ManifestEntry as d, ManifestPurgeResult as f, PurgeResult as g, PurgeFilter as h, DataSource as i, SyncStateScope as j, SyncStateFilter as k, ListLiveFilter as l, ParquetCodec as m, CompactionTier as n, FileSetRef as o, ManifestStore as p, dedupeOverlappingTiers as q, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, LockScope as u, QueryCtx as v, RunSQLOptions as w, QueryExecutor as x, QueryExecuteOptions as y, dayPartition as z };
553
+ export { CodecCtx, CompactionThresholds, CompactionTier, DEFAULT_SEARCH_TYPE, DataSource, EngineOptions, FileSetRef, GcCtx, type Grain$1 as Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, RAW_DAILY_COMPACT_THRESHOLD, type Row$1 as Row, RunSQLOptions, type SearchType$1 as SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, type TableName$1 as TableName, type TenantCtx$1 as TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, countRawDailies, dayPartition, dedupeOverlappingTiers, enumeratePartitions, hourPartition, inferLegacyTier, inferSearchType, objectKey, splitOverlappingTiers };
@@ -39,4 +39,4 @@ function objectKey(ctx, table, partition, version, searchType) {
39
39
  function tenantPrefix(ctx) {
40
40
  return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/` : `u_${ctx.userId}/`;
41
41
  }
42
- export { inferSearchType as a, objectKey as c, tenantPrefix as d, weekPartition as f, inferLegacyTier as i, quarterOfMonth as l, dayPartition as n, mondayOfWeek as o, hourPartition as r, monthPartition as s, DEFAULT_SEARCH_TYPE as t, quarterPartition as u };
42
+ export { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, mondayOfWeek, monthPartition, objectKey, quarterOfMonth, quarterPartition, tenantPrefix, weekPartition };
@@ -51,4 +51,4 @@ interface ExtraQuery {
51
51
  sql: string;
52
52
  params: unknown[];
53
53
  }
54
- export { ResolvedSQLOptimized as a, ResolvedSQL as i, ExtraQuery as n, ResolverAdapter as o, ResolvedComparisonSQL as r, ResolverOptions as s, ComparisonFilter as t };
54
+ export { ComparisonFilter, ExtraQuery, ResolvedComparisonSQL, ResolvedSQL, ResolvedSQLOptimized, ResolverAdapter, ResolverOptions };
@@ -1,4 +1,4 @@
1
- import { n as DuckDBHandle } from "../_chunks/duckdb.mjs";
1
+ import { DuckDBHandle } from "../_chunks/duckdb.mjs";
2
2
  interface NodeDuckDBOptions {
3
3
  verbose?: boolean;
4
4
  }
@@ -1,4 +1,4 @@
1
- import { i as DataSource, p as ManifestStore } from "../_chunks/storage.mjs";
1
+ import { DataSource, ManifestStore } from "../_chunks/storage.mjs";
2
2
  interface FilesystemDataSourceOptions {
3
3
  rootDir: string;
4
4
  }
@@ -1,4 +1,4 @@
1
- import { a as inferSearchType, i as inferLegacyTier } from "../_chunks/storage.mjs";
1
+ import { inferLegacyTier, inferSearchType } from "../_chunks/storage.mjs";
2
2
  import { dirname, join, resolve } from "node:path";
3
3
  import { Buffer } from "node:buffer";
4
4
  import { randomBytes } from "node:crypto";
@@ -1,5 +1,5 @@
1
- import { C as Row, M as TableName, i as DataSource, m as ParquetCodec, t as CodecCtx } from "../_chunks/storage.mjs";
2
- import { t as ColumnDef } from "../_chunks/schema.mjs";
1
+ import { CodecCtx, DataSource, ParquetCodec, Row, TableName } from "../_chunks/storage.mjs";
2
+ import { ColumnDef } from "../_chunks/schema.mjs";
3
3
  import { ParquetQueryFilter } from "hyparquet";
4
4
  declare function encodeRowsToParquet(table: TableName, rows: readonly Row[]): Uint8Array;
5
5
  interface EncodeFlexOptions {
@@ -1,4 +1,4 @@
1
- import { i as dedupeByNaturalKey, l as TABLE_METADATA, t as SCHEMAS } from "../_chunks/schema.mjs";
1
+ import { SCHEMAS, TABLE_METADATA, dedupeByNaturalKey } from "../_chunks/schema.mjs";
2
2
  import { parquetReadObjects } from "hyparquet";
3
3
  import { parquetWriteBuffer } from "hyparquet-writer";
4
4
  const ROW_GROUP_SIZE = 25e3;
@@ -1,6 +1,6 @@
1
- import { E as StorageEngine, i as DataSource } from "../_chunks/storage.mjs";
1
+ import { DataSource, StorageEngine } from "../_chunks/storage.mjs";
2
2
  import { NodeDuckDBOptions, createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
3
- import { t as SnapshotIndex } from "../_chunks/snapshot.mjs";
3
+ import { SnapshotIndex } from "../_chunks/snapshot.mjs";
4
4
  import { Row, TableName } from "@gscdump/contracts";
5
5
  import { SearchType } from "gscdump/query";
6
6
  interface NodeHarnessOptions {
@@ -1,4 +1,4 @@
1
- import { a as createDuckDBCodec, n as createStorageEngine, o as createDuckDBExecutor } from "../_chunks/engine.mjs";
1
+ import { createDuckDBCodec, createDuckDBExecutor, createStorageEngine } from "../_chunks/engine.mjs";
2
2
  import { createNodeDuckDBHandle, resetNodeDuckDB } from "./duckdb-node.mjs";
3
3
  import { createFilesystemDataSource, createFilesystemManifestStore } from "./filesystem.mjs";
4
4
  import path from "node:path";
@@ -1,4 +1,4 @@
1
- import { M as TableName, p as ManifestStore } from "../_chunks/storage.mjs";
1
+ import { ManifestStore, TableName } from "../_chunks/storage.mjs";
2
2
  interface R2ObjectMetadata {
3
3
  etag: string;
4
4
  }
@@ -1,4 +1,4 @@
1
- import { a as inferSearchType, i as inferLegacyTier } from "../_chunks/storage.mjs";
1
+ import { inferLegacyTier, inferSearchType } from "../_chunks/storage.mjs";
2
2
  const SHARD_RE = /^u_[^/]+\/manifest\/(?<siteId>[^/]+)\/(?<table>[^/]+)\/HEAD$/;
3
3
  function defaultSnapshotId() {
4
4
  return `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
@@ -1,4 +1,4 @@
1
- import { i as DataSource } from "../_chunks/storage.mjs";
1
+ import { DataSource } from "../_chunks/storage.mjs";
2
2
  interface R2GetOptions {
3
3
  range?: {
4
4
  offset: number;
@@ -1,2 +1,2 @@
1
- import { i as num, n as AnalysisResult, r as AnalysisTool, t as AnalysisParams } from "./_chunks/analysis-types.mjs";
1
+ import { AnalysisParams, AnalysisResult, AnalysisTool, num } from "./_chunks/analysis-types.mjs";
2
2
  export { AnalysisParams, AnalysisResult, AnalysisTool, num };
@@ -1,5 +1,5 @@
1
- import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
2
- import { _ as SqlExtraQuery, a as DefineAnalyzerOptions, b as requireAdapter, c as Reducer, d as Analyzer, f as BuildContext, g as RowQueriesPlan, h as RequiredCapability, i as createAnalyzerRegistry, l as SqlPlanSpec, m as ReduceContext, n as AnalyzerRegistryInit, o as DefinedAnalyzer, p as Plan, r as AnalyzerVariants, s as ReduceCtx, t as AnalyzerRegistry, u as defineAnalyzer, v as SqlPlan, x as AnalysisQuerySource, y as TypedRowQuery } from "../_chunks/registry.mjs";
1
+ import { AnalysisParams, AnalysisResult } from "../_chunks/analysis-types.mjs";
2
+ import { AnalysisQuerySource, Analyzer, AnalyzerRegistry, AnalyzerRegistryInit, AnalyzerVariants, BuildContext, DefineAnalyzerOptions, DefinedAnalyzer, Plan, ReduceContext, ReduceCtx, Reducer, RequiredCapability, RowQueriesPlan, SqlExtraQuery, SqlPlan, SqlPlanSpec, TypedRowQuery, createAnalyzerRegistry, defineAnalyzer, requireAdapter } from "../_chunks/registry.mjs";
3
3
  declare class AnalyzerCapabilityError extends Error {
4
4
  readonly tool: string;
5
5
  readonly missing: readonly RequiredCapability[];
@@ -1,4 +1,4 @@
1
- import { n as runAnalyzerFromSource, t as AnalyzerCapabilityError } from "../_chunks/dispatch.mjs";
1
+ import { AnalyzerCapabilityError, runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
2
2
  const DEFAULT_SQL_REQUIRES = ["executeSql", "fileSets"];
3
3
  function defineAnalyzer(opts) {
4
4
  const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
@@ -1,2 +1,2 @@
1
- import { A as SyncStateKind, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, I as WatermarkScope, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, a as EngineOptions, b as QueryExecuteResult, d as ManifestEntry, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions } from "./_chunks/storage.mjs";
1
+ import { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult } from "./_chunks/storage.mjs";
2
2
  export type { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
@@ -1,4 +1,4 @@
1
- import { i as DataSource } from "./_chunks/storage.mjs";
1
+ import { DataSource } from "./_chunks/storage.mjs";
2
2
  import { ScheduleState } from "./schedule.mjs";
3
3
  import { TenantCtx } from "@gscdump/contracts";
4
4
  /**
package/dist/index.d.mts CHANGED
@@ -1,10 +1,10 @@
1
- import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, J as enumeratePartitions, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, Y as splitOverlappingTiers, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as dedupeOverlappingTiers, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
2
- import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
- import { _ as hourly_pages, a as allTables, b as queries, c as dimensionToColumn, f as DrizzleSchema, g as drizzleSchema, h as dates, i as TableSchema, l as inferTable, m as countries, n as ColumnType, o as currentSchemaVersion, p as TABLE_METADATA, r as SCHEMAS, t as ColumnDef, v as page_queries, y as pages } from "./_chunks/schema.mjs";
1
+ import { CodecCtx, CompactionThresholds, CompactionTier, DEFAULT_SEARCH_TYPE, DataSource, EngineOptions, FileSetRef, GcCtx, Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, RAW_DAILY_COMPACT_THRESHOLD, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, countRawDailies, dayPartition, dedupeOverlappingTiers, enumeratePartitions, hourPartition, inferLegacyTier, inferSearchType, objectKey, splitOverlappingTiers } from "./_chunks/storage.mjs";
2
+ import { DuckDBFactory, DuckDBHandle, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor } from "./_chunks/duckdb.mjs";
3
+ import { ColumnDef, ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, TableSchema, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
4
4
  import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
5
- import { A as ICEBERG_PARTITION_SPEC, C as icebergAppendRetrying, D as listIcebergTables, E as isCommitRateLimited, F as IcebergPartitionField, I as IcebergPartitionTransform, L as IcebergTableName, M as ICEBERG_TABLES, N as IcebergColumn, O as ICEBERG_FIELD_ID_BASE, P as IcebergColumnType, R as IcebergTableSpec, S as ensureIcebergNamespace, T as icebergSchemaFor, _ as IcebergSchemaField, a as SinkCloseResult, b as createIcebergTables, c as SinkWriteResult, d as IcebergConnection, f as IcebergPartitionSpec, g as IcebergSchema, h as IcebergS3Config, i as SinkCapabilities, j as ICEBERG_SCHEMAS, k as ICEBERG_PARTITION_COLUMNS, l as CommitRetryOptions, m as IcebergPrimitiveType, n as LocalIcebergSinkOptions, o as SinkOptions, p as IcebergPartitionSpecField, r as Sink, s as SinkSlice, t as IcebergAppendSinkOptions, u as IcebergCatalogConfig, v as IcebergTableOpResult, w as icebergPartitionSpecFor, x as dropIcebergTables, y as connectIcebergCatalog, z as icebergTableSpec } from "./_chunks/sink.mjs";
5
+ import { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, listIcebergDataFiles, listIcebergTables } from "./_chunks/sink.mjs";
6
6
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
7
- import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
7
+ import { FILES_PLACEHOLDER, ResolvedQuery, resolveParquetSQL, substituteNamedFiles } from "./_chunks/planner.mjs";
8
8
  import { rebuildDailyFromHourly } from "./rollups.mjs";
9
9
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
10
10
  import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
@@ -179,4 +179,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
179
179
  declare const MIN_COUNTRY_IMPRESSIONS = 10;
180
180
  declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
181
181
  declare const MAX_TRACKED_URLS_PER_SITE = 200000;
182
- export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
182
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListIcebergDataFilesOptions, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/index.mjs CHANGED
@@ -1,15 +1,15 @@
1
- import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
2
- import { a as dimensionToColumn, d as dates, f as drizzleSchema, g as queries, h as pages, l as TABLE_METADATA, m as page_queries, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
3
- import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
4
- import { a as RAW_DAILY_COMPACT_THRESHOLD, c as dedupeOverlappingTiers, i as substituteNamedFiles, l as enumeratePartitions, r as resolveParquetSQL, s as countRawDailies, t as FILES_PLACEHOLDER, u as splitOverlappingTiers } from "./_chunks/parquet-plan.mjs";
1
+ import { coerceRow, coerceRows } from "./_chunks/coerce.mjs";
2
+ import { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
3
+ import { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, objectKey } from "./_chunks/storage.mjs";
4
+ import { FILES_PLACEHOLDER, RAW_DAILY_COMPACT_THRESHOLD, countRawDailies, dedupeOverlappingTiers, enumeratePartitions, resolveParquetSQL, splitOverlappingTiers, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
5
5
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
6
- import { a as createDuckDBCodec, i as canonicalEmptyParquetSchema, n as createStorageEngine, o as createDuckDBExecutor, r as gcOrphansImpl, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
7
- import { a as ICEBERG_TABLES, i as ICEBERG_SCHEMAS, n as ICEBERG_PARTITION_COLUMNS, o as icebergTableSpec, r as ICEBERG_PARTITION_SPEC, t as ICEBERG_FIELD_ID_BASE } from "./_chunks/iceberg-schema.mjs";
6
+ import { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor, createStorageEngine, gcOrphansImpl } from "./_chunks/engine.mjs";
7
+ import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, icebergTableSpec } from "./_chunks/iceberg-schema.mjs";
8
8
  import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
9
9
  import "./planner.mjs";
10
10
  import { rebuildDailyFromHourly } from "./rollups.mjs";
11
11
  import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
12
- import { icebergAppend, icebergCreateTable, icebergDropTable, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, s3SignedResolver } from "icebird";
12
+ import { icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "icebird";
13
13
  const ICEBERG_TYPE_MAP = {
14
14
  STRING: "string",
15
15
  INT: "int",
@@ -109,6 +109,62 @@ async function createIcebergTables(conn, tables = ICEBERG_TABLES) {
109
109
  async function listIcebergTables(conn) {
110
110
  return restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name).sort(), () => []);
111
111
  }
112
+ function monthsInRange(range) {
113
+ const [sy, sm] = range.start.split("-").map(Number);
114
+ const [ey, em] = range.end.split("-").map(Number);
115
+ const out = [];
116
+ let y = sy;
117
+ let m = sm;
118
+ while (y < ey || y === ey && m <= em) {
119
+ out.push(`${y}-${String(m).padStart(2, "0")}`);
120
+ m++;
121
+ if (m > 12) {
122
+ m = 1;
123
+ y++;
124
+ }
125
+ }
126
+ return out;
127
+ }
128
+ function monthsSinceEpoch(ym) {
129
+ const [y, m] = ym.split("-").map(Number);
130
+ return (y - 1970) * 12 + (m - 1);
131
+ }
132
+ function stripBucket(filePath) {
133
+ if (!filePath.startsWith("s3://")) return filePath;
134
+ const rest = filePath.slice(5);
135
+ const slash = rest.indexOf("/");
136
+ return slash >= 0 ? rest.slice(slash + 1) : rest;
137
+ }
138
+ async function listIcebergDataFiles(conn, opts) {
139
+ const { metadata } = await restCatalogLoadTable(conn.catalog, {
140
+ namespace: conn.namespace,
141
+ table: opts.table
142
+ });
143
+ if (metadata["current-snapshot-id"] == null) return [];
144
+ const wantedMonths = new Set(monthsInRange(opts.range).map(monthsSinceEpoch));
145
+ const manifests = await icebergManifests({
146
+ metadata,
147
+ resolver: conn.resolver
148
+ });
149
+ const out = [];
150
+ for (const m of manifests) for (const entry of m.entries) {
151
+ if (entry.status === 2) continue;
152
+ const df = entry.data_file;
153
+ if (df.content !== 0) continue;
154
+ const part = df.partition;
155
+ if (part.site_id !== opts.siteId) continue;
156
+ if (part.search_type !== opts.searchType) continue;
157
+ const month = part.date_month;
158
+ if (typeof month !== "number" || !wantedMonths.has(month)) continue;
159
+ out.push({
160
+ filePath: df.file_path,
161
+ objectKey: stripBucket(df.file_path),
162
+ bytes: Number(df.file_size_in_bytes),
163
+ rowCount: Number(df.record_count)
164
+ });
165
+ }
166
+ return out;
167
+ }
112
168
  async function dropIcebergTables(conn, tables) {
113
169
  const targets = tables ?? await restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name), () => []);
114
170
  const results = [];
@@ -473,4 +529,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
473
529
  const MIN_COUNTRY_IMPRESSIONS = 10;
474
530
  const MAX_SITEMAP_URLS_PER_SITE = 5e4;
475
531
  const MAX_TRACKED_URLS_PER_SITE = 2e5;
476
- export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
532
+ export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/ingest.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { C as Row, M as TableName } from "./_chunks/storage.mjs";
1
+ import { Row, TableName } from "./_chunks/storage.mjs";
2
2
  /**
3
3
  * Canonical GSC API dimension order per table. Consumers hitting the raw
4
4
  * `searchanalytics.query` endpoint must request dimensions in this order so
@@ -23,6 +23,10 @@ interface IngestOptions {
23
23
  * `page_queries` tables only.
24
24
  */
25
25
  normalizeQuery?: (query: string) => string | null | undefined;
26
+ /** Date for one-day `searchAppearance` total queries, whose keys omit date. */
27
+ date?: string;
28
+ /** Search appearance filter used for contextual second-step rows. */
29
+ searchAppearance?: string;
26
30
  }
27
31
  /**
28
32
  * Strip a GSC URL to its pathname. Core analytics stores pages by path so
package/dist/ingest.mjs CHANGED
@@ -9,6 +9,13 @@ const TABLE_DIMS = {
9
9
  "date"
10
10
  ],
11
11
  search_appearance: ["searchAppearance", "date"],
12
+ search_appearance_pages: ["page", "date"],
13
+ search_appearance_queries: ["query", "date"],
14
+ search_appearance_page_queries: [
15
+ "page",
16
+ "query",
17
+ "date"
18
+ ],
12
19
  hourly_pages: ["hour", "page"]
13
20
  };
14
21
  function toPath(gscUrl) {
@@ -84,7 +91,7 @@ function transformGscRow(table, apiRow, options = {}) {
84
91
  };
85
92
  }
86
93
  if (table === "search_appearance") {
87
- const date = String(keys[1] ?? "");
94
+ const date = String(keys[1] ?? options.date ?? "");
88
95
  return {
89
96
  date,
90
97
  row: {
@@ -96,6 +103,55 @@ function transformGscRow(table, apiRow, options = {}) {
96
103
  }
97
104
  };
98
105
  }
106
+ if (table === "search_appearance_pages") {
107
+ const date = String(keys[1] ?? "");
108
+ return {
109
+ date,
110
+ row: {
111
+ searchAppearance: String(options.searchAppearance ?? ""),
112
+ url: toPath(String(keys[0] ?? "")),
113
+ date,
114
+ clicks,
115
+ impressions,
116
+ sum_position
117
+ }
118
+ };
119
+ }
120
+ if (table === "search_appearance_queries") {
121
+ const query = String(keys[0] ?? "");
122
+ const date = String(keys[1] ?? "");
123
+ const query_canonical = options.normalizeQuery?.(query) ?? null;
124
+ return {
125
+ date,
126
+ row: {
127
+ searchAppearance: String(options.searchAppearance ?? ""),
128
+ query,
129
+ query_canonical,
130
+ date,
131
+ clicks,
132
+ impressions,
133
+ sum_position
134
+ }
135
+ };
136
+ }
137
+ if (table === "search_appearance_page_queries") {
138
+ const query = String(keys[1] ?? "");
139
+ const date = String(keys[2] ?? "");
140
+ const query_canonical = options.normalizeQuery?.(query) ?? null;
141
+ return {
142
+ date,
143
+ row: {
144
+ searchAppearance: String(options.searchAppearance ?? ""),
145
+ url: toPath(String(keys[0] ?? "")),
146
+ query,
147
+ query_canonical,
148
+ date,
149
+ clicks,
150
+ impressions,
151
+ sum_position
152
+ }
153
+ };
154
+ }
99
155
  if (table === "dates") throw new Error("`dates` rows must be built via assembleDatesRow, not transformGscRow");
100
156
  const query = String(keys[1] ?? "");
101
157
  const date = String(keys[2] ?? "");
@@ -1,2 +1,2 @@
1
- import { a as ResolveWindowOptions, c as comparisonOf, d as padTimeseries, f as periodOf, h as windowToPeriod, i as PadTimeseriesOptions, l as defaultEndDate, m as windowToComparisonPeriod, n as ComparisonMode, o as ResolvedWindow, p as resolveWindow, r as ComparisonPeriod, s as WindowPreset, t as AnalysisPeriod, u as defaultStartDate } from "../_chunks/index.mjs";
1
+ import { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod } from "../_chunks/index.mjs";
2
2
  export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
@@ -1,3 +1,3 @@
1
- import { J as enumeratePartitions } from "./_chunks/storage.mjs";
2
- import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
1
+ import { enumeratePartitions } from "./_chunks/storage.mjs";
2
+ import { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, resolveParquetSQL, substituteNamedFiles } from "./_chunks/planner.mjs";
3
3
  export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };
package/dist/planner.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import { i as substituteNamedFiles, l as enumeratePartitions, n as compileLogicalQueryPlan, r as resolveParquetSQL, t as FILES_PLACEHOLDER } from "./_chunks/parquet-plan.mjs";
1
+ import { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
2
2
  export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };