@gscdump/engine 0.17.3 → 0.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { BuilderState } from "gscdump/query";
1
+ import { BuilderState, SearchType } from "gscdump/query";
2
2
  type AnalysisTool = 'striking-distance' | 'opportunity' | 'movers' | 'decay' | 'zero-click' | 'brand' | 'cannibalization' | 'clustering' | 'concentration' | 'seasonality' | 'trends' | 'ctr-anomaly' | 'position-volatility' | 'long-tail' | 'intent-atlas' | 'query-migration' | 'bayesian-ctr' | 'stl-decompose' | 'change-point' | 'bipartite-pagerank' | 'survival' | 'position-distribution' | 'ctr-curve' | 'dark-traffic' | 'content-velocity' | 'keyword-breadth' | 'device-gap' | 'data-query' | 'data-detail';
3
3
  interface AnalysisParams {
4
4
  type: AnalysisTool;
@@ -37,6 +37,8 @@ interface AnalysisParams {
37
37
  qc?: BuilderState;
38
38
  /** data-query comparison filter applied to joined current/previous rows. */
39
39
  comparisonFilter?: 'new' | 'lost' | 'improving' | 'declining';
40
+ /** GSC slice the analysis is scoped to. Undefined = analyzer runs cross-type (today's behaviour for web-only sites). */
41
+ searchType?: SearchType;
40
42
  }
41
43
  interface AnalysisResult {
42
44
  results: Record<string, unknown>[];
@@ -1,8 +1,8 @@
1
1
  import { i as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
2
2
  import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, s as objectKey } from "./storage.mjs";
3
3
  import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
4
- import { buildLogicalPlan } from "gscdump/query/plan";
5
4
  import { MS_PER_DAY } from "gscdump";
5
+ import { buildLogicalPlan } from "gscdump/query/plan";
6
6
  const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
7
7
  const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
8
8
  const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
@@ -11,6 +11,10 @@ const DEFAULT_THRESHOLDS = {
11
11
  d7: 30,
12
12
  d30: 90
13
13
  };
14
+ const RAW_DAILY_COMPACT_THRESHOLD = 7;
15
+ function countRawDailies(entries) {
16
+ return entries.filter((e) => e.tier === "raw" || e.tier == null && e.partition.startsWith("daily/")).length;
17
+ }
14
18
  const PENDING_WINDOW_DAYS = 4;
15
19
  const STAGES = [
16
20
  {
@@ -285,4 +289,4 @@ function substituteNamedFiles(sql, sets) {
285
289
  for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
286
290
  return out;
287
291
  }
288
- export { compactTieredImpl as a, substituteNamedFiles as i, compileLogicalQueryPlan as n, enumeratePartitions as o, resolveToSQL as r, FILES_PLACEHOLDER as t };
292
+ export { RAW_DAILY_COMPACT_THRESHOLD as a, enumeratePartitions as c, substituteNamedFiles as i, compileLogicalQueryPlan as n, compactTieredImpl as o, resolveToSQL as r, countRawDailies as s, FILES_PLACEHOLDER as t };
@@ -1,6 +1,6 @@
1
1
  import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
2
2
  import { i as inferSearchType, n as dayPartition, s as objectKey, u as tenantPrefix } from "./storage.mjs";
3
- import { a as compactTieredImpl, i as substituteNamedFiles, n as compileLogicalQueryPlan } from "./compiler.mjs";
3
+ import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./compiler.mjs";
4
4
  import { sqlEscape } from "../sql-bind.mjs";
5
5
  import { buildLogicalPlan } from "gscdump/query/plan";
6
6
  import { normalizeUrl } from "gscdump/normalize";
@@ -1,7 +1,9 @@
1
1
  import { t as SCHEMAS, u as drizzleSchema } from "./schema.mjs";
2
+ import { c as enumeratePartitions } from "./compiler.mjs";
2
3
  import { escapeLike } from "../sql-fragments.mjs";
3
- import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
4
+ import "../planner.mjs";
4
5
  import { PgDialect } from "drizzle-orm/pg-core";
6
+ import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
5
7
  import { normalizeUrl } from "gscdump/normalize";
6
8
  import { sql } from "drizzle-orm";
7
9
  const DIMENSION_SURFACES = {
@@ -756,6 +758,93 @@ function createParquetResolverAdapter() {
756
758
  tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
757
759
  });
758
760
  }
761
+ function runArgs(ctx, partitions) {
762
+ return {
763
+ ctx: {
764
+ userId: ctx.userId,
765
+ siteId: ctx.siteId
766
+ },
767
+ table: ctx.table,
768
+ fileSets: { FILES: {
769
+ table: ctx.table,
770
+ partitions
771
+ } },
772
+ ...ctx.searchType !== void 0 ? { searchType: ctx.searchType } : {}
773
+ };
774
+ }
775
+ async function runOptimizedQuery(runSQL, ctx, state, dateRange) {
776
+ const adapter = createParquetResolverAdapter();
777
+ const base = runArgs(ctx, enumeratePartitions(dateRange.startDate, dateRange.endDate));
778
+ const optimized = resolveToSQLOptimized(state, {
779
+ adapter,
780
+ siteId: void 0
781
+ });
782
+ const extras = buildExtrasQueries(state, {
783
+ adapter,
784
+ siteId: void 0
785
+ });
786
+ const [optRes, ...extrasRows] = await Promise.all([runSQL({
787
+ ...base,
788
+ sql: optimized.sql,
789
+ params: optimized.params
790
+ }), ...extras.map((e) => runSQL({
791
+ ...base,
792
+ sql: e.sql,
793
+ params: e.params
794
+ }))]);
795
+ const firstRow = optRes.rows[0];
796
+ const totalCount = Number(firstRow?.totalCount ?? 0);
797
+ const totals = {
798
+ clicks: Number(firstRow?.totalClicks ?? 0),
799
+ impressions: Number(firstRow?.totalImpressions ?? 0),
800
+ ctr: Number(firstRow?.totalCtr ?? 0),
801
+ position: Number(firstRow?.totalPosition ?? 0)
802
+ };
803
+ return {
804
+ rows: optRes.rows.map((r) => {
805
+ const { totalCount: _tc, totalClicks: _tcl, totalImpressions: _ti, totalCtr: _tr, totalPosition: _tp, ...rest } = r;
806
+ return rest;
807
+ }),
808
+ totalCount,
809
+ totals,
810
+ extras: extras.map((e, i) => ({
811
+ key: e.key,
812
+ rows: extrasRows[i].rows
813
+ }))
814
+ };
815
+ }
816
+ async function runComparisonQuery(runSQL, ctx, current, previous, windows, filter) {
817
+ const adapter = createParquetResolverAdapter();
818
+ const comparison = resolveComparisonSQL(current, previous, {
819
+ adapter,
820
+ siteId: void 0
821
+ }, filter);
822
+ const totals = buildTotalsSql(current, {
823
+ adapter,
824
+ siteId: void 0
825
+ });
826
+ const base = runArgs(ctx, enumeratePartitions(windows.current.startDate < windows.previous.startDate ? windows.current.startDate : windows.previous.startDate, windows.current.endDate > windows.previous.endDate ? windows.current.endDate : windows.previous.endDate));
827
+ const main = await runSQL({
828
+ ...base,
829
+ sql: comparison.sql,
830
+ params: comparison.params
831
+ });
832
+ const count = await runSQL({
833
+ ...base,
834
+ sql: comparison.countSql,
835
+ params: comparison.countParams
836
+ });
837
+ const totalsRow = await runSQL({
838
+ ...base,
839
+ sql: totals.sql,
840
+ params: totals.params
841
+ });
842
+ return {
843
+ rows: main.rows,
844
+ totalCount: Number(count.rows[0]?.total ?? 0),
845
+ totals: totalsRow.rows[0] ?? {}
846
+ };
847
+ }
759
848
  function assertSchemaInSync(options) {
760
849
  const { label, schema, tableKeyToName, mode } = options;
761
850
  for (const [key, table] of Object.entries(schema)) {
@@ -766,4 +855,4 @@ function assertSchemaInSync(options) {
766
855
  if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
767
856
  }
768
857
  }
769
- export { dimensionColumn as C, assertDimensionsSupported as S, supportsDimensionOnSurface as T, resolveToSQLOptimized as _, getDimensionFilters as a, DIMENSION_SURFACES as b, matchesDimensionFilter as c, metricValue as d, buildExtrasQueries as f, resolveToSQL as g, resolveComparisonSQL as h, dimensionValue as i, matchesMetricFilter as l, mergeExtras as m, createParquetResolverAdapter as n, getFilterDimensions as o, buildTotalsSql as p, pgResolverAdapter as r, getInternalFilters as s, assertSchemaInSync as t, matchesTopLevelPage as u, createResolverAdapter as v, inferLogicalDataset as w, LOGICAL_DATASETS as x, createSqlFragments as y };
858
+ export { LOGICAL_DATASETS as C, supportsDimensionOnSurface as D, inferLogicalDataset as E, DIMENSION_SURFACES as S, dimensionColumn as T, resolveComparisonSQL as _, pgResolverAdapter as a, createResolverAdapter as b, getFilterDimensions as c, matchesMetricFilter as d, matchesTopLevelPage as f, mergeExtras as g, buildTotalsSql as h, createParquetResolverAdapter as i, getInternalFilters as l, buildExtrasQueries as m, runComparisonQuery as n, dimensionValue as o, metricValue as p, runOptimizedQuery as r, getDimensionFilters as s, assertSchemaInSync as t, matchesDimensionFilter as u, resolveToSQL as v, assertDimensionsSupported as w, createSqlFragments as x, resolveToSQLOptimized as y };
@@ -11,6 +11,11 @@ interface CompactionThresholds {
11
11
  d7?: number;
12
12
  d30?: number;
13
13
  }
14
+ declare const RAW_DAILY_COMPACT_THRESHOLD = 7;
15
+ declare function countRawDailies(entries: ReadonlyArray<{
16
+ tier?: string | null;
17
+ partition: string;
18
+ }>): number;
14
19
  declare function enumeratePartitions(startDate: string, endDate: string): string[];
15
20
  /**
16
21
  * Default `searchType` for entries written before the field landed and for
@@ -468,4 +473,4 @@ interface EngineOptions {
468
473
  }
469
474
  declare function dayPartition(date: string): string;
470
475
  declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
471
- export { SyncStateScope as A, inferSearchType as B, RunSQLOptions as C, SyncStateDetail as D, SyncState as E, WatermarkScope as F, CompactionThresholds as H, WriteCtx as I, WriteResult as L, TenantCtx$1 as M, Watermark as N, SyncStateFilter as O, WatermarkFilter as P, dayPartition as R, Row$1 as S, StorageEngine as T, enumeratePartitions as U, objectKey as V, QueryCtx as _, EngineOptions as a, QueryExecutor as b, ListLiveFilter as c, ManifestPurgeResult as d, ManifestStore as f, PurgeUrlsResult as g, PurgeResult as h, DataSource as i, TableName$1 as j, SyncStateKind as k, LockScope as l, PurgeFilter as m, CompactionTier as n, FileSetRef as o, ParquetCodec as p, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, ManifestEntry as u, QueryExecuteOptions as v, SearchType$1 as w, QueryResult as x, QueryExecuteResult as y, inferLegacyTier as z };
476
+ export { SyncStateScope as A, inferSearchType as B, RunSQLOptions as C, SyncStateDetail as D, SyncState as E, WatermarkScope as F, enumeratePartitions as G, CompactionThresholds as H, WriteCtx as I, WriteResult as L, TenantCtx$1 as M, Watermark as N, SyncStateFilter as O, WatermarkFilter as P, dayPartition as R, Row$1 as S, StorageEngine as T, RAW_DAILY_COMPACT_THRESHOLD as U, objectKey as V, countRawDailies as W, QueryCtx as _, EngineOptions as a, QueryExecutor as b, ListLiveFilter as c, ManifestPurgeResult as d, ManifestStore as f, PurgeUrlsResult as g, PurgeResult as h, DataSource as i, TableName$1 as j, SyncStateKind as k, LockScope as l, PurgeFilter as m, CompactionTier as n, FileSetRef as o, ParquetCodec as p, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, ManifestEntry as u, QueryExecuteOptions as v, SearchType$1 as w, QueryResult as x, QueryExecuteResult as y, inferLegacyTier as z };
package/dist/index.d.mts CHANGED
@@ -1,13 +1,116 @@
1
- import { A as SyncStateScope, B as inferSearchType, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, H as CompactionThresholds, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, R as dayPartition, S as Row, T as StorageEngine, U as enumeratePartitions, V as objectKey, _ as QueryCtx, a as EngineOptions, b as QueryExecutor, c as ListLiveFilter, d as ManifestPurgeResult, f as ManifestStore, g as PurgeUrlsResult, h as PurgeResult, i as DataSource, j as TableName, k as SyncStateKind, l as LockScope, m as PurgeFilter, n as CompactionTier, o as FileSetRef, p as ParquetCodec, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as ManifestEntry, v as QueryExecuteOptions, w as SearchType, x as QueryResult, y as QueryExecuteResult, z as inferLegacyTier } from "./_chunks/storage.mjs";
1
+ import { A as SyncStateScope, B as inferSearchType, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, G as enumeratePartitions, H as CompactionThresholds, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, R as dayPartition, S as Row, T as StorageEngine, U as RAW_DAILY_COMPACT_THRESHOLD, V as objectKey, W as countRawDailies, _ as QueryCtx, a as EngineOptions, b as QueryExecutor, c as ListLiveFilter, d as ManifestPurgeResult, f as ManifestStore, g as PurgeUrlsResult, h as PurgeResult, i as DataSource, j as TableName, k as SyncStateKind, l as LockScope, m as PurgeFilter, n as CompactionTier, o as FileSetRef, p as ParquetCodec, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as ManifestEntry, v as QueryExecuteOptions, w as SearchType, x as QueryResult, y as QueryExecuteResult, z as inferLegacyTier } from "./_chunks/storage.mjs";
2
2
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
3
  import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
4
4
  import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
5
5
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
6
6
  import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
7
7
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
8
- import { Row as Row$1 } from "@gscdump/contracts";
8
+ import { Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
9
9
  declare function coerceRow(row: Row$1): Row$1;
10
10
  declare function coerceRows(rows: readonly Row$1[]): Row$1[];
11
11
  declare const MAX_DAY_BYTES: number;
12
12
  declare function createStorageEngine(opts: EngineOptions): StorageEngine;
13
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow };
13
+ interface IngestAccumulatorEngine {
14
+ writeDay: (scope: TenantCtx & {
15
+ table: TableName$1;
16
+ date: string;
17
+ searchType?: SearchType;
18
+ }, rows: Row$1[]) => Promise<void>;
19
+ setSyncState: (scope: TenantCtx & {
20
+ table: TableName$1;
21
+ date: string;
22
+ searchType?: SearchType;
23
+ }, state: 'done' | 'failed', info?: {
24
+ error?: string;
25
+ }) => Promise<void>;
26
+ }
27
+ interface IngestAccumulatorCtx {
28
+ userId: string | number;
29
+ siteId: string;
30
+ searchType?: SearchType;
31
+ }
32
+ interface IngestAccumulatorHooks {
33
+ /**
34
+ * Called once per (table, date) when the job must abandon in-memory rows
35
+ * (overflow or `hasMore` continuation). Host queues a forced re-sync from
36
+ * the source. Return true iff a recovery job was actually queued.
37
+ */
38
+ onRecover: (table: TableName$1, date: string) => Promise<boolean>;
39
+ /**
40
+ * Called when an engine.writeDay fails or recovery itself errors. Host
41
+ * logs to its error sink (e.g. `r2_write_errors` D1 table).
42
+ */
43
+ onWriteError: (info: {
44
+ table: TableName$1 | null;
45
+ date: string | null;
46
+ error: unknown;
47
+ }) => Promise<void>;
48
+ /**
49
+ * Called after a successful writeDay for a (table, date). Host typically
50
+ * busts the manifest cache here so the next read sees the new parquet.
51
+ */
52
+ onWritten?: (info: {
53
+ table: TableName$1;
54
+ date: string;
55
+ rowCount: number;
56
+ }) => void | Promise<void>;
57
+ /**
58
+ * Called once at end of `finalize`, only when at least one (table, date)
59
+ * actually landed. Host queues rollup rebuild + compaction.
60
+ */
61
+ onJobComplete?: (info: {
62
+ flushed: number;
63
+ rowsWritten: number;
64
+ }) => Promise<void>;
65
+ }
66
+ interface FinalizeOptions {
67
+ /**
68
+ * The GSC `hasMore` flag for the whole job. When true, in-memory buckets
69
+ * only reflect this job's slice; we re-queue forced single-day re-syncs
70
+ * via `onRecover` so R2 stays authoritative.
71
+ */
72
+ hasMore: boolean;
73
+ }
74
+ interface FinalizeResult {
75
+ flushed: number;
76
+ recovered: number;
77
+ failed: number;
78
+ rowsWritten: number;
79
+ }
80
+ interface IngestAccumulator {
81
+ push: (table: TableName$1, rows: readonly GscApiRow[]) => boolean;
82
+ finalize: (opts: FinalizeOptions) => Promise<FinalizeResult>;
83
+ }
84
+ interface CreateIngestAccumulatorOptions extends RowAccumulatorOptions {
85
+ engine: IngestAccumulatorEngine;
86
+ ctx: IngestAccumulatorCtx;
87
+ hooks: IngestAccumulatorHooks;
88
+ }
89
+ declare function createNoopIngestAccumulator(): IngestAccumulator;
90
+ declare function createIngestAccumulator(opts: CreateIngestAccumulatorOptions): IngestAccumulator;
91
+ type SyncTableName = Extract<TableName$1, 'pages' | 'keywords' | 'countries' | 'devices' | 'page_keywords'>;
92
+ declare const TABLES_BY_SEARCH_TYPE: Record<SearchType, readonly SyncTableName[]>;
93
+ declare function parseEnabledSearchTypes(raw: string | null | undefined): SearchType[];
94
+ declare function validateEnabledSearchTypes(value: unknown): SearchType[];
95
+ declare const TABLE_TIERS: {
96
+ readonly pages: "critical";
97
+ readonly keywords: "critical";
98
+ readonly countries: "standard";
99
+ readonly devices: "standard";
100
+ readonly page_keywords: "extended";
101
+ };
102
+ type TieredTableName = keyof typeof TABLE_TIERS;
103
+ type TableTier = 'critical' | 'standard' | 'extended';
104
+ type DateWeight = 'fresh' | 'recent' | 'historical';
105
+ declare function getTableTier(table: string): TableTier;
106
+ declare function getTablesForTier(tier: TableTier): TieredTableName[];
107
+ declare function getDateWeight(date: string, now?: Date): DateWeight;
108
+ declare const TIER_PRIORITY: Record<TableTier, number>;
109
+ declare const WEIGHT_PRIORITY: Record<DateWeight, number>;
110
+ declare const MAX_GSC_PAGES_R2 = 40;
111
+ declare const ROW_LIMIT_R2 = 10000;
112
+ declare const MIN_SYNC_IMPRESSIONS = 1;
113
+ declare const MIN_COUNTRY_IMPRESSIONS = 10;
114
+ declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
115
+ declare const MAX_TRACKED_URLS_PER_SITE = 200000;
116
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/index.mjs CHANGED
@@ -1,10 +1,217 @@
1
1
  import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
2
2
  import { a as inferTable, c as countries, d as keywords, f as page_keywords, i as dimensionToColumn, l as devices, n as allTables, p as pages, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
3
3
  import { i as inferSearchType, n as dayPartition, r as inferLegacyTier, s as objectKey, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
4
- import { i as substituteNamedFiles, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
4
+ import { a as RAW_DAILY_COMPACT_THRESHOLD, c as enumeratePartitions, i as substituteNamedFiles, r as resolveToSQL, s as countRawDailies, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
5
5
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
6
6
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine, r as canonicalEmptyParquetSchema, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
7
7
  import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
8
8
  import "./planner.mjs";
9
9
  import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
10
- export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, SCHEMAS, TABLE_METADATA, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow };
10
+ const NOOP_RESULT = {
11
+ flushed: 0,
12
+ recovered: 0,
13
+ failed: 0,
14
+ rowsWritten: 0
15
+ };
16
+ function scopeOf(ctx, table, date) {
17
+ return {
18
+ userId: String(ctx.userId),
19
+ siteId: ctx.siteId,
20
+ table,
21
+ date,
22
+ ...ctx.searchType !== void 0 ? { searchType: ctx.searchType } : {}
23
+ };
24
+ }
25
+ function createNoopIngestAccumulator() {
26
+ return {
27
+ push() {
28
+ return false;
29
+ },
30
+ async finalize() {
31
+ return NOOP_RESULT;
32
+ }
33
+ };
34
+ }
35
+ function createIngestAccumulator(opts) {
36
+ const { engine, ctx, hooks, ...accOpts } = opts;
37
+ const acc = createRowAccumulator(accOpts);
38
+ async function writeOne(table, date, rows) {
39
+ const scope = scopeOf(ctx, table, date);
40
+ return engine.writeDay(scope, rows).then(() => engine.setSyncState(scope, "done")).then(async () => {
41
+ await hooks.onWritten?.({
42
+ table,
43
+ date,
44
+ rowCount: rows.length
45
+ });
46
+ return {
47
+ ok: true,
48
+ rows: rows.length
49
+ };
50
+ }).catch(async (err) => {
51
+ await hooks.onWriteError({
52
+ table,
53
+ date,
54
+ error: err
55
+ }).catch(() => {});
56
+ return { ok: false };
57
+ });
58
+ }
59
+ async function recover(table, date) {
60
+ const scope = scopeOf(ctx, table, date);
61
+ await engine.setSyncState(scope, "failed", { error: "mid-continuation-skip" }).catch(() => {});
62
+ return hooks.onRecover(table, date).catch(async (err) => {
63
+ await hooks.onWriteError({
64
+ table,
65
+ date,
66
+ error: err
67
+ }).catch(() => {});
68
+ return false;
69
+ });
70
+ }
71
+ return {
72
+ push(table, rows) {
73
+ return acc.push(table, rows);
74
+ },
75
+ async finalize({ hasMore }) {
76
+ const overflowed = acc.overflowed;
77
+ const totalRows = acc.totalRows;
78
+ const buckets = acc.drain();
79
+ if (overflowed || hasMore) {
80
+ const tasks = [];
81
+ for (const [table, byDate] of buckets) for (const date of byDate.keys()) tasks.push(recover(table, date));
82
+ const results = await Promise.all(tasks).catch(async (err) => {
83
+ await hooks.onWriteError({
84
+ table: null,
85
+ date: null,
86
+ error: err
87
+ }).catch(() => {});
88
+ return [];
89
+ });
90
+ if (overflowed) await hooks.onWriteError({
91
+ table: null,
92
+ date: null,
93
+ error: /* @__PURE__ */ new Error(`ingest accumulator overflow at ${totalRows} rows; recovering via forced re-sync`)
94
+ }).catch(() => {});
95
+ return {
96
+ flushed: 0,
97
+ recovered: results.filter(Boolean).length,
98
+ failed: 0,
99
+ rowsWritten: 0
100
+ };
101
+ }
102
+ const writes = [];
103
+ for (const [table, byDate] of buckets) for (const [date, rows] of byDate) writes.push(writeOne(table, date, rows));
104
+ const outcomes = await Promise.all(writes);
105
+ let flushed = 0;
106
+ let failed = 0;
107
+ let rowsWritten = 0;
108
+ for (const o of outcomes) if (o.ok) {
109
+ flushed++;
110
+ rowsWritten += o.rows;
111
+ } else failed++;
112
+ if (flushed > 0) await hooks.onJobComplete?.({
113
+ flushed,
114
+ rowsWritten
115
+ }).catch(() => {});
116
+ return {
117
+ flushed,
118
+ recovered: 0,
119
+ failed,
120
+ rowsWritten
121
+ };
122
+ }
123
+ };
124
+ }
125
+ const TABLES_BY_SEARCH_TYPE = {
126
+ web: [
127
+ "pages",
128
+ "keywords",
129
+ "countries",
130
+ "devices",
131
+ "page_keywords"
132
+ ],
133
+ discover: [
134
+ "pages",
135
+ "countries",
136
+ "devices"
137
+ ],
138
+ news: [
139
+ "pages",
140
+ "countries",
141
+ "devices"
142
+ ],
143
+ googleNews: [
144
+ "pages",
145
+ "countries",
146
+ "devices"
147
+ ],
148
+ image: [
149
+ "pages",
150
+ "countries",
151
+ "devices"
152
+ ],
153
+ video: [
154
+ "pages",
155
+ "countries",
156
+ "devices"
157
+ ]
158
+ };
159
+ function parseEnabledSearchTypes(raw) {
160
+ if (!raw) return ["web"];
161
+ const parsed = JSON.parse(raw);
162
+ if (!Array.isArray(parsed) || parsed.length === 0) return ["web"];
163
+ const valid = parsed.filter((v) => typeof v === "string" && v in TABLES_BY_SEARCH_TYPE);
164
+ if (valid.length === 0) return ["web"];
165
+ if (!valid.includes("web")) valid.unshift("web");
166
+ return valid;
167
+ }
168
+ function validateEnabledSearchTypes(value) {
169
+ if (!Array.isArray(value) || value.length === 0) throw new Error("enabledSearchTypes must be a non-empty array");
170
+ const seen = /* @__PURE__ */ new Set();
171
+ const out = [];
172
+ for (const v of value) {
173
+ if (typeof v !== "string" || !(v in TABLES_BY_SEARCH_TYPE)) throw new Error(`enabledSearchTypes: unknown searchType ${String(v)}`);
174
+ if (seen.has(v)) continue;
175
+ seen.add(v);
176
+ out.push(v);
177
+ }
178
+ if (!out.includes("web")) throw new Error("enabledSearchTypes must include \"web\"");
179
+ return out;
180
+ }
181
+ const TABLE_TIERS = {
182
+ pages: "critical",
183
+ keywords: "critical",
184
+ countries: "standard",
185
+ devices: "standard",
186
+ page_keywords: "extended"
187
+ };
188
+ function getTableTier(table) {
189
+ return TABLE_TIERS[table] || "extended";
190
+ }
191
+ function getTablesForTier(tier) {
192
+ return Object.entries(TABLE_TIERS).filter(([_, t]) => t === tier).map(([name]) => name);
193
+ }
194
+ function getDateWeight(date, now = /* @__PURE__ */ new Date()) {
195
+ const target = new Date(date);
196
+ const daysAgo = Math.floor((now.getTime() - target.getTime()) / (1e3 * 60 * 60 * 24));
197
+ if (daysAgo <= 3) return "fresh";
198
+ if (daysAgo <= 60) return "recent";
199
+ return "historical";
200
+ }
201
+ const TIER_PRIORITY = {
202
+ critical: 0,
203
+ standard: 1,
204
+ extended: 2
205
+ };
206
+ const WEIGHT_PRIORITY = {
207
+ fresh: 0,
208
+ recent: 1,
209
+ historical: 2
210
+ };
211
+ const MAX_GSC_PAGES_R2 = 40;
212
+ const ROW_LIMIT_R2 = 1e4;
213
+ const MIN_SYNC_IMPRESSIONS = 1;
214
+ const MIN_COUNTRY_IMPRESSIONS = 10;
215
+ const MAX_SITEMAP_URLS_PER_SITE = 5e4;
216
+ const MAX_TRACKED_URLS_PER_SITE = 2e5;
217
+ export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
@@ -1,3 +1,3 @@
1
- import { U as enumeratePartitions } from "./_chunks/storage.mjs";
1
+ import { G as enumeratePartitions } from "./_chunks/storage.mjs";
2
2
  import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
3
3
  export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
package/dist/planner.mjs CHANGED
@@ -1,2 +1,2 @@
1
- import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
1
+ import { c as enumeratePartitions, i as substituteNamedFiles, n as compileLogicalQueryPlan, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
2
2
  export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
@@ -1,3 +1,4 @@
1
+ import { j as TableName$1, w as SearchType$1 } from "../_chunks/storage.mjs";
1
2
  import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
2
3
  import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
3
4
  import { SQL } from "drizzle-orm";
@@ -96,6 +97,63 @@ declare const pgResolverAdapter: ResolverAdapter<PgTableKey>;
96
97
  * accidental adapter caching that would lock in a stale `{{FILES}}` set.
97
98
  */
98
99
  declare function createParquetResolverAdapter(): ResolverAdapter<PgTableKey>;
100
+ interface RunQueryCtx {
101
+ userId: string;
102
+ siteId: string;
103
+ table: TableName$1;
104
+ searchType?: SearchType$1;
105
+ }
106
+ interface RunSQLFn {
107
+ (opts: {
108
+ ctx: {
109
+ userId: string;
110
+ siteId: string;
111
+ };
112
+ table: TableName$1;
113
+ fileSets: Record<string, {
114
+ table: TableName$1;
115
+ partitions: string[];
116
+ }>;
117
+ sql: string;
118
+ params: unknown[];
119
+ searchType?: SearchType$1;
120
+ }): Promise<{
121
+ rows: Array<Record<string, unknown>>;
122
+ }>;
123
+ }
124
+ interface OptimizedQueryResult {
125
+ rows: Array<Record<string, unknown>>;
126
+ totalCount: number;
127
+ totals: {
128
+ clicks: number;
129
+ impressions: number;
130
+ ctr: number;
131
+ position: number;
132
+ };
133
+ extras: Array<{
134
+ key: string;
135
+ rows: Array<Record<string, unknown>>;
136
+ }>;
137
+ }
138
+ interface ComparisonQueryResult {
139
+ rows: Array<Record<string, unknown>>;
140
+ totalCount: number;
141
+ totals: Record<string, unknown>;
142
+ }
143
+ declare function runOptimizedQuery(runSQL: RunSQLFn, ctx: RunQueryCtx, state: BuilderState, dateRange: {
144
+ startDate: string;
145
+ endDate: string;
146
+ }): Promise<OptimizedQueryResult>;
147
+ declare function runComparisonQuery(runSQL: RunSQLFn, ctx: RunQueryCtx, current: BuilderState, previous: BuilderState, windows: {
148
+ current: {
149
+ startDate: string;
150
+ endDate: string;
151
+ };
152
+ previous: {
153
+ startDate: string;
154
+ endDate: string;
155
+ };
156
+ }, filter?: ComparisonFilter): Promise<ComparisonQueryResult>;
99
157
  interface AssertSchemaInSyncOptions {
100
158
  /** Label used in the thrown error (e.g. 'browser', 'sqlite'). */
101
159
  label: string;
@@ -109,4 +167,4 @@ interface AssertSchemaInSyncOptions {
109
167
  mode: 'exact' | 'superset';
110
168
  }
111
169
  declare function assertSchemaInSync(options: AssertSchemaInSyncOptions): void;
112
- export { type AssertSchemaInSyncOptions, type ComparisonFilter, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
170
+ export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
@@ -1,2 +1,2 @@
1
- import { C as dimensionColumn, S as assertDimensionsSupported, T as supportsDimensionOnSurface, _ as resolveToSQLOptimized, a as getDimensionFilters, b as DIMENSION_SURFACES, c as matchesDimensionFilter, d as metricValue, f as buildExtrasQueries, g as resolveToSQL, h as resolveComparisonSQL, i as dimensionValue, l as matchesMetricFilter, m as mergeExtras, n as createParquetResolverAdapter, o as getFilterDimensions, p as buildTotalsSql, r as pgResolverAdapter, s as getInternalFilters, t as assertSchemaInSync, u as matchesTopLevelPage, v as createResolverAdapter, w as inferLogicalDataset, x as LOGICAL_DATASETS, y as createSqlFragments } from "../_chunks/resolver.mjs";
2
- export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
1
+ import { C as LOGICAL_DATASETS, D as supportsDimensionOnSurface, E as inferLogicalDataset, S as DIMENSION_SURFACES, T as dimensionColumn, _ as resolveComparisonSQL, a as pgResolverAdapter, b as createResolverAdapter, c as getFilterDimensions, d as matchesMetricFilter, f as matchesTopLevelPage, g as mergeExtras, h as buildTotalsSql, i as createParquetResolverAdapter, l as getInternalFilters, m as buildExtrasQueries, n as runComparisonQuery, o as dimensionValue, p as metricValue, r as runOptimizedQuery, s as getDimensionFilters, t as assertSchemaInSync, u as matchesDimensionFilter, v as resolveToSQL, w as assertDimensionsSupported, x as createSqlFragments, y as resolveToSQLOptimized } from "../_chunks/resolver.mjs";
2
+ export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
@@ -58,6 +58,14 @@ interface RollupDef {
58
58
  parquetColumns?: readonly ColumnDef[];
59
59
  /** Sort-key column names for parquet row-group stats. Optional. */
60
60
  parquetSortKey?: readonly string[];
61
+ /**
62
+ * When true, this rollup's payload is independent of GSC slice (e.g. entity
63
+ * rollups sourced from sitemap / indexing snapshots, not slice-partitioned
64
+ * fact tables). The runner rejects calls that pass `searchType` alongside
65
+ * a slice-orthogonal def so the output never lands under a per-slice prefix
66
+ * that the read path won't look at.
67
+ */
68
+ sliceOrthogonal?: boolean;
61
69
  build: (deps: {
62
70
  engine: RollupEngine;
63
71
  ctx: TenantCtx;
@@ -102,6 +110,19 @@ interface ParquetRollupPointer {
102
110
  }
103
111
  declare function rollupKey(ctx: TenantCtx, id: string, builtAt: number, searchType?: SearchType): string;
104
112
  declare function rollupParquetKey(ctx: TenantCtx, id: string, builtAt: number, searchType?: SearchType): string;
113
+ interface RollupBucket {
114
+ list: (opts: {
115
+ prefix: string;
116
+ }) => Promise<{
117
+ objects: Array<{
118
+ key: string;
119
+ }>;
120
+ }>;
121
+ get: (key: string) => Promise<{
122
+ text: () => Promise<string>;
123
+ } | null>;
124
+ }
125
+ declare function readLatestRollup<T = unknown>(bucket: RollupBucket, ctx: TenantCtx, id: string, searchType?: SearchType): Promise<RollupEnvelope<T> | null>;
105
126
  interface RebuildRollupsOptions {
106
127
  engine: RollupEngine;
107
128
  dataSource: DataSource;
@@ -215,4 +236,4 @@ declare const sitemapHealthRollup: RollupDef;
215
236
  */
216
237
  declare const sitemapChanges28dRollup: RollupDef;
217
238
  declare const DEFAULT_ROLLUPS: readonly RollupDef[];
218
- export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
239
+ export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupBucket, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
package/dist/rollups.mjs CHANGED
@@ -11,10 +11,33 @@ function rollupKey(ctx, id, builtAt, searchType) {
11
11
  function rollupParquetKey(ctx, id, builtAt, searchType) {
12
12
  return `${rollupPrefix(ctx, searchType)}/${id}__v${builtAt}.parquet`;
13
13
  }
14
+ const ROLLUP_FILE_RE = /^(?<id>[a-z0-9_]+)__v(?<ts>\d+)\.json$/;
15
+ async function readLatestRollup(bucket, ctx, id, searchType) {
16
+ const prefix = `${rollupPrefix(ctx, searchType)}/`;
17
+ const listing = await bucket.list({ prefix }).catch(() => null);
18
+ if (!listing) return null;
19
+ let newest = null;
20
+ for (const obj of listing.objects) {
21
+ const m = ROLLUP_FILE_RE.exec(obj.key.slice(prefix.length));
22
+ if (!m?.groups || m.groups.id !== id) continue;
23
+ const ts = Number(m.groups.ts);
24
+ if (!newest || ts > newest.ts) newest = {
25
+ ts,
26
+ key: obj.key
27
+ };
28
+ }
29
+ if (!newest) return null;
30
+ const obj = await bucket.get(newest.key).catch(() => null);
31
+ if (!obj) return null;
32
+ return JSON.parse(await obj.text());
33
+ }
14
34
  async function rebuildRollups(opts) {
15
35
  const now = opts.now ?? (() => Date.now());
16
36
  const results = [];
17
37
  const searchType = opts.searchType;
38
+ if (searchType !== void 0) {
39
+ for (const def of opts.defs) if (def.sliceOrthogonal === true) throw new Error(`rollup def '${def.id}' is slice-orthogonal; do not pass searchType`);
40
+ }
18
41
  for (const def of opts.defs) {
19
42
  const builtAt = now();
20
43
  const payload = await def.build({
@@ -348,6 +371,7 @@ const indexingMetadataRollup = {
348
371
  const indexingHealthRollup = {
349
372
  id: "indexing_health",
350
373
  windowDays: 90,
374
+ sliceOrthogonal: true,
351
375
  async build({ engine, ctx, dataSource, builtAt }) {
352
376
  const key = inspectionParquetKey(ctx);
353
377
  if (!await dataSource.head?.(key)) return { days: [] };
@@ -391,6 +415,7 @@ const indexingHealthRollup = {
391
415
  const indexPercentRollup = {
392
416
  id: "index_percent",
393
417
  windowDays: 90,
418
+ sliceOrthogonal: true,
394
419
  async build({ engine, ctx, dataSource, builtAt, searchType }) {
395
420
  const urlsKey = sitemapUrlsIndexKey(ctx);
396
421
  if (!await dataSource.head?.(urlsKey)) return {
@@ -452,6 +477,7 @@ const indexPercentRollup = {
452
477
  const sitemapHealthRollup = {
453
478
  id: "sitemap_health",
454
479
  windowDays: 90,
480
+ sliceOrthogonal: true,
455
481
  async build({ dataSource, ctx, builtAt }) {
456
482
  const index = await createSitemapStore({ dataSource }).loadIndex(ctx);
457
483
  const records = Object.values(index.records);
@@ -495,6 +521,7 @@ const sitemapHealthRollup = {
495
521
  const sitemapChanges28dRollup = {
496
522
  id: "sitemap_changes_28d",
497
523
  windowDays: 28,
524
+ sliceOrthogonal: true,
498
525
  async build({ dataSource, ctx, builtAt }) {
499
526
  const store = createSitemapStore({ dataSource });
500
527
  const from = utcDateMinusDays(builtAt, 28);
@@ -562,4 +589,4 @@ const DEFAULT_ROLLUPS = [
562
589
  sitemapHealthRollup,
563
590
  sitemapChanges28dRollup
564
591
  ];
565
- export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
592
+ export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
@@ -1,5 +1,5 @@
1
1
  import { n as coerceRows } from "../_chunks/coerce.mjs";
2
- import { S as assertDimensionsSupported, g as resolveToSQL, o as getFilterDimensions, r as pgResolverAdapter } from "../_chunks/resolver.mjs";
2
+ import { a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL, w as assertDimensionsSupported } from "../_chunks/resolver.mjs";
3
3
  import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
4
4
  var AttachedTableMissingError = class extends Error {
5
5
  missing;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.17.3",
4
+ "version": "0.17.5",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -169,8 +169,8 @@
169
169
  "dependencies": {
170
170
  "drizzle-orm": "^0.45.2",
171
171
  "proper-lockfile": "^4.1.2",
172
- "@gscdump/contracts": "0.17.3",
173
- "gscdump": "0.17.3"
172
+ "@gscdump/contracts": "0.17.5",
173
+ "gscdump": "0.17.5"
174
174
  },
175
175
  "devDependencies": {
176
176
  "@duckdb/duckdb-wasm": "^1.32.0",