@gscdump/engine 0.8.2 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { N as TableName, S as QueryExecutor, h as ParquetCodec, w as Row } from "./storage.mjs";
1
+ import { C as QueryExecutor, P as TableName, T as Row, g as ParquetCodec } from "./storage.mjs";
2
2
  interface DuckDBHandle {
3
3
  query: (sql: string, params?: unknown[]) => Promise<Row[]>;
4
4
  registerFileBuffer: (name: string, bytes: Uint8Array) => Promise<void>;
@@ -0,0 +1,57 @@
1
+ import { t as AnalysisParams } from "./analysis-types.mjs";
2
+ type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
3
+ type ComparisonMode = 'none' | 'prev-period' | 'yoy';
4
+ interface ResolveWindowOptions {
5
+ preset: WindowPreset;
6
+ comparison?: ComparisonMode;
7
+ anchor?: string;
8
+ start?: string;
9
+ end?: string;
10
+ }
11
+ interface ResolvedWindow {
12
+ start: string;
13
+ end: string;
14
+ days: number;
15
+ comparison?: {
16
+ start: string;
17
+ end: string;
18
+ };
19
+ }
20
+ interface AnalysisPeriod {
21
+ startDate: string;
22
+ endDate: string;
23
+ }
24
+ interface ComparisonPeriod {
25
+ current: AnalysisPeriod;
26
+ previous: AnalysisPeriod;
27
+ }
28
+ declare function defaultEndDate(): string;
29
+ declare function defaultStartDate(): string;
30
+ declare function periodOf(params: AnalysisParams): AnalysisPeriod;
31
+ declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
32
+ declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
33
+ /** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
34
+ declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
35
+ declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
36
+ interface PadTimeseriesOptions<T> {
37
+ /** ISO date (YYYY-MM-DD), inclusive lower bound. */
38
+ startDate: string;
39
+ /** ISO date (YYYY-MM-DD), inclusive upper bound. */
40
+ endDate: string;
41
+ /**
42
+ * Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
43
+ * The `date` field is set automatically.
44
+ */
45
+ fill?: Omit<T, 'date'>;
46
+ /** Row-field that carries the ISO date. Defaults to `date`. */
47
+ dateKey?: string;
48
+ }
49
+ type DateRowShape = Record<string, unknown> & {
50
+ date?: unknown;
51
+ };
52
+ /**
53
+ * Pad rows so every calendar day in `[startDate, endDate]` appears at least
54
+ * once. Existing dates keep all their rows (grouped timeseries safe).
55
+ */
56
+ declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
57
+ export { ResolveWindowOptions as a, comparisonOf as c, padTimeseries as d, periodOf as f, windowToPeriod as h, PadTimeseriesOptions as i, defaultEndDate as l, windowToComparisonPeriod as m, ComparisonMode as n, ResolvedWindow as o, resolveWindow as p, ComparisonPeriod as r, WindowPreset as s, AnalysisPeriod as t, defaultStartDate as u };
@@ -1,4 +1,4 @@
1
- import { N as TableName } from "./storage.mjs";
1
+ import { P as TableName } from "./storage.mjs";
2
2
  import { LogicalQueryPlan } from "gscdump/query/plan";
3
3
  import { BuilderState } from "gscdump/query";
4
4
  interface ResolvedQuery {
@@ -1,4 +1,4 @@
1
- import { w as Row } from "./storage.mjs";
1
+ import { T as Row } from "./storage.mjs";
2
2
  import { t as AnalysisParams } from "./analysis-types.mjs";
3
3
  import { r as FileSet } from "./source-types.mjs";
4
4
  import { BuilderState } from "gscdump/query";
@@ -307,6 +307,16 @@ interface ExtraResult {
307
307
  key: string;
308
308
  rows: Row[];
309
309
  }
310
+ interface OptimizedQueryResult {
311
+ rows: Row[];
312
+ totalCount: number;
313
+ totals: {
314
+ clicks: number;
315
+ impressions: number;
316
+ ctr: number;
317
+ position: number;
318
+ };
319
+ }
310
320
  interface QueryExecuteOptions {
311
321
  sql: string;
312
322
  params: unknown[];
@@ -317,6 +327,13 @@ interface QueryExecuteOptions {
317
327
  * `dataSource.uri` is available.
318
328
  */
319
329
  fileKeys: Record<string, string[]>;
330
+ /**
331
+ * Per-placeholder table identity. Used by the executor to emit a
332
+ * schema-correct empty fallback when a named file set is empty: an
333
+ * `extraFiles` placeholder against `page_keywords` should fall back to
334
+ * the page_keywords schema, not the analyzer's primary `table`.
335
+ */
336
+ placeholderTables?: Record<string, TableName>;
320
337
  dataSource: DataSource;
321
338
  table: TableName;
322
339
  signal?: AbortSignal;
@@ -396,6 +413,16 @@ interface StorageEngine {
396
413
  * when the state has no extras-eligible dimensions.
397
414
  */
398
415
  queryExtras: (ctx: QueryCtx, state: BuilderState) => Promise<ExtraResult[]>;
416
+ /**
417
+ * Single-scan variant of {@link query} that piggy-backs `totalCount` and
418
+ * unfiltered metric totals onto the dimensioned result via window functions.
419
+ * Replaces the host-side rows + totals + count fan-out with one DuckDB
420
+ * execution. Window-function output columns (`totalCount`, `totalClicks`,
421
+ * `totalImpressions`, `totalCtr`, `totalPosition`) are stripped from `rows`
422
+ * before return; missing per-metric totals (when the metric was not
423
+ * requested in `state.metrics`) default to 0.
424
+ */
425
+ queryOptimized: (ctx: QueryCtx, state: BuilderState) => Promise<OptimizedQueryResult>;
399
426
  /**
400
427
  * Run arbitrary SQL resolved against named partition sets. Composes
401
428
  * manifest lookup + object reads + placeholder substitution + execution
@@ -473,4 +500,4 @@ declare function mondayOfWeek(isoDate: string): string;
473
500
  /** YYYY-Qq for the quarter containing the given YYYY-MM month string. */
474
501
  declare function quarterOfMonth(month: string): string;
475
502
  declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
476
- export { SyncStateFilter as A, dayPartition as B, QueryResult as C, StorageEngine as D, SearchType$1 as E, Watermark as F, objectKey as G, inferSearchType as H, WatermarkFilter as I, weekPartition as J, quarterOfMonth as K, WatermarkScope as L, SyncStateScope as M, TableName$1 as N, SyncState as O, TenantCtx$1 as P, WriteCtx as R, QueryExecutor as S, RunSQLOptions as T, mondayOfWeek as U, inferLegacyTier as V, monthPartition as W, enumeratePartitions as X, CompactionThresholds as Y, PurgeResult as _, DataSource as a, QueryExecuteOptions as b, FileSetRef as c, LockScope as d, ManifestEntry as f, PurgeFilter as g, ParquetCodec as h, DEFAULT_SEARCH_TYPE as i, SyncStateKind as j, SyncStateDetail as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, quarterPartition as q, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeUrlsResult as v, Row$1 as w, QueryExecuteResult as x, QueryCtx as y, WriteResult as z };
503
+ export { SyncStateDetail as A, WriteResult as B, QueryExecutor as C, SearchType$1 as D, RunSQLOptions as E, TenantCtx$1 as F, monthPartition as G, inferLegacyTier as H, Watermark as I, quarterPartition as J, objectKey as K, WatermarkFilter as L, SyncStateKind as M, SyncStateScope as N, StorageEngine as O, TableName$1 as P, WatermarkScope as R, QueryExecuteResult as S, Row$1 as T, inferSearchType as U, dayPartition as V, mondayOfWeek as W, CompactionThresholds as X, weekPartition as Y, enumeratePartitions as Z, PurgeFilter as _, DataSource as a, QueryCtx as b, FileSetRef as c, LockScope as d, ManifestEntry as f, ParquetCodec as g, OptimizedQueryResult as h, DEFAULT_SEARCH_TYPE as i, SyncStateFilter as j, SyncState as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, quarterOfMonth as q, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeResult as v, QueryResult as w, QueryExecuteOptions as x, PurgeUrlsResult as y, WriteCtx as z };
@@ -1,4 +1,4 @@
1
- import { N as TableName, a as DataSource, h as ParquetCodec, t as CodecCtx, w as Row } from "../_chunks/storage.mjs";
1
+ import { P as TableName, T as Row, a as DataSource, g as ParquetCodec, t as CodecCtx } from "../_chunks/storage.mjs";
2
2
  import { t as ColumnDef } from "../_chunks/schema.mjs";
3
3
  declare function encodeRowsToParquet(table: TableName, rows: readonly Row[]): Uint8Array;
4
4
  interface EncodeFlexOptions {
@@ -1,4 +1,4 @@
1
- import { N as TableName, m as ManifestStore } from "../_chunks/storage.mjs";
1
+ import { P as TableName, m as ManifestStore } from "../_chunks/storage.mjs";
2
2
  interface R2ObjectMetadata {
3
3
  etag: string;
4
4
  }
@@ -1,2 +1,2 @@
1
- import { A as SyncStateFilter, C as QueryResult, D as StorageEngine, E as SearchType, F as Watermark, I as WatermarkFilter, L as WatermarkScope, M as SyncStateScope, N as TableName, O as SyncState, P as TenantCtx, R as WriteCtx, S as QueryExecutor, T as RunSQLOptions, a as DataSource, b as QueryExecuteOptions, c as FileSetRef, d as LockScope, f as ManifestEntry, h as ParquetCodec, j as SyncStateKind, k as SyncStateDetail, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, t as CodecCtx, u as ListLiveFilter, w as Row, x as QueryExecuteResult, y as QueryCtx, z as WriteResult } from "./_chunks/storage.mjs";
1
+ import { A as SyncStateDetail, B as WriteResult, C as QueryExecutor, D as SearchType, E as RunSQLOptions, F as TenantCtx, I as Watermark, L as WatermarkFilter, M as SyncStateKind, N as SyncStateScope, O as StorageEngine, P as TableName, R as WatermarkScope, S as QueryExecuteResult, T as Row, a as DataSource, b as QueryCtx, c as FileSetRef, d as LockScope, f as ManifestEntry, g as ParquetCodec, j as SyncStateFilter, k as SyncState, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, t as CodecCtx, u as ListLiveFilter, w as QueryResult, x as QueryExecuteOptions, z as WriteCtx } from "./_chunks/storage.mjs";
2
2
  export { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { A as SyncStateFilter, B as dayPartition, C as QueryResult, D as StorageEngine, E as SearchType, F as Watermark, G as objectKey, H as inferSearchType, I as WatermarkFilter, J as weekPartition, K as quarterOfMonth, L as WatermarkScope, M as SyncStateScope, N as TableName, O as SyncState, P as TenantCtx, R as WriteCtx, S as QueryExecutor, T as RunSQLOptions, U as mondayOfWeek, V as inferLegacyTier, W as monthPartition, X as enumeratePartitions, Y as CompactionThresholds, _ as PurgeResult, a as DataSource, b as QueryExecuteOptions, c as FileSetRef, d as LockScope, f as ManifestEntry, g as PurgeFilter, h as ParquetCodec, i as DEFAULT_SEARCH_TYPE, j as SyncStateKind, k as SyncStateDetail, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, p as ManifestPurgeResult, q as quarterPartition, r as ComparisonResult, s as ExtraResult, t as CodecCtx, u as ListLiveFilter, v as PurgeUrlsResult, w as Row, x as QueryExecuteResult, y as QueryCtx, z as WriteResult } from "./_chunks/storage.mjs";
1
+ import { A as SyncStateDetail, B as WriteResult, C as QueryExecutor, D as SearchType, E as RunSQLOptions, F as TenantCtx, G as monthPartition, H as inferLegacyTier, I as Watermark, J as quarterPartition, K as objectKey, L as WatermarkFilter, M as SyncStateKind, N as SyncStateScope, O as StorageEngine, P as TableName, R as WatermarkScope, S as QueryExecuteResult, T as Row, U as inferSearchType, V as dayPartition, W as mondayOfWeek, X as CompactionThresholds, Y as weekPartition, Z as enumeratePartitions, _ as PurgeFilter, a as DataSource, b as QueryCtx, c as FileSetRef, d as LockScope, f as ManifestEntry, g as ParquetCodec, h as OptimizedQueryResult, i as DEFAULT_SEARCH_TYPE, j as SyncStateFilter, k as SyncState, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, p as ManifestPurgeResult, q as quarterOfMonth, r as ComparisonResult, s as ExtraResult, t as CodecCtx, u as ListLiveFilter, v as PurgeResult, w as QueryResult, x as QueryExecuteOptions, y as PurgeUrlsResult, z as WriteCtx } from "./_chunks/storage.mjs";
2
2
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
3
  import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
4
4
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
@@ -9,4 +9,4 @@ declare function coerceRow(row: Row$1): Row$1;
9
9
  declare function coerceRows(rows: readonly Row$1[]): Row$1[];
10
10
  declare const MAX_DAY_BYTES: number;
11
11
  declare function createStorageEngine(opts: EngineOptions): StorageEngine;
12
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
12
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type OptimizedQueryResult, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
package/dist/index.mjs CHANGED
@@ -2,7 +2,7 @@ import { a as inferTable, c as countries, d as keywords, f as page_keywords, i a
2
2
  import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, r as inferLegacyTier, s as objectKey, t as DEFAULT_SEARCH_TYPE, u as tenantPrefix } from "./_chunks/storage.mjs";
3
3
  import { a as compactTieredImpl, i as substituteNamedFiles, n as compileLogicalQueryPlan, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
4
4
  import { bindLiterals, formatLiteral, sqlEscape } from "./sql-bind.mjs";
5
- import { g as resolveComparisonSQL, m as buildTotalsSql, p as buildExtrasQueries, t as createParquetResolverAdapter } from "./_chunks/pg-adapter.mjs";
5
+ import { g as resolveComparisonSQL, m as buildTotalsSql, p as buildExtrasQueries, t as createParquetResolverAdapter, v as resolveToSQLOptimized } from "./_chunks/pg-adapter.mjs";
6
6
  import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
7
7
  import { buildLogicalPlan } from "gscdump/query/plan";
8
8
  import { normalizeUrl } from "gscdump/normalize";
@@ -113,18 +113,18 @@ function createDuckDBCodec(factory) {
113
113
  }
114
114
  };
115
115
  }
116
- function rewriteEmptyFileSets(sql, placeholders, table) {
117
- const emptyFallback = `(SELECT * FROM ${emptyTableSchema(table)} WHERE FALSE)`;
116
+ function rewriteEmptyFileSets(sql, placeholders, defaultTable, placeholderTables) {
118
117
  let out = sql;
119
118
  for (const [name, keys] of Object.entries(placeholders)) {
120
119
  if (keys.length > 0) continue;
120
+ const emptyFallback = `(SELECT * FROM ${emptyTableSchema(placeholderTables?.[name] ?? defaultTable)} WHERE FALSE)`;
121
121
  const pattern = new RegExp(`read_parquet\\(\\s*\\{\\{${name}\\}\\}\\s*(?:,\\s*union_by_name\\s*=\\s*true\\s*)?\\)`, "g");
122
122
  out = out.replace(pattern, emptyFallback);
123
123
  }
124
124
  return out;
125
125
  }
126
126
  function createDuckDBExecutor(factory) {
127
- return { async execute({ sql, params, fileKeys, dataSource, table, signal }) {
127
+ return { async execute({ sql, params, fileKeys, placeholderTables, dataSource, table, signal }) {
128
128
  signal?.throwIfAborted();
129
129
  const db = await factory.getDuckDB();
130
130
  const placeholders = {};
@@ -145,7 +145,7 @@ function createDuckDBExecutor(factory) {
145
145
  }
146
146
  try {
147
147
  signal?.throwIfAborted();
148
- const finalSql = substituteNamedFiles(rewriteEmptyFileSets(sql, placeholders, table), placeholders);
148
+ const finalSql = substituteNamedFiles(rewriteEmptyFileSets(sql, placeholders, table, placeholderTables), placeholders);
149
149
  return {
150
150
  rows: await db.query(finalSql, params),
151
151
  sql: finalSql
@@ -349,10 +349,13 @@ function createStorageEngine(opts) {
349
349
  table = entries[0]?.[1].table;
350
350
  }
351
351
  if (!table) throw new Error("runSQL requires at least one fileSet or an explicit table");
352
+ const placeholderTables = {};
353
+ for (const [name, ref] of entries) placeholderTables[name] = ref.table;
352
354
  const result = await executor.execute({
353
355
  sql: opts.sql,
354
356
  params: opts.params ?? [],
355
357
  fileKeys,
358
+ placeholderTables,
356
359
  dataSource,
357
360
  table,
358
361
  signal: opts.signal
@@ -436,6 +439,46 @@ function createStorageEngine(opts) {
436
439
  totals: totalsRow.rows[0] ?? {}
437
440
  };
438
441
  }
442
+ async function queryOptimized(ctx, state) {
443
+ const adapter = createParquetResolverAdapter();
444
+ const plan = buildLogicalPlan(state, adapter.capabilities);
445
+ const table = ctx.table ?? plan.dataset;
446
+ const partitions = enumeratePartitions(plan.dateRange.startDate, plan.dateRange.endDate);
447
+ const { sql, params } = resolveToSQLOptimized(state, {
448
+ adapter,
449
+ siteId: void 0
450
+ });
451
+ const result = await runSQL({
452
+ ctx: {
453
+ userId: ctx.userId,
454
+ siteId: ctx.siteId
455
+ },
456
+ table,
457
+ fileSets: { FILES: {
458
+ table,
459
+ partitions
460
+ } },
461
+ sql,
462
+ params,
463
+ signal: ctx.signal
464
+ });
465
+ const firstRow = result.rows[0];
466
+ const totalCount = Number(firstRow?.totalCount ?? 0);
467
+ const totals = {
468
+ clicks: Number(firstRow?.totalClicks ?? 0),
469
+ impressions: Number(firstRow?.totalImpressions ?? 0),
470
+ ctr: Number(firstRow?.totalCtr ?? 0),
471
+ position: Number(firstRow?.totalPosition ?? 0)
472
+ };
473
+ return {
474
+ rows: result.rows.map((r) => {
475
+ const { totalCount: _tc, totalClicks: _tcl, totalImpressions: _ti, totalCtr: _tr, totalPosition: _tp, ...rest } = r;
476
+ return rest;
477
+ }),
478
+ totalCount,
479
+ totals
480
+ };
481
+ }
439
482
  async function queryExtras(ctx, state) {
440
483
  const adapter = createParquetResolverAdapter();
441
484
  const extras = buildExtrasQueries(state, {
@@ -576,6 +619,7 @@ function createStorageEngine(opts) {
576
619
  query,
577
620
  queryComparison,
578
621
  queryExtras,
622
+ queryOptimized,
579
623
  runSQL,
580
624
  compactTiered,
581
625
  gcOrphans,
package/dist/ingest.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { N as TableName, w as Row } from "./_chunks/storage.mjs";
1
+ import { P as TableName, T as Row } from "./_chunks/storage.mjs";
2
2
  /**
3
3
  * Canonical GSC API dimension order per table. Consumers hitting the raw
4
4
  * `searchanalytics.query` endpoint must request dimensions in this order so
@@ -1,57 +1,2 @@
1
- import { t as AnalysisParams } from "../_chunks/analysis-types.mjs";
2
- type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
3
- type ComparisonMode = 'none' | 'prev-period' | 'yoy';
4
- interface ResolveWindowOptions {
5
- preset: WindowPreset;
6
- comparison?: ComparisonMode;
7
- anchor?: string;
8
- start?: string;
9
- end?: string;
10
- }
11
- interface ResolvedWindow {
12
- start: string;
13
- end: string;
14
- days: number;
15
- comparison?: {
16
- start: string;
17
- end: string;
18
- };
19
- }
20
- interface AnalysisPeriod {
21
- startDate: string;
22
- endDate: string;
23
- }
24
- interface ComparisonPeriod {
25
- current: AnalysisPeriod;
26
- previous: AnalysisPeriod;
27
- }
28
- declare function defaultEndDate(): string;
29
- declare function defaultStartDate(): string;
30
- declare function periodOf(params: AnalysisParams): AnalysisPeriod;
31
- declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
32
- declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
33
- /** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
34
- declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
35
- declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
36
- interface PadTimeseriesOptions<T> {
37
- /** ISO date (YYYY-MM-DD), inclusive lower bound. */
38
- startDate: string;
39
- /** ISO date (YYYY-MM-DD), inclusive upper bound. */
40
- endDate: string;
41
- /**
42
- * Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
43
- * The `date` field is set automatically.
44
- */
45
- fill?: Omit<T, 'date'>;
46
- /** Row-field that carries the ISO date. Defaults to `date`. */
47
- dateKey?: string;
48
- }
49
- type DateRowShape = Record<string, unknown> & {
50
- date?: unknown;
51
- };
52
- /**
53
- * Pad rows so every calendar day in `[startDate, endDate]` appears at least
54
- * once. Existing dates keep all their rows (grouped timeseries safe).
55
- */
56
- declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
1
+ import { a as ResolveWindowOptions, c as comparisonOf, d as padTimeseries, f as periodOf, h as windowToPeriod, i as PadTimeseriesOptions, l as defaultEndDate, m as windowToComparisonPeriod, n as ComparisonMode, o as ResolvedWindow, p as resolveWindow, r as ComparisonPeriod, s as WindowPreset, t as AnalysisPeriod, u as defaultStartDate } from "../_chunks/index.mjs";
57
2
  export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
@@ -1,3 +1,3 @@
1
- import { X as enumeratePartitions } from "./_chunks/storage.mjs";
1
+ import { Z as enumeratePartitions } from "./_chunks/storage.mjs";
2
2
  import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
3
3
  export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
@@ -0,0 +1,171 @@
1
+ import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
2
+ import { n as ComparisonMode, o as ResolvedWindow, s as WindowPreset } from "../_chunks/index.mjs";
3
+ /** Status vocabulary mirrors `ActionPrioritySourceStatus`. */
4
+ type ReportStepStatus = 'pending' | 'running' | 'done' | 'skipped' | 'error';
5
+ type ReportSeverity = 'info' | 'low' | 'medium' | 'high';
6
+ type ReportEntityKind = 'page' | 'query';
7
+ type ReportActionKind = 'analyzer' | 'cli' | 'indexing' | 'fix';
8
+ type ReportCoverage = 'full' | 'partial';
9
+ /** Citty-shaped arg spec, kept structural so engine doesn't pull citty in. */
10
+ interface ReportArgDef {
11
+ type: 'string' | 'boolean' | 'number';
12
+ description?: string;
13
+ default?: string | boolean | number;
14
+ required?: boolean;
15
+ alias?: string;
16
+ }
17
+ type ReportArgsSpec = Record<string, ReportArgDef>;
18
+ interface ReportEntity {
19
+ kind: ReportEntityKind;
20
+ value: string;
21
+ }
22
+ interface ReportFindingDelta {
23
+ metric: string;
24
+ prior: number;
25
+ current: number;
26
+ pct: number;
27
+ }
28
+ interface ReportFinding {
29
+ entity: ReportEntity;
30
+ metrics: Record<string, number>;
31
+ delta?: ReportFindingDelta;
32
+ why?: string;
33
+ }
34
+ interface ReportSectionSummary {
35
+ delta?: number;
36
+ direction?: 'up' | 'down' | 'flat';
37
+ magnitudeLabel?: string;
38
+ }
39
+ interface ReportAction {
40
+ kind: ReportActionKind;
41
+ target?: ReportEntity;
42
+ params?: Record<string, unknown>;
43
+ rationale: string;
44
+ /** Human hint, generated; never authoritative. */
45
+ cliHint?: string;
46
+ }
47
+ interface ReportSectionArtifact {
48
+ analyzer: string;
49
+ params: AnalysisParams;
50
+ }
51
+ interface ReportSection {
52
+ id: string;
53
+ title: string;
54
+ severity: ReportSeverity;
55
+ summary: ReportSectionSummary;
56
+ /** Bounded; sorted by stable composite key. */
57
+ findings: ReportFinding[];
58
+ truncated?: {
59
+ kept: number;
60
+ total: number;
61
+ };
62
+ coverage: ReportCoverage;
63
+ actions: ReportAction[];
64
+ artifact?: ReportSectionArtifact;
65
+ }
66
+ interface ReportPlanStep {
67
+ /** Stable identifier within the report (e.g. `movers`, `decay-current`). */
68
+ key: string;
69
+ /** Analyzer id (or future: nested report id). Open string by design. */
70
+ type: string;
71
+ /** Analyzer params; report-runtime applies `type` from the step. */
72
+ params: Omit<AnalysisParams, 'type'>;
73
+ /** Required steps fail the report; optional steps degrade `coverage`. */
74
+ required?: boolean;
75
+ }
76
+ interface ReportStepStateMeta {
77
+ key: string;
78
+ type: string;
79
+ status: ReportStepStatus;
80
+ error?: string;
81
+ }
82
+ interface ReportResultMeta {
83
+ durationMs: number;
84
+ rowsScanned: number;
85
+ degraded: boolean;
86
+ steps: ReportStepStateMeta[];
87
+ }
88
+ interface ReportResult {
89
+ id: string;
90
+ site: string;
91
+ /** sha256(id|site|window|paramsCanonical|registryVersion). Stable. */
92
+ inputHash: string;
93
+ /** ISO 8601. NOT included in inputHash. */
94
+ generatedAt: string;
95
+ window: ResolvedWindow;
96
+ sections: ReportSection[];
97
+ meta: ReportResultMeta;
98
+ }
99
+ /**
100
+ * Loose params bag. Concrete reports refine this with their own interface.
101
+ * Constraint is `object` so report authors can use plain interfaces without
102
+ * needing an index signature.
103
+ */
104
+ type ReportParams = object;
105
+ interface ReportContext<P extends ReportParams = ReportParams> {
106
+ /** Resolved site URL (e.g. `https://example.com/`). */
107
+ site: string;
108
+ /** Already-resolved window — runtime calls `resolveWindow` once before plan(). */
109
+ window: ResolvedWindow;
110
+ params: P;
111
+ /** Hash of registry/code version. Bumped via package version. */
112
+ registryVersion: string;
113
+ }
114
+ /**
115
+ * Reduce step results → sections. Runtime injects `meta` post-reduce.
116
+ */
117
+ type ReportReducer<P extends ReportParams = ReportParams> = (results: Record<string, AnalysisResult>, ctx: ReportContext<P>) => Omit<ReportResult, 'meta' | 'inputHash' | 'generatedAt' | 'site' | 'window' | 'id'> & {
118
+ sections: ReportSection[];
119
+ };
120
+ interface DefinedReport<P extends ReportParams = ReportParams> {
121
+ id: string;
122
+ description: string;
123
+ defaultPeriod: WindowPreset;
124
+ defaultComparison: ComparisonMode;
125
+ /** Single source of truth for CLI flags + MCP input schema. */
126
+ argsSpec: ReportArgsSpec;
127
+ plan: (params: P, window: ResolvedWindow) => readonly ReportPlanStep[];
128
+ reduce: ReportReducer<P>;
129
+ }
130
+ interface DefineReportOptions<P extends ReportParams = ReportParams> {
131
+ id: string;
132
+ description: string;
133
+ defaultPeriod: WindowPreset;
134
+ defaultComparison: ComparisonMode;
135
+ argsSpec?: ReportArgsSpec;
136
+ plan: (params: P, window: ResolvedWindow) => readonly ReportPlanStep[];
137
+ reduce: ReportReducer<P>;
138
+ }
139
+ /**
140
+ * Mirror of `defineAnalyzer`. Pure factory: validates required fields,
141
+ * fills default `argsSpec`. No runtime behaviour — `runReport` consumes
142
+ * the returned object.
143
+ */
144
+ declare function defineReport<P extends ReportParams = ReportParams>(opts: DefineReportOptions<P>): DefinedReport<P>;
145
+ interface InputHashSeeds {
146
+ id: string;
147
+ site: string;
148
+ window: ResolvedWindow;
149
+ params: ReportParams;
150
+ registryVersion: string;
151
+ }
152
+ /** Stable JSON: sorts object keys at every level. Arrays preserve order. */
153
+ declare function canonicalize(value: unknown): unknown;
154
+ declare function computeInputHash(seeds: InputHashSeeds): Promise<string>;
155
+ interface ReportRegistryInit {
156
+ reports?: readonly DefinedReport<ReportParams>[];
157
+ /**
158
+ * Opaque version string. Used as the `registryVersion` input to
159
+ * `inputHash` so cached results invalidate when report code ships.
160
+ * Caller is expected to feed in their package version.
161
+ */
162
+ version?: string;
163
+ }
164
+ interface ReportRegistry {
165
+ version: string;
166
+ listReportIds: () => readonly string[];
167
+ getReport: (id: string) => DefinedReport<ReportParams> | undefined;
168
+ listReports: () => readonly DefinedReport<ReportParams>[];
169
+ }
170
+ declare function createReportRegistry(init?: ReportRegistryInit): ReportRegistry;
171
+ export { type DefineReportOptions, type DefinedReport, type InputHashSeeds, type ReportAction, type ReportActionKind, type ReportArgDef, type ReportArgsSpec, type ReportContext, type ReportCoverage, type ReportEntity, type ReportEntityKind, type ReportFinding, type ReportFindingDelta, type ReportParams, type ReportPlanStep, type ReportReducer, type ReportRegistry, type ReportRegistryInit, type ReportResult, type ReportResultMeta, type ReportSection, type ReportSectionArtifact, type ReportSectionSummary, type ReportSeverity, type ReportStepStateMeta, type ReportStepStatus, canonicalize, computeInputHash, createReportRegistry, defineReport };
@@ -0,0 +1,56 @@
1
+ function defineReport(opts) {
2
+ if (!opts.id) throw new Error("defineReport: id is required");
3
+ if (!opts.plan) throw new Error(`defineReport(${opts.id}): plan is required`);
4
+ if (!opts.reduce) throw new Error(`defineReport(${opts.id}): reduce is required`);
5
+ return {
6
+ id: opts.id,
7
+ description: opts.description,
8
+ defaultPeriod: opts.defaultPeriod,
9
+ defaultComparison: opts.defaultComparison,
10
+ argsSpec: opts.argsSpec ?? {},
11
+ plan: opts.plan,
12
+ reduce: opts.reduce
13
+ };
14
+ }
15
+ function canonicalize(value) {
16
+ if (value == null || typeof value !== "object") return value;
17
+ if (Array.isArray(value)) return value.map(canonicalize);
18
+ const out = {};
19
+ for (const k of Object.keys(value).sort()) out[k] = canonicalize(value[k]);
20
+ return out;
21
+ }
22
+ async function computeInputHash(seeds) {
23
+ const payload = JSON.stringify(canonicalize({
24
+ id: seeds.id,
25
+ site: seeds.site,
26
+ window: {
27
+ start: seeds.window.start,
28
+ end: seeds.window.end,
29
+ comparison: seeds.window.comparison ?? null
30
+ },
31
+ params: seeds.params,
32
+ registryVersion: seeds.registryVersion
33
+ }));
34
+ const bytes = new TextEncoder().encode(payload);
35
+ return bufferToHex(await globalThis.crypto.subtle.digest("SHA-256", bytes));
36
+ }
37
+ function bufferToHex(buffer) {
38
+ const bytes = new Uint8Array(buffer);
39
+ let out = "";
40
+ for (let i = 0; i < bytes.length; i++) out += bytes[i].toString(16).padStart(2, "0");
41
+ return out;
42
+ }
43
+ function createReportRegistry(init = {}) {
44
+ const byId = /* @__PURE__ */ new Map();
45
+ for (const r of init.reports ?? []) {
46
+ if (byId.has(r.id)) throw new Error(`createReportRegistry: duplicate report id ${r.id}`);
47
+ byId.set(r.id, r);
48
+ }
49
+ return {
50
+ version: init.version ?? "0",
51
+ listReportIds: () => [...byId.keys()].sort(),
52
+ getReport: (id) => byId.get(id),
53
+ listReports: () => [...byId.values()]
54
+ };
55
+ }
56
+ export { canonicalize, computeInputHash, createReportRegistry, defineReport };
@@ -1,4 +1,4 @@
1
- import { D as StorageEngine, P as TenantCtx, w as Row } from "../_chunks/storage.mjs";
1
+ import { F as TenantCtx, O as StorageEngine, T as Row } from "../_chunks/storage.mjs";
2
2
  import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
3
3
  import { i as QueryRow, r as FileSet, s as SqlQuerySource, t as AnalysisQuerySource } from "../_chunks/source-types.mjs";
4
4
  import { t as AnalyzerRegistry } from "../_chunks/registry.mjs";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.8.2",
4
+ "version": "0.9.1",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -101,6 +101,11 @@
101
101
  "import": "./dist/analyzer/index.mjs",
102
102
  "default": "./dist/analyzer/index.mjs"
103
103
  },
104
+ "./report": {
105
+ "types": "./dist/report/index.d.mts",
106
+ "import": "./dist/report/index.mjs",
107
+ "default": "./dist/report/index.mjs"
108
+ },
104
109
  "./analysis-types": {
105
110
  "types": "./dist/analysis-types.d.mts",
106
111
  "import": "./dist/analysis-types.mjs",
@@ -154,7 +159,7 @@
154
159
  "dependencies": {
155
160
  "drizzle-orm": "^0.45.2",
156
161
  "proper-lockfile": "^4.1.2",
157
- "gscdump": "0.8.2"
162
+ "gscdump": "0.9.1"
158
163
  },
159
164
  "devDependencies": {
160
165
  "@duckdb/duckdb-wasm": "^1.32.0",