@gscdump/engine 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { N as TableName, S as QueryExecutor, h as ParquetCodec, w as Row } from "./storage.mjs";
1
+ import { C as QueryExecutor, P as TableName, T as Row, g as ParquetCodec } from "./storage.mjs";
2
2
  interface DuckDBHandle {
3
3
  query: (sql: string, params?: unknown[]) => Promise<Row[]>;
4
4
  registerFileBuffer: (name: string, bytes: Uint8Array) => Promise<void>;
@@ -1,4 +1,4 @@
1
- import { N as TableName } from "./storage.mjs";
1
+ import { P as TableName } from "./storage.mjs";
2
2
  import { LogicalQueryPlan } from "gscdump/query/plan";
3
3
  import { BuilderState } from "gscdump/query";
4
4
  interface ResolvedQuery {
@@ -1,4 +1,4 @@
1
- import { w as Row } from "./storage.mjs";
1
+ import { T as Row } from "./storage.mjs";
2
2
  import { t as AnalysisParams } from "./analysis-types.mjs";
3
3
  import { r as FileSet } from "./source-types.mjs";
4
4
  import { BuilderState } from "gscdump/query";
@@ -307,6 +307,16 @@ interface ExtraResult {
307
307
  key: string;
308
308
  rows: Row[];
309
309
  }
310
+ interface OptimizedQueryResult {
311
+ rows: Row[];
312
+ totalCount: number;
313
+ totals: {
314
+ clicks: number;
315
+ impressions: number;
316
+ ctr: number;
317
+ position: number;
318
+ };
319
+ }
310
320
  interface QueryExecuteOptions {
311
321
  sql: string;
312
322
  params: unknown[];
@@ -403,6 +413,16 @@ interface StorageEngine {
403
413
  * when the state has no extras-eligible dimensions.
404
414
  */
405
415
  queryExtras: (ctx: QueryCtx, state: BuilderState) => Promise<ExtraResult[]>;
416
+ /**
417
+ * Single-scan variant of {@link query} that piggy-backs `totalCount` and
418
+ * unfiltered metric totals onto the dimensioned result via window functions.
419
+ * Replaces the host-side rows + totals + count fan-out with one DuckDB
420
+ * execution. Window-function output columns (`totalCount`, `totalClicks`,
421
+ * `totalImpressions`, `totalCtr`, `totalPosition`) are stripped from `rows`
422
+ * before return; missing per-metric totals (when the metric was not
423
+ * requested in `state.metrics`) default to 0.
424
+ */
425
+ queryOptimized: (ctx: QueryCtx, state: BuilderState) => Promise<OptimizedQueryResult>;
406
426
  /**
407
427
  * Run arbitrary SQL resolved against named partition sets. Composes
408
428
  * manifest lookup + object reads + placeholder substitution + execution
@@ -480,4 +500,4 @@ declare function mondayOfWeek(isoDate: string): string;
480
500
  /** YYYY-Qq for the quarter containing the given YYYY-MM month string. */
481
501
  declare function quarterOfMonth(month: string): string;
482
502
  declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
483
- export { SyncStateFilter as A, dayPartition as B, QueryResult as C, StorageEngine as D, SearchType$1 as E, Watermark as F, objectKey as G, inferSearchType as H, WatermarkFilter as I, weekPartition as J, quarterOfMonth as K, WatermarkScope as L, SyncStateScope as M, TableName$1 as N, SyncState as O, TenantCtx$1 as P, WriteCtx as R, QueryExecutor as S, RunSQLOptions as T, mondayOfWeek as U, inferLegacyTier as V, monthPartition as W, enumeratePartitions as X, CompactionThresholds as Y, PurgeResult as _, DataSource as a, QueryExecuteOptions as b, FileSetRef as c, LockScope as d, ManifestEntry as f, PurgeFilter as g, ParquetCodec as h, DEFAULT_SEARCH_TYPE as i, SyncStateKind as j, SyncStateDetail as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, quarterPartition as q, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeUrlsResult as v, Row$1 as w, QueryExecuteResult as x, QueryCtx as y, WriteResult as z };
503
+ export { SyncStateDetail as A, WriteResult as B, QueryExecutor as C, SearchType$1 as D, RunSQLOptions as E, TenantCtx$1 as F, monthPartition as G, inferLegacyTier as H, Watermark as I, quarterPartition as J, objectKey as K, WatermarkFilter as L, SyncStateKind as M, SyncStateScope as N, StorageEngine as O, TableName$1 as P, WatermarkScope as R, QueryExecuteResult as S, Row$1 as T, inferSearchType as U, dayPartition as V, mondayOfWeek as W, CompactionThresholds as X, weekPartition as Y, enumeratePartitions as Z, PurgeFilter as _, DataSource as a, QueryCtx as b, FileSetRef as c, LockScope as d, ManifestEntry as f, ParquetCodec as g, OptimizedQueryResult as h, DEFAULT_SEARCH_TYPE as i, SyncStateFilter as j, SyncState as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, quarterOfMonth as q, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeResult as v, QueryResult as w, QueryExecuteOptions as x, PurgeUrlsResult as y, WriteCtx as z };
@@ -1,4 +1,4 @@
1
- import { N as TableName, a as DataSource, h as ParquetCodec, t as CodecCtx, w as Row } from "../_chunks/storage.mjs";
1
+ import { P as TableName, T as Row, a as DataSource, g as ParquetCodec, t as CodecCtx } from "../_chunks/storage.mjs";
2
2
  import { t as ColumnDef } from "../_chunks/schema.mjs";
3
3
  declare function encodeRowsToParquet(table: TableName, rows: readonly Row[]): Uint8Array;
4
4
  interface EncodeFlexOptions {
@@ -1,4 +1,4 @@
1
- import { N as TableName, m as ManifestStore } from "../_chunks/storage.mjs";
1
+ import { P as TableName, m as ManifestStore } from "../_chunks/storage.mjs";
2
2
  interface R2ObjectMetadata {
3
3
  etag: string;
4
4
  }
@@ -1,2 +1,2 @@
1
- import { A as SyncStateFilter, C as QueryResult, D as StorageEngine, E as SearchType, F as Watermark, I as WatermarkFilter, L as WatermarkScope, M as SyncStateScope, N as TableName, O as SyncState, P as TenantCtx, R as WriteCtx, S as QueryExecutor, T as RunSQLOptions, a as DataSource, b as QueryExecuteOptions, c as FileSetRef, d as LockScope, f as ManifestEntry, h as ParquetCodec, j as SyncStateKind, k as SyncStateDetail, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, t as CodecCtx, u as ListLiveFilter, w as Row, x as QueryExecuteResult, y as QueryCtx, z as WriteResult } from "./_chunks/storage.mjs";
1
+ import { A as SyncStateDetail, B as WriteResult, C as QueryExecutor, D as SearchType, E as RunSQLOptions, F as TenantCtx, I as Watermark, L as WatermarkFilter, M as SyncStateKind, N as SyncStateScope, O as StorageEngine, P as TableName, R as WatermarkScope, S as QueryExecuteResult, T as Row, a as DataSource, b as QueryCtx, c as FileSetRef, d as LockScope, f as ManifestEntry, g as ParquetCodec, j as SyncStateFilter, k as SyncState, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, t as CodecCtx, u as ListLiveFilter, w as QueryResult, x as QueryExecuteOptions, z as WriteCtx } from "./_chunks/storage.mjs";
2
2
  export { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { A as SyncStateFilter, B as dayPartition, C as QueryResult, D as StorageEngine, E as SearchType, F as Watermark, G as objectKey, H as inferSearchType, I as WatermarkFilter, J as weekPartition, K as quarterOfMonth, L as WatermarkScope, M as SyncStateScope, N as TableName, O as SyncState, P as TenantCtx, R as WriteCtx, S as QueryExecutor, T as RunSQLOptions, U as mondayOfWeek, V as inferLegacyTier, W as monthPartition, X as enumeratePartitions, Y as CompactionThresholds, _ as PurgeResult, a as DataSource, b as QueryExecuteOptions, c as FileSetRef, d as LockScope, f as ManifestEntry, g as PurgeFilter, h as ParquetCodec, i as DEFAULT_SEARCH_TYPE, j as SyncStateKind, k as SyncStateDetail, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, p as ManifestPurgeResult, q as quarterPartition, r as ComparisonResult, s as ExtraResult, t as CodecCtx, u as ListLiveFilter, v as PurgeUrlsResult, w as Row, x as QueryExecuteResult, y as QueryCtx, z as WriteResult } from "./_chunks/storage.mjs";
1
+ import { A as SyncStateDetail, B as WriteResult, C as QueryExecutor, D as SearchType, E as RunSQLOptions, F as TenantCtx, G as monthPartition, H as inferLegacyTier, I as Watermark, J as quarterPartition, K as objectKey, L as WatermarkFilter, M as SyncStateKind, N as SyncStateScope, O as StorageEngine, P as TableName, R as WatermarkScope, S as QueryExecuteResult, T as Row, U as inferSearchType, V as dayPartition, W as mondayOfWeek, X as CompactionThresholds, Y as weekPartition, Z as enumeratePartitions, _ as PurgeFilter, a as DataSource, b as QueryCtx, c as FileSetRef, d as LockScope, f as ManifestEntry, g as ParquetCodec, h as OptimizedQueryResult, i as DEFAULT_SEARCH_TYPE, j as SyncStateFilter, k as SyncState, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, p as ManifestPurgeResult, q as quarterOfMonth, r as ComparisonResult, s as ExtraResult, t as CodecCtx, u as ListLiveFilter, v as PurgeResult, w as QueryResult, x as QueryExecuteOptions, y as PurgeUrlsResult, z as WriteCtx } from "./_chunks/storage.mjs";
2
2
  import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
3
3
  import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
4
4
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
@@ -9,4 +9,4 @@ declare function coerceRow(row: Row$1): Row$1;
9
9
  declare function coerceRows(rows: readonly Row$1[]): Row$1[];
10
10
  declare const MAX_DAY_BYTES: number;
11
11
  declare function createStorageEngine(opts: EngineOptions): StorageEngine;
12
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
12
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type OptimizedQueryResult, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
package/dist/index.mjs CHANGED
@@ -2,7 +2,7 @@ import { a as inferTable, c as countries, d as keywords, f as page_keywords, i a
2
2
  import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, r as inferLegacyTier, s as objectKey, t as DEFAULT_SEARCH_TYPE, u as tenantPrefix } from "./_chunks/storage.mjs";
3
3
  import { a as compactTieredImpl, i as substituteNamedFiles, n as compileLogicalQueryPlan, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
4
4
  import { bindLiterals, formatLiteral, sqlEscape } from "./sql-bind.mjs";
5
- import { g as resolveComparisonSQL, m as buildTotalsSql, p as buildExtrasQueries, t as createParquetResolverAdapter } from "./_chunks/pg-adapter.mjs";
5
+ import { g as resolveComparisonSQL, m as buildTotalsSql, p as buildExtrasQueries, t as createParquetResolverAdapter, v as resolveToSQLOptimized } from "./_chunks/pg-adapter.mjs";
6
6
  import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
7
7
  import { buildLogicalPlan } from "gscdump/query/plan";
8
8
  import { normalizeUrl } from "gscdump/normalize";
@@ -439,6 +439,46 @@ function createStorageEngine(opts) {
439
439
  totals: totalsRow.rows[0] ?? {}
440
440
  };
441
441
  }
442
+ async function queryOptimized(ctx, state) {
443
+ const adapter = createParquetResolverAdapter();
444
+ const plan = buildLogicalPlan(state, adapter.capabilities);
445
+ const table = ctx.table ?? plan.dataset;
446
+ const partitions = enumeratePartitions(plan.dateRange.startDate, plan.dateRange.endDate);
447
+ const { sql, params } = resolveToSQLOptimized(state, {
448
+ adapter,
449
+ siteId: void 0
450
+ });
451
+ const result = await runSQL({
452
+ ctx: {
453
+ userId: ctx.userId,
454
+ siteId: ctx.siteId
455
+ },
456
+ table,
457
+ fileSets: { FILES: {
458
+ table,
459
+ partitions
460
+ } },
461
+ sql,
462
+ params,
463
+ signal: ctx.signal
464
+ });
465
+ const firstRow = result.rows[0];
466
+ const totalCount = Number(firstRow?.totalCount ?? 0);
467
+ const totals = {
468
+ clicks: Number(firstRow?.totalClicks ?? 0),
469
+ impressions: Number(firstRow?.totalImpressions ?? 0),
470
+ ctr: Number(firstRow?.totalCtr ?? 0),
471
+ position: Number(firstRow?.totalPosition ?? 0)
472
+ };
473
+ return {
474
+ rows: result.rows.map((r) => {
475
+ const { totalCount: _tc, totalClicks: _tcl, totalImpressions: _ti, totalCtr: _tr, totalPosition: _tp, ...rest } = r;
476
+ return rest;
477
+ }),
478
+ totalCount,
479
+ totals
480
+ };
481
+ }
442
482
  async function queryExtras(ctx, state) {
443
483
  const adapter = createParquetResolverAdapter();
444
484
  const extras = buildExtrasQueries(state, {
@@ -579,6 +619,7 @@ function createStorageEngine(opts) {
579
619
  query,
580
620
  queryComparison,
581
621
  queryExtras,
622
+ queryOptimized,
582
623
  runSQL,
583
624
  compactTiered,
584
625
  gcOrphans,
package/dist/ingest.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { N as TableName, w as Row } from "./_chunks/storage.mjs";
1
+ import { P as TableName, T as Row } from "./_chunks/storage.mjs";
2
2
  /**
3
3
  * Canonical GSC API dimension order per table. Consumers hitting the raw
4
4
  * `searchanalytics.query` endpoint must request dimensions in this order so
@@ -1,3 +1,3 @@
1
- import { X as enumeratePartitions } from "./_chunks/storage.mjs";
1
+ import { Z as enumeratePartitions } from "./_chunks/storage.mjs";
2
2
  import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
3
3
  export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
@@ -1,4 +1,4 @@
1
- import { D as StorageEngine, P as TenantCtx, w as Row } from "../_chunks/storage.mjs";
1
+ import { F as TenantCtx, O as StorageEngine, T as Row } from "../_chunks/storage.mjs";
2
2
  import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
3
3
  import { i as QueryRow, r as FileSet, s as SqlQuerySource, t as AnalysisQuerySource } from "../_chunks/source-types.mjs";
4
4
  import { t as AnalyzerRegistry } from "../_chunks/registry.mjs";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.9.0",
4
+ "version": "0.9.1",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -159,7 +159,7 @@
159
159
  "dependencies": {
160
160
  "drizzle-orm": "^0.45.2",
161
161
  "proper-lockfile": "^4.1.2",
162
- "gscdump": "0.9.0"
162
+ "gscdump": "0.9.1"
163
163
  },
164
164
  "devDependencies": {
165
165
  "@duckdb/duckdb-wasm": "^1.32.0",