@gscdump/engine 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/duckdb.d.mts +1 -1
- package/dist/_chunks/planner.d.mts +1 -1
- package/dist/_chunks/registry.d.mts +1 -1
- package/dist/_chunks/storage.d.mts +21 -1
- package/dist/adapters/hyparquet.d.mts +1 -1
- package/dist/adapters/r2-manifest.d.mts +1 -1
- package/dist/contracts.d.mts +1 -1
- package/dist/index.d.mts +2 -2
- package/dist/index.mjs +42 -1
- package/dist/ingest.d.mts +1 -1
- package/dist/planner.d.mts +1 -1
- package/dist/source/index.d.mts +1 -1
- package/package.json +2 -2
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as QueryExecutor, P as TableName, T as Row, g as ParquetCodec } from "./storage.mjs";
|
|
2
2
|
interface DuckDBHandle {
|
|
3
3
|
query: (sql: string, params?: unknown[]) => Promise<Row[]>;
|
|
4
4
|
registerFileBuffer: (name: string, bytes: Uint8Array) => Promise<void>;
|
|
@@ -307,6 +307,16 @@ interface ExtraResult {
|
|
|
307
307
|
key: string;
|
|
308
308
|
rows: Row[];
|
|
309
309
|
}
|
|
310
|
+
interface OptimizedQueryResult {
|
|
311
|
+
rows: Row[];
|
|
312
|
+
totalCount: number;
|
|
313
|
+
totals: {
|
|
314
|
+
clicks: number;
|
|
315
|
+
impressions: number;
|
|
316
|
+
ctr: number;
|
|
317
|
+
position: number;
|
|
318
|
+
};
|
|
319
|
+
}
|
|
310
320
|
interface QueryExecuteOptions {
|
|
311
321
|
sql: string;
|
|
312
322
|
params: unknown[];
|
|
@@ -403,6 +413,16 @@ interface StorageEngine {
|
|
|
403
413
|
* when the state has no extras-eligible dimensions.
|
|
404
414
|
*/
|
|
405
415
|
queryExtras: (ctx: QueryCtx, state: BuilderState) => Promise<ExtraResult[]>;
|
|
416
|
+
/**
|
|
417
|
+
* Single-scan variant of {@link query} that piggy-backs `totalCount` and
|
|
418
|
+
* unfiltered metric totals onto the dimensioned result via window functions.
|
|
419
|
+
* Replaces the host-side rows + totals + count fan-out with one DuckDB
|
|
420
|
+
* execution. Window-function output columns (`totalCount`, `totalClicks`,
|
|
421
|
+
* `totalImpressions`, `totalCtr`, `totalPosition`) are stripped from `rows`
|
|
422
|
+
* before return; missing per-metric totals (when the metric was not
|
|
423
|
+
* requested in `state.metrics`) default to 0.
|
|
424
|
+
*/
|
|
425
|
+
queryOptimized: (ctx: QueryCtx, state: BuilderState) => Promise<OptimizedQueryResult>;
|
|
406
426
|
/**
|
|
407
427
|
* Run arbitrary SQL resolved against named partition sets. Composes
|
|
408
428
|
* manifest lookup + object reads + placeholder substitution + execution
|
|
@@ -480,4 +500,4 @@ declare function mondayOfWeek(isoDate: string): string;
|
|
|
480
500
|
/** YYYY-Qq for the quarter containing the given YYYY-MM month string. */
|
|
481
501
|
declare function quarterOfMonth(month: string): string;
|
|
482
502
|
declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
|
|
483
|
-
export {
|
|
503
|
+
export { SyncStateDetail as A, WriteResult as B, QueryExecutor as C, SearchType$1 as D, RunSQLOptions as E, TenantCtx$1 as F, monthPartition as G, inferLegacyTier as H, Watermark as I, quarterPartition as J, objectKey as K, WatermarkFilter as L, SyncStateKind as M, SyncStateScope as N, StorageEngine as O, TableName$1 as P, WatermarkScope as R, QueryExecuteResult as S, Row$1 as T, inferSearchType as U, dayPartition as V, mondayOfWeek as W, CompactionThresholds as X, weekPartition as Y, enumeratePartitions as Z, PurgeFilter as _, DataSource as a, QueryCtx as b, FileSetRef as c, LockScope as d, ManifestEntry as f, ParquetCodec as g, OptimizedQueryResult as h, DEFAULT_SEARCH_TYPE as i, SyncStateFilter as j, SyncState as k, GcCtx as l, ManifestStore as m, CompactionTier as n, EngineOptions as o, ManifestPurgeResult as p, quarterOfMonth as q, ComparisonResult as r, ExtraResult as s, CodecCtx as t, ListLiveFilter as u, PurgeResult as v, QueryResult as w, QueryExecuteOptions as x, PurgeUrlsResult as y, WriteCtx as z };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { P as TableName, T as Row, a as DataSource, g as ParquetCodec, t as CodecCtx } from "../_chunks/storage.mjs";
|
|
2
2
|
import { t as ColumnDef } from "../_chunks/schema.mjs";
|
|
3
3
|
declare function encodeRowsToParquet(table: TableName, rows: readonly Row[]): Uint8Array;
|
|
4
4
|
interface EncodeFlexOptions {
|
package/dist/contracts.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { A as
|
|
1
|
+
import { A as SyncStateDetail, B as WriteResult, C as QueryExecutor, D as SearchType, E as RunSQLOptions, F as TenantCtx, I as Watermark, L as WatermarkFilter, M as SyncStateKind, N as SyncStateScope, O as StorageEngine, P as TableName, R as WatermarkScope, S as QueryExecuteResult, T as Row, a as DataSource, b as QueryCtx, c as FileSetRef, d as LockScope, f as ManifestEntry, g as ParquetCodec, j as SyncStateFilter, k as SyncState, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, t as CodecCtx, u as ListLiveFilter, w as QueryResult, x as QueryExecuteOptions, z as WriteCtx } from "./_chunks/storage.mjs";
|
|
2
2
|
export { CodecCtx, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, ListLiveFilter, LockScope, ManifestEntry, ManifestStore, ParquetCodec, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult };
|
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as
|
|
1
|
+
import { A as SyncStateDetail, B as WriteResult, C as QueryExecutor, D as SearchType, E as RunSQLOptions, F as TenantCtx, G as monthPartition, H as inferLegacyTier, I as Watermark, J as quarterPartition, K as objectKey, L as WatermarkFilter, M as SyncStateKind, N as SyncStateScope, O as StorageEngine, P as TableName, R as WatermarkScope, S as QueryExecuteResult, T as Row, U as inferSearchType, V as dayPartition, W as mondayOfWeek, X as CompactionThresholds, Y as weekPartition, Z as enumeratePartitions, _ as PurgeFilter, a as DataSource, b as QueryCtx, c as FileSetRef, d as LockScope, f as ManifestEntry, g as ParquetCodec, h as OptimizedQueryResult, i as DEFAULT_SEARCH_TYPE, j as SyncStateFilter, k as SyncState, l as GcCtx, m as ManifestStore, n as CompactionTier, o as EngineOptions, p as ManifestPurgeResult, q as quarterOfMonth, r as ComparisonResult, s as ExtraResult, t as CodecCtx, u as ListLiveFilter, v as PurgeResult, w as QueryResult, x as QueryExecuteOptions, y as PurgeUrlsResult, z as WriteCtx } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
|
|
3
3
|
import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
|
|
4
4
|
import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
@@ -9,4 +9,4 @@ declare function coerceRow(row: Row$1): Row$1;
|
|
|
9
9
|
declare function coerceRows(rows: readonly Row$1[]): Row$1[];
|
|
10
10
|
declare const MAX_DAY_BYTES: number;
|
|
11
11
|
declare function createStorageEngine(opts: EngineOptions): StorageEngine;
|
|
12
|
-
export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
|
|
12
|
+
export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type ComparisonResult, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, type ExtraResult, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type OptimizedQueryResult, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, formatLiteral, inferLegacyTier, inferSearchType, inferTable, keywords, mondayOfWeek, monthPartition, objectKey, page_keywords, pages, quarterOfMonth, quarterPartition, resolveToSQL, substituteNamedFiles, toPath, toSumPosition, transformGscRow, weekPartition };
|
package/dist/index.mjs
CHANGED
|
@@ -2,7 +2,7 @@ import { a as inferTable, c as countries, d as keywords, f as page_keywords, i a
|
|
|
2
2
|
import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, r as inferLegacyTier, s as objectKey, t as DEFAULT_SEARCH_TYPE, u as tenantPrefix } from "./_chunks/storage.mjs";
|
|
3
3
|
import { a as compactTieredImpl, i as substituteNamedFiles, n as compileLogicalQueryPlan, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
|
4
4
|
import { bindLiterals, formatLiteral, sqlEscape } from "./sql-bind.mjs";
|
|
5
|
-
import { g as resolveComparisonSQL, m as buildTotalsSql, p as buildExtrasQueries, t as createParquetResolverAdapter } from "./_chunks/pg-adapter.mjs";
|
|
5
|
+
import { g as resolveComparisonSQL, m as buildTotalsSql, p as buildExtrasQueries, t as createParquetResolverAdapter, v as resolveToSQLOptimized } from "./_chunks/pg-adapter.mjs";
|
|
6
6
|
import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
7
7
|
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
8
8
|
import { normalizeUrl } from "gscdump/normalize";
|
|
@@ -439,6 +439,46 @@ function createStorageEngine(opts) {
|
|
|
439
439
|
totals: totalsRow.rows[0] ?? {}
|
|
440
440
|
};
|
|
441
441
|
}
|
|
442
|
+
async function queryOptimized(ctx, state) {
|
|
443
|
+
const adapter = createParquetResolverAdapter();
|
|
444
|
+
const plan = buildLogicalPlan(state, adapter.capabilities);
|
|
445
|
+
const table = ctx.table ?? plan.dataset;
|
|
446
|
+
const partitions = enumeratePartitions(plan.dateRange.startDate, plan.dateRange.endDate);
|
|
447
|
+
const { sql, params } = resolveToSQLOptimized(state, {
|
|
448
|
+
adapter,
|
|
449
|
+
siteId: void 0
|
|
450
|
+
});
|
|
451
|
+
const result = await runSQL({
|
|
452
|
+
ctx: {
|
|
453
|
+
userId: ctx.userId,
|
|
454
|
+
siteId: ctx.siteId
|
|
455
|
+
},
|
|
456
|
+
table,
|
|
457
|
+
fileSets: { FILES: {
|
|
458
|
+
table,
|
|
459
|
+
partitions
|
|
460
|
+
} },
|
|
461
|
+
sql,
|
|
462
|
+
params,
|
|
463
|
+
signal: ctx.signal
|
|
464
|
+
});
|
|
465
|
+
const firstRow = result.rows[0];
|
|
466
|
+
const totalCount = Number(firstRow?.totalCount ?? 0);
|
|
467
|
+
const totals = {
|
|
468
|
+
clicks: Number(firstRow?.totalClicks ?? 0),
|
|
469
|
+
impressions: Number(firstRow?.totalImpressions ?? 0),
|
|
470
|
+
ctr: Number(firstRow?.totalCtr ?? 0),
|
|
471
|
+
position: Number(firstRow?.totalPosition ?? 0)
|
|
472
|
+
};
|
|
473
|
+
return {
|
|
474
|
+
rows: result.rows.map((r) => {
|
|
475
|
+
const { totalCount: _tc, totalClicks: _tcl, totalImpressions: _ti, totalCtr: _tr, totalPosition: _tp, ...rest } = r;
|
|
476
|
+
return rest;
|
|
477
|
+
}),
|
|
478
|
+
totalCount,
|
|
479
|
+
totals
|
|
480
|
+
};
|
|
481
|
+
}
|
|
442
482
|
async function queryExtras(ctx, state) {
|
|
443
483
|
const adapter = createParquetResolverAdapter();
|
|
444
484
|
const extras = buildExtrasQueries(state, {
|
|
@@ -579,6 +619,7 @@ function createStorageEngine(opts) {
|
|
|
579
619
|
query,
|
|
580
620
|
queryComparison,
|
|
581
621
|
queryExtras,
|
|
622
|
+
queryOptimized,
|
|
582
623
|
runSQL,
|
|
583
624
|
compactTiered,
|
|
584
625
|
gcOrphans,
|
package/dist/ingest.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { P as TableName, T as Row } from "./_chunks/storage.mjs";
|
|
2
2
|
/**
|
|
3
3
|
* Canonical GSC API dimension order per table. Consumers hitting the raw
|
|
4
4
|
* `searchanalytics.query` endpoint must request dimensions in this order so
|
package/dist/planner.d.mts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Z as enumeratePartitions } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
|
3
3
|
export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
|
package/dist/source/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { F as TenantCtx, O as StorageEngine, T as Row } from "../_chunks/storage.mjs";
|
|
2
2
|
import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
|
|
3
3
|
import { i as QueryRow, r as FileSet, s as SqlQuerySource, t as AnalysisQuerySource } from "../_chunks/source-types.mjs";
|
|
4
4
|
import { t as AnalyzerRegistry } from "../_chunks/registry.mjs";
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.9.
|
|
4
|
+
"version": "0.9.1",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -159,7 +159,7 @@
|
|
|
159
159
|
"dependencies": {
|
|
160
160
|
"drizzle-orm": "^0.45.2",
|
|
161
161
|
"proper-lockfile": "^4.1.2",
|
|
162
|
-
"gscdump": "0.9.
|
|
162
|
+
"gscdump": "0.9.1"
|
|
163
163
|
},
|
|
164
164
|
"devDependencies": {
|
|
165
165
|
"@duckdb/duckdb-wasm": "^1.32.0",
|