@gscdump/engine 0.23.1 → 0.23.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/index.d.mts +102 -48
- package/dist/_chunks/index2.d.mts +57 -0
- package/dist/_chunks/pg-adapter.d.mts +28 -0
- package/dist/_chunks/resolver.mjs +91 -50
- package/dist/_chunks/source.mjs +165 -0
- package/dist/_chunks/types.d.mts +3 -0
- package/dist/index.d.mts +3 -1
- package/dist/index.mjs +3 -2
- package/dist/period/index.d.mts +1 -1
- package/dist/report/index.d.mts +1 -1
- package/dist/resolver/index.d.mts +4 -15
- package/dist/resolver/index.mjs +2 -2
- package/dist/source/index.d.mts +2 -108
- package/dist/source/index.mjs +1 -151
- package/package.json +3 -3
- package/dist/_chunks/coerce.mjs +0 -14
package/dist/_chunks/index.d.mts
CHANGED
|
@@ -1,57 +1,111 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
import { Row, SearchType as SearchType$1, StorageEngine, TenantCtx } from "./storage.mjs";
|
|
2
|
+
import { AnalysisParams, AnalysisResult } from "./analysis-types.mjs";
|
|
3
|
+
import { ResolverAdapter } from "./types.mjs";
|
|
4
|
+
import { AnalysisQuerySource, AnalysisSourceKind, AnalyzerRegistry, FileSet, QueryRow, SourceCapabilities } from "./registry.mjs";
|
|
5
|
+
import { PlannerCapabilities } from "gscdump/query/plan";
|
|
6
|
+
import { BuilderState } from "gscdump/query";
|
|
7
|
+
interface AttachedTableRunner {
|
|
8
|
+
/**
|
|
9
|
+
* Run a query with positional (`?`) bound parameters. Return objects keyed
|
|
10
|
+
* by column name. BIGINT → number coercion is applied by the source factory
|
|
11
|
+
* (see `coerceRows`); runners only need to handle DATE → ISO string (or
|
|
12
|
+
* let the analyzer reducer normalize via `num(v)`/`str(v)`).
|
|
13
|
+
*/
|
|
14
|
+
query: (sql: string, params?: unknown[], signal?: AbortSignal) => Promise<Row[]>;
|
|
10
15
|
}
|
|
11
|
-
interface
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
interface AttachedTableSourceOptions {
|
|
17
|
+
/** Schema name the exported DuckDB file was attached under — e.g. `gsc`. */
|
|
18
|
+
schema: string;
|
|
19
|
+
/**
|
|
20
|
+
* Abort in-flight queries when the caller no longer cares about the
|
|
21
|
+
* result. Every `runner.query` call receives the same signal.
|
|
22
|
+
*/
|
|
23
|
+
signal?: AbortSignal;
|
|
24
|
+
/**
|
|
25
|
+
* List of table names actually attached to this connection. When provided,
|
|
26
|
+
* `executeSql` short-circuits with a specific "table not attached" error
|
|
27
|
+
* if the SQL plan references a table that isn't in this list — letting
|
|
28
|
+
* callers (e.g. the analytics layer) route to cloud fallback without
|
|
29
|
+
* paying the SQL execution cost. Omit to disable the check.
|
|
30
|
+
*/
|
|
31
|
+
attachedTables?: readonly string[];
|
|
32
|
+
/**
|
|
33
|
+
* Dialect adapter surfaced on the source for analyzers that compose SQL
|
|
34
|
+
* from a `BuilderState` at plan-build time (e.g. `data-query`,
|
|
35
|
+
* `data-detail`). Attached-table sources execute pg-flavored DuckDB SQL,
|
|
36
|
+
* so callers should pass `pgResolverAdapter` here.
|
|
37
|
+
*/
|
|
38
|
+
adapter?: ResolverAdapter<any>;
|
|
19
39
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
40
|
+
declare class AttachedTableMissingError extends Error {
|
|
41
|
+
readonly missing: readonly string[];
|
|
42
|
+
constructor(missing: readonly string[]);
|
|
23
43
|
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
44
|
+
/**
|
|
45
|
+
* Swap `read_parquet({{KEY}}, union_by_name = true)` for `<schema>.<table>`.
|
|
46
|
+
* Tolerates whitespace variation. Preserves the rest of the SQL verbatim.
|
|
47
|
+
*/
|
|
48
|
+
declare function rewriteForTableSource(sql: string, schema: string, fileSets: Record<string, FileSet>): string;
|
|
49
|
+
declare function createAttachedTableSource(runner: AttachedTableRunner, options: AttachedTableSourceOptions): AnalysisQuerySource;
|
|
50
|
+
interface CreateSqlQuerySourceOptions<TKey extends string> {
|
|
51
|
+
/** Debug-only identifier surfaced on the source for error messages. */
|
|
52
|
+
name: string;
|
|
53
|
+
/** Telemetry tag stamped onto analyzer result meta. */
|
|
54
|
+
kind?: AnalysisSourceKind;
|
|
55
|
+
/** Dialect-specific adapter; compiles `BuilderState` → `{ sql, params }`. */
|
|
56
|
+
adapter: ResolverAdapter<TKey>;
|
|
57
|
+
/** Drives the underlying DB. Called for both typed queries and raw SQL. */
|
|
58
|
+
execute: (sql: string, params: unknown[]) => Promise<QueryRow[]>;
|
|
59
|
+
/** Tenant id for multi-tenant dialects; forwarded to `resolveToSQL`. */
|
|
60
|
+
siteId?: string | number;
|
|
61
|
+
/** Search-type scope for multi-tenant dialects; forwarded to `resolveToSQL`. */
|
|
62
|
+
searchType?: string;
|
|
63
|
+
/** Additional capability flags merged on top of `adapter.capabilities`. */
|
|
64
|
+
extraCapabilities?: Partial<SourceCapabilities>;
|
|
27
65
|
}
|
|
28
|
-
declare function
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
declare
|
|
36
|
-
interface
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
/** ISO date (YYYY-MM-DD), inclusive upper bound. */
|
|
40
|
-
endDate: string;
|
|
66
|
+
declare function createSqlQuerySource<TKey extends string>(options: CreateSqlQuerySourceOptions<TKey>): AnalysisQuerySource;
|
|
67
|
+
/**
|
|
68
|
+
* Capabilities the engine query path honors. Matches what the DuckDB compiler
|
|
69
|
+
* passes to `buildLogicalPlan`: regex pushes down; comparison joins and
|
|
70
|
+
* multi-dataset queries belong to the analyzer dispatcher, not the engine's
|
|
71
|
+
* builder-state query path.
|
|
72
|
+
*/
|
|
73
|
+
declare const ENGINE_QUERY_CAPABILITIES: PlannerCapabilities;
|
|
74
|
+
interface EngineQuerySourceOptions {
|
|
75
|
+
engine: StorageEngine;
|
|
76
|
+
ctx: TenantCtx;
|
|
41
77
|
/**
|
|
42
|
-
*
|
|
43
|
-
*
|
|
78
|
+
* Restrict every manifest lookup the source performs to a single search-type
|
|
79
|
+
* slice. Threads into `engine.query` and `engine.runSQL` so the wrapped
|
|
80
|
+
* source returns rows from one cohort instead of unioning web + non-web
|
|
81
|
+
* parquet. Undefined preserves legacy cross-type behaviour for web-only
|
|
82
|
+
* tenants and admin paths.
|
|
44
83
|
*/
|
|
45
|
-
|
|
46
|
-
/** Row-field that carries the ISO date. Defaults to `date`. */
|
|
47
|
-
dateKey?: string;
|
|
84
|
+
searchType?: SearchType$1;
|
|
48
85
|
}
|
|
49
|
-
type DateRowShape = Record<string, unknown> & {
|
|
50
|
-
date?: unknown;
|
|
51
|
-
};
|
|
52
86
|
/**
|
|
53
|
-
*
|
|
54
|
-
*
|
|
87
|
+
* Wraps a storage engine as an `AnalysisQuerySource` with SQL execution.
|
|
88
|
+
* `queryRows` runs typed builder-state queries; `executeSql` delegates to
|
|
89
|
+
* `engine.runSQL` and requires `opts.fileSets` (with a `FILES` entry so the
|
|
90
|
+
* target table can be resolved for partition lookup).
|
|
55
91
|
*/
|
|
56
|
-
declare function
|
|
57
|
-
|
|
92
|
+
declare function createEngineQuerySource(options: EngineQuerySourceOptions): AnalysisQuerySource;
|
|
93
|
+
/**
|
|
94
|
+
* Convenience: wrap a storage engine + tenant ctx in a source and dispatch.
|
|
95
|
+
* Equivalent to wrapping `createEngineQuerySource`, with omitted searchType
|
|
96
|
+
* defaulted to web at this public helper boundary.
|
|
97
|
+
*/
|
|
98
|
+
declare function runAnalyzerWithEngine(deps: {
|
|
99
|
+
engine: StorageEngine;
|
|
100
|
+
}, ctx: TenantCtx, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
|
|
101
|
+
interface TypedQuery<TRow> {
|
|
102
|
+
state: BuilderState;
|
|
103
|
+
readonly __row?: TRow;
|
|
104
|
+
}
|
|
105
|
+
declare function typedQuery<TRow>(state: BuilderState): TypedQuery<TRow>;
|
|
106
|
+
declare function queryRows<TRow = QueryRow>(source: AnalysisQuerySource, query: BuilderState | TypedQuery<TRow>): Promise<TRow[]>;
|
|
107
|
+
declare function queryComparisonRows<TRow = QueryRow>(source: AnalysisQuerySource, current: BuilderState | TypedQuery<TRow>, previous: BuilderState | TypedQuery<TRow>): Promise<{
|
|
108
|
+
current: TRow[];
|
|
109
|
+
previous: TRow[];
|
|
110
|
+
}>;
|
|
111
|
+
export { AttachedTableMissingError, AttachedTableRunner, AttachedTableSourceOptions, CreateSqlQuerySourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, TypedQuery, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { AnalysisParams } from "./analysis-types.mjs";
|
|
2
|
+
type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
|
|
3
|
+
type ComparisonMode = 'none' | 'prev-period' | 'yoy';
|
|
4
|
+
interface ResolveWindowOptions {
|
|
5
|
+
preset: WindowPreset;
|
|
6
|
+
comparison?: ComparisonMode;
|
|
7
|
+
anchor?: string;
|
|
8
|
+
start?: string;
|
|
9
|
+
end?: string;
|
|
10
|
+
}
|
|
11
|
+
interface ResolvedWindow {
|
|
12
|
+
start: string;
|
|
13
|
+
end: string;
|
|
14
|
+
days: number;
|
|
15
|
+
comparison?: {
|
|
16
|
+
start: string;
|
|
17
|
+
end: string;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
interface AnalysisPeriod {
|
|
21
|
+
startDate: string;
|
|
22
|
+
endDate: string;
|
|
23
|
+
}
|
|
24
|
+
interface ComparisonPeriod {
|
|
25
|
+
current: AnalysisPeriod;
|
|
26
|
+
previous: AnalysisPeriod;
|
|
27
|
+
}
|
|
28
|
+
declare function defaultEndDate(): string;
|
|
29
|
+
declare function defaultStartDate(): string;
|
|
30
|
+
declare function periodOf(params: AnalysisParams): AnalysisPeriod;
|
|
31
|
+
declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
|
|
32
|
+
declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
|
|
33
|
+
/** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
|
|
34
|
+
declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
|
|
35
|
+
declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
|
|
36
|
+
interface PadTimeseriesOptions<T> {
|
|
37
|
+
/** ISO date (YYYY-MM-DD), inclusive lower bound. */
|
|
38
|
+
startDate: string;
|
|
39
|
+
/** ISO date (YYYY-MM-DD), inclusive upper bound. */
|
|
40
|
+
endDate: string;
|
|
41
|
+
/**
|
|
42
|
+
* Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
|
|
43
|
+
* The `date` field is set automatically.
|
|
44
|
+
*/
|
|
45
|
+
fill?: Omit<T, 'date'>;
|
|
46
|
+
/** Row-field that carries the ISO date. Defaults to `date`. */
|
|
47
|
+
dateKey?: string;
|
|
48
|
+
}
|
|
49
|
+
type DateRowShape = Record<string, unknown> & {
|
|
50
|
+
date?: unknown;
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* Pad rows so every calendar day in `[startDate, endDate]` appears at least
|
|
54
|
+
* once. Existing dates keep all their rows (grouped timeseries safe).
|
|
55
|
+
*/
|
|
56
|
+
declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
|
|
57
|
+
export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { ResolverAdapter } from "./types.mjs";
|
|
2
|
+
import { TableName } from "@gscdump/contracts";
|
|
3
|
+
type PgTableKey = TableName;
|
|
4
|
+
declare const pgResolverAdapter: ResolverAdapter<PgTableKey>;
|
|
5
|
+
/**
|
|
6
|
+
* Parquet-aware variant of {@link pgResolverAdapter}. Identical SQL output
|
|
7
|
+
* except FROM clauses emit `read_parquet({{FILES}}, union_by_name = true) AS
|
|
8
|
+
* "${tk}"`. The runSQL pipeline substitutes `{{FILES}}` with R2 object keys
|
|
9
|
+
* resolved from the manifest. The `AS "${tk}"` alias is mandatory — drizzle
|
|
10
|
+
* compiles `colRef` to table-qualified `"pages"."url"`, which would not
|
|
11
|
+
* resolve against an unaliased FROM.
|
|
12
|
+
*
|
|
13
|
+
* Single-use: build a fresh adapter per query. Cheap (no I/O) and avoids
|
|
14
|
+
* accidental adapter caching that would lock in a stale `{{FILES}}` set.
|
|
15
|
+
*/
|
|
16
|
+
declare function createParquetResolverAdapter(): ResolverAdapter<PgTableKey>;
|
|
17
|
+
/**
|
|
18
|
+
* Multi-tenant pg-flavored adapter for the Iceberg / R2 SQL read path.
|
|
19
|
+
* Identical SQL output to `pgResolverAdapter` except WHERE clauses inject
|
|
20
|
+
* `site_id = ?` AND `search_type = ?` automatically when those scopes are
|
|
21
|
+
* passed to `resolveToSQL`. Required for the Iceberg fact tables which are
|
|
22
|
+
* shared across tenants — querying without these predicates would leak
|
|
23
|
+
* cross-tenant data. Single-use: the adapter has no `tableRef` override,
|
|
24
|
+
* so callers must rewrite bare table names to their qualified form (e.g.
|
|
25
|
+
* `${namespace}.pages`) before sending to R2 SQL.
|
|
26
|
+
*/
|
|
27
|
+
declare function createIcebergResolverAdapter(): ResolverAdapter<PgTableKey>;
|
|
28
|
+
export { PgTableKey, createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter };
|
|
@@ -2,7 +2,7 @@ import { SCHEMAS, drizzleSchema } from "./schema.mjs";
|
|
|
2
2
|
import { enumeratePartitions } from "./parquet-plan.mjs";
|
|
3
3
|
import { escapeLike } from "../sql-fragments.mjs";
|
|
4
4
|
import "../planner.mjs";
|
|
5
|
-
import { PgDialect } from "drizzle-orm/pg-core";
|
|
5
|
+
import { PgDialect, pgTable, varchar } from "drizzle-orm/pg-core";
|
|
6
6
|
import { UnresolvableDatasetError, buildLogicalComparisonPlan, buildLogicalPlan, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
|
|
7
7
|
import { normalizeUrl } from "gscdump/normalize";
|
|
8
8
|
import { sql } from "drizzle-orm";
|
|
@@ -180,7 +180,7 @@ function buildDimensionColumnMap(datasetToTableKey) {
|
|
|
180
180
|
return Object.fromEntries(entries);
|
|
181
181
|
}
|
|
182
182
|
function createSqlFragments(config) {
|
|
183
|
-
const { schema, datasetToTableKey, metricCast, regexPredicate, tableLabel, includeSiteId, urlToPathExpr: urlToPathExprOverride, tableRef: tableRefOverride } = config;
|
|
183
|
+
const { schema, datasetToTableKey, metricCast, regexPredicate, tableLabel, includeSiteId, includeSearchType, urlToPathExpr: urlToPathExprOverride, tableRef: tableRefOverride } = config;
|
|
184
184
|
const DIM_COLUMN_MAP = buildDimensionColumnMap(datasetToTableKey);
|
|
185
185
|
function isMetricDimension(dim) {
|
|
186
186
|
return METRIC_NAMES.includes(dim);
|
|
@@ -211,6 +211,9 @@ function createSqlFragments(config) {
|
|
|
211
211
|
function siteIdColRef(tableKey) {
|
|
212
212
|
return colRef(tableKey, "site_id");
|
|
213
213
|
}
|
|
214
|
+
function searchTypeColRef(tableKey) {
|
|
215
|
+
return colRef(tableKey, "search_type");
|
|
216
|
+
}
|
|
214
217
|
function dimExprSql(dim, tableKey) {
|
|
215
218
|
const colName = dimColumn(dim, tableKey);
|
|
216
219
|
if (dim === "page") return sql.raw(urlToPathExpr(colName));
|
|
@@ -335,6 +338,7 @@ function createSqlFragments(config) {
|
|
|
335
338
|
tableRef,
|
|
336
339
|
dateColRef,
|
|
337
340
|
siteIdColRef: includeSiteId ? siteIdColRef : void 0,
|
|
341
|
+
searchTypeColRef: includeSearchType ? searchTypeColRef : void 0,
|
|
338
342
|
dimExprSql,
|
|
339
343
|
metricSql,
|
|
340
344
|
havingPredicates,
|
|
@@ -357,6 +361,7 @@ function createResolverAdapter(config) {
|
|
|
357
361
|
dateColRef: runtime.dateColRef,
|
|
358
362
|
urlToPathExpr: runtime.urlToPathExpr,
|
|
359
363
|
siteIdColRef: runtime.siteIdColRef,
|
|
364
|
+
searchTypeColRef: runtime.searchTypeColRef,
|
|
360
365
|
dimExprSql: runtime.dimExprSql,
|
|
361
366
|
metricSql: runtime.metricSql,
|
|
362
367
|
dimensionPredicates: runtime.dimensionPredicates,
|
|
@@ -366,6 +371,83 @@ function createResolverAdapter(config) {
|
|
|
366
371
|
compile: config.compile
|
|
367
372
|
};
|
|
368
373
|
}
|
|
374
|
+
const pgDialect = new PgDialect();
|
|
375
|
+
function withTenantCols(tableName, baseTable) {
|
|
376
|
+
const t = pgTable(tableName, {
|
|
377
|
+
site_id: varchar("site_id").notNull(),
|
|
378
|
+
search_type: varchar("search_type").notNull()
|
|
379
|
+
});
|
|
380
|
+
return {
|
|
381
|
+
...baseTable,
|
|
382
|
+
site_id: t.site_id,
|
|
383
|
+
search_type: t.search_type
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
const icebergSchema = {
|
|
387
|
+
pages: withTenantCols("pages", drizzleSchema.pages),
|
|
388
|
+
queries: withTenantCols("queries", drizzleSchema.queries),
|
|
389
|
+
countries: withTenantCols("countries", drizzleSchema.countries),
|
|
390
|
+
page_queries: withTenantCols("page_queries", drizzleSchema.page_queries),
|
|
391
|
+
dates: withTenantCols("dates", drizzleSchema.dates),
|
|
392
|
+
search_appearance: withTenantCols("search_appearance", drizzleSchema.search_appearance),
|
|
393
|
+
search_appearance_pages: withTenantCols("search_appearance_pages", drizzleSchema.search_appearance_pages),
|
|
394
|
+
search_appearance_queries: withTenantCols("search_appearance_queries", drizzleSchema.search_appearance_queries),
|
|
395
|
+
search_appearance_page_queries: withTenantCols("search_appearance_page_queries", drizzleSchema.search_appearance_page_queries),
|
|
396
|
+
hourly_pages: withTenantCols("hourly_pages", drizzleSchema.hourly_pages)
|
|
397
|
+
};
|
|
398
|
+
function compilePg(query) {
|
|
399
|
+
const compiled = pgDialect.sqlToQuery(query);
|
|
400
|
+
return {
|
|
401
|
+
sql: compiled.sql,
|
|
402
|
+
params: compiled.params
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
const PG_BASE_CONFIG = {
|
|
406
|
+
schema: drizzleSchema,
|
|
407
|
+
datasetToTableKey: {
|
|
408
|
+
pages: "pages",
|
|
409
|
+
queries: "queries",
|
|
410
|
+
page_queries: "page_queries",
|
|
411
|
+
countries: "countries",
|
|
412
|
+
dates: "dates",
|
|
413
|
+
search_appearance: "search_appearance",
|
|
414
|
+
search_appearance_pages: "search_appearance_pages",
|
|
415
|
+
search_appearance_queries: "search_appearance_queries",
|
|
416
|
+
search_appearance_page_queries: "search_appearance_page_queries",
|
|
417
|
+
hourly_pages: "hourly_pages"
|
|
418
|
+
},
|
|
419
|
+
metricCast: "DOUBLE",
|
|
420
|
+
regexPredicate: (expr, pattern, negate) => negate ? sql`NOT regexp_matches(${expr}, ${pattern})` : sql`regexp_matches(${expr}, ${pattern})`,
|
|
421
|
+
urlToPathExpr: (col) => `CASE WHEN ${col} LIKE 'http%' THEN COALESCE(NULLIF(regexp_replace(${col}, '^https?://[^/]+', ''), ''), '/') ELSE ${col} END`,
|
|
422
|
+
includeSiteId: false,
|
|
423
|
+
compile: compilePg,
|
|
424
|
+
capabilities: {
|
|
425
|
+
regex: true,
|
|
426
|
+
comparisonJoin: true,
|
|
427
|
+
windowTotals: true
|
|
428
|
+
}
|
|
429
|
+
};
|
|
430
|
+
const pgResolverAdapter = createResolverAdapter({
|
|
431
|
+
...PG_BASE_CONFIG,
|
|
432
|
+
tableLabel: "pg-resolver-adapter"
|
|
433
|
+
});
|
|
434
|
+
function createParquetResolverAdapter() {
|
|
435
|
+
return createResolverAdapter({
|
|
436
|
+
...PG_BASE_CONFIG,
|
|
437
|
+
tableLabel: "parquet-resolver-adapter",
|
|
438
|
+
tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
function createIcebergResolverAdapter() {
|
|
442
|
+
return createResolverAdapter({
|
|
443
|
+
...PG_BASE_CONFIG,
|
|
444
|
+
schema: icebergSchema,
|
|
445
|
+
includeSiteId: true,
|
|
446
|
+
includeSearchType: true,
|
|
447
|
+
tableLabel: "iceberg-resolver-adapter",
|
|
448
|
+
tableRef: (tk) => sql.raw(`"${tk}"`)
|
|
449
|
+
});
|
|
450
|
+
}
|
|
369
451
|
const COMPARISON_FILTER_SQL = {
|
|
370
452
|
new: sql`AND (p.impressions IS NULL OR p.impressions = 0)`,
|
|
371
453
|
lost: sql`AND p.impressions > 0 AND c.impressions = 0`,
|
|
@@ -440,7 +522,7 @@ function compileFilterTree(node, adapter, tableKey) {
|
|
|
440
522
|
return sql`(${sql.join(childSqls, sep)})`;
|
|
441
523
|
}
|
|
442
524
|
function buildScope(state, options) {
|
|
443
|
-
const { adapter, siteId } = options;
|
|
525
|
+
const { adapter, siteId, searchType } = options;
|
|
444
526
|
const plan = buildLogicalPlan(state, adapter.capabilities);
|
|
445
527
|
const tableKey = adapter.tableKeyForDataset(plan.dataset);
|
|
446
528
|
const dimFilters = toInternalDimensionFilters(plan.dimensionFilters);
|
|
@@ -451,6 +533,7 @@ function buildScope(state, options) {
|
|
|
451
533
|
const metrics = plan.metrics;
|
|
452
534
|
const wherePredicates = [];
|
|
453
535
|
if (adapter.siteIdColRef && siteId != null) wherePredicates.push(sql`${adapter.siteIdColRef(tableKey)} = ${siteId}`);
|
|
536
|
+
if (adapter.searchTypeColRef && searchType != null) wherePredicates.push(sql`${adapter.searchTypeColRef(tableKey)} = ${searchType}`);
|
|
454
537
|
wherePredicates.push(sql`${adapter.dateColRef(tableKey)} >= ${plan.dateRange.startDate}`);
|
|
455
538
|
wherePredicates.push(sql`${adapter.dateColRef(tableKey)} <= ${plan.dateRange.endDate}`);
|
|
456
539
|
wherePredicates.push(...adapter.prefilterPredicates(prefilters, tableKey));
|
|
@@ -572,7 +655,7 @@ function buildTotalsSql(state, options) {
|
|
|
572
655
|
return compileCollapsed(adapter, wherePredicates.length > 0 ? sql`SELECT ${joinComma(selectExprs)} FROM ${table} WHERE ${joinAnd(wherePredicates)}` : sql`SELECT ${joinComma(selectExprs)} FROM ${table}`);
|
|
573
656
|
}
|
|
574
657
|
function resolveComparisonSQL(current, previous, options, comparisonFilter) {
|
|
575
|
-
const { adapter, siteId } = options;
|
|
658
|
+
const { adapter, siteId, searchType } = options;
|
|
576
659
|
const comparisonPlan = buildComparisonPlan(current, previous, adapter.capabilities);
|
|
577
660
|
const currentScope = buildScope(current, options);
|
|
578
661
|
const previousScope = buildScope(previous, options);
|
|
@@ -590,6 +673,7 @@ function resolveComparisonSQL(current, previous, options, comparisonFilter) {
|
|
|
590
673
|
const groupByExprs = groupByDims.map((d) => adapter.dimExprSql(d, tableKey));
|
|
591
674
|
const prevWhere = [];
|
|
592
675
|
if (adapter.siteIdColRef && siteId != null) prevWhere.push(sql`${adapter.siteIdColRef(tableKey)} = ${siteId}`);
|
|
676
|
+
if (adapter.searchTypeColRef && searchType != null) prevWhere.push(sql`${adapter.searchTypeColRef(tableKey)} = ${searchType}`);
|
|
593
677
|
if (previousScope.startDate) prevWhere.push(sql`${adapter.dateColRef(tableKey)} >= ${previousScope.startDate}`);
|
|
594
678
|
if (previousScope.endDate) prevWhere.push(sql`${adapter.dateColRef(tableKey)} <= ${previousScope.endDate}`);
|
|
595
679
|
const prevDimSql = comparisonPlan.current.dimensionFilterTree ? compileFilterTree(comparisonPlan.current.dimensionFilterTree, adapter, tableKey) : void 0;
|
|
@@ -627,7 +711,7 @@ function resolveComparisonSQL(current, previous, options, comparisonFilter) {
|
|
|
627
711
|
};
|
|
628
712
|
}
|
|
629
713
|
function buildExtrasQueries(state, options) {
|
|
630
|
-
const { adapter, siteId } = options;
|
|
714
|
+
const { adapter, siteId, searchType } = options;
|
|
631
715
|
const plan = buildLogicalPlan(state, adapter.capabilities);
|
|
632
716
|
const dims = plan.groupByDimensions;
|
|
633
717
|
const extras = [];
|
|
@@ -637,6 +721,7 @@ function buildExtrasQueries(state, options) {
|
|
|
637
721
|
const table = adapter.tableRef(queriesKey);
|
|
638
722
|
const whereParts = [];
|
|
639
723
|
if (adapter.siteIdColRef && siteId != null) whereParts.push(sql`${adapter.siteIdColRef(queriesKey)} = ${siteId}`);
|
|
724
|
+
if (adapter.searchTypeColRef && searchType != null) whereParts.push(sql`${adapter.searchTypeColRef(queriesKey)} = ${searchType}`);
|
|
640
725
|
whereParts.push(sql`${adapter.dateColRef(queriesKey)} >= ${plan.dateRange.startDate}`);
|
|
641
726
|
whereParts.push(sql`${adapter.dateColRef(queriesKey)} <= ${plan.dateRange.endDate}`);
|
|
642
727
|
const whereExpr = whereParts.length > 0 ? sql`WHERE ${joinAnd(whereParts)}` : sql``;
|
|
@@ -771,50 +856,6 @@ function matchesMetricFilter(row, filter) {
|
|
|
771
856
|
function matchesTopLevelPage(row) {
|
|
772
857
|
return (normalizeUrl(dimensionValue(row, "page")).match(/\//g)?.length ?? 0) <= 1;
|
|
773
858
|
}
|
|
774
|
-
const pgDialect = new PgDialect();
|
|
775
|
-
function compilePg(query) {
|
|
776
|
-
const compiled = pgDialect.sqlToQuery(query);
|
|
777
|
-
return {
|
|
778
|
-
sql: compiled.sql,
|
|
779
|
-
params: compiled.params
|
|
780
|
-
};
|
|
781
|
-
}
|
|
782
|
-
const PG_BASE_CONFIG = {
|
|
783
|
-
schema: drizzleSchema,
|
|
784
|
-
datasetToTableKey: {
|
|
785
|
-
pages: "pages",
|
|
786
|
-
queries: "queries",
|
|
787
|
-
page_queries: "page_queries",
|
|
788
|
-
countries: "countries",
|
|
789
|
-
dates: "dates",
|
|
790
|
-
search_appearance: "search_appearance",
|
|
791
|
-
search_appearance_pages: "search_appearance_pages",
|
|
792
|
-
search_appearance_queries: "search_appearance_queries",
|
|
793
|
-
search_appearance_page_queries: "search_appearance_page_queries",
|
|
794
|
-
hourly_pages: "hourly_pages"
|
|
795
|
-
},
|
|
796
|
-
metricCast: "DOUBLE",
|
|
797
|
-
regexPredicate: (expr, pattern, negate) => negate ? sql`NOT regexp_matches(${expr}, ${pattern})` : sql`regexp_matches(${expr}, ${pattern})`,
|
|
798
|
-
urlToPathExpr: (col) => `CASE WHEN ${col} LIKE 'http%' THEN COALESCE(NULLIF(regexp_replace(${col}, '^https?://[^/]+', ''), ''), '/') ELSE ${col} END`,
|
|
799
|
-
includeSiteId: false,
|
|
800
|
-
compile: compilePg,
|
|
801
|
-
capabilities: {
|
|
802
|
-
regex: true,
|
|
803
|
-
comparisonJoin: true,
|
|
804
|
-
windowTotals: true
|
|
805
|
-
}
|
|
806
|
-
};
|
|
807
|
-
const pgResolverAdapter = createResolverAdapter({
|
|
808
|
-
...PG_BASE_CONFIG,
|
|
809
|
-
tableLabel: "pg-resolver-adapter"
|
|
810
|
-
});
|
|
811
|
-
function createParquetResolverAdapter() {
|
|
812
|
-
return createResolverAdapter({
|
|
813
|
-
...PG_BASE_CONFIG,
|
|
814
|
-
tableLabel: "parquet-resolver-adapter",
|
|
815
|
-
tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
|
|
816
|
-
});
|
|
817
|
-
}
|
|
818
859
|
function runArgs(ctx, partitions) {
|
|
819
860
|
return {
|
|
820
861
|
ctx: {
|
|
@@ -912,4 +953,4 @@ function assertSchemaInSync(options) {
|
|
|
912
953
|
if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
|
|
913
954
|
}
|
|
914
955
|
}
|
|
915
|
-
export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
|
956
|
+
export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import "./storage.mjs";
|
|
2
|
+
import { assertDimensionsSupported, getFilterDimensions, pgResolverAdapter, resolveToSQL } from "./resolver.mjs";
|
|
3
|
+
import { runAnalyzerFromSource } from "./dispatch.mjs";
|
|
4
|
+
function coerceRow(row) {
|
|
5
|
+
let mutated = null;
|
|
6
|
+
for (const [k, v] of Object.entries(row)) if (typeof v === "bigint") {
|
|
7
|
+
if (!mutated) mutated = { ...row };
|
|
8
|
+
mutated[k] = Number(v);
|
|
9
|
+
}
|
|
10
|
+
return mutated ?? row;
|
|
11
|
+
}
|
|
12
|
+
function coerceRows(rows) {
|
|
13
|
+
const out = Array.from({ length: rows.length });
|
|
14
|
+
for (let i = 0; i < rows.length; i++) out[i] = coerceRow(rows[i]);
|
|
15
|
+
return out;
|
|
16
|
+
}
|
|
17
|
+
var AttachedTableMissingError = class extends Error {
|
|
18
|
+
missing;
|
|
19
|
+
constructor(missing) {
|
|
20
|
+
super(`attached-table source: required table(s) not attached: ${missing.join(", ")}`);
|
|
21
|
+
this.missing = missing;
|
|
22
|
+
this.name = "AttachedTableMissingError";
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
const ATTACHED_TABLE_CAPABILITIES = {
|
|
26
|
+
fileSets: true,
|
|
27
|
+
attachedTables: true,
|
|
28
|
+
regex: true
|
|
29
|
+
};
|
|
30
|
+
const ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER = {
|
|
31
|
+
...ATTACHED_TABLE_CAPABILITIES,
|
|
32
|
+
adapter: true
|
|
33
|
+
};
|
|
34
|
+
function rewriteForTableSource(sql, schema, fileSets) {
|
|
35
|
+
let out = sql;
|
|
36
|
+
for (const [key, fs] of Object.entries(fileSets)) {
|
|
37
|
+
const pattern = new RegExp(`read_parquet\\(\\s*\\{\\{${key}\\}\\}\\s*,\\s*union_by_name\\s*=\\s*true\\s*\\)`, "g");
|
|
38
|
+
out = out.replace(pattern, `${schema}.${fs.table}`);
|
|
39
|
+
}
|
|
40
|
+
return out;
|
|
41
|
+
}
|
|
42
|
+
function createAttachedTableSource(runner, options) {
|
|
43
|
+
const { schema, signal, attachedTables, adapter } = options;
|
|
44
|
+
const attachedSet = attachedTables ? new Set(attachedTables) : null;
|
|
45
|
+
return {
|
|
46
|
+
name: "attached-table",
|
|
47
|
+
kind: "browser",
|
|
48
|
+
capabilities: adapter ? ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER : ATTACHED_TABLE_CAPABILITIES,
|
|
49
|
+
adapter,
|
|
50
|
+
async queryRows() {
|
|
51
|
+
throw new Error("attached-table source: queryRows is not supported; use SQL analyzers");
|
|
52
|
+
},
|
|
53
|
+
async executeSql(sql, params, opts) {
|
|
54
|
+
signal?.throwIfAborted();
|
|
55
|
+
const fileSets = opts?.fileSets ?? {};
|
|
56
|
+
if (attachedSet) {
|
|
57
|
+
const missing = [];
|
|
58
|
+
for (const fs of Object.values(fileSets)) if (!attachedSet.has(fs.table)) missing.push(fs.table);
|
|
59
|
+
if (missing.length > 0) throw new AttachedTableMissingError(missing);
|
|
60
|
+
}
|
|
61
|
+
const rewritten = rewriteForTableSource(sql, schema, fileSets);
|
|
62
|
+
return coerceRows(await runner.query(rewritten, params ?? [], signal));
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
function createSqlQuerySource(options) {
|
|
67
|
+
const { name, kind, adapter, execute, siteId, searchType, extraCapabilities } = options;
|
|
68
|
+
return {
|
|
69
|
+
name,
|
|
70
|
+
kind,
|
|
71
|
+
capabilities: {
|
|
72
|
+
...adapter.capabilities,
|
|
73
|
+
...extraCapabilities,
|
|
74
|
+
adapter: true
|
|
75
|
+
},
|
|
76
|
+
adapter,
|
|
77
|
+
siteId,
|
|
78
|
+
async queryRows(state) {
|
|
79
|
+
const resolved = resolveToSQL(state, {
|
|
80
|
+
adapter,
|
|
81
|
+
siteId,
|
|
82
|
+
searchType
|
|
83
|
+
});
|
|
84
|
+
return coerceRows(await execute(resolved.sql, resolved.params));
|
|
85
|
+
},
|
|
86
|
+
async executeSql(sql, params) {
|
|
87
|
+
return coerceRows(await execute(sql, params ?? []));
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
function isMetricDimension(dim) {
|
|
92
|
+
return [
|
|
93
|
+
"clicks",
|
|
94
|
+
"impressions",
|
|
95
|
+
"ctr",
|
|
96
|
+
"position"
|
|
97
|
+
].includes(dim);
|
|
98
|
+
}
|
|
99
|
+
const ENGINE_QUERY_CAPABILITIES = {
|
|
100
|
+
regex: true,
|
|
101
|
+
multiDataset: false,
|
|
102
|
+
comparisonJoin: false,
|
|
103
|
+
windowTotals: false
|
|
104
|
+
};
|
|
105
|
+
const ENGINE_SOURCE_CAPABILITIES = {
|
|
106
|
+
...ENGINE_QUERY_CAPABILITIES,
|
|
107
|
+
fileSets: true,
|
|
108
|
+
adapter: true
|
|
109
|
+
};
|
|
110
|
+
function createEngineQuerySource(options) {
|
|
111
|
+
const { engine, ctx, searchType } = options;
|
|
112
|
+
return {
|
|
113
|
+
name: "engine",
|
|
114
|
+
kind: "local",
|
|
115
|
+
capabilities: ENGINE_SOURCE_CAPABILITIES,
|
|
116
|
+
adapter: pgResolverAdapter,
|
|
117
|
+
async queryRows(state) {
|
|
118
|
+
const filterDims = getFilterDimensions(state.filter, isMetricDimension);
|
|
119
|
+
assertDimensionsSupported([...state.dimensions, ...filterDims], "stored", "engine query source");
|
|
120
|
+
if (state.dimensions.includes("queryCanonical") || filterDims.includes("queryCanonical")) throw new Error("engine query source does not support queryCanonical; use browser/sqlite query sources for derived dimensions");
|
|
121
|
+
return coerceRows((await engine.query({
|
|
122
|
+
...ctx,
|
|
123
|
+
...searchType !== void 0 ? { searchType } : {}
|
|
124
|
+
}, state)).rows);
|
|
125
|
+
},
|
|
126
|
+
async executeSql(sql, params, opts) {
|
|
127
|
+
const fileSets = opts?.fileSets;
|
|
128
|
+
if (!fileSets?.FILES) throw new Error("engine query source: executeSql requires opts.fileSets with a FILES entry");
|
|
129
|
+
const { rows } = await engine.runSQL({
|
|
130
|
+
ctx,
|
|
131
|
+
table: fileSets.FILES.table,
|
|
132
|
+
fileSets,
|
|
133
|
+
sql,
|
|
134
|
+
params: params ?? [],
|
|
135
|
+
...searchType !== void 0 ? { searchType } : {}
|
|
136
|
+
});
|
|
137
|
+
return coerceRows(rows);
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
async function runAnalyzerWithEngine(deps, ctx, params, registry) {
|
|
142
|
+
return runAnalyzerFromSource(createEngineQuerySource({
|
|
143
|
+
engine: deps.engine,
|
|
144
|
+
ctx,
|
|
145
|
+
searchType: params.searchType ?? "web"
|
|
146
|
+
}), params, registry);
|
|
147
|
+
}
|
|
148
|
+
function typedQuery(state) {
|
|
149
|
+
return { state };
|
|
150
|
+
}
|
|
151
|
+
function isTypedQuery(value) {
|
|
152
|
+
return "state" in value;
|
|
153
|
+
}
|
|
154
|
+
async function queryRows(source, query) {
|
|
155
|
+
const state = isTypedQuery(query) ? query.state : query;
|
|
156
|
+
return await source.queryRows(state);
|
|
157
|
+
}
|
|
158
|
+
async function queryComparisonRows(source, current, previous) {
|
|
159
|
+
const [currentRows, previousRows] = await Promise.all([queryRows(source, current), queryRows(source, previous)]);
|
|
160
|
+
return {
|
|
161
|
+
current: currentRows,
|
|
162
|
+
previous: previousRows
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
export { AttachedTableMissingError, ENGINE_QUERY_CAPABILITIES, coerceRow, coerceRows, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
|
package/dist/_chunks/types.d.mts
CHANGED
|
@@ -13,6 +13,7 @@ interface ResolverAdapter<TableKey extends string = string> {
|
|
|
13
13
|
dateColRef: (tableKey: TableKey) => SQL;
|
|
14
14
|
urlToPathExpr: (col: string) => string;
|
|
15
15
|
siteIdColRef?: (tableKey: TableKey) => SQL;
|
|
16
|
+
searchTypeColRef?: (tableKey: TableKey) => SQL;
|
|
16
17
|
dimExprSql: (dim: Dimension, tableKey: TableKey) => SQL;
|
|
17
18
|
metricSql: (metric: Metric, tableKey: TableKey) => SQL;
|
|
18
19
|
dimensionPredicates: (filters: InternalFilter[], tableKey: TableKey) => SQL[];
|
|
@@ -29,6 +30,8 @@ interface ResolverOptions<TableKey extends string = string> {
|
|
|
29
30
|
adapter: ResolverAdapter<TableKey>;
|
|
30
31
|
/** Optional site scope. Required for multi-tenant D1; omitted for parquet. */
|
|
31
32
|
siteId?: string | number;
|
|
33
|
+
/** Optional searchType scope. Required for multi-tenant Iceberg; omitted for parquet. */
|
|
34
|
+
searchType?: string;
|
|
32
35
|
}
|
|
33
36
|
interface ResolvedSQL {
|
|
34
37
|
sql: string;
|
package/dist/index.d.mts
CHANGED
|
@@ -5,7 +5,9 @@ import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspecti
|
|
|
5
5
|
import { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, listIcebergDataFiles, listIcebergTables } from "./_chunks/sink.mjs";
|
|
6
6
|
import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
7
7
|
import { FILES_PLACEHOLDER, ResolvedQuery, resolveParquetSQL, substituteNamedFiles } from "./_chunks/planner.mjs";
|
|
8
|
+
import { createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "./_chunks/pg-adapter.mjs";
|
|
8
9
|
import { rebuildDailyFromHourly } from "./rollups.mjs";
|
|
10
|
+
import { ENGINE_QUERY_CAPABILITIES, createSqlQuerySource } from "./_chunks/index.mjs";
|
|
9
11
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
10
12
|
import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
|
|
11
13
|
declare function coerceRow(row: Row$1): Row$1;
|
|
@@ -179,4 +181,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
|
|
|
179
181
|
declare const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
180
182
|
declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
|
|
181
183
|
declare const MAX_TRACKED_URLS_PER_SITE = 200000;
|
|
182
|
-
export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListIcebergDataFilesOptions, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
|
184
|
+
export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, ENGINE_QUERY_CAPABILITIES, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListIcebergDataFilesOptions, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergResolverAdapter, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { coerceRow, coerceRows } from "./_chunks/
|
|
1
|
+
import { ENGINE_QUERY_CAPABILITIES, coerceRow, coerceRows, createSqlQuerySource } from "./_chunks/source.mjs";
|
|
2
2
|
import { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
|
|
3
3
|
import { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, objectKey } from "./_chunks/storage.mjs";
|
|
4
4
|
import { FILES_PLACEHOLDER, RAW_DAILY_COMPACT_THRESHOLD, countRawDailies, dedupeOverlappingTiers, enumeratePartitions, resolveParquetSQL, splitOverlappingTiers, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
|
|
@@ -7,6 +7,7 @@ import { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDu
|
|
|
7
7
|
import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, icebergTableSpec } from "./_chunks/iceberg-schema.mjs";
|
|
8
8
|
import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
9
9
|
import "./planner.mjs";
|
|
10
|
+
import { createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "./_chunks/resolver.mjs";
|
|
10
11
|
import { rebuildDailyFromHourly } from "./rollups.mjs";
|
|
11
12
|
import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
12
13
|
import { icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "icebird";
|
|
@@ -529,4 +530,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
|
|
|
529
530
|
const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
530
531
|
const MAX_SITEMAP_URLS_PER_SITE = 5e4;
|
|
531
532
|
const MAX_TRACKED_URLS_PER_SITE = 2e5;
|
|
532
|
-
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
|
533
|
+
export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergResolverAdapter, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/period/index.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod } from "../_chunks/
|
|
1
|
+
import { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod } from "../_chunks/index2.mjs";
|
|
2
2
|
export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
|
package/dist/report/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { AnalysisParams, AnalysisResult } from "../_chunks/analysis-types.mjs";
|
|
2
|
-
import { ComparisonMode, ResolvedWindow, WindowPreset } from "../_chunks/
|
|
2
|
+
import { ComparisonMode, ResolvedWindow, WindowPreset } from "../_chunks/index2.mjs";
|
|
3
3
|
/** Status vocabulary mirrors `ActionPrioritySourceStatus`. */
|
|
4
4
|
type ReportStepStatus = 'pending' | 'running' | 'done' | 'skipped' | 'error';
|
|
5
5
|
type ReportSeverity = 'info' | 'low' | 'medium' | 'high';
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { SearchType as SearchType$1, TableName as TableName$1 } from "../_chunks/storage.mjs";
|
|
2
2
|
import { ComparisonFilter, ExtraQuery, ResolvedComparisonSQL, ResolvedSQL, ResolvedSQLOptimized, ResolverAdapter, ResolverOptions } from "../_chunks/types.mjs";
|
|
3
|
+
import { PgTableKey, createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "../_chunks/pg-adapter.mjs";
|
|
3
4
|
import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities, UnresolvableDatasetError, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
|
|
4
5
|
import { SQL } from "drizzle-orm";
|
|
5
6
|
import { Grain, TableName } from "@gscdump/contracts";
|
|
@@ -24,6 +25,7 @@ interface SqlFragmentsConfig<TableKey extends string> {
|
|
|
24
25
|
regexPredicate: (expr: SQL, pattern: string, negate: boolean) => SQL;
|
|
25
26
|
tableLabel: string;
|
|
26
27
|
includeSiteId: boolean;
|
|
28
|
+
includeSearchType?: boolean;
|
|
27
29
|
urlToPathExpr?: (col: string) => string;
|
|
28
30
|
/**
|
|
29
31
|
* Override the FROM-clause table reference. Default emits the bound drizzle
|
|
@@ -46,6 +48,7 @@ interface SqlFragments<TableKey extends string> {
|
|
|
46
48
|
tableRef: (tableKey: TableKey) => SQL;
|
|
47
49
|
dateColRef: (tableKey: TableKey) => SQL;
|
|
48
50
|
siteIdColRef?: (tableKey: TableKey) => SQL;
|
|
51
|
+
searchTypeColRef?: (tableKey: TableKey) => SQL;
|
|
49
52
|
dimExprSql: (dim: Dimension, tableKey: TableKey) => SQL;
|
|
50
53
|
metricSql: (metric: Metric, tableKey: TableKey) => SQL;
|
|
51
54
|
havingPredicates: (filters: InternalFilter[], tableKey: TableKey) => SQL[];
|
|
@@ -82,20 +85,6 @@ declare function dimensionValue(row: Record<string, unknown>, dimension: string)
|
|
|
82
85
|
declare function matchesDimensionFilter(row: Record<string, unknown>, filter: InternalFilter): boolean;
|
|
83
86
|
declare function matchesMetricFilter(row: Record<string, unknown>, filter: InternalFilter): boolean;
|
|
84
87
|
declare function matchesTopLevelPage(row: Record<string, unknown>): boolean;
|
|
85
|
-
type PgTableKey = TableName;
|
|
86
|
-
declare const pgResolverAdapter: ResolverAdapter<PgTableKey>;
|
|
87
|
-
/**
|
|
88
|
-
* Parquet-aware variant of {@link pgResolverAdapter}. Identical SQL output
|
|
89
|
-
* except FROM clauses emit `read_parquet({{FILES}}, union_by_name = true) AS
|
|
90
|
-
* "${tk}"`. The runSQL pipeline substitutes `{{FILES}}` with R2 object keys
|
|
91
|
-
* resolved from the manifest. The `AS "${tk}"` alias is mandatory — drizzle
|
|
92
|
-
* compiles `colRef` to table-qualified `"pages"."url"`, which would not
|
|
93
|
-
* resolve against an unaliased FROM.
|
|
94
|
-
*
|
|
95
|
-
* Single-use: build a fresh adapter per query. Cheap (no I/O) and avoids
|
|
96
|
-
* accidental adapter caching that would lock in a stale `{{FILES}}` set.
|
|
97
|
-
*/
|
|
98
|
-
declare function createParquetResolverAdapter(): ResolverAdapter<PgTableKey>;
|
|
99
88
|
interface RunQueryCtx {
|
|
100
89
|
userId: string;
|
|
101
90
|
siteId: string;
|
|
@@ -175,4 +164,4 @@ interface AssertSchemaInSyncOptions {
|
|
|
175
164
|
mode: 'exact' | 'superset';
|
|
176
165
|
}
|
|
177
166
|
declare function assertSchemaInSync(options: AssertSchemaInSyncOptions): void;
|
|
178
|
-
export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
|
167
|
+
export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
package/dist/resolver/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface } from "../_chunks/resolver.mjs";
|
|
2
|
-
export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
|
1
|
+
import { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface } from "../_chunks/resolver.mjs";
|
|
2
|
+
export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
package/dist/source/index.d.mts
CHANGED
|
@@ -1,109 +1,3 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { ResolverAdapter } from "../_chunks/types.mjs";
|
|
4
|
-
import { AnalysisQuerySource, AnalysisSourceKind, AnalyzerRegistry, ExecuteSqlOptions, FileSet, QueryRow, SourceCapabilities } from "../_chunks/registry.mjs";
|
|
5
|
-
import { PlannerCapabilities } from "gscdump/query/plan";
|
|
6
|
-
import { BuilderState } from "gscdump/query";
|
|
7
|
-
interface AttachedTableRunner {
|
|
8
|
-
/**
|
|
9
|
-
* Run a query with positional (`?`) bound parameters. Return objects keyed
|
|
10
|
-
* by column name. BIGINT → number coercion is applied by the source factory
|
|
11
|
-
* (see `coerceRows`); runners only need to handle DATE → ISO string (or
|
|
12
|
-
* let the analyzer reducer normalize via `num(v)`/`str(v)`).
|
|
13
|
-
*/
|
|
14
|
-
query: (sql: string, params?: unknown[], signal?: AbortSignal) => Promise<Row[]>;
|
|
15
|
-
}
|
|
16
|
-
interface AttachedTableSourceOptions {
|
|
17
|
-
/** Schema name the exported DuckDB file was attached under — e.g. `gsc`. */
|
|
18
|
-
schema: string;
|
|
19
|
-
/**
|
|
20
|
-
* Abort in-flight queries when the caller no longer cares about the
|
|
21
|
-
* result. Every `runner.query` call receives the same signal.
|
|
22
|
-
*/
|
|
23
|
-
signal?: AbortSignal;
|
|
24
|
-
/**
|
|
25
|
-
* List of table names actually attached to this connection. When provided,
|
|
26
|
-
* `executeSql` short-circuits with a specific "table not attached" error
|
|
27
|
-
* if the SQL plan references a table that isn't in this list — letting
|
|
28
|
-
* callers (e.g. the analytics layer) route to cloud fallback without
|
|
29
|
-
* paying the SQL execution cost. Omit to disable the check.
|
|
30
|
-
*/
|
|
31
|
-
attachedTables?: readonly string[];
|
|
32
|
-
/**
|
|
33
|
-
* Dialect adapter surfaced on the source for analyzers that compose SQL
|
|
34
|
-
* from a `BuilderState` at plan-build time (e.g. `data-query`,
|
|
35
|
-
* `data-detail`). Attached-table sources execute pg-flavored DuckDB SQL,
|
|
36
|
-
* so callers should pass `pgResolverAdapter` here.
|
|
37
|
-
*/
|
|
38
|
-
adapter?: ResolverAdapter<any>;
|
|
39
|
-
}
|
|
40
|
-
declare class AttachedTableMissingError extends Error {
|
|
41
|
-
readonly missing: readonly string[];
|
|
42
|
-
constructor(missing: readonly string[]);
|
|
43
|
-
}
|
|
44
|
-
/**
|
|
45
|
-
* Swap `read_parquet({{KEY}}, union_by_name = true)` for `<schema>.<table>`.
|
|
46
|
-
* Tolerates whitespace variation. Preserves the rest of the SQL verbatim.
|
|
47
|
-
*/
|
|
48
|
-
declare function rewriteForTableSource(sql: string, schema: string, fileSets: Record<string, FileSet>): string;
|
|
49
|
-
declare function createAttachedTableSource(runner: AttachedTableRunner, options: AttachedTableSourceOptions): AnalysisQuerySource;
|
|
50
|
-
interface CreateSqlQuerySourceOptions<TKey extends string> {
|
|
51
|
-
/** Debug-only identifier surfaced on the source for error messages. */
|
|
52
|
-
name: string;
|
|
53
|
-
/** Telemetry tag stamped onto analyzer result meta. */
|
|
54
|
-
kind?: AnalysisSourceKind;
|
|
55
|
-
/** Dialect-specific adapter; compiles `BuilderState` → `{ sql, params }`. */
|
|
56
|
-
adapter: ResolverAdapter<TKey>;
|
|
57
|
-
/** Drives the underlying DB. Called for both typed queries and raw SQL. */
|
|
58
|
-
execute: (sql: string, params: unknown[]) => Promise<QueryRow[]>;
|
|
59
|
-
/** Tenant id for multi-tenant dialects; forwarded to `resolveToSQL`. */
|
|
60
|
-
siteId?: string | number;
|
|
61
|
-
/** Additional capability flags merged on top of `adapter.capabilities`. */
|
|
62
|
-
extraCapabilities?: Partial<SourceCapabilities>;
|
|
63
|
-
}
|
|
64
|
-
declare function createSqlQuerySource<TKey extends string>(options: CreateSqlQuerySourceOptions<TKey>): AnalysisQuerySource;
|
|
65
|
-
/**
|
|
66
|
-
* Capabilities the engine query path honors. Matches what the DuckDB compiler
|
|
67
|
-
* passes to `buildLogicalPlan`: regex pushes down; comparison joins and
|
|
68
|
-
* multi-dataset queries belong to the analyzer dispatcher, not the engine's
|
|
69
|
-
* builder-state query path.
|
|
70
|
-
*/
|
|
71
|
-
declare const ENGINE_QUERY_CAPABILITIES: PlannerCapabilities;
|
|
72
|
-
interface EngineQuerySourceOptions {
|
|
73
|
-
engine: StorageEngine;
|
|
74
|
-
ctx: TenantCtx;
|
|
75
|
-
/**
|
|
76
|
-
* Restrict every manifest lookup the source performs to a single search-type
|
|
77
|
-
* slice. Threads into `engine.query` and `engine.runSQL` so the wrapped
|
|
78
|
-
* source returns rows from one cohort instead of unioning web + non-web
|
|
79
|
-
* parquet. Undefined preserves legacy cross-type behaviour for web-only
|
|
80
|
-
* tenants and admin paths.
|
|
81
|
-
*/
|
|
82
|
-
searchType?: SearchType$1;
|
|
83
|
-
}
|
|
84
|
-
/**
|
|
85
|
-
* Wraps a storage engine as an `AnalysisQuerySource` with SQL execution.
|
|
86
|
-
* `queryRows` runs typed builder-state queries; `executeSql` delegates to
|
|
87
|
-
* `engine.runSQL` and requires `opts.fileSets` (with a `FILES` entry so the
|
|
88
|
-
* target table can be resolved for partition lookup).
|
|
89
|
-
*/
|
|
90
|
-
declare function createEngineQuerySource(options: EngineQuerySourceOptions): AnalysisQuerySource;
|
|
91
|
-
/**
|
|
92
|
-
* Convenience: wrap a storage engine + tenant ctx in a source and dispatch.
|
|
93
|
-
* Equivalent to wrapping `createEngineQuerySource`, with omitted searchType
|
|
94
|
-
* defaulted to web at this public helper boundary.
|
|
95
|
-
*/
|
|
96
|
-
declare function runAnalyzerWithEngine(deps: {
|
|
97
|
-
engine: StorageEngine;
|
|
98
|
-
}, ctx: TenantCtx, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
|
|
99
|
-
interface TypedQuery<TRow> {
|
|
100
|
-
state: BuilderState;
|
|
101
|
-
readonly __row?: TRow;
|
|
102
|
-
}
|
|
103
|
-
declare function typedQuery<TRow>(state: BuilderState): TypedQuery<TRow>;
|
|
104
|
-
declare function queryRows<TRow = QueryRow>(source: AnalysisQuerySource, query: BuilderState | TypedQuery<TRow>): Promise<TRow[]>;
|
|
105
|
-
declare function queryComparisonRows<TRow = QueryRow>(source: AnalysisQuerySource, current: BuilderState | TypedQuery<TRow>, previous: BuilderState | TypedQuery<TRow>): Promise<{
|
|
106
|
-
current: TRow[];
|
|
107
|
-
previous: TRow[];
|
|
108
|
-
}>;
|
|
1
|
+
import { AnalysisQuerySource, AnalysisSourceKind, ExecuteSqlOptions, FileSet, QueryRow, SourceCapabilities } from "../_chunks/registry.mjs";
|
|
2
|
+
import { AttachedTableMissingError, AttachedTableRunner, AttachedTableSourceOptions, CreateSqlQuerySourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, TypedQuery, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery } from "../_chunks/index.mjs";
|
|
109
3
|
export { type AnalysisQuerySource, type AnalysisSourceKind, AttachedTableMissingError, type AttachedTableRunner, type AttachedTableSourceOptions, type CreateSqlQuerySourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, type ExecuteSqlOptions, type FileSet, type QueryRow, type SourceCapabilities, TypedQuery, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
|
package/dist/source/index.mjs
CHANGED
|
@@ -1,152 +1,2 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import "../_chunks/storage.mjs";
|
|
3
|
-
import { assertDimensionsSupported, getFilterDimensions, pgResolverAdapter, resolveToSQL } from "../_chunks/resolver.mjs";
|
|
4
|
-
import { runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
|
|
5
|
-
var AttachedTableMissingError = class extends Error {
|
|
6
|
-
missing;
|
|
7
|
-
constructor(missing) {
|
|
8
|
-
super(`attached-table source: required table(s) not attached: ${missing.join(", ")}`);
|
|
9
|
-
this.missing = missing;
|
|
10
|
-
this.name = "AttachedTableMissingError";
|
|
11
|
-
}
|
|
12
|
-
};
|
|
13
|
-
const ATTACHED_TABLE_CAPABILITIES = {
|
|
14
|
-
fileSets: true,
|
|
15
|
-
attachedTables: true,
|
|
16
|
-
regex: true
|
|
17
|
-
};
|
|
18
|
-
const ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER = {
|
|
19
|
-
...ATTACHED_TABLE_CAPABILITIES,
|
|
20
|
-
adapter: true
|
|
21
|
-
};
|
|
22
|
-
function rewriteForTableSource(sql, schema, fileSets) {
|
|
23
|
-
let out = sql;
|
|
24
|
-
for (const [key, fs] of Object.entries(fileSets)) {
|
|
25
|
-
const pattern = new RegExp(`read_parquet\\(\\s*\\{\\{${key}\\}\\}\\s*,\\s*union_by_name\\s*=\\s*true\\s*\\)`, "g");
|
|
26
|
-
out = out.replace(pattern, `${schema}.${fs.table}`);
|
|
27
|
-
}
|
|
28
|
-
return out;
|
|
29
|
-
}
|
|
30
|
-
function createAttachedTableSource(runner, options) {
|
|
31
|
-
const { schema, signal, attachedTables, adapter } = options;
|
|
32
|
-
const attachedSet = attachedTables ? new Set(attachedTables) : null;
|
|
33
|
-
return {
|
|
34
|
-
name: "attached-table",
|
|
35
|
-
kind: "browser",
|
|
36
|
-
capabilities: adapter ? ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER : ATTACHED_TABLE_CAPABILITIES,
|
|
37
|
-
adapter,
|
|
38
|
-
async queryRows() {
|
|
39
|
-
throw new Error("attached-table source: queryRows is not supported; use SQL analyzers");
|
|
40
|
-
},
|
|
41
|
-
async executeSql(sql, params, opts) {
|
|
42
|
-
signal?.throwIfAborted();
|
|
43
|
-
const fileSets = opts?.fileSets ?? {};
|
|
44
|
-
if (attachedSet) {
|
|
45
|
-
const missing = [];
|
|
46
|
-
for (const fs of Object.values(fileSets)) if (!attachedSet.has(fs.table)) missing.push(fs.table);
|
|
47
|
-
if (missing.length > 0) throw new AttachedTableMissingError(missing);
|
|
48
|
-
}
|
|
49
|
-
const rewritten = rewriteForTableSource(sql, schema, fileSets);
|
|
50
|
-
return coerceRows(await runner.query(rewritten, params ?? [], signal));
|
|
51
|
-
}
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
function createSqlQuerySource(options) {
|
|
55
|
-
const { name, kind, adapter, execute, siteId, extraCapabilities } = options;
|
|
56
|
-
return {
|
|
57
|
-
name,
|
|
58
|
-
kind,
|
|
59
|
-
capabilities: {
|
|
60
|
-
...adapter.capabilities,
|
|
61
|
-
...extraCapabilities,
|
|
62
|
-
adapter: true
|
|
63
|
-
},
|
|
64
|
-
adapter,
|
|
65
|
-
siteId,
|
|
66
|
-
async queryRows(state) {
|
|
67
|
-
const resolved = resolveToSQL(state, {
|
|
68
|
-
adapter,
|
|
69
|
-
siteId
|
|
70
|
-
});
|
|
71
|
-
return coerceRows(await execute(resolved.sql, resolved.params));
|
|
72
|
-
},
|
|
73
|
-
async executeSql(sql, params) {
|
|
74
|
-
return coerceRows(await execute(sql, params ?? []));
|
|
75
|
-
}
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
function isMetricDimension(dim) {
|
|
79
|
-
return [
|
|
80
|
-
"clicks",
|
|
81
|
-
"impressions",
|
|
82
|
-
"ctr",
|
|
83
|
-
"position"
|
|
84
|
-
].includes(dim);
|
|
85
|
-
}
|
|
86
|
-
const ENGINE_QUERY_CAPABILITIES = {
|
|
87
|
-
regex: true,
|
|
88
|
-
multiDataset: false,
|
|
89
|
-
comparisonJoin: false,
|
|
90
|
-
windowTotals: false
|
|
91
|
-
};
|
|
92
|
-
const ENGINE_SOURCE_CAPABILITIES = {
|
|
93
|
-
...ENGINE_QUERY_CAPABILITIES,
|
|
94
|
-
fileSets: true,
|
|
95
|
-
adapter: true
|
|
96
|
-
};
|
|
97
|
-
function createEngineQuerySource(options) {
|
|
98
|
-
const { engine, ctx, searchType } = options;
|
|
99
|
-
return {
|
|
100
|
-
name: "engine",
|
|
101
|
-
kind: "local",
|
|
102
|
-
capabilities: ENGINE_SOURCE_CAPABILITIES,
|
|
103
|
-
adapter: pgResolverAdapter,
|
|
104
|
-
async queryRows(state) {
|
|
105
|
-
const filterDims = getFilterDimensions(state.filter, isMetricDimension);
|
|
106
|
-
assertDimensionsSupported([...state.dimensions, ...filterDims], "stored", "engine query source");
|
|
107
|
-
if (state.dimensions.includes("queryCanonical") || filterDims.includes("queryCanonical")) throw new Error("engine query source does not support queryCanonical; use browser/sqlite query sources for derived dimensions");
|
|
108
|
-
return coerceRows((await engine.query({
|
|
109
|
-
...ctx,
|
|
110
|
-
...searchType !== void 0 ? { searchType } : {}
|
|
111
|
-
}, state)).rows);
|
|
112
|
-
},
|
|
113
|
-
async executeSql(sql, params, opts) {
|
|
114
|
-
const fileSets = opts?.fileSets;
|
|
115
|
-
if (!fileSets?.FILES) throw new Error("engine query source: executeSql requires opts.fileSets with a FILES entry");
|
|
116
|
-
const { rows } = await engine.runSQL({
|
|
117
|
-
ctx,
|
|
118
|
-
table: fileSets.FILES.table,
|
|
119
|
-
fileSets,
|
|
120
|
-
sql,
|
|
121
|
-
params: params ?? [],
|
|
122
|
-
...searchType !== void 0 ? { searchType } : {}
|
|
123
|
-
});
|
|
124
|
-
return coerceRows(rows);
|
|
125
|
-
}
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
async function runAnalyzerWithEngine(deps, ctx, params, registry) {
|
|
129
|
-
return runAnalyzerFromSource(createEngineQuerySource({
|
|
130
|
-
engine: deps.engine,
|
|
131
|
-
ctx,
|
|
132
|
-
searchType: params.searchType ?? "web"
|
|
133
|
-
}), params, registry);
|
|
134
|
-
}
|
|
135
|
-
function typedQuery(state) {
|
|
136
|
-
return { state };
|
|
137
|
-
}
|
|
138
|
-
function isTypedQuery(value) {
|
|
139
|
-
return "state" in value;
|
|
140
|
-
}
|
|
141
|
-
async function queryRows(source, query) {
|
|
142
|
-
const state = isTypedQuery(query) ? query.state : query;
|
|
143
|
-
return await source.queryRows(state);
|
|
144
|
-
}
|
|
145
|
-
async function queryComparisonRows(source, current, previous) {
|
|
146
|
-
const [currentRows, previousRows] = await Promise.all([queryRows(source, current), queryRows(source, previous)]);
|
|
147
|
-
return {
|
|
148
|
-
current: currentRows,
|
|
149
|
-
previous: previousRows
|
|
150
|
-
};
|
|
151
|
-
}
|
|
1
|
+
import { AttachedTableMissingError, ENGINE_QUERY_CAPABILITIES, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery } from "../_chunks/source.mjs";
|
|
152
2
|
export { AttachedTableMissingError, ENGINE_QUERY_CAPABILITIES, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.23.
|
|
4
|
+
"version": "0.23.2",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -180,8 +180,8 @@
|
|
|
180
180
|
"drizzle-orm": "^0.45.2",
|
|
181
181
|
"icebird": "^0.8.6",
|
|
182
182
|
"proper-lockfile": "^4.1.2",
|
|
183
|
-
"@gscdump/contracts": "0.23.
|
|
184
|
-
"gscdump": "0.23.
|
|
183
|
+
"@gscdump/contracts": "0.23.2",
|
|
184
|
+
"gscdump": "0.23.2"
|
|
185
185
|
},
|
|
186
186
|
"devDependencies": {
|
|
187
187
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
package/dist/_chunks/coerce.mjs
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
function coerceRow(row) {
|
|
2
|
-
let mutated = null;
|
|
3
|
-
for (const [k, v] of Object.entries(row)) if (typeof v === "bigint") {
|
|
4
|
-
if (!mutated) mutated = { ...row };
|
|
5
|
-
mutated[k] = Number(v);
|
|
6
|
-
}
|
|
7
|
-
return mutated ?? row;
|
|
8
|
-
}
|
|
9
|
-
function coerceRows(rows) {
|
|
10
|
-
const out = Array.from({ length: rows.length });
|
|
11
|
-
for (let i = 0; i < rows.length; i++) out[i] = coerceRow(rows[i]);
|
|
12
|
-
return out;
|
|
13
|
-
}
|
|
14
|
-
export { coerceRow, coerceRows };
|