@gscdump/engine 0.23.1 → 0.23.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,57 +1,111 @@
1
- import { AnalysisParams } from "./analysis-types.mjs";
2
- type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
3
- type ComparisonMode = 'none' | 'prev-period' | 'yoy';
4
- interface ResolveWindowOptions {
5
- preset: WindowPreset;
6
- comparison?: ComparisonMode;
7
- anchor?: string;
8
- start?: string;
9
- end?: string;
1
+ import { Row, SearchType as SearchType$1, StorageEngine, TenantCtx } from "./storage.mjs";
2
+ import { AnalysisParams, AnalysisResult } from "./analysis-types.mjs";
3
+ import { ResolverAdapter } from "./types.mjs";
4
+ import { AnalysisQuerySource, AnalysisSourceKind, AnalyzerRegistry, FileSet, QueryRow, SourceCapabilities } from "./registry.mjs";
5
+ import { PlannerCapabilities } from "gscdump/query/plan";
6
+ import { BuilderState } from "gscdump/query";
7
+ interface AttachedTableRunner {
8
+ /**
9
+ * Run a query with positional (`?`) bound parameters. Return objects keyed
10
+ * by column name. BIGINT → number coercion is applied by the source factory
11
+ * (see `coerceRows`); runners only need to handle DATE → ISO string (or
12
+ * let the analyzer reducer normalize via `num(v)`/`str(v)`).
13
+ */
14
+ query: (sql: string, params?: unknown[], signal?: AbortSignal) => Promise<Row[]>;
10
15
  }
11
- interface ResolvedWindow {
12
- start: string;
13
- end: string;
14
- days: number;
15
- comparison?: {
16
- start: string;
17
- end: string;
18
- };
16
+ interface AttachedTableSourceOptions {
17
+ /** Schema name the exported DuckDB file was attached under — e.g. `gsc`. */
18
+ schema: string;
19
+ /**
20
+ * Abort in-flight queries when the caller no longer cares about the
21
+ * result. Every `runner.query` call receives the same signal.
22
+ */
23
+ signal?: AbortSignal;
24
+ /**
25
+ * List of table names actually attached to this connection. When provided,
26
+ * `executeSql` short-circuits with a specific "table not attached" error
27
+ * if the SQL plan references a table that isn't in this list — letting
28
+ * callers (e.g. the analytics layer) route to cloud fallback without
29
+ * paying the SQL execution cost. Omit to disable the check.
30
+ */
31
+ attachedTables?: readonly string[];
32
+ /**
33
+ * Dialect adapter surfaced on the source for analyzers that compose SQL
34
+ * from a `BuilderState` at plan-build time (e.g. `data-query`,
35
+ * `data-detail`). Attached-table sources execute pg-flavored DuckDB SQL,
36
+ * so callers should pass `pgResolverAdapter` here.
37
+ */
38
+ adapter?: ResolverAdapter<any>;
19
39
  }
20
- interface AnalysisPeriod {
21
- startDate: string;
22
- endDate: string;
40
+ declare class AttachedTableMissingError extends Error {
41
+ readonly missing: readonly string[];
42
+ constructor(missing: readonly string[]);
23
43
  }
24
- interface ComparisonPeriod {
25
- current: AnalysisPeriod;
26
- previous: AnalysisPeriod;
44
+ /**
45
+ * Swap `read_parquet({{KEY}}, union_by_name = true)` for `<schema>.<table>`.
46
+ * Tolerates whitespace variation. Preserves the rest of the SQL verbatim.
47
+ */
48
+ declare function rewriteForTableSource(sql: string, schema: string, fileSets: Record<string, FileSet>): string;
49
+ declare function createAttachedTableSource(runner: AttachedTableRunner, options: AttachedTableSourceOptions): AnalysisQuerySource;
50
+ interface CreateSqlQuerySourceOptions<TKey extends string> {
51
+ /** Debug-only identifier surfaced on the source for error messages. */
52
+ name: string;
53
+ /** Telemetry tag stamped onto analyzer result meta. */
54
+ kind?: AnalysisSourceKind;
55
+ /** Dialect-specific adapter; compiles `BuilderState` → `{ sql, params }`. */
56
+ adapter: ResolverAdapter<TKey>;
57
+ /** Drives the underlying DB. Called for both typed queries and raw SQL. */
58
+ execute: (sql: string, params: unknown[]) => Promise<QueryRow[]>;
59
+ /** Tenant id for multi-tenant dialects; forwarded to `resolveToSQL`. */
60
+ siteId?: string | number;
61
+ /** Search-type scope for multi-tenant dialects; forwarded to `resolveToSQL`. */
62
+ searchType?: string;
63
+ /** Additional capability flags merged on top of `adapter.capabilities`. */
64
+ extraCapabilities?: Partial<SourceCapabilities>;
27
65
  }
28
- declare function defaultEndDate(): string;
29
- declare function defaultStartDate(): string;
30
- declare function periodOf(params: AnalysisParams): AnalysisPeriod;
31
- declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
32
- declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
33
- /** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
34
- declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
35
- declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
36
- interface PadTimeseriesOptions<T> {
37
- /** ISO date (YYYY-MM-DD), inclusive lower bound. */
38
- startDate: string;
39
- /** ISO date (YYYY-MM-DD), inclusive upper bound. */
40
- endDate: string;
66
+ declare function createSqlQuerySource<TKey extends string>(options: CreateSqlQuerySourceOptions<TKey>): AnalysisQuerySource;
67
+ /**
68
+ * Capabilities the engine query path honors. Matches what the DuckDB compiler
69
+ * passes to `buildLogicalPlan`: regex pushes down; comparison joins and
70
+ * multi-dataset queries belong to the analyzer dispatcher, not the engine's
71
+ * builder-state query path.
72
+ */
73
+ declare const ENGINE_QUERY_CAPABILITIES: PlannerCapabilities;
74
+ interface EngineQuerySourceOptions {
75
+ engine: StorageEngine;
76
+ ctx: TenantCtx;
41
77
  /**
42
- * Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
43
- * The `date` field is set automatically.
78
+ * Restrict every manifest lookup the source performs to a single search-type
79
+ * slice. Threads into `engine.query` and `engine.runSQL` so the wrapped
80
+ * source returns rows from one cohort instead of unioning web + non-web
81
+ * parquet. Undefined preserves legacy cross-type behaviour for web-only
82
+ * tenants and admin paths.
44
83
  */
45
- fill?: Omit<T, 'date'>;
46
- /** Row-field that carries the ISO date. Defaults to `date`. */
47
- dateKey?: string;
84
+ searchType?: SearchType$1;
48
85
  }
49
- type DateRowShape = Record<string, unknown> & {
50
- date?: unknown;
51
- };
52
86
  /**
53
- * Pad rows so every calendar day in `[startDate, endDate]` appears at least
54
- * once. Existing dates keep all their rows (grouped timeseries safe).
87
+ * Wraps a storage engine as an `AnalysisQuerySource` with SQL execution.
88
+ * `queryRows` runs typed builder-state queries; `executeSql` delegates to
89
+ * `engine.runSQL` and requires `opts.fileSets` (with a `FILES` entry so the
90
+ * target table can be resolved for partition lookup).
55
91
  */
56
- declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
57
- export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
92
+ declare function createEngineQuerySource(options: EngineQuerySourceOptions): AnalysisQuerySource;
93
+ /**
94
+ * Convenience: wrap a storage engine + tenant ctx in a source and dispatch.
95
+ * Equivalent to wrapping `createEngineQuerySource`, with omitted searchType
96
+ * defaulted to web at this public helper boundary.
97
+ */
98
+ declare function runAnalyzerWithEngine(deps: {
99
+ engine: StorageEngine;
100
+ }, ctx: TenantCtx, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
101
+ interface TypedQuery<TRow> {
102
+ state: BuilderState;
103
+ readonly __row?: TRow;
104
+ }
105
+ declare function typedQuery<TRow>(state: BuilderState): TypedQuery<TRow>;
106
+ declare function queryRows<TRow = QueryRow>(source: AnalysisQuerySource, query: BuilderState | TypedQuery<TRow>): Promise<TRow[]>;
107
+ declare function queryComparisonRows<TRow = QueryRow>(source: AnalysisQuerySource, current: BuilderState | TypedQuery<TRow>, previous: BuilderState | TypedQuery<TRow>): Promise<{
108
+ current: TRow[];
109
+ previous: TRow[];
110
+ }>;
111
+ export { AttachedTableMissingError, AttachedTableRunner, AttachedTableSourceOptions, CreateSqlQuerySourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, TypedQuery, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
@@ -0,0 +1,57 @@
1
+ import { AnalysisParams } from "./analysis-types.mjs";
2
+ type WindowPreset = 'last-7d' | 'last-28d' | 'last-30d' | 'last-90d' | 'last-180d' | 'last-365d' | 'mtd' | 'ytd' | 'custom';
3
+ type ComparisonMode = 'none' | 'prev-period' | 'yoy';
4
+ interface ResolveWindowOptions {
5
+ preset: WindowPreset;
6
+ comparison?: ComparisonMode;
7
+ anchor?: string;
8
+ start?: string;
9
+ end?: string;
10
+ }
11
+ interface ResolvedWindow {
12
+ start: string;
13
+ end: string;
14
+ days: number;
15
+ comparison?: {
16
+ start: string;
17
+ end: string;
18
+ };
19
+ }
20
+ interface AnalysisPeriod {
21
+ startDate: string;
22
+ endDate: string;
23
+ }
24
+ interface ComparisonPeriod {
25
+ current: AnalysisPeriod;
26
+ previous: AnalysisPeriod;
27
+ }
28
+ declare function defaultEndDate(): string;
29
+ declare function defaultStartDate(): string;
30
+ declare function periodOf(params: AnalysisParams): AnalysisPeriod;
31
+ declare function comparisonOf(params: AnalysisParams): ComparisonPeriod;
32
+ declare function resolveWindow(opts: ResolveWindowOptions): ResolvedWindow;
33
+ /** Convert a ResolvedWindow into the AnalysisPeriod / ComparisonPeriod shape. */
34
+ declare function windowToPeriod(w: ResolvedWindow): AnalysisPeriod;
35
+ declare function windowToComparisonPeriod(w: ResolvedWindow): ComparisonPeriod | undefined;
36
+ interface PadTimeseriesOptions<T> {
37
+ /** ISO date (YYYY-MM-DD), inclusive lower bound. */
38
+ startDate: string;
39
+ /** ISO date (YYYY-MM-DD), inclusive upper bound. */
40
+ endDate: string;
41
+ /**
42
+ * Row to insert for missing dates. Defaults to `{ clicks: 0, impressions: 0, ctr: 0, position: 0 }`.
43
+ * The `date` field is set automatically.
44
+ */
45
+ fill?: Omit<T, 'date'>;
46
+ /** Row-field that carries the ISO date. Defaults to `date`. */
47
+ dateKey?: string;
48
+ }
49
+ type DateRowShape = Record<string, unknown> & {
50
+ date?: unknown;
51
+ };
52
+ /**
53
+ * Pad rows so every calendar day in `[startDate, endDate]` appears at least
54
+ * once. Existing dates keep all their rows (grouped timeseries safe).
55
+ */
56
+ declare function padTimeseries<T extends DateRowShape = DateRowShape>(rows: readonly T[], options: PadTimeseriesOptions<T>): T[];
57
+ export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
@@ -0,0 +1,28 @@
1
+ import { ResolverAdapter } from "./types.mjs";
2
+ import { TableName } from "@gscdump/contracts";
3
+ type PgTableKey = TableName;
4
+ declare const pgResolverAdapter: ResolverAdapter<PgTableKey>;
5
+ /**
6
+ * Parquet-aware variant of {@link pgResolverAdapter}. Identical SQL output
7
+ * except FROM clauses emit `read_parquet({{FILES}}, union_by_name = true) AS
8
+ * "${tk}"`. The runSQL pipeline substitutes `{{FILES}}` with R2 object keys
9
+ * resolved from the manifest. The `AS "${tk}"` alias is mandatory — drizzle
10
+ * compiles `colRef` to table-qualified `"pages"."url"`, which would not
11
+ * resolve against an unaliased FROM.
12
+ *
13
+ * Single-use: build a fresh adapter per query. Cheap (no I/O) and avoids
14
+ * accidental adapter caching that would lock in a stale `{{FILES}}` set.
15
+ */
16
+ declare function createParquetResolverAdapter(): ResolverAdapter<PgTableKey>;
17
+ /**
18
+ * Multi-tenant pg-flavored adapter for the Iceberg / R2 SQL read path.
19
+ * Identical SQL output to `pgResolverAdapter` except WHERE clauses inject
20
+ * `site_id = ?` AND `search_type = ?` automatically when those scopes are
21
+ * passed to `resolveToSQL`. Required for the Iceberg fact tables which are
22
+ * shared across tenants — querying without these predicates would leak
23
+ * cross-tenant data. Single-use: the adapter has no `tableRef` override,
24
+ * so callers must rewrite bare table names to their qualified form (e.g.
25
+ * `${namespace}.pages`) before sending to R2 SQL.
26
+ */
27
+ declare function createIcebergResolverAdapter(): ResolverAdapter<PgTableKey>;
28
+ export { PgTableKey, createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter };
@@ -2,7 +2,7 @@ import { SCHEMAS, drizzleSchema } from "./schema.mjs";
2
2
  import { enumeratePartitions } from "./parquet-plan.mjs";
3
3
  import { escapeLike } from "../sql-fragments.mjs";
4
4
  import "../planner.mjs";
5
- import { PgDialect } from "drizzle-orm/pg-core";
5
+ import { PgDialect, pgTable, varchar } from "drizzle-orm/pg-core";
6
6
  import { UnresolvableDatasetError, buildLogicalComparisonPlan, buildLogicalPlan, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
7
7
  import { normalizeUrl } from "gscdump/normalize";
8
8
  import { sql } from "drizzle-orm";
@@ -180,7 +180,7 @@ function buildDimensionColumnMap(datasetToTableKey) {
180
180
  return Object.fromEntries(entries);
181
181
  }
182
182
  function createSqlFragments(config) {
183
- const { schema, datasetToTableKey, metricCast, regexPredicate, tableLabel, includeSiteId, urlToPathExpr: urlToPathExprOverride, tableRef: tableRefOverride } = config;
183
+ const { schema, datasetToTableKey, metricCast, regexPredicate, tableLabel, includeSiteId, includeSearchType, urlToPathExpr: urlToPathExprOverride, tableRef: tableRefOverride } = config;
184
184
  const DIM_COLUMN_MAP = buildDimensionColumnMap(datasetToTableKey);
185
185
  function isMetricDimension(dim) {
186
186
  return METRIC_NAMES.includes(dim);
@@ -211,6 +211,9 @@ function createSqlFragments(config) {
211
211
  function siteIdColRef(tableKey) {
212
212
  return colRef(tableKey, "site_id");
213
213
  }
214
+ function searchTypeColRef(tableKey) {
215
+ return colRef(tableKey, "search_type");
216
+ }
214
217
  function dimExprSql(dim, tableKey) {
215
218
  const colName = dimColumn(dim, tableKey);
216
219
  if (dim === "page") return sql.raw(urlToPathExpr(colName));
@@ -335,6 +338,7 @@ function createSqlFragments(config) {
335
338
  tableRef,
336
339
  dateColRef,
337
340
  siteIdColRef: includeSiteId ? siteIdColRef : void 0,
341
+ searchTypeColRef: includeSearchType ? searchTypeColRef : void 0,
338
342
  dimExprSql,
339
343
  metricSql,
340
344
  havingPredicates,
@@ -357,6 +361,7 @@ function createResolverAdapter(config) {
357
361
  dateColRef: runtime.dateColRef,
358
362
  urlToPathExpr: runtime.urlToPathExpr,
359
363
  siteIdColRef: runtime.siteIdColRef,
364
+ searchTypeColRef: runtime.searchTypeColRef,
360
365
  dimExprSql: runtime.dimExprSql,
361
366
  metricSql: runtime.metricSql,
362
367
  dimensionPredicates: runtime.dimensionPredicates,
@@ -366,6 +371,83 @@ function createResolverAdapter(config) {
366
371
  compile: config.compile
367
372
  };
368
373
  }
374
+ const pgDialect = new PgDialect();
375
+ function withTenantCols(tableName, baseTable) {
376
+ const t = pgTable(tableName, {
377
+ site_id: varchar("site_id").notNull(),
378
+ search_type: varchar("search_type").notNull()
379
+ });
380
+ return {
381
+ ...baseTable,
382
+ site_id: t.site_id,
383
+ search_type: t.search_type
384
+ };
385
+ }
386
+ const icebergSchema = {
387
+ pages: withTenantCols("pages", drizzleSchema.pages),
388
+ queries: withTenantCols("queries", drizzleSchema.queries),
389
+ countries: withTenantCols("countries", drizzleSchema.countries),
390
+ page_queries: withTenantCols("page_queries", drizzleSchema.page_queries),
391
+ dates: withTenantCols("dates", drizzleSchema.dates),
392
+ search_appearance: withTenantCols("search_appearance", drizzleSchema.search_appearance),
393
+ search_appearance_pages: withTenantCols("search_appearance_pages", drizzleSchema.search_appearance_pages),
394
+ search_appearance_queries: withTenantCols("search_appearance_queries", drizzleSchema.search_appearance_queries),
395
+ search_appearance_page_queries: withTenantCols("search_appearance_page_queries", drizzleSchema.search_appearance_page_queries),
396
+ hourly_pages: withTenantCols("hourly_pages", drizzleSchema.hourly_pages)
397
+ };
398
+ function compilePg(query) {
399
+ const compiled = pgDialect.sqlToQuery(query);
400
+ return {
401
+ sql: compiled.sql,
402
+ params: compiled.params
403
+ };
404
+ }
405
+ const PG_BASE_CONFIG = {
406
+ schema: drizzleSchema,
407
+ datasetToTableKey: {
408
+ pages: "pages",
409
+ queries: "queries",
410
+ page_queries: "page_queries",
411
+ countries: "countries",
412
+ dates: "dates",
413
+ search_appearance: "search_appearance",
414
+ search_appearance_pages: "search_appearance_pages",
415
+ search_appearance_queries: "search_appearance_queries",
416
+ search_appearance_page_queries: "search_appearance_page_queries",
417
+ hourly_pages: "hourly_pages"
418
+ },
419
+ metricCast: "DOUBLE",
420
+ regexPredicate: (expr, pattern, negate) => negate ? sql`NOT regexp_matches(${expr}, ${pattern})` : sql`regexp_matches(${expr}, ${pattern})`,
421
+ urlToPathExpr: (col) => `CASE WHEN ${col} LIKE 'http%' THEN COALESCE(NULLIF(regexp_replace(${col}, '^https?://[^/]+', ''), ''), '/') ELSE ${col} END`,
422
+ includeSiteId: false,
423
+ compile: compilePg,
424
+ capabilities: {
425
+ regex: true,
426
+ comparisonJoin: true,
427
+ windowTotals: true
428
+ }
429
+ };
430
+ const pgResolverAdapter = createResolverAdapter({
431
+ ...PG_BASE_CONFIG,
432
+ tableLabel: "pg-resolver-adapter"
433
+ });
434
+ function createParquetResolverAdapter() {
435
+ return createResolverAdapter({
436
+ ...PG_BASE_CONFIG,
437
+ tableLabel: "parquet-resolver-adapter",
438
+ tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
439
+ });
440
+ }
441
+ function createIcebergResolverAdapter() {
442
+ return createResolverAdapter({
443
+ ...PG_BASE_CONFIG,
444
+ schema: icebergSchema,
445
+ includeSiteId: true,
446
+ includeSearchType: true,
447
+ tableLabel: "iceberg-resolver-adapter",
448
+ tableRef: (tk) => sql.raw(`"${tk}"`)
449
+ });
450
+ }
369
451
  const COMPARISON_FILTER_SQL = {
370
452
  new: sql`AND (p.impressions IS NULL OR p.impressions = 0)`,
371
453
  lost: sql`AND p.impressions > 0 AND c.impressions = 0`,
@@ -440,7 +522,7 @@ function compileFilterTree(node, adapter, tableKey) {
440
522
  return sql`(${sql.join(childSqls, sep)})`;
441
523
  }
442
524
  function buildScope(state, options) {
443
- const { adapter, siteId } = options;
525
+ const { adapter, siteId, searchType } = options;
444
526
  const plan = buildLogicalPlan(state, adapter.capabilities);
445
527
  const tableKey = adapter.tableKeyForDataset(plan.dataset);
446
528
  const dimFilters = toInternalDimensionFilters(plan.dimensionFilters);
@@ -451,6 +533,7 @@ function buildScope(state, options) {
451
533
  const metrics = plan.metrics;
452
534
  const wherePredicates = [];
453
535
  if (adapter.siteIdColRef && siteId != null) wherePredicates.push(sql`${adapter.siteIdColRef(tableKey)} = ${siteId}`);
536
+ if (adapter.searchTypeColRef && searchType != null) wherePredicates.push(sql`${adapter.searchTypeColRef(tableKey)} = ${searchType}`);
454
537
  wherePredicates.push(sql`${adapter.dateColRef(tableKey)} >= ${plan.dateRange.startDate}`);
455
538
  wherePredicates.push(sql`${adapter.dateColRef(tableKey)} <= ${plan.dateRange.endDate}`);
456
539
  wherePredicates.push(...adapter.prefilterPredicates(prefilters, tableKey));
@@ -572,7 +655,7 @@ function buildTotalsSql(state, options) {
572
655
  return compileCollapsed(adapter, wherePredicates.length > 0 ? sql`SELECT ${joinComma(selectExprs)} FROM ${table} WHERE ${joinAnd(wherePredicates)}` : sql`SELECT ${joinComma(selectExprs)} FROM ${table}`);
573
656
  }
574
657
  function resolveComparisonSQL(current, previous, options, comparisonFilter) {
575
- const { adapter, siteId } = options;
658
+ const { adapter, siteId, searchType } = options;
576
659
  const comparisonPlan = buildComparisonPlan(current, previous, adapter.capabilities);
577
660
  const currentScope = buildScope(current, options);
578
661
  const previousScope = buildScope(previous, options);
@@ -590,6 +673,7 @@ function resolveComparisonSQL(current, previous, options, comparisonFilter) {
590
673
  const groupByExprs = groupByDims.map((d) => adapter.dimExprSql(d, tableKey));
591
674
  const prevWhere = [];
592
675
  if (adapter.siteIdColRef && siteId != null) prevWhere.push(sql`${adapter.siteIdColRef(tableKey)} = ${siteId}`);
676
+ if (adapter.searchTypeColRef && searchType != null) prevWhere.push(sql`${adapter.searchTypeColRef(tableKey)} = ${searchType}`);
593
677
  if (previousScope.startDate) prevWhere.push(sql`${adapter.dateColRef(tableKey)} >= ${previousScope.startDate}`);
594
678
  if (previousScope.endDate) prevWhere.push(sql`${adapter.dateColRef(tableKey)} <= ${previousScope.endDate}`);
595
679
  const prevDimSql = comparisonPlan.current.dimensionFilterTree ? compileFilterTree(comparisonPlan.current.dimensionFilterTree, adapter, tableKey) : void 0;
@@ -627,7 +711,7 @@ function resolveComparisonSQL(current, previous, options, comparisonFilter) {
627
711
  };
628
712
  }
629
713
  function buildExtrasQueries(state, options) {
630
- const { adapter, siteId } = options;
714
+ const { adapter, siteId, searchType } = options;
631
715
  const plan = buildLogicalPlan(state, adapter.capabilities);
632
716
  const dims = plan.groupByDimensions;
633
717
  const extras = [];
@@ -637,6 +721,7 @@ function buildExtrasQueries(state, options) {
637
721
  const table = adapter.tableRef(queriesKey);
638
722
  const whereParts = [];
639
723
  if (adapter.siteIdColRef && siteId != null) whereParts.push(sql`${adapter.siteIdColRef(queriesKey)} = ${siteId}`);
724
+ if (adapter.searchTypeColRef && searchType != null) whereParts.push(sql`${adapter.searchTypeColRef(queriesKey)} = ${searchType}`);
640
725
  whereParts.push(sql`${adapter.dateColRef(queriesKey)} >= ${plan.dateRange.startDate}`);
641
726
  whereParts.push(sql`${adapter.dateColRef(queriesKey)} <= ${plan.dateRange.endDate}`);
642
727
  const whereExpr = whereParts.length > 0 ? sql`WHERE ${joinAnd(whereParts)}` : sql``;
@@ -771,50 +856,6 @@ function matchesMetricFilter(row, filter) {
771
856
  function matchesTopLevelPage(row) {
772
857
  return (normalizeUrl(dimensionValue(row, "page")).match(/\//g)?.length ?? 0) <= 1;
773
858
  }
774
- const pgDialect = new PgDialect();
775
- function compilePg(query) {
776
- const compiled = pgDialect.sqlToQuery(query);
777
- return {
778
- sql: compiled.sql,
779
- params: compiled.params
780
- };
781
- }
782
- const PG_BASE_CONFIG = {
783
- schema: drizzleSchema,
784
- datasetToTableKey: {
785
- pages: "pages",
786
- queries: "queries",
787
- page_queries: "page_queries",
788
- countries: "countries",
789
- dates: "dates",
790
- search_appearance: "search_appearance",
791
- search_appearance_pages: "search_appearance_pages",
792
- search_appearance_queries: "search_appearance_queries",
793
- search_appearance_page_queries: "search_appearance_page_queries",
794
- hourly_pages: "hourly_pages"
795
- },
796
- metricCast: "DOUBLE",
797
- regexPredicate: (expr, pattern, negate) => negate ? sql`NOT regexp_matches(${expr}, ${pattern})` : sql`regexp_matches(${expr}, ${pattern})`,
798
- urlToPathExpr: (col) => `CASE WHEN ${col} LIKE 'http%' THEN COALESCE(NULLIF(regexp_replace(${col}, '^https?://[^/]+', ''), ''), '/') ELSE ${col} END`,
799
- includeSiteId: false,
800
- compile: compilePg,
801
- capabilities: {
802
- regex: true,
803
- comparisonJoin: true,
804
- windowTotals: true
805
- }
806
- };
807
- const pgResolverAdapter = createResolverAdapter({
808
- ...PG_BASE_CONFIG,
809
- tableLabel: "pg-resolver-adapter"
810
- });
811
- function createParquetResolverAdapter() {
812
- return createResolverAdapter({
813
- ...PG_BASE_CONFIG,
814
- tableLabel: "parquet-resolver-adapter",
815
- tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
816
- });
817
- }
818
859
  function runArgs(ctx, partitions) {
819
860
  return {
820
861
  ctx: {
@@ -912,4 +953,4 @@ function assertSchemaInSync(options) {
912
953
  if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
913
954
  }
914
955
  }
915
- export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
956
+ export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
@@ -0,0 +1,165 @@
1
+ import "./storage.mjs";
2
+ import { assertDimensionsSupported, getFilterDimensions, pgResolverAdapter, resolveToSQL } from "./resolver.mjs";
3
+ import { runAnalyzerFromSource } from "./dispatch.mjs";
4
+ function coerceRow(row) {
5
+ let mutated = null;
6
+ for (const [k, v] of Object.entries(row)) if (typeof v === "bigint") {
7
+ if (!mutated) mutated = { ...row };
8
+ mutated[k] = Number(v);
9
+ }
10
+ return mutated ?? row;
11
+ }
12
+ function coerceRows(rows) {
13
+ const out = Array.from({ length: rows.length });
14
+ for (let i = 0; i < rows.length; i++) out[i] = coerceRow(rows[i]);
15
+ return out;
16
+ }
17
+ var AttachedTableMissingError = class extends Error {
18
+ missing;
19
+ constructor(missing) {
20
+ super(`attached-table source: required table(s) not attached: ${missing.join(", ")}`);
21
+ this.missing = missing;
22
+ this.name = "AttachedTableMissingError";
23
+ }
24
+ };
25
+ const ATTACHED_TABLE_CAPABILITIES = {
26
+ fileSets: true,
27
+ attachedTables: true,
28
+ regex: true
29
+ };
30
+ const ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER = {
31
+ ...ATTACHED_TABLE_CAPABILITIES,
32
+ adapter: true
33
+ };
34
+ function rewriteForTableSource(sql, schema, fileSets) {
35
+ let out = sql;
36
+ for (const [key, fs] of Object.entries(fileSets)) {
37
+ const pattern = new RegExp(`read_parquet\\(\\s*\\{\\{${key}\\}\\}\\s*,\\s*union_by_name\\s*=\\s*true\\s*\\)`, "g");
38
+ out = out.replace(pattern, `${schema}.${fs.table}`);
39
+ }
40
+ return out;
41
+ }
42
+ function createAttachedTableSource(runner, options) {
43
+ const { schema, signal, attachedTables, adapter } = options;
44
+ const attachedSet = attachedTables ? new Set(attachedTables) : null;
45
+ return {
46
+ name: "attached-table",
47
+ kind: "browser",
48
+ capabilities: adapter ? ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER : ATTACHED_TABLE_CAPABILITIES,
49
+ adapter,
50
+ async queryRows() {
51
+ throw new Error("attached-table source: queryRows is not supported; use SQL analyzers");
52
+ },
53
+ async executeSql(sql, params, opts) {
54
+ signal?.throwIfAborted();
55
+ const fileSets = opts?.fileSets ?? {};
56
+ if (attachedSet) {
57
+ const missing = [];
58
+ for (const fs of Object.values(fileSets)) if (!attachedSet.has(fs.table)) missing.push(fs.table);
59
+ if (missing.length > 0) throw new AttachedTableMissingError(missing);
60
+ }
61
+ const rewritten = rewriteForTableSource(sql, schema, fileSets);
62
+ return coerceRows(await runner.query(rewritten, params ?? [], signal));
63
+ }
64
+ };
65
+ }
66
+ function createSqlQuerySource(options) {
67
+ const { name, kind, adapter, execute, siteId, searchType, extraCapabilities } = options;
68
+ return {
69
+ name,
70
+ kind,
71
+ capabilities: {
72
+ ...adapter.capabilities,
73
+ ...extraCapabilities,
74
+ adapter: true
75
+ },
76
+ adapter,
77
+ siteId,
78
+ async queryRows(state) {
79
+ const resolved = resolveToSQL(state, {
80
+ adapter,
81
+ siteId,
82
+ searchType
83
+ });
84
+ return coerceRows(await execute(resolved.sql, resolved.params));
85
+ },
86
+ async executeSql(sql, params) {
87
+ return coerceRows(await execute(sql, params ?? []));
88
+ }
89
+ };
90
+ }
91
+ function isMetricDimension(dim) {
92
+ return [
93
+ "clicks",
94
+ "impressions",
95
+ "ctr",
96
+ "position"
97
+ ].includes(dim);
98
+ }
99
+ const ENGINE_QUERY_CAPABILITIES = {
100
+ regex: true,
101
+ multiDataset: false,
102
+ comparisonJoin: false,
103
+ windowTotals: false
104
+ };
105
+ const ENGINE_SOURCE_CAPABILITIES = {
106
+ ...ENGINE_QUERY_CAPABILITIES,
107
+ fileSets: true,
108
+ adapter: true
109
+ };
110
+ function createEngineQuerySource(options) {
111
+ const { engine, ctx, searchType } = options;
112
+ return {
113
+ name: "engine",
114
+ kind: "local",
115
+ capabilities: ENGINE_SOURCE_CAPABILITIES,
116
+ adapter: pgResolverAdapter,
117
+ async queryRows(state) {
118
+ const filterDims = getFilterDimensions(state.filter, isMetricDimension);
119
+ assertDimensionsSupported([...state.dimensions, ...filterDims], "stored", "engine query source");
120
+ if (state.dimensions.includes("queryCanonical") || filterDims.includes("queryCanonical")) throw new Error("engine query source does not support queryCanonical; use browser/sqlite query sources for derived dimensions");
121
+ return coerceRows((await engine.query({
122
+ ...ctx,
123
+ ...searchType !== void 0 ? { searchType } : {}
124
+ }, state)).rows);
125
+ },
126
+ async executeSql(sql, params, opts) {
127
+ const fileSets = opts?.fileSets;
128
+ if (!fileSets?.FILES) throw new Error("engine query source: executeSql requires opts.fileSets with a FILES entry");
129
+ const { rows } = await engine.runSQL({
130
+ ctx,
131
+ table: fileSets.FILES.table,
132
+ fileSets,
133
+ sql,
134
+ params: params ?? [],
135
+ ...searchType !== void 0 ? { searchType } : {}
136
+ });
137
+ return coerceRows(rows);
138
+ }
139
+ };
140
+ }
141
+ async function runAnalyzerWithEngine(deps, ctx, params, registry) {
142
+ return runAnalyzerFromSource(createEngineQuerySource({
143
+ engine: deps.engine,
144
+ ctx,
145
+ searchType: params.searchType ?? "web"
146
+ }), params, registry);
147
+ }
148
+ function typedQuery(state) {
149
+ return { state };
150
+ }
151
+ function isTypedQuery(value) {
152
+ return "state" in value;
153
+ }
154
+ async function queryRows(source, query) {
155
+ const state = isTypedQuery(query) ? query.state : query;
156
+ return await source.queryRows(state);
157
+ }
158
+ async function queryComparisonRows(source, current, previous) {
159
+ const [currentRows, previousRows] = await Promise.all([queryRows(source, current), queryRows(source, previous)]);
160
+ return {
161
+ current: currentRows,
162
+ previous: previousRows
163
+ };
164
+ }
165
+ export { AttachedTableMissingError, ENGINE_QUERY_CAPABILITIES, coerceRow, coerceRows, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
@@ -13,6 +13,7 @@ interface ResolverAdapter<TableKey extends string = string> {
13
13
  dateColRef: (tableKey: TableKey) => SQL;
14
14
  urlToPathExpr: (col: string) => string;
15
15
  siteIdColRef?: (tableKey: TableKey) => SQL;
16
+ searchTypeColRef?: (tableKey: TableKey) => SQL;
16
17
  dimExprSql: (dim: Dimension, tableKey: TableKey) => SQL;
17
18
  metricSql: (metric: Metric, tableKey: TableKey) => SQL;
18
19
  dimensionPredicates: (filters: InternalFilter[], tableKey: TableKey) => SQL[];
@@ -29,6 +30,8 @@ interface ResolverOptions<TableKey extends string = string> {
29
30
  adapter: ResolverAdapter<TableKey>;
30
31
  /** Optional site scope. Required for multi-tenant D1; omitted for parquet. */
31
32
  siteId?: string | number;
33
+ /** Optional searchType scope. Required for multi-tenant Iceberg; omitted for parquet. */
34
+ searchType?: string;
32
35
  }
33
36
  interface ResolvedSQL {
34
37
  sql: string;
package/dist/index.d.mts CHANGED
@@ -5,7 +5,9 @@ import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspecti
5
5
  import { CommitRetryOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, isCommitRateLimited, listIcebergDataFiles, listIcebergTables } from "./_chunks/sink.mjs";
6
6
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
7
7
  import { FILES_PLACEHOLDER, ResolvedQuery, resolveParquetSQL, substituteNamedFiles } from "./_chunks/planner.mjs";
8
+ import { createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "./_chunks/pg-adapter.mjs";
8
9
  import { rebuildDailyFromHourly } from "./rollups.mjs";
10
+ import { ENGINE_QUERY_CAPABILITIES, createSqlQuerySource } from "./_chunks/index.mjs";
9
11
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
10
12
  import { Grain as Grain$1, Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
11
13
  declare function coerceRow(row: Row$1): Row$1;
@@ -179,4 +181,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
179
181
  declare const MIN_COUNTRY_IMPRESSIONS = 10;
180
182
  declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
181
183
  declare const MAX_TRACKED_URLS_PER_SITE = 200000;
182
- export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListIcebergDataFilesOptions, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
184
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CommitRetryOptions, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, ENGINE_QUERY_CAPABILITIES, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListIcebergDataFilesOptions, type ListLiveFilter, type LocalIcebergSinkOptions, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergResolverAdapter, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { coerceRow, coerceRows } from "./_chunks/coerce.mjs";
1
+ import { ENGINE_QUERY_CAPABILITIES, coerceRow, coerceRows, createSqlQuerySource } from "./_chunks/source.mjs";
2
2
  import { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
3
3
  import { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, objectKey } from "./_chunks/storage.mjs";
4
4
  import { FILES_PLACEHOLDER, RAW_DAILY_COMPACT_THRESHOLD, countRawDailies, dedupeOverlappingTiers, enumeratePartitions, resolveParquetSQL, splitOverlappingTiers, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
@@ -7,6 +7,7 @@ import { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDu
7
7
  import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, icebergTableSpec } from "./_chunks/iceberg-schema.mjs";
8
8
  import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
9
9
  import "./planner.mjs";
10
+ import { createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "./_chunks/resolver.mjs";
10
11
  import { rebuildDailyFromHourly } from "./rollups.mjs";
11
12
  import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
12
13
  import { icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "icebird";
@@ -529,4 +530,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
529
530
  const MIN_COUNTRY_IMPRESSIONS = 10;
530
531
  const MAX_SITEMAP_URLS_PER_SITE = 5e4;
531
532
  const MAX_TRACKED_URLS_PER_SITE = 2e5;
532
- export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
533
+ export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergResolverAdapter, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergDataFiles, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
@@ -1,2 +1,2 @@
1
- import { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod } from "../_chunks/index.mjs";
1
+ import { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod } from "../_chunks/index2.mjs";
2
2
  export { AnalysisPeriod, ComparisonMode, ComparisonPeriod, PadTimeseriesOptions, ResolveWindowOptions, ResolvedWindow, WindowPreset, comparisonOf, defaultEndDate, defaultStartDate, padTimeseries, periodOf, resolveWindow, windowToComparisonPeriod, windowToPeriod };
@@ -1,5 +1,5 @@
1
1
  import { AnalysisParams, AnalysisResult } from "../_chunks/analysis-types.mjs";
2
- import { ComparisonMode, ResolvedWindow, WindowPreset } from "../_chunks/index.mjs";
2
+ import { ComparisonMode, ResolvedWindow, WindowPreset } from "../_chunks/index2.mjs";
3
3
  /** Status vocabulary mirrors `ActionPrioritySourceStatus`. */
4
4
  type ReportStepStatus = 'pending' | 'running' | 'done' | 'skipped' | 'error';
5
5
  type ReportSeverity = 'info' | 'low' | 'medium' | 'high';
@@ -1,5 +1,6 @@
1
1
  import { SearchType as SearchType$1, TableName as TableName$1 } from "../_chunks/storage.mjs";
2
2
  import { ComparisonFilter, ExtraQuery, ResolvedComparisonSQL, ResolvedSQL, ResolvedSQLOptimized, ResolverAdapter, ResolverOptions } from "../_chunks/types.mjs";
3
+ import { PgTableKey, createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "../_chunks/pg-adapter.mjs";
3
4
  import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities, UnresolvableDatasetError, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
4
5
  import { SQL } from "drizzle-orm";
5
6
  import { Grain, TableName } from "@gscdump/contracts";
@@ -24,6 +25,7 @@ interface SqlFragmentsConfig<TableKey extends string> {
24
25
  regexPredicate: (expr: SQL, pattern: string, negate: boolean) => SQL;
25
26
  tableLabel: string;
26
27
  includeSiteId: boolean;
28
+ includeSearchType?: boolean;
27
29
  urlToPathExpr?: (col: string) => string;
28
30
  /**
29
31
  * Override the FROM-clause table reference. Default emits the bound drizzle
@@ -46,6 +48,7 @@ interface SqlFragments<TableKey extends string> {
46
48
  tableRef: (tableKey: TableKey) => SQL;
47
49
  dateColRef: (tableKey: TableKey) => SQL;
48
50
  siteIdColRef?: (tableKey: TableKey) => SQL;
51
+ searchTypeColRef?: (tableKey: TableKey) => SQL;
49
52
  dimExprSql: (dim: Dimension, tableKey: TableKey) => SQL;
50
53
  metricSql: (metric: Metric, tableKey: TableKey) => SQL;
51
54
  havingPredicates: (filters: InternalFilter[], tableKey: TableKey) => SQL[];
@@ -82,20 +85,6 @@ declare function dimensionValue(row: Record<string, unknown>, dimension: string)
82
85
  declare function matchesDimensionFilter(row: Record<string, unknown>, filter: InternalFilter): boolean;
83
86
  declare function matchesMetricFilter(row: Record<string, unknown>, filter: InternalFilter): boolean;
84
87
  declare function matchesTopLevelPage(row: Record<string, unknown>): boolean;
85
- type PgTableKey = TableName;
86
- declare const pgResolverAdapter: ResolverAdapter<PgTableKey>;
87
- /**
88
- * Parquet-aware variant of {@link pgResolverAdapter}. Identical SQL output
89
- * except FROM clauses emit `read_parquet({{FILES}}, union_by_name = true) AS
90
- * "${tk}"`. The runSQL pipeline substitutes `{{FILES}}` with R2 object keys
91
- * resolved from the manifest. The `AS "${tk}"` alias is mandatory — drizzle
92
- * compiles `colRef` to table-qualified `"pages"."url"`, which would not
93
- * resolve against an unaliased FROM.
94
- *
95
- * Single-use: build a fresh adapter per query. Cheap (no I/O) and avoids
96
- * accidental adapter caching that would lock in a stale `{{FILES}}` set.
97
- */
98
- declare function createParquetResolverAdapter(): ResolverAdapter<PgTableKey>;
99
88
  interface RunQueryCtx {
100
89
  userId: string;
101
90
  siteId: string;
@@ -175,4 +164,4 @@ interface AssertSchemaInSyncOptions {
175
164
  mode: 'exact' | 'superset';
176
165
  }
177
166
  declare function assertSchemaInSync(options: AssertSchemaInSyncOptions): void;
178
- export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
167
+ export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
@@ -1,2 +1,2 @@
1
- import { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface } from "../_chunks/resolver.mjs";
2
- export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
1
+ import { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface } from "../_chunks/resolver.mjs";
2
+ export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createIcebergResolverAdapter, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
@@ -1,109 +1,3 @@
1
- import { Row, SearchType as SearchType$1, StorageEngine, TenantCtx } from "../_chunks/storage.mjs";
2
- import { AnalysisParams, AnalysisResult } from "../_chunks/analysis-types.mjs";
3
- import { ResolverAdapter } from "../_chunks/types.mjs";
4
- import { AnalysisQuerySource, AnalysisSourceKind, AnalyzerRegistry, ExecuteSqlOptions, FileSet, QueryRow, SourceCapabilities } from "../_chunks/registry.mjs";
5
- import { PlannerCapabilities } from "gscdump/query/plan";
6
- import { BuilderState } from "gscdump/query";
7
- interface AttachedTableRunner {
8
- /**
9
- * Run a query with positional (`?`) bound parameters. Return objects keyed
10
- * by column name. BIGINT → number coercion is applied by the source factory
11
- * (see `coerceRows`); runners only need to handle DATE → ISO string (or
12
- * let the analyzer reducer normalize via `num(v)`/`str(v)`).
13
- */
14
- query: (sql: string, params?: unknown[], signal?: AbortSignal) => Promise<Row[]>;
15
- }
16
- interface AttachedTableSourceOptions {
17
- /** Schema name the exported DuckDB file was attached under — e.g. `gsc`. */
18
- schema: string;
19
- /**
20
- * Abort in-flight queries when the caller no longer cares about the
21
- * result. Every `runner.query` call receives the same signal.
22
- */
23
- signal?: AbortSignal;
24
- /**
25
- * List of table names actually attached to this connection. When provided,
26
- * `executeSql` short-circuits with a specific "table not attached" error
27
- * if the SQL plan references a table that isn't in this list — letting
28
- * callers (e.g. the analytics layer) route to cloud fallback without
29
- * paying the SQL execution cost. Omit to disable the check.
30
- */
31
- attachedTables?: readonly string[];
32
- /**
33
- * Dialect adapter surfaced on the source for analyzers that compose SQL
34
- * from a `BuilderState` at plan-build time (e.g. `data-query`,
35
- * `data-detail`). Attached-table sources execute pg-flavored DuckDB SQL,
36
- * so callers should pass `pgResolverAdapter` here.
37
- */
38
- adapter?: ResolverAdapter<any>;
39
- }
40
- declare class AttachedTableMissingError extends Error {
41
- readonly missing: readonly string[];
42
- constructor(missing: readonly string[]);
43
- }
44
- /**
45
- * Swap `read_parquet({{KEY}}, union_by_name = true)` for `<schema>.<table>`.
46
- * Tolerates whitespace variation. Preserves the rest of the SQL verbatim.
47
- */
48
- declare function rewriteForTableSource(sql: string, schema: string, fileSets: Record<string, FileSet>): string;
49
- declare function createAttachedTableSource(runner: AttachedTableRunner, options: AttachedTableSourceOptions): AnalysisQuerySource;
50
- interface CreateSqlQuerySourceOptions<TKey extends string> {
51
- /** Debug-only identifier surfaced on the source for error messages. */
52
- name: string;
53
- /** Telemetry tag stamped onto analyzer result meta. */
54
- kind?: AnalysisSourceKind;
55
- /** Dialect-specific adapter; compiles `BuilderState` → `{ sql, params }`. */
56
- adapter: ResolverAdapter<TKey>;
57
- /** Drives the underlying DB. Called for both typed queries and raw SQL. */
58
- execute: (sql: string, params: unknown[]) => Promise<QueryRow[]>;
59
- /** Tenant id for multi-tenant dialects; forwarded to `resolveToSQL`. */
60
- siteId?: string | number;
61
- /** Additional capability flags merged on top of `adapter.capabilities`. */
62
- extraCapabilities?: Partial<SourceCapabilities>;
63
- }
64
- declare function createSqlQuerySource<TKey extends string>(options: CreateSqlQuerySourceOptions<TKey>): AnalysisQuerySource;
65
- /**
66
- * Capabilities the engine query path honors. Matches what the DuckDB compiler
67
- * passes to `buildLogicalPlan`: regex pushes down; comparison joins and
68
- * multi-dataset queries belong to the analyzer dispatcher, not the engine's
69
- * builder-state query path.
70
- */
71
- declare const ENGINE_QUERY_CAPABILITIES: PlannerCapabilities;
72
- interface EngineQuerySourceOptions {
73
- engine: StorageEngine;
74
- ctx: TenantCtx;
75
- /**
76
- * Restrict every manifest lookup the source performs to a single search-type
77
- * slice. Threads into `engine.query` and `engine.runSQL` so the wrapped
78
- * source returns rows from one cohort instead of unioning web + non-web
79
- * parquet. Undefined preserves legacy cross-type behaviour for web-only
80
- * tenants and admin paths.
81
- */
82
- searchType?: SearchType$1;
83
- }
84
- /**
85
- * Wraps a storage engine as an `AnalysisQuerySource` with SQL execution.
86
- * `queryRows` runs typed builder-state queries; `executeSql` delegates to
87
- * `engine.runSQL` and requires `opts.fileSets` (with a `FILES` entry so the
88
- * target table can be resolved for partition lookup).
89
- */
90
- declare function createEngineQuerySource(options: EngineQuerySourceOptions): AnalysisQuerySource;
91
- /**
92
- * Convenience: wrap a storage engine + tenant ctx in a source and dispatch.
93
- * Equivalent to wrapping `createEngineQuerySource`, with omitted searchType
94
- * defaulted to web at this public helper boundary.
95
- */
96
- declare function runAnalyzerWithEngine(deps: {
97
- engine: StorageEngine;
98
- }, ctx: TenantCtx, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
99
- interface TypedQuery<TRow> {
100
- state: BuilderState;
101
- readonly __row?: TRow;
102
- }
103
- declare function typedQuery<TRow>(state: BuilderState): TypedQuery<TRow>;
104
- declare function queryRows<TRow = QueryRow>(source: AnalysisQuerySource, query: BuilderState | TypedQuery<TRow>): Promise<TRow[]>;
105
- declare function queryComparisonRows<TRow = QueryRow>(source: AnalysisQuerySource, current: BuilderState | TypedQuery<TRow>, previous: BuilderState | TypedQuery<TRow>): Promise<{
106
- current: TRow[];
107
- previous: TRow[];
108
- }>;
1
+ import { AnalysisQuerySource, AnalysisSourceKind, ExecuteSqlOptions, FileSet, QueryRow, SourceCapabilities } from "../_chunks/registry.mjs";
2
+ import { AttachedTableMissingError, AttachedTableRunner, AttachedTableSourceOptions, CreateSqlQuerySourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, TypedQuery, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery } from "../_chunks/index.mjs";
109
3
  export { type AnalysisQuerySource, type AnalysisSourceKind, AttachedTableMissingError, type AttachedTableRunner, type AttachedTableSourceOptions, type CreateSqlQuerySourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, type ExecuteSqlOptions, type FileSet, type QueryRow, type SourceCapabilities, TypedQuery, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
@@ -1,152 +1,2 @@
1
- import { coerceRows } from "../_chunks/coerce.mjs";
2
- import "../_chunks/storage.mjs";
3
- import { assertDimensionsSupported, getFilterDimensions, pgResolverAdapter, resolveToSQL } from "../_chunks/resolver.mjs";
4
- import { runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
5
- var AttachedTableMissingError = class extends Error {
6
- missing;
7
- constructor(missing) {
8
- super(`attached-table source: required table(s) not attached: ${missing.join(", ")}`);
9
- this.missing = missing;
10
- this.name = "AttachedTableMissingError";
11
- }
12
- };
13
- const ATTACHED_TABLE_CAPABILITIES = {
14
- fileSets: true,
15
- attachedTables: true,
16
- regex: true
17
- };
18
- const ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER = {
19
- ...ATTACHED_TABLE_CAPABILITIES,
20
- adapter: true
21
- };
22
- function rewriteForTableSource(sql, schema, fileSets) {
23
- let out = sql;
24
- for (const [key, fs] of Object.entries(fileSets)) {
25
- const pattern = new RegExp(`read_parquet\\(\\s*\\{\\{${key}\\}\\}\\s*,\\s*union_by_name\\s*=\\s*true\\s*\\)`, "g");
26
- out = out.replace(pattern, `${schema}.${fs.table}`);
27
- }
28
- return out;
29
- }
30
- function createAttachedTableSource(runner, options) {
31
- const { schema, signal, attachedTables, adapter } = options;
32
- const attachedSet = attachedTables ? new Set(attachedTables) : null;
33
- return {
34
- name: "attached-table",
35
- kind: "browser",
36
- capabilities: adapter ? ATTACHED_TABLE_CAPABILITIES_WITH_ADAPTER : ATTACHED_TABLE_CAPABILITIES,
37
- adapter,
38
- async queryRows() {
39
- throw new Error("attached-table source: queryRows is not supported; use SQL analyzers");
40
- },
41
- async executeSql(sql, params, opts) {
42
- signal?.throwIfAborted();
43
- const fileSets = opts?.fileSets ?? {};
44
- if (attachedSet) {
45
- const missing = [];
46
- for (const fs of Object.values(fileSets)) if (!attachedSet.has(fs.table)) missing.push(fs.table);
47
- if (missing.length > 0) throw new AttachedTableMissingError(missing);
48
- }
49
- const rewritten = rewriteForTableSource(sql, schema, fileSets);
50
- return coerceRows(await runner.query(rewritten, params ?? [], signal));
51
- }
52
- };
53
- }
54
- function createSqlQuerySource(options) {
55
- const { name, kind, adapter, execute, siteId, extraCapabilities } = options;
56
- return {
57
- name,
58
- kind,
59
- capabilities: {
60
- ...adapter.capabilities,
61
- ...extraCapabilities,
62
- adapter: true
63
- },
64
- adapter,
65
- siteId,
66
- async queryRows(state) {
67
- const resolved = resolveToSQL(state, {
68
- adapter,
69
- siteId
70
- });
71
- return coerceRows(await execute(resolved.sql, resolved.params));
72
- },
73
- async executeSql(sql, params) {
74
- return coerceRows(await execute(sql, params ?? []));
75
- }
76
- };
77
- }
78
- function isMetricDimension(dim) {
79
- return [
80
- "clicks",
81
- "impressions",
82
- "ctr",
83
- "position"
84
- ].includes(dim);
85
- }
86
- const ENGINE_QUERY_CAPABILITIES = {
87
- regex: true,
88
- multiDataset: false,
89
- comparisonJoin: false,
90
- windowTotals: false
91
- };
92
- const ENGINE_SOURCE_CAPABILITIES = {
93
- ...ENGINE_QUERY_CAPABILITIES,
94
- fileSets: true,
95
- adapter: true
96
- };
97
- function createEngineQuerySource(options) {
98
- const { engine, ctx, searchType } = options;
99
- return {
100
- name: "engine",
101
- kind: "local",
102
- capabilities: ENGINE_SOURCE_CAPABILITIES,
103
- adapter: pgResolverAdapter,
104
- async queryRows(state) {
105
- const filterDims = getFilterDimensions(state.filter, isMetricDimension);
106
- assertDimensionsSupported([...state.dimensions, ...filterDims], "stored", "engine query source");
107
- if (state.dimensions.includes("queryCanonical") || filterDims.includes("queryCanonical")) throw new Error("engine query source does not support queryCanonical; use browser/sqlite query sources for derived dimensions");
108
- return coerceRows((await engine.query({
109
- ...ctx,
110
- ...searchType !== void 0 ? { searchType } : {}
111
- }, state)).rows);
112
- },
113
- async executeSql(sql, params, opts) {
114
- const fileSets = opts?.fileSets;
115
- if (!fileSets?.FILES) throw new Error("engine query source: executeSql requires opts.fileSets with a FILES entry");
116
- const { rows } = await engine.runSQL({
117
- ctx,
118
- table: fileSets.FILES.table,
119
- fileSets,
120
- sql,
121
- params: params ?? [],
122
- ...searchType !== void 0 ? { searchType } : {}
123
- });
124
- return coerceRows(rows);
125
- }
126
- };
127
- }
128
- async function runAnalyzerWithEngine(deps, ctx, params, registry) {
129
- return runAnalyzerFromSource(createEngineQuerySource({
130
- engine: deps.engine,
131
- ctx,
132
- searchType: params.searchType ?? "web"
133
- }), params, registry);
134
- }
135
- function typedQuery(state) {
136
- return { state };
137
- }
138
- function isTypedQuery(value) {
139
- return "state" in value;
140
- }
141
- async function queryRows(source, query) {
142
- const state = isTypedQuery(query) ? query.state : query;
143
- return await source.queryRows(state);
144
- }
145
- async function queryComparisonRows(source, current, previous) {
146
- const [currentRows, previousRows] = await Promise.all([queryRows(source, current), queryRows(source, previous)]);
147
- return {
148
- current: currentRows,
149
- previous: previousRows
150
- };
151
- }
1
+ import { AttachedTableMissingError, ENGINE_QUERY_CAPABILITIES, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery } from "../_chunks/source.mjs";
152
2
  export { AttachedTableMissingError, ENGINE_QUERY_CAPABILITIES, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.23.1",
4
+ "version": "0.23.2",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -180,8 +180,8 @@
180
180
  "drizzle-orm": "^0.45.2",
181
181
  "icebird": "^0.8.6",
182
182
  "proper-lockfile": "^4.1.2",
183
- "@gscdump/contracts": "0.23.1",
184
- "gscdump": "0.23.1"
183
+ "@gscdump/contracts": "0.23.2",
184
+ "gscdump": "0.23.2"
185
185
  },
186
186
  "devDependencies": {
187
187
  "@duckdb/duckdb-wasm": "^1.32.0",
@@ -1,14 +0,0 @@
1
- function coerceRow(row) {
2
- let mutated = null;
3
- for (const [k, v] of Object.entries(row)) if (typeof v === "bigint") {
4
- if (!mutated) mutated = { ...row };
5
- mutated[k] = Number(v);
6
- }
7
- return mutated ?? row;
8
- }
9
- function coerceRows(rows) {
10
- const out = Array.from({ length: rows.length });
11
- for (let i = 0; i < rows.length; i++) out[i] = coerceRow(rows[i]);
12
- return out;
13
- }
14
- export { coerceRow, coerceRows };