@gscdump/engine 0.17.3 → 0.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/analysis-types.d.mts +3 -1
- package/dist/_chunks/compiler.mjs +6 -2
- package/dist/_chunks/engine.mjs +1 -1
- package/dist/_chunks/resolver.mjs +91 -2
- package/dist/_chunks/storage.d.mts +6 -1
- package/dist/index.d.mts +106 -3
- package/dist/index.mjs +209 -2
- package/dist/planner.d.mts +1 -1
- package/dist/planner.mjs +1 -1
- package/dist/resolver/index.d.mts +59 -1
- package/dist/resolver/index.mjs +2 -2
- package/dist/rollups.d.mts +22 -1
- package/dist/rollups.mjs +28 -1
- package/dist/source/index.mjs +1 -1
- package/package.json +3 -3
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { BuilderState } from "gscdump/query";
|
|
1
|
+
import { BuilderState, SearchType } from "gscdump/query";
|
|
2
2
|
type AnalysisTool = 'striking-distance' | 'opportunity' | 'movers' | 'decay' | 'zero-click' | 'brand' | 'cannibalization' | 'clustering' | 'concentration' | 'seasonality' | 'trends' | 'ctr-anomaly' | 'position-volatility' | 'long-tail' | 'intent-atlas' | 'query-migration' | 'bayesian-ctr' | 'stl-decompose' | 'change-point' | 'bipartite-pagerank' | 'survival' | 'position-distribution' | 'ctr-curve' | 'dark-traffic' | 'content-velocity' | 'keyword-breadth' | 'device-gap' | 'data-query' | 'data-detail';
|
|
3
3
|
interface AnalysisParams {
|
|
4
4
|
type: AnalysisTool;
|
|
@@ -37,6 +37,8 @@ interface AnalysisParams {
|
|
|
37
37
|
qc?: BuilderState;
|
|
38
38
|
/** data-query comparison filter applied to joined current/previous rows. */
|
|
39
39
|
comparisonFilter?: 'new' | 'lost' | 'improving' | 'declining';
|
|
40
|
+
/** GSC slice the analysis is scoped to. Undefined = analyzer runs cross-type (today's behaviour for web-only sites). */
|
|
41
|
+
searchType?: SearchType;
|
|
40
42
|
}
|
|
41
43
|
interface AnalysisResult {
|
|
42
44
|
results: Record<string, unknown>[];
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { i as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
|
|
2
2
|
import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, s as objectKey } from "./storage.mjs";
|
|
3
3
|
import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
|
|
4
|
-
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
5
4
|
import { MS_PER_DAY } from "gscdump";
|
|
5
|
+
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
6
6
|
const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
|
|
7
7
|
const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
|
|
8
8
|
const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
|
|
@@ -11,6 +11,10 @@ const DEFAULT_THRESHOLDS = {
|
|
|
11
11
|
d7: 30,
|
|
12
12
|
d30: 90
|
|
13
13
|
};
|
|
14
|
+
const RAW_DAILY_COMPACT_THRESHOLD = 7;
|
|
15
|
+
function countRawDailies(entries) {
|
|
16
|
+
return entries.filter((e) => e.tier === "raw" || e.tier == null && e.partition.startsWith("daily/")).length;
|
|
17
|
+
}
|
|
14
18
|
const PENDING_WINDOW_DAYS = 4;
|
|
15
19
|
const STAGES = [
|
|
16
20
|
{
|
|
@@ -285,4 +289,4 @@ function substituteNamedFiles(sql, sets) {
|
|
|
285
289
|
for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
|
|
286
290
|
return out;
|
|
287
291
|
}
|
|
288
|
-
export {
|
|
292
|
+
export { RAW_DAILY_COMPACT_THRESHOLD as a, enumeratePartitions as c, substituteNamedFiles as i, compileLogicalQueryPlan as n, compactTieredImpl as o, resolveToSQL as r, countRawDailies as s, FILES_PLACEHOLDER as t };
|
package/dist/_chunks/engine.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
|
|
2
2
|
import { i as inferSearchType, n as dayPartition, s as objectKey, u as tenantPrefix } from "./storage.mjs";
|
|
3
|
-
import {
|
|
3
|
+
import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./compiler.mjs";
|
|
4
4
|
import { sqlEscape } from "../sql-bind.mjs";
|
|
5
5
|
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
6
6
|
import { normalizeUrl } from "gscdump/normalize";
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { t as SCHEMAS, u as drizzleSchema } from "./schema.mjs";
|
|
2
|
+
import { c as enumeratePartitions } from "./compiler.mjs";
|
|
2
3
|
import { escapeLike } from "../sql-fragments.mjs";
|
|
3
|
-
import
|
|
4
|
+
import "../planner.mjs";
|
|
4
5
|
import { PgDialect } from "drizzle-orm/pg-core";
|
|
6
|
+
import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
|
|
5
7
|
import { normalizeUrl } from "gscdump/normalize";
|
|
6
8
|
import { sql } from "drizzle-orm";
|
|
7
9
|
const DIMENSION_SURFACES = {
|
|
@@ -756,6 +758,93 @@ function createParquetResolverAdapter() {
|
|
|
756
758
|
tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
|
|
757
759
|
});
|
|
758
760
|
}
|
|
761
|
+
function runArgs(ctx, partitions) {
|
|
762
|
+
return {
|
|
763
|
+
ctx: {
|
|
764
|
+
userId: ctx.userId,
|
|
765
|
+
siteId: ctx.siteId
|
|
766
|
+
},
|
|
767
|
+
table: ctx.table,
|
|
768
|
+
fileSets: { FILES: {
|
|
769
|
+
table: ctx.table,
|
|
770
|
+
partitions
|
|
771
|
+
} },
|
|
772
|
+
...ctx.searchType !== void 0 ? { searchType: ctx.searchType } : {}
|
|
773
|
+
};
|
|
774
|
+
}
|
|
775
|
+
async function runOptimizedQuery(runSQL, ctx, state, dateRange) {
|
|
776
|
+
const adapter = createParquetResolverAdapter();
|
|
777
|
+
const base = runArgs(ctx, enumeratePartitions(dateRange.startDate, dateRange.endDate));
|
|
778
|
+
const optimized = resolveToSQLOptimized(state, {
|
|
779
|
+
adapter,
|
|
780
|
+
siteId: void 0
|
|
781
|
+
});
|
|
782
|
+
const extras = buildExtrasQueries(state, {
|
|
783
|
+
adapter,
|
|
784
|
+
siteId: void 0
|
|
785
|
+
});
|
|
786
|
+
const [optRes, ...extrasRows] = await Promise.all([runSQL({
|
|
787
|
+
...base,
|
|
788
|
+
sql: optimized.sql,
|
|
789
|
+
params: optimized.params
|
|
790
|
+
}), ...extras.map((e) => runSQL({
|
|
791
|
+
...base,
|
|
792
|
+
sql: e.sql,
|
|
793
|
+
params: e.params
|
|
794
|
+
}))]);
|
|
795
|
+
const firstRow = optRes.rows[0];
|
|
796
|
+
const totalCount = Number(firstRow?.totalCount ?? 0);
|
|
797
|
+
const totals = {
|
|
798
|
+
clicks: Number(firstRow?.totalClicks ?? 0),
|
|
799
|
+
impressions: Number(firstRow?.totalImpressions ?? 0),
|
|
800
|
+
ctr: Number(firstRow?.totalCtr ?? 0),
|
|
801
|
+
position: Number(firstRow?.totalPosition ?? 0)
|
|
802
|
+
};
|
|
803
|
+
return {
|
|
804
|
+
rows: optRes.rows.map((r) => {
|
|
805
|
+
const { totalCount: _tc, totalClicks: _tcl, totalImpressions: _ti, totalCtr: _tr, totalPosition: _tp, ...rest } = r;
|
|
806
|
+
return rest;
|
|
807
|
+
}),
|
|
808
|
+
totalCount,
|
|
809
|
+
totals,
|
|
810
|
+
extras: extras.map((e, i) => ({
|
|
811
|
+
key: e.key,
|
|
812
|
+
rows: extrasRows[i].rows
|
|
813
|
+
}))
|
|
814
|
+
};
|
|
815
|
+
}
|
|
816
|
+
async function runComparisonQuery(runSQL, ctx, current, previous, windows, filter) {
|
|
817
|
+
const adapter = createParquetResolverAdapter();
|
|
818
|
+
const comparison = resolveComparisonSQL(current, previous, {
|
|
819
|
+
adapter,
|
|
820
|
+
siteId: void 0
|
|
821
|
+
}, filter);
|
|
822
|
+
const totals = buildTotalsSql(current, {
|
|
823
|
+
adapter,
|
|
824
|
+
siteId: void 0
|
|
825
|
+
});
|
|
826
|
+
const base = runArgs(ctx, enumeratePartitions(windows.current.startDate < windows.previous.startDate ? windows.current.startDate : windows.previous.startDate, windows.current.endDate > windows.previous.endDate ? windows.current.endDate : windows.previous.endDate));
|
|
827
|
+
const main = await runSQL({
|
|
828
|
+
...base,
|
|
829
|
+
sql: comparison.sql,
|
|
830
|
+
params: comparison.params
|
|
831
|
+
});
|
|
832
|
+
const count = await runSQL({
|
|
833
|
+
...base,
|
|
834
|
+
sql: comparison.countSql,
|
|
835
|
+
params: comparison.countParams
|
|
836
|
+
});
|
|
837
|
+
const totalsRow = await runSQL({
|
|
838
|
+
...base,
|
|
839
|
+
sql: totals.sql,
|
|
840
|
+
params: totals.params
|
|
841
|
+
});
|
|
842
|
+
return {
|
|
843
|
+
rows: main.rows,
|
|
844
|
+
totalCount: Number(count.rows[0]?.total ?? 0),
|
|
845
|
+
totals: totalsRow.rows[0] ?? {}
|
|
846
|
+
};
|
|
847
|
+
}
|
|
759
848
|
function assertSchemaInSync(options) {
|
|
760
849
|
const { label, schema, tableKeyToName, mode } = options;
|
|
761
850
|
for (const [key, table] of Object.entries(schema)) {
|
|
@@ -766,4 +855,4 @@ function assertSchemaInSync(options) {
|
|
|
766
855
|
if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
|
|
767
856
|
}
|
|
768
857
|
}
|
|
769
|
-
export {
|
|
858
|
+
export { LOGICAL_DATASETS as C, supportsDimensionOnSurface as D, inferLogicalDataset as E, DIMENSION_SURFACES as S, dimensionColumn as T, resolveComparisonSQL as _, pgResolverAdapter as a, createResolverAdapter as b, getFilterDimensions as c, matchesMetricFilter as d, matchesTopLevelPage as f, mergeExtras as g, buildTotalsSql as h, createParquetResolverAdapter as i, getInternalFilters as l, buildExtrasQueries as m, runComparisonQuery as n, dimensionValue as o, metricValue as p, runOptimizedQuery as r, getDimensionFilters as s, assertSchemaInSync as t, matchesDimensionFilter as u, resolveToSQL as v, assertDimensionsSupported as w, createSqlFragments as x, resolveToSQLOptimized as y };
|
|
@@ -11,6 +11,11 @@ interface CompactionThresholds {
|
|
|
11
11
|
d7?: number;
|
|
12
12
|
d30?: number;
|
|
13
13
|
}
|
|
14
|
+
declare const RAW_DAILY_COMPACT_THRESHOLD = 7;
|
|
15
|
+
declare function countRawDailies(entries: ReadonlyArray<{
|
|
16
|
+
tier?: string | null;
|
|
17
|
+
partition: string;
|
|
18
|
+
}>): number;
|
|
14
19
|
declare function enumeratePartitions(startDate: string, endDate: string): string[];
|
|
15
20
|
/**
|
|
16
21
|
* Default `searchType` for entries written before the field landed and for
|
|
@@ -468,4 +473,4 @@ interface EngineOptions {
|
|
|
468
473
|
}
|
|
469
474
|
declare function dayPartition(date: string): string;
|
|
470
475
|
declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
|
|
471
|
-
export { SyncStateScope as A, inferSearchType as B, RunSQLOptions as C, SyncStateDetail as D, SyncState as E, WatermarkScope as F, CompactionThresholds as H, WriteCtx as I, WriteResult as L, TenantCtx$1 as M, Watermark as N, SyncStateFilter as O, WatermarkFilter as P, dayPartition as R, Row$1 as S, StorageEngine as T,
|
|
476
|
+
export { SyncStateScope as A, inferSearchType as B, RunSQLOptions as C, SyncStateDetail as D, SyncState as E, WatermarkScope as F, enumeratePartitions as G, CompactionThresholds as H, WriteCtx as I, WriteResult as L, TenantCtx$1 as M, Watermark as N, SyncStateFilter as O, WatermarkFilter as P, dayPartition as R, Row$1 as S, StorageEngine as T, RAW_DAILY_COMPACT_THRESHOLD as U, objectKey as V, countRawDailies as W, QueryCtx as _, EngineOptions as a, QueryExecutor as b, ListLiveFilter as c, ManifestPurgeResult as d, ManifestStore as f, PurgeUrlsResult as g, PurgeResult as h, DataSource as i, TableName$1 as j, SyncStateKind as k, LockScope as l, PurgeFilter as m, CompactionTier as n, FileSetRef as o, ParquetCodec as p, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, ManifestEntry as u, QueryExecuteOptions as v, SearchType$1 as w, QueryResult as x, QueryExecuteResult as y, inferLegacyTier as z };
|
package/dist/index.d.mts
CHANGED
|
@@ -1,13 +1,116 @@
|
|
|
1
|
-
import { A as SyncStateScope, B as inferSearchType, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, H as CompactionThresholds, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, R as dayPartition, S as Row, T as StorageEngine, U as
|
|
1
|
+
import { A as SyncStateScope, B as inferSearchType, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, G as enumeratePartitions, H as CompactionThresholds, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, R as dayPartition, S as Row, T as StorageEngine, U as RAW_DAILY_COMPACT_THRESHOLD, V as objectKey, W as countRawDailies, _ as QueryCtx, a as EngineOptions, b as QueryExecutor, c as ListLiveFilter, d as ManifestPurgeResult, f as ManifestStore, g as PurgeUrlsResult, h as PurgeResult, i as DataSource, j as TableName, k as SyncStateKind, l as LockScope, m as PurgeFilter, n as CompactionTier, o as FileSetRef, p as ParquetCodec, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as ManifestEntry, v as QueryExecuteOptions, w as SearchType, x as QueryResult, y as QueryExecuteResult, z as inferLegacyTier } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
|
|
3
3
|
import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
|
|
4
4
|
import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
5
5
|
import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
6
6
|
import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
|
7
7
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
8
|
-
import { Row as Row$1 } from "@gscdump/contracts";
|
|
8
|
+
import { Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
|
|
9
9
|
declare function coerceRow(row: Row$1): Row$1;
|
|
10
10
|
declare function coerceRows(rows: readonly Row$1[]): Row$1[];
|
|
11
11
|
declare const MAX_DAY_BYTES: number;
|
|
12
12
|
declare function createStorageEngine(opts: EngineOptions): StorageEngine;
|
|
13
|
-
|
|
13
|
+
interface IngestAccumulatorEngine {
|
|
14
|
+
writeDay: (scope: TenantCtx & {
|
|
15
|
+
table: TableName$1;
|
|
16
|
+
date: string;
|
|
17
|
+
searchType?: SearchType;
|
|
18
|
+
}, rows: Row$1[]) => Promise<void>;
|
|
19
|
+
setSyncState: (scope: TenantCtx & {
|
|
20
|
+
table: TableName$1;
|
|
21
|
+
date: string;
|
|
22
|
+
searchType?: SearchType;
|
|
23
|
+
}, state: 'done' | 'failed', info?: {
|
|
24
|
+
error?: string;
|
|
25
|
+
}) => Promise<void>;
|
|
26
|
+
}
|
|
27
|
+
interface IngestAccumulatorCtx {
|
|
28
|
+
userId: string | number;
|
|
29
|
+
siteId: string;
|
|
30
|
+
searchType?: SearchType;
|
|
31
|
+
}
|
|
32
|
+
interface IngestAccumulatorHooks {
|
|
33
|
+
/**
|
|
34
|
+
* Called once per (table, date) when the job must abandon in-memory rows
|
|
35
|
+
* (overflow or `hasMore` continuation). Host queues a forced re-sync from
|
|
36
|
+
* the source. Return true iff a recovery job was actually queued.
|
|
37
|
+
*/
|
|
38
|
+
onRecover: (table: TableName$1, date: string) => Promise<boolean>;
|
|
39
|
+
/**
|
|
40
|
+
* Called when an engine.writeDay fails or recovery itself errors. Host
|
|
41
|
+
* logs to its error sink (e.g. `r2_write_errors` D1 table).
|
|
42
|
+
*/
|
|
43
|
+
onWriteError: (info: {
|
|
44
|
+
table: TableName$1 | null;
|
|
45
|
+
date: string | null;
|
|
46
|
+
error: unknown;
|
|
47
|
+
}) => Promise<void>;
|
|
48
|
+
/**
|
|
49
|
+
* Called after a successful writeDay for a (table, date). Host typically
|
|
50
|
+
* busts the manifest cache here so the next read sees the new parquet.
|
|
51
|
+
*/
|
|
52
|
+
onWritten?: (info: {
|
|
53
|
+
table: TableName$1;
|
|
54
|
+
date: string;
|
|
55
|
+
rowCount: number;
|
|
56
|
+
}) => void | Promise<void>;
|
|
57
|
+
/**
|
|
58
|
+
* Called once at end of `finalize`, only when at least one (table, date)
|
|
59
|
+
* actually landed. Host queues rollup rebuild + compaction.
|
|
60
|
+
*/
|
|
61
|
+
onJobComplete?: (info: {
|
|
62
|
+
flushed: number;
|
|
63
|
+
rowsWritten: number;
|
|
64
|
+
}) => Promise<void>;
|
|
65
|
+
}
|
|
66
|
+
interface FinalizeOptions {
|
|
67
|
+
/**
|
|
68
|
+
* The GSC `hasMore` flag for the whole job. When true, in-memory buckets
|
|
69
|
+
* only reflect this job's slice; we re-queue forced single-day re-syncs
|
|
70
|
+
* via `onRecover` so R2 stays authoritative.
|
|
71
|
+
*/
|
|
72
|
+
hasMore: boolean;
|
|
73
|
+
}
|
|
74
|
+
interface FinalizeResult {
|
|
75
|
+
flushed: number;
|
|
76
|
+
recovered: number;
|
|
77
|
+
failed: number;
|
|
78
|
+
rowsWritten: number;
|
|
79
|
+
}
|
|
80
|
+
interface IngestAccumulator {
|
|
81
|
+
push: (table: TableName$1, rows: readonly GscApiRow[]) => boolean;
|
|
82
|
+
finalize: (opts: FinalizeOptions) => Promise<FinalizeResult>;
|
|
83
|
+
}
|
|
84
|
+
interface CreateIngestAccumulatorOptions extends RowAccumulatorOptions {
|
|
85
|
+
engine: IngestAccumulatorEngine;
|
|
86
|
+
ctx: IngestAccumulatorCtx;
|
|
87
|
+
hooks: IngestAccumulatorHooks;
|
|
88
|
+
}
|
|
89
|
+
declare function createNoopIngestAccumulator(): IngestAccumulator;
|
|
90
|
+
declare function createIngestAccumulator(opts: CreateIngestAccumulatorOptions): IngestAccumulator;
|
|
91
|
+
type SyncTableName = Extract<TableName$1, 'pages' | 'keywords' | 'countries' | 'devices' | 'page_keywords'>;
|
|
92
|
+
declare const TABLES_BY_SEARCH_TYPE: Record<SearchType, readonly SyncTableName[]>;
|
|
93
|
+
declare function parseEnabledSearchTypes(raw: string | null | undefined): SearchType[];
|
|
94
|
+
declare function validateEnabledSearchTypes(value: unknown): SearchType[];
|
|
95
|
+
declare const TABLE_TIERS: {
|
|
96
|
+
readonly pages: "critical";
|
|
97
|
+
readonly keywords: "critical";
|
|
98
|
+
readonly countries: "standard";
|
|
99
|
+
readonly devices: "standard";
|
|
100
|
+
readonly page_keywords: "extended";
|
|
101
|
+
};
|
|
102
|
+
type TieredTableName = keyof typeof TABLE_TIERS;
|
|
103
|
+
type TableTier = 'critical' | 'standard' | 'extended';
|
|
104
|
+
type DateWeight = 'fresh' | 'recent' | 'historical';
|
|
105
|
+
declare function getTableTier(table: string): TableTier;
|
|
106
|
+
declare function getTablesForTier(tier: TableTier): TieredTableName[];
|
|
107
|
+
declare function getDateWeight(date: string, now?: Date): DateWeight;
|
|
108
|
+
declare const TIER_PRIORITY: Record<TableTier, number>;
|
|
109
|
+
declare const WEIGHT_PRIORITY: Record<DateWeight, number>;
|
|
110
|
+
declare const MAX_GSC_PAGES_R2 = 40;
|
|
111
|
+
declare const ROW_LIMIT_R2 = 10000;
|
|
112
|
+
declare const MIN_SYNC_IMPRESSIONS = 1;
|
|
113
|
+
declare const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
114
|
+
declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
|
|
115
|
+
declare const MAX_TRACKED_URLS_PER_SITE = 200000;
|
|
116
|
+
export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/index.mjs
CHANGED
|
@@ -1,10 +1,217 @@
|
|
|
1
1
|
import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
|
|
2
2
|
import { a as inferTable, c as countries, d as keywords, f as page_keywords, i as dimensionToColumn, l as devices, n as allTables, p as pages, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
|
|
3
3
|
import { i as inferSearchType, n as dayPartition, r as inferLegacyTier, s as objectKey, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
|
|
4
|
-
import {
|
|
4
|
+
import { a as RAW_DAILY_COMPACT_THRESHOLD, c as enumeratePartitions, i as substituteNamedFiles, r as resolveToSQL, s as countRawDailies, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
|
|
5
5
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
6
6
|
import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine, r as canonicalEmptyParquetSchema, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
|
|
7
7
|
import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
8
8
|
import "./planner.mjs";
|
|
9
9
|
import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
10
|
-
|
|
10
|
+
const NOOP_RESULT = {
|
|
11
|
+
flushed: 0,
|
|
12
|
+
recovered: 0,
|
|
13
|
+
failed: 0,
|
|
14
|
+
rowsWritten: 0
|
|
15
|
+
};
|
|
16
|
+
function scopeOf(ctx, table, date) {
|
|
17
|
+
return {
|
|
18
|
+
userId: String(ctx.userId),
|
|
19
|
+
siteId: ctx.siteId,
|
|
20
|
+
table,
|
|
21
|
+
date,
|
|
22
|
+
...ctx.searchType !== void 0 ? { searchType: ctx.searchType } : {}
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
function createNoopIngestAccumulator() {
|
|
26
|
+
return {
|
|
27
|
+
push() {
|
|
28
|
+
return false;
|
|
29
|
+
},
|
|
30
|
+
async finalize() {
|
|
31
|
+
return NOOP_RESULT;
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
function createIngestAccumulator(opts) {
|
|
36
|
+
const { engine, ctx, hooks, ...accOpts } = opts;
|
|
37
|
+
const acc = createRowAccumulator(accOpts);
|
|
38
|
+
async function writeOne(table, date, rows) {
|
|
39
|
+
const scope = scopeOf(ctx, table, date);
|
|
40
|
+
return engine.writeDay(scope, rows).then(() => engine.setSyncState(scope, "done")).then(async () => {
|
|
41
|
+
await hooks.onWritten?.({
|
|
42
|
+
table,
|
|
43
|
+
date,
|
|
44
|
+
rowCount: rows.length
|
|
45
|
+
});
|
|
46
|
+
return {
|
|
47
|
+
ok: true,
|
|
48
|
+
rows: rows.length
|
|
49
|
+
};
|
|
50
|
+
}).catch(async (err) => {
|
|
51
|
+
await hooks.onWriteError({
|
|
52
|
+
table,
|
|
53
|
+
date,
|
|
54
|
+
error: err
|
|
55
|
+
}).catch(() => {});
|
|
56
|
+
return { ok: false };
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
async function recover(table, date) {
|
|
60
|
+
const scope = scopeOf(ctx, table, date);
|
|
61
|
+
await engine.setSyncState(scope, "failed", { error: "mid-continuation-skip" }).catch(() => {});
|
|
62
|
+
return hooks.onRecover(table, date).catch(async (err) => {
|
|
63
|
+
await hooks.onWriteError({
|
|
64
|
+
table,
|
|
65
|
+
date,
|
|
66
|
+
error: err
|
|
67
|
+
}).catch(() => {});
|
|
68
|
+
return false;
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
return {
|
|
72
|
+
push(table, rows) {
|
|
73
|
+
return acc.push(table, rows);
|
|
74
|
+
},
|
|
75
|
+
async finalize({ hasMore }) {
|
|
76
|
+
const overflowed = acc.overflowed;
|
|
77
|
+
const totalRows = acc.totalRows;
|
|
78
|
+
const buckets = acc.drain();
|
|
79
|
+
if (overflowed || hasMore) {
|
|
80
|
+
const tasks = [];
|
|
81
|
+
for (const [table, byDate] of buckets) for (const date of byDate.keys()) tasks.push(recover(table, date));
|
|
82
|
+
const results = await Promise.all(tasks).catch(async (err) => {
|
|
83
|
+
await hooks.onWriteError({
|
|
84
|
+
table: null,
|
|
85
|
+
date: null,
|
|
86
|
+
error: err
|
|
87
|
+
}).catch(() => {});
|
|
88
|
+
return [];
|
|
89
|
+
});
|
|
90
|
+
if (overflowed) await hooks.onWriteError({
|
|
91
|
+
table: null,
|
|
92
|
+
date: null,
|
|
93
|
+
error: /* @__PURE__ */ new Error(`ingest accumulator overflow at ${totalRows} rows; recovering via forced re-sync`)
|
|
94
|
+
}).catch(() => {});
|
|
95
|
+
return {
|
|
96
|
+
flushed: 0,
|
|
97
|
+
recovered: results.filter(Boolean).length,
|
|
98
|
+
failed: 0,
|
|
99
|
+
rowsWritten: 0
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
const writes = [];
|
|
103
|
+
for (const [table, byDate] of buckets) for (const [date, rows] of byDate) writes.push(writeOne(table, date, rows));
|
|
104
|
+
const outcomes = await Promise.all(writes);
|
|
105
|
+
let flushed = 0;
|
|
106
|
+
let failed = 0;
|
|
107
|
+
let rowsWritten = 0;
|
|
108
|
+
for (const o of outcomes) if (o.ok) {
|
|
109
|
+
flushed++;
|
|
110
|
+
rowsWritten += o.rows;
|
|
111
|
+
} else failed++;
|
|
112
|
+
if (flushed > 0) await hooks.onJobComplete?.({
|
|
113
|
+
flushed,
|
|
114
|
+
rowsWritten
|
|
115
|
+
}).catch(() => {});
|
|
116
|
+
return {
|
|
117
|
+
flushed,
|
|
118
|
+
recovered: 0,
|
|
119
|
+
failed,
|
|
120
|
+
rowsWritten
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
const TABLES_BY_SEARCH_TYPE = {
|
|
126
|
+
web: [
|
|
127
|
+
"pages",
|
|
128
|
+
"keywords",
|
|
129
|
+
"countries",
|
|
130
|
+
"devices",
|
|
131
|
+
"page_keywords"
|
|
132
|
+
],
|
|
133
|
+
discover: [
|
|
134
|
+
"pages",
|
|
135
|
+
"countries",
|
|
136
|
+
"devices"
|
|
137
|
+
],
|
|
138
|
+
news: [
|
|
139
|
+
"pages",
|
|
140
|
+
"countries",
|
|
141
|
+
"devices"
|
|
142
|
+
],
|
|
143
|
+
googleNews: [
|
|
144
|
+
"pages",
|
|
145
|
+
"countries",
|
|
146
|
+
"devices"
|
|
147
|
+
],
|
|
148
|
+
image: [
|
|
149
|
+
"pages",
|
|
150
|
+
"countries",
|
|
151
|
+
"devices"
|
|
152
|
+
],
|
|
153
|
+
video: [
|
|
154
|
+
"pages",
|
|
155
|
+
"countries",
|
|
156
|
+
"devices"
|
|
157
|
+
]
|
|
158
|
+
};
|
|
159
|
+
function parseEnabledSearchTypes(raw) {
|
|
160
|
+
if (!raw) return ["web"];
|
|
161
|
+
const parsed = JSON.parse(raw);
|
|
162
|
+
if (!Array.isArray(parsed) || parsed.length === 0) return ["web"];
|
|
163
|
+
const valid = parsed.filter((v) => typeof v === "string" && v in TABLES_BY_SEARCH_TYPE);
|
|
164
|
+
if (valid.length === 0) return ["web"];
|
|
165
|
+
if (!valid.includes("web")) valid.unshift("web");
|
|
166
|
+
return valid;
|
|
167
|
+
}
|
|
168
|
+
function validateEnabledSearchTypes(value) {
|
|
169
|
+
if (!Array.isArray(value) || value.length === 0) throw new Error("enabledSearchTypes must be a non-empty array");
|
|
170
|
+
const seen = /* @__PURE__ */ new Set();
|
|
171
|
+
const out = [];
|
|
172
|
+
for (const v of value) {
|
|
173
|
+
if (typeof v !== "string" || !(v in TABLES_BY_SEARCH_TYPE)) throw new Error(`enabledSearchTypes: unknown searchType ${String(v)}`);
|
|
174
|
+
if (seen.has(v)) continue;
|
|
175
|
+
seen.add(v);
|
|
176
|
+
out.push(v);
|
|
177
|
+
}
|
|
178
|
+
if (!out.includes("web")) throw new Error("enabledSearchTypes must include \"web\"");
|
|
179
|
+
return out;
|
|
180
|
+
}
|
|
181
|
+
const TABLE_TIERS = {
|
|
182
|
+
pages: "critical",
|
|
183
|
+
keywords: "critical",
|
|
184
|
+
countries: "standard",
|
|
185
|
+
devices: "standard",
|
|
186
|
+
page_keywords: "extended"
|
|
187
|
+
};
|
|
188
|
+
function getTableTier(table) {
|
|
189
|
+
return TABLE_TIERS[table] || "extended";
|
|
190
|
+
}
|
|
191
|
+
function getTablesForTier(tier) {
|
|
192
|
+
return Object.entries(TABLE_TIERS).filter(([_, t]) => t === tier).map(([name]) => name);
|
|
193
|
+
}
|
|
194
|
+
function getDateWeight(date, now = /* @__PURE__ */ new Date()) {
|
|
195
|
+
const target = new Date(date);
|
|
196
|
+
const daysAgo = Math.floor((now.getTime() - target.getTime()) / (1e3 * 60 * 60 * 24));
|
|
197
|
+
if (daysAgo <= 3) return "fresh";
|
|
198
|
+
if (daysAgo <= 60) return "recent";
|
|
199
|
+
return "historical";
|
|
200
|
+
}
|
|
201
|
+
const TIER_PRIORITY = {
|
|
202
|
+
critical: 0,
|
|
203
|
+
standard: 1,
|
|
204
|
+
extended: 2
|
|
205
|
+
};
|
|
206
|
+
const WEIGHT_PRIORITY = {
|
|
207
|
+
fresh: 0,
|
|
208
|
+
recent: 1,
|
|
209
|
+
historical: 2
|
|
210
|
+
};
|
|
211
|
+
const MAX_GSC_PAGES_R2 = 40;
|
|
212
|
+
const ROW_LIMIT_R2 = 1e4;
|
|
213
|
+
const MIN_SYNC_IMPRESSIONS = 1;
|
|
214
|
+
const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
215
|
+
const MAX_SITEMAP_URLS_PER_SITE = 5e4;
|
|
216
|
+
const MAX_TRACKED_URLS_PER_SITE = 2e5;
|
|
217
|
+
export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/planner.d.mts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { G as enumeratePartitions } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
|
3
3
|
export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
|
package/dist/planner.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { c as enumeratePartitions, i as substituteNamedFiles, n as compileLogicalQueryPlan, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
|
|
2
2
|
export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { j as TableName$1, w as SearchType$1 } from "../_chunks/storage.mjs";
|
|
1
2
|
import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
|
|
2
3
|
import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
|
|
3
4
|
import { SQL } from "drizzle-orm";
|
|
@@ -96,6 +97,63 @@ declare const pgResolverAdapter: ResolverAdapter<PgTableKey>;
|
|
|
96
97
|
* accidental adapter caching that would lock in a stale `{{FILES}}` set.
|
|
97
98
|
*/
|
|
98
99
|
declare function createParquetResolverAdapter(): ResolverAdapter<PgTableKey>;
|
|
100
|
+
interface RunQueryCtx {
|
|
101
|
+
userId: string;
|
|
102
|
+
siteId: string;
|
|
103
|
+
table: TableName$1;
|
|
104
|
+
searchType?: SearchType$1;
|
|
105
|
+
}
|
|
106
|
+
interface RunSQLFn {
|
|
107
|
+
(opts: {
|
|
108
|
+
ctx: {
|
|
109
|
+
userId: string;
|
|
110
|
+
siteId: string;
|
|
111
|
+
};
|
|
112
|
+
table: TableName$1;
|
|
113
|
+
fileSets: Record<string, {
|
|
114
|
+
table: TableName$1;
|
|
115
|
+
partitions: string[];
|
|
116
|
+
}>;
|
|
117
|
+
sql: string;
|
|
118
|
+
params: unknown[];
|
|
119
|
+
searchType?: SearchType$1;
|
|
120
|
+
}): Promise<{
|
|
121
|
+
rows: Array<Record<string, unknown>>;
|
|
122
|
+
}>;
|
|
123
|
+
}
|
|
124
|
+
interface OptimizedQueryResult {
|
|
125
|
+
rows: Array<Record<string, unknown>>;
|
|
126
|
+
totalCount: number;
|
|
127
|
+
totals: {
|
|
128
|
+
clicks: number;
|
|
129
|
+
impressions: number;
|
|
130
|
+
ctr: number;
|
|
131
|
+
position: number;
|
|
132
|
+
};
|
|
133
|
+
extras: Array<{
|
|
134
|
+
key: string;
|
|
135
|
+
rows: Array<Record<string, unknown>>;
|
|
136
|
+
}>;
|
|
137
|
+
}
|
|
138
|
+
interface ComparisonQueryResult {
|
|
139
|
+
rows: Array<Record<string, unknown>>;
|
|
140
|
+
totalCount: number;
|
|
141
|
+
totals: Record<string, unknown>;
|
|
142
|
+
}
|
|
143
|
+
declare function runOptimizedQuery(runSQL: RunSQLFn, ctx: RunQueryCtx, state: BuilderState, dateRange: {
|
|
144
|
+
startDate: string;
|
|
145
|
+
endDate: string;
|
|
146
|
+
}): Promise<OptimizedQueryResult>;
|
|
147
|
+
declare function runComparisonQuery(runSQL: RunSQLFn, ctx: RunQueryCtx, current: BuilderState, previous: BuilderState, windows: {
|
|
148
|
+
current: {
|
|
149
|
+
startDate: string;
|
|
150
|
+
endDate: string;
|
|
151
|
+
};
|
|
152
|
+
previous: {
|
|
153
|
+
startDate: string;
|
|
154
|
+
endDate: string;
|
|
155
|
+
};
|
|
156
|
+
}, filter?: ComparisonFilter): Promise<ComparisonQueryResult>;
|
|
99
157
|
interface AssertSchemaInSyncOptions {
|
|
100
158
|
/** Label used in the thrown error (e.g. 'browser', 'sqlite'). */
|
|
101
159
|
label: string;
|
|
@@ -109,4 +167,4 @@ interface AssertSchemaInSyncOptions {
|
|
|
109
167
|
mode: 'exact' | 'superset';
|
|
110
168
|
}
|
|
111
169
|
declare function assertSchemaInSync(options: AssertSchemaInSyncOptions): void;
|
|
112
|
-
export { type AssertSchemaInSyncOptions, type ComparisonFilter, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
|
|
170
|
+
export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
package/dist/resolver/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { C as
|
|
2
|
-
export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
|
|
1
|
+
import { C as LOGICAL_DATASETS, D as supportsDimensionOnSurface, E as inferLogicalDataset, S as DIMENSION_SURFACES, T as dimensionColumn, _ as resolveComparisonSQL, a as pgResolverAdapter, b as createResolverAdapter, c as getFilterDimensions, d as matchesMetricFilter, f as matchesTopLevelPage, g as mergeExtras, h as buildTotalsSql, i as createParquetResolverAdapter, l as getInternalFilters, m as buildExtrasQueries, n as runComparisonQuery, o as dimensionValue, p as metricValue, r as runOptimizedQuery, s as getDimensionFilters, t as assertSchemaInSync, u as matchesDimensionFilter, v as resolveToSQL, w as assertDimensionsSupported, x as createSqlFragments, y as resolveToSQLOptimized } from "../_chunks/resolver.mjs";
|
|
2
|
+
export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
package/dist/rollups.d.mts
CHANGED
|
@@ -58,6 +58,14 @@ interface RollupDef {
|
|
|
58
58
|
parquetColumns?: readonly ColumnDef[];
|
|
59
59
|
/** Sort-key column names for parquet row-group stats. Optional. */
|
|
60
60
|
parquetSortKey?: readonly string[];
|
|
61
|
+
/**
|
|
62
|
+
* When true, this rollup's payload is independent of GSC slice (e.g. entity
|
|
63
|
+
* rollups sourced from sitemap / indexing snapshots, not slice-partitioned
|
|
64
|
+
* fact tables). The runner rejects calls that pass `searchType` alongside
|
|
65
|
+
* a slice-orthogonal def so the output never lands under a per-slice prefix
|
|
66
|
+
* that the read path won't look at.
|
|
67
|
+
*/
|
|
68
|
+
sliceOrthogonal?: boolean;
|
|
61
69
|
build: (deps: {
|
|
62
70
|
engine: RollupEngine;
|
|
63
71
|
ctx: TenantCtx;
|
|
@@ -102,6 +110,19 @@ interface ParquetRollupPointer {
|
|
|
102
110
|
}
|
|
103
111
|
declare function rollupKey(ctx: TenantCtx, id: string, builtAt: number, searchType?: SearchType): string;
|
|
104
112
|
declare function rollupParquetKey(ctx: TenantCtx, id: string, builtAt: number, searchType?: SearchType): string;
|
|
113
|
+
interface RollupBucket {
|
|
114
|
+
list: (opts: {
|
|
115
|
+
prefix: string;
|
|
116
|
+
}) => Promise<{
|
|
117
|
+
objects: Array<{
|
|
118
|
+
key: string;
|
|
119
|
+
}>;
|
|
120
|
+
}>;
|
|
121
|
+
get: (key: string) => Promise<{
|
|
122
|
+
text: () => Promise<string>;
|
|
123
|
+
} | null>;
|
|
124
|
+
}
|
|
125
|
+
declare function readLatestRollup<T = unknown>(bucket: RollupBucket, ctx: TenantCtx, id: string, searchType?: SearchType): Promise<RollupEnvelope<T> | null>;
|
|
105
126
|
interface RebuildRollupsOptions {
|
|
106
127
|
engine: RollupEngine;
|
|
107
128
|
dataSource: DataSource;
|
|
@@ -215,4 +236,4 @@ declare const sitemapHealthRollup: RollupDef;
|
|
|
215
236
|
*/
|
|
216
237
|
declare const sitemapChanges28dRollup: RollupDef;
|
|
217
238
|
declare const DEFAULT_ROLLUPS: readonly RollupDef[];
|
|
218
|
-
export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
|
239
|
+
export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupBucket, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
package/dist/rollups.mjs
CHANGED
|
@@ -11,10 +11,33 @@ function rollupKey(ctx, id, builtAt, searchType) {
|
|
|
11
11
|
function rollupParquetKey(ctx, id, builtAt, searchType) {
|
|
12
12
|
return `${rollupPrefix(ctx, searchType)}/${id}__v${builtAt}.parquet`;
|
|
13
13
|
}
|
|
14
|
+
const ROLLUP_FILE_RE = /^(?<id>[a-z0-9_]+)__v(?<ts>\d+)\.json$/;
|
|
15
|
+
async function readLatestRollup(bucket, ctx, id, searchType) {
|
|
16
|
+
const prefix = `${rollupPrefix(ctx, searchType)}/`;
|
|
17
|
+
const listing = await bucket.list({ prefix }).catch(() => null);
|
|
18
|
+
if (!listing) return null;
|
|
19
|
+
let newest = null;
|
|
20
|
+
for (const obj of listing.objects) {
|
|
21
|
+
const m = ROLLUP_FILE_RE.exec(obj.key.slice(prefix.length));
|
|
22
|
+
if (!m?.groups || m.groups.id !== id) continue;
|
|
23
|
+
const ts = Number(m.groups.ts);
|
|
24
|
+
if (!newest || ts > newest.ts) newest = {
|
|
25
|
+
ts,
|
|
26
|
+
key: obj.key
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
if (!newest) return null;
|
|
30
|
+
const obj = await bucket.get(newest.key).catch(() => null);
|
|
31
|
+
if (!obj) return null;
|
|
32
|
+
return JSON.parse(await obj.text());
|
|
33
|
+
}
|
|
14
34
|
async function rebuildRollups(opts) {
|
|
15
35
|
const now = opts.now ?? (() => Date.now());
|
|
16
36
|
const results = [];
|
|
17
37
|
const searchType = opts.searchType;
|
|
38
|
+
if (searchType !== void 0) {
|
|
39
|
+
for (const def of opts.defs) if (def.sliceOrthogonal === true) throw new Error(`rollup def '${def.id}' is slice-orthogonal; do not pass searchType`);
|
|
40
|
+
}
|
|
18
41
|
for (const def of opts.defs) {
|
|
19
42
|
const builtAt = now();
|
|
20
43
|
const payload = await def.build({
|
|
@@ -348,6 +371,7 @@ const indexingMetadataRollup = {
|
|
|
348
371
|
const indexingHealthRollup = {
|
|
349
372
|
id: "indexing_health",
|
|
350
373
|
windowDays: 90,
|
|
374
|
+
sliceOrthogonal: true,
|
|
351
375
|
async build({ engine, ctx, dataSource, builtAt }) {
|
|
352
376
|
const key = inspectionParquetKey(ctx);
|
|
353
377
|
if (!await dataSource.head?.(key)) return { days: [] };
|
|
@@ -391,6 +415,7 @@ const indexingHealthRollup = {
|
|
|
391
415
|
const indexPercentRollup = {
|
|
392
416
|
id: "index_percent",
|
|
393
417
|
windowDays: 90,
|
|
418
|
+
sliceOrthogonal: true,
|
|
394
419
|
async build({ engine, ctx, dataSource, builtAt, searchType }) {
|
|
395
420
|
const urlsKey = sitemapUrlsIndexKey(ctx);
|
|
396
421
|
if (!await dataSource.head?.(urlsKey)) return {
|
|
@@ -452,6 +477,7 @@ const indexPercentRollup = {
|
|
|
452
477
|
const sitemapHealthRollup = {
|
|
453
478
|
id: "sitemap_health",
|
|
454
479
|
windowDays: 90,
|
|
480
|
+
sliceOrthogonal: true,
|
|
455
481
|
async build({ dataSource, ctx, builtAt }) {
|
|
456
482
|
const index = await createSitemapStore({ dataSource }).loadIndex(ctx);
|
|
457
483
|
const records = Object.values(index.records);
|
|
@@ -495,6 +521,7 @@ const sitemapHealthRollup = {
|
|
|
495
521
|
const sitemapChanges28dRollup = {
|
|
496
522
|
id: "sitemap_changes_28d",
|
|
497
523
|
windowDays: 28,
|
|
524
|
+
sliceOrthogonal: true,
|
|
498
525
|
async build({ dataSource, ctx, builtAt }) {
|
|
499
526
|
const store = createSitemapStore({ dataSource });
|
|
500
527
|
const from = utcDateMinusDays(builtAt, 28);
|
|
@@ -562,4 +589,4 @@ const DEFAULT_ROLLUPS = [
|
|
|
562
589
|
sitemapHealthRollup,
|
|
563
590
|
sitemapChanges28dRollup
|
|
564
591
|
];
|
|
565
|
-
export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
|
592
|
+
export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
|
package/dist/source/index.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { n as coerceRows } from "../_chunks/coerce.mjs";
|
|
2
|
-
import {
|
|
2
|
+
import { a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL, w as assertDimensionsSupported } from "../_chunks/resolver.mjs";
|
|
3
3
|
import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
|
|
4
4
|
var AttachedTableMissingError = class extends Error {
|
|
5
5
|
missing;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.17.
|
|
4
|
+
"version": "0.17.5",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -169,8 +169,8 @@
|
|
|
169
169
|
"dependencies": {
|
|
170
170
|
"drizzle-orm": "^0.45.2",
|
|
171
171
|
"proper-lockfile": "^4.1.2",
|
|
172
|
-
"@gscdump/contracts": "0.17.
|
|
173
|
-
"gscdump": "0.17.
|
|
172
|
+
"@gscdump/contracts": "0.17.5",
|
|
173
|
+
"gscdump": "0.17.5"
|
|
174
174
|
},
|
|
175
175
|
"devDependencies": {
|
|
176
176
|
"@duckdb/duckdb-wasm": "^1.32.0",
|