@gscdump/engine 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/_chunks/compiler.mjs +288 -0
  2. package/dist/_chunks/duckdb.d.mts +26 -0
  3. package/dist/_chunks/engine.mjs +578 -0
  4. package/dist/_chunks/pg-adapter.mjs +676 -0
  5. package/dist/_chunks/planner.d.mts +15 -0
  6. package/dist/_chunks/schema.d.mts +1258 -0
  7. package/dist/_chunks/schema.mjs +139 -0
  8. package/dist/_chunks/storage.d.mts +476 -0
  9. package/dist/_chunks/storage.mjs +39 -0
  10. package/dist/_chunks/types.d.mts +53 -0
  11. package/dist/adapters/duckdb-node.d.mts +1 -13
  12. package/dist/adapters/duckdb-node.mjs +1 -7
  13. package/dist/adapters/filesystem.d.mts +1 -193
  14. package/dist/adapters/filesystem.mjs +2 -9
  15. package/dist/adapters/http.d.mts +1 -193
  16. package/dist/adapters/http.mjs +1 -5
  17. package/dist/adapters/hyparquet.d.mts +6 -83
  18. package/dist/adapters/hyparquet.mjs +1 -105
  19. package/dist/adapters/inspection-sqlite-browser.d.mts +1 -7
  20. package/dist/adapters/inspection-sqlite-node.d.mts +1 -7
  21. package/dist/adapters/inspection-sqlite-node.mjs +1 -1
  22. package/dist/adapters/node-harness.d.mts +3 -306
  23. package/dist/adapters/node-harness.mjs +4 -1866
  24. package/dist/adapters/r2-manifest.d.mts +4 -149
  25. package/dist/adapters/r2-manifest.mjs +1 -8
  26. package/dist/adapters/r2.d.mts +1 -47
  27. package/dist/contracts.d.mts +1 -435
  28. package/dist/entities.d.mts +1 -47
  29. package/dist/index.d.mts +8 -1844
  30. package/dist/index.mjs +8 -1962
  31. package/dist/ingest.d.mts +1 -1
  32. package/dist/planner.d.mts +3 -16
  33. package/dist/planner.mjs +1 -320
  34. package/dist/resolver/index.d.mts +3 -51
  35. package/dist/resolver/index.mjs +2 -780
  36. package/dist/rollups.d.mts +6 -51
  37. package/dist/rollups.mjs +2 -209
  38. package/dist/schema.d.mts +2 -1258
  39. package/dist/schema.mjs +1 -138
  40. package/package.json +2 -2
package/dist/ingest.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { Row, TableName } from "gscdump/contracts";
1
+ import { N as TableName, w as Row } from "./_chunks/storage.mjs";
2
2
  /**
3
3
  * Canonical GSC API dimension order per table. Consumers hitting the raw
4
4
  * `searchanalytics.query` endpoint must request dimensions in this order so
@@ -1,16 +1,3 @@
1
- import { LogicalQueryPlan } from "gscdump/query/plan";
2
- import { TableName } from "gscdump/contracts";
3
- import { BuilderState } from "gscdump/query";
4
- declare function enumeratePartitions(startDate: string, endDate: string): string[];
5
- interface ResolvedQuery {
6
- sql: string;
7
- params: unknown[];
8
- partitions: string[];
9
- table: TableName;
10
- filesPlaceholder: string;
11
- }
12
- declare const FILES_PLACEHOLDER = "{{FILES}}";
13
- declare function compileLogicalQueryPlan(plan: LogicalQueryPlan, table?: TableName): ResolvedQuery;
14
- declare function resolveToSQL(state: BuilderState, table?: TableName): ResolvedQuery;
15
- declare function substituteNamedFiles(sql: string, sets: Record<string, string[]>): string;
16
- export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
1
+ import { X as enumeratePartitions } from "./_chunks/storage.mjs";
2
+ import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
3
+ export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
package/dist/planner.mjs CHANGED
@@ -1,321 +1,2 @@
1
- import { MS_PER_DAY, toIsoDate } from "gscdump";
2
- import { date, doublePrecision, getTableConfig, integer, pgTable, varchar } from "drizzle-orm/pg-core";
3
- import { buildLogicalPlan } from "gscdump/query/plan";
4
- function metricCols() {
5
- return {
6
- clicks: integer("clicks").notNull(),
7
- impressions: integer("impressions").notNull(),
8
- sum_position: doublePrecision("sum_position").notNull()
9
- };
10
- }
11
- const dateCol = () => date("date").notNull();
12
- const drizzleSchema = {
13
- pages: pgTable("pages", {
14
- url: varchar("url").notNull(),
15
- date: dateCol(),
16
- ...metricCols()
17
- }),
18
- keywords: pgTable("keywords", {
19
- query: varchar("query").notNull(),
20
- query_canonical: varchar("query_canonical"),
21
- date: dateCol(),
22
- ...metricCols()
23
- }),
24
- countries: pgTable("countries", {
25
- country: varchar("country").notNull(),
26
- date: dateCol(),
27
- ...metricCols()
28
- }),
29
- devices: pgTable("devices", {
30
- device: varchar("device").notNull(),
31
- date: dateCol(),
32
- ...metricCols()
33
- }),
34
- page_keywords: pgTable("page_keywords", {
35
- url: varchar("url").notNull(),
36
- query: varchar("query").notNull(),
37
- query_canonical: varchar("query_canonical"),
38
- date: dateCol(),
39
- ...metricCols()
40
- }),
41
- search_appearance: pgTable("search_appearance", {
42
- searchAppearance: varchar("searchAppearance").notNull(),
43
- date: dateCol(),
44
- ...metricCols()
45
- })
46
- };
47
- const TABLE_METADATA = {
48
- pages: {
49
- sortKey: ["date", "url"],
50
- version: 1
51
- },
52
- keywords: {
53
- sortKey: ["date", "query"],
54
- version: 2
55
- },
56
- countries: {
57
- sortKey: ["date", "country"],
58
- version: 1
59
- },
60
- devices: {
61
- sortKey: ["date", "device"],
62
- version: 1
63
- },
64
- page_keywords: {
65
- sortKey: [
66
- "date",
67
- "url",
68
- "query"
69
- ],
70
- version: 2
71
- },
72
- search_appearance: {
73
- sortKey: ["date", "searchAppearance"],
74
- version: 1
75
- }
76
- };
77
- function pgSqlTypeToColumnType(sqlType) {
78
- const t = sqlType.toLowerCase();
79
- if (t.startsWith("varchar") || t === "text" || t.startsWith("char")) return "VARCHAR";
80
- if (t === "date" || t.startsWith("timestamp")) return "DATE";
81
- if (t.startsWith("double") || t === "real" || t.startsWith("numeric") || t.startsWith("decimal")) return "DOUBLE";
82
- if (t === "bigint" || t === "int8") return "BIGINT";
83
- if (t === "integer" || t === "int" || t === "int4" || t === "smallint" || t === "int2") return "INTEGER";
84
- throw new Error(`unmapped pg type '${sqlType}' — extend pgSqlTypeToColumnType in @gscdump/engine/schema`);
85
- }
86
- function tableSchemaFrom(tableName) {
87
- const columns = getTableConfig(drizzleSchema[tableName]).columns.map((col) => ({
88
- name: col.name,
89
- type: pgSqlTypeToColumnType(col.getSQLType()),
90
- nullable: !col.notNull
91
- }));
92
- const meta = TABLE_METADATA[tableName];
93
- return {
94
- name: tableName,
95
- columns,
96
- sortKey: meta.sortKey,
97
- version: meta.version
98
- };
99
- }
100
- Object.fromEntries([
101
- "pages",
102
- "keywords",
103
- "countries",
104
- "devices",
105
- "page_keywords",
106
- "search_appearance"
107
- ].map((t) => [t, tableSchemaFrom(t)]));
108
- function dimensionToColumn(dim, _table) {
109
- if (dim === "page") return "url";
110
- if (dim === "queryCanonical") return "query_canonical";
111
- return dim;
112
- }
113
- function dayPartition(date) {
114
- return `daily/${date}`;
115
- }
116
- function monthPartition(month) {
117
- return `monthly/${month}`;
118
- }
119
- function weekPartition(mondayIsoDate) {
120
- return `weekly/${mondayIsoDate}`;
121
- }
122
- function quarterPartition(quarter) {
123
- return `quarterly/${quarter}`;
124
- }
125
- function mondayOfWeek(isoDate) {
126
- const ms = Date.parse(`${isoDate}T00:00:00Z`);
127
- const dow = new Date(ms).getUTCDay();
128
- const offset = dow === 0 ? -6 : 1 - dow;
129
- return toIsoDate(new Date(ms + offset * MS_PER_DAY));
130
- }
131
- function quarterOfMonth(month) {
132
- const [y, m] = month.split("-").map(Number);
133
- return `${y}-Q${Math.floor((m - 1) / 3) + 1}`;
134
- }
135
- const DEFAULT_THRESHOLDS = {
136
- raw: 7,
137
- d7: 30,
138
- d30: 90
139
- };
140
- DEFAULT_THRESHOLDS.raw, DEFAULT_THRESHOLDS.d7, DEFAULT_THRESHOLDS.d30;
141
- function enumeratePartitions(startDate, endDate) {
142
- const out = [];
143
- const [sy, sm, sd] = startDate.split("-").map(Number);
144
- const [ey, em, ed] = endDate.split("-").map(Number);
145
- const start = Date.UTC(sy, sm - 1, sd);
146
- const end = Date.UTC(ey, em - 1, ed);
147
- if (end < start) return out;
148
- const seenWeeks = /* @__PURE__ */ new Set();
149
- const seenMonths = /* @__PURE__ */ new Set();
150
- const seenQuarters = /* @__PURE__ */ new Set();
151
- for (let t = start; t <= end; t += 864e5) {
152
- const d = new Date(t);
153
- const y = d.getUTCFullYear();
154
- const m = String(d.getUTCMonth() + 1).padStart(2, "0");
155
- const isoDay = `${y}-${m}-${String(d.getUTCDate()).padStart(2, "0")}`;
156
- const isoMonth = `${y}-${m}`;
157
- out.push(dayPartition(isoDay));
158
- const monday = mondayOfWeek(isoDay);
159
- if (!seenWeeks.has(monday)) {
160
- seenWeeks.add(monday);
161
- out.push(weekPartition(monday));
162
- }
163
- if (!seenMonths.has(isoMonth)) {
164
- seenMonths.add(isoMonth);
165
- out.push(monthPartition(isoMonth));
166
- }
167
- const quarter = quarterOfMonth(isoMonth);
168
- if (!seenQuarters.has(quarter)) {
169
- seenQuarters.add(quarter);
170
- out.push(quarterPartition(quarter));
171
- }
172
- }
173
- return out;
174
- }
175
- function escapeLike(value) {
176
- return value.replace(/\\/g, "\\\\").replace(/%/g, "\\%").replace(/_/g, "\\_");
177
- }
178
- const METRIC_EXPR = {
179
- clicks: "CAST(SUM(clicks) AS DOUBLE)",
180
- impressions: "CAST(SUM(impressions) AS DOUBLE)",
181
- ctr: "CAST(SUM(clicks) AS DOUBLE) / NULLIF(SUM(impressions), 0)",
182
- position: "SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1"
183
- };
184
- function topLevelPagePredicateSql(pathExpr) {
185
- return `LENGTH(${pathExpr}) - LENGTH(REPLACE(${pathExpr}, '/', '')) <= 1`;
186
- }
187
- const FILES_PLACEHOLDER = "{{FILES}}";
188
- function buildDimensionWhere(filters, table) {
189
- const clauses = [];
190
- const params = [];
191
- for (const filter of filters) {
192
- const column = dimensionToColumn(filter.dimension, table);
193
- switch (filter.operator) {
194
- case "equals":
195
- clauses.push(`${column} = ?`);
196
- params.push(filter.expression);
197
- break;
198
- case "notEquals":
199
- clauses.push(`${column} != ?`);
200
- params.push(filter.expression);
201
- break;
202
- case "contains":
203
- clauses.push(`${column} LIKE ? ESCAPE '\\'`);
204
- params.push(`%${escapeLike(filter.expression)}%`);
205
- break;
206
- case "notContains":
207
- clauses.push(`${column} NOT LIKE ? ESCAPE '\\'`);
208
- params.push(`%${escapeLike(filter.expression)}%`);
209
- break;
210
- case "includingRegex":
211
- clauses.push(`regexp_matches(${column}, ?)`);
212
- params.push(filter.expression);
213
- break;
214
- case "excludingRegex":
215
- clauses.push(`NOT regexp_matches(${column}, ?)`);
216
- params.push(filter.expression);
217
- break;
218
- }
219
- }
220
- return {
221
- clause: clauses.join(" AND "),
222
- params
223
- };
224
- }
225
- function buildTopLevelWhere(plan, table) {
226
- if (!plan.specialFilters.topLevel) return "";
227
- return topLevelPagePredicateSql(dimensionToColumn("page", table));
228
- }
229
- function buildHaving(filters) {
230
- if (filters.length === 0) return {
231
- clause: "",
232
- params: []
233
- };
234
- const clauses = [];
235
- const params = [];
236
- for (const filter of filters) {
237
- const expr = METRIC_EXPR[filter.metric];
238
- switch (filter.operator) {
239
- case "metricGte":
240
- clauses.push(`${expr} >= ?`);
241
- params.push(filter.expression);
242
- break;
243
- case "metricGt":
244
- clauses.push(`${expr} > ?`);
245
- params.push(filter.expression);
246
- break;
247
- case "metricLte":
248
- clauses.push(`${expr} <= ?`);
249
- params.push(filter.expression);
250
- break;
251
- case "metricLt":
252
- clauses.push(`${expr} < ?`);
253
- params.push(filter.expression);
254
- break;
255
- case "metricBetween":
256
- clauses.push(`${expr} >= ? AND ${expr} <= ?`);
257
- params.push(filter.expression, filter.expression2 ?? filter.expression);
258
- break;
259
- }
260
- }
261
- return {
262
- clause: clauses.length > 0 ? `HAVING ${clauses.join(" AND ")}` : "",
263
- params
264
- };
265
- }
266
- function compileLogicalQueryPlan(plan, table = plan.dataset) {
267
- const partitions = enumeratePartitions(plan.dateRange.startDate, plan.dateRange.endDate);
268
- const metricSelects = plan.metrics.map((metric) => `${METRIC_EXPR[metric]} AS ${metric}`);
269
- const dimSelects = plan.groupByDimensions.map((dimension) => {
270
- const column = dimensionToColumn(dimension, table);
271
- return column !== dimension ? `${column} AS ${dimension}` : dimension;
272
- });
273
- const whereClauses = ["date >= ?", "date <= ?"];
274
- const whereParams = [plan.dateRange.startDate, plan.dateRange.endDate];
275
- const dimWhere = buildDimensionWhere(plan.dimensionFilters, table);
276
- if (dimWhere.clause) {
277
- whereClauses.push(dimWhere.clause);
278
- whereParams.push(...dimWhere.params);
279
- }
280
- const topLevelClause = buildTopLevelWhere(plan, table);
281
- if (topLevelClause) whereClauses.push(topLevelClause);
282
- const having = buildHaving(plan.metricFilters);
283
- const groupByCols = [...plan.groupByDimensions.map((dimension) => dimensionToColumn(dimension, table)), ...plan.hasDate ? ["date"] : []];
284
- const groupBy = groupByCols.length > 0 ? `GROUP BY ${groupByCols.join(", ")}` : "";
285
- const orderBy = plan.orderBy ? `ORDER BY ${plan.orderBy.column} ${plan.orderBy.dir.toUpperCase()}` : "ORDER BY clicks DESC";
286
- const limit = `LIMIT ${plan.rowLimit ?? 1e3}`;
287
- const offset = plan.startRow ? `OFFSET ${plan.startRow}` : "";
288
- return {
289
- sql: [
290
- `SELECT ${[
291
- ...dimSelects,
292
- ...plan.hasDate ? ["date"] : [],
293
- ...metricSelects
294
- ].join(", ")}`,
295
- `FROM read_parquet(${FILES_PLACEHOLDER}, union_by_name = true)`,
296
- `WHERE ${whereClauses.join(" AND ")}`,
297
- groupBy,
298
- having.clause,
299
- orderBy,
300
- limit,
301
- offset
302
- ].filter(Boolean).join(" ").replace(/\s+/g, " ").trim(),
303
- params: [...whereParams, ...having.params],
304
- partitions,
305
- table,
306
- filesPlaceholder: FILES_PLACEHOLDER
307
- };
308
- }
309
- function resolveToSQL(state, table) {
310
- const plan = buildLogicalPlan(state, { regex: true });
311
- return compileLogicalQueryPlan(plan, table ?? plan.dataset);
312
- }
313
- function fileList(keys) {
314
- return keys.length === 0 ? "[]" : `[${keys.map((key) => `'${key.replace(/'/g, "''")}'`).join(", ")}]`;
315
- }
316
- function substituteNamedFiles(sql, sets) {
317
- let out = sql;
318
- for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
319
- return out;
320
- }
1
+ import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
321
2
  export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
@@ -1,7 +1,8 @@
1
- import { SQL } from "drizzle-orm";
1
+ import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
2
2
  import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
3
- import { BuilderState, Dimension, FilterInput, InternalFilter, Metric } from "gscdump/query";
3
+ import { SQL } from "drizzle-orm";
4
4
  import { TableName } from "gscdump/contracts";
5
+ import { BuilderState, Dimension, FilterInput, InternalFilter, Metric } from "gscdump/query";
5
6
  type DimensionSurface = 'api' | 'stored' | 'derived';
6
7
  interface DimensionBinding {
7
8
  column: string;
@@ -52,55 +53,6 @@ interface SqlFragments<TableKey extends string> {
52
53
  topLevelPredicate: (filters: InternalFilter[], tableKey: TableKey) => SQL | undefined;
53
54
  }
54
55
  declare function createSqlFragments<TableKey extends string>(config: SqlFragmentsConfig<TableKey>): SqlFragments<TableKey>;
55
- interface ResolverAdapter<TableKey extends string = string> {
56
- readonly METRIC_NAMES: readonly Metric[];
57
- readonly capabilities: PlannerCapabilities;
58
- readonly schema: Record<TableKey, unknown>;
59
- tableKeyForDataset: (dataset: LogicalDataset$1) => TableKey;
60
- inferTable: (dimensions: Dimension[], filterDims?: Dimension[]) => TableKey;
61
- dimColumn: (dim: Dimension, tableKey: TableKey) => string;
62
- isMetricDimension: (dim: string) => dim is Metric;
63
- tableRef: (tableKey: TableKey) => SQL;
64
- dateColRef: (tableKey: TableKey) => SQL;
65
- urlToPathExpr: (col: string) => string;
66
- siteIdColRef?: (tableKey: TableKey) => SQL;
67
- dimExprSql: (dim: Dimension, tableKey: TableKey) => SQL;
68
- metricSql: (metric: Metric, tableKey: TableKey) => SQL;
69
- dimensionPredicates: (filters: InternalFilter[], tableKey: TableKey) => SQL[];
70
- havingPredicates: (filters: InternalFilter[], tableKey: TableKey) => SQL[];
71
- topLevelPredicate: (filters: InternalFilter[], tableKey: TableKey) => SQL | undefined;
72
- compile: (query: SQL) => {
73
- sql: string;
74
- params: unknown[];
75
- };
76
- }
77
- type ComparisonFilter = 'new' | 'lost' | 'improving' | 'declining';
78
- interface ResolverOptions<TableKey extends string = string> {
79
- adapter: ResolverAdapter<TableKey>;
80
- /** Optional site scope. Required for multi-tenant D1; omitted for parquet. */
81
- siteId?: string | number;
82
- }
83
- interface ResolvedSQL {
84
- sql: string;
85
- params: unknown[];
86
- countSql: string;
87
- countParams: unknown[];
88
- }
89
- interface ResolvedSQLOptimized {
90
- sql: string;
91
- params: unknown[];
92
- }
93
- interface ResolvedComparisonSQL {
94
- sql: string;
95
- params: unknown[];
96
- countSql: string;
97
- countParams: unknown[];
98
- }
99
- interface ExtraQuery {
100
- key: string;
101
- sql: string;
102
- params: unknown[];
103
- }
104
56
  interface CreateResolverAdapterConfig<TableKey extends string> extends SqlFragmentsConfig<TableKey> {
105
57
  compile: (query: SQL) => {
106
58
  sql: string;