@gscdump/engine 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +53 -0
  3. package/dist/adapters/duckdb-node.d.mts +19 -0
  4. package/dist/adapters/duckdb-node.mjs +78 -0
  5. package/dist/adapters/filesystem.d.mts +206 -0
  6. package/dist/adapters/filesystem.mjs +320 -0
  7. package/dist/adapters/http.d.mts +227 -0
  8. package/dist/adapters/http.mjs +119 -0
  9. package/dist/adapters/hyparquet.d.mts +107 -0
  10. package/dist/adapters/hyparquet.mjs +250 -0
  11. package/dist/adapters/inspection-sqlite-browser.d.mts +9 -0
  12. package/dist/adapters/inspection-sqlite-browser.mjs +42 -0
  13. package/dist/adapters/inspection-sqlite-node.d.mts +9 -0
  14. package/dist/adapters/inspection-sqlite-node.mjs +32 -0
  15. package/dist/adapters/node-harness.d.mts +334 -0
  16. package/dist/adapters/node-harness.mjs +1907 -0
  17. package/dist/adapters/r2-manifest.d.mts +227 -0
  18. package/dist/adapters/r2-manifest.mjs +355 -0
  19. package/dist/adapters/r2.d.mts +93 -0
  20. package/dist/adapters/r2.mjs +65 -0
  21. package/dist/arrow-utils.d.mts +14 -0
  22. package/dist/arrow-utils.mjs +8 -0
  23. package/dist/contracts.d.mts +436 -0
  24. package/dist/contracts.mjs +1 -0
  25. package/dist/entities.d.mts +238 -0
  26. package/dist/entities.mjs +359 -0
  27. package/dist/index.d.mts +1849 -0
  28. package/dist/index.mjs +1976 -0
  29. package/dist/ingest.d.mts +96 -0
  30. package/dist/ingest.mjs +187 -0
  31. package/dist/planner.d.mts +16 -0
  32. package/dist/planner.mjs +321 -0
  33. package/dist/resolver/index.d.mts +207 -0
  34. package/dist/resolver/index.mjs +869 -0
  35. package/dist/rollups.d.mts +207 -0
  36. package/dist/rollups.mjs +553 -0
  37. package/dist/schema.d.mts +1258 -0
  38. package/dist/schema.mjs +139 -0
  39. package/dist/scope.d.mts +38 -0
  40. package/dist/scope.mjs +28 -0
  41. package/dist/snapshot.d.mts +14 -0
  42. package/dist/snapshot.mjs +1 -0
  43. package/dist/sql-bind.d.mts +19 -0
  44. package/dist/sql-bind.mjs +92 -0
  45. package/dist/sql-fragments.d.mts +21 -0
  46. package/dist/sql-fragments.mjs +13 -0
  47. package/package.json +168 -0
@@ -0,0 +1,96 @@
1
+ import { Row, TableName } from "gscdump/contracts";
2
+ /**
3
+ * Canonical GSC API dimension order per table. Consumers hitting the raw
4
+ * `searchanalytics.query` endpoint must request dimensions in this order so
5
+ * that `transformGscRow` / `createRowAccumulator` can decode the resulting
6
+ * `keys[]` tuples. Storage-column names (e.g. `page` → `url`) are handled
7
+ * inside `transformGscRow` — this record stays in GSC-API vocabulary.
8
+ */
9
+ declare const TABLE_DIMS: Record<TableName, string[]>;
10
+ interface GscApiRow {
11
+ keys: string[];
12
+ clicks: number;
13
+ impressions: number;
14
+ /** Unused by ingest — the `sum_position` column encodes weighted position. */
15
+ ctr?: number;
16
+ position: number;
17
+ }
18
+ interface IngestOptions {
19
+ /**
20
+ * Canonical form of a query string, stored alongside `query` as
21
+ * `query_canonical`. Site-specific (e.g. synonym groups, stemming); if
22
+ * omitted, `query_canonical` is null. Applied to `keywords` +
23
+ * `page_keywords` tables only.
24
+ */
25
+ normalizeQuery?: (query: string) => string | null | undefined;
26
+ }
27
+ /**
28
+ * Strip a GSC URL to its pathname. Core analytics stores pages by path so
29
+ * queries don't carry origin-prefix filters.
30
+ */
31
+ declare function toPath(gscUrl: string): string;
32
+ /**
33
+ * Encode weighted average position as `sum_position`. The raw GSC position
34
+ * is 1-indexed; subtract 1 and weight by impressions so a downstream
35
+ * `SUM(sum_position) / SUM(impressions) + 1` recovers the true mean without
36
+ * ever materialising per-row position values.
37
+ */
38
+ declare function toSumPosition(apiPosition: number, impressions: number): number;
39
+ /**
40
+ * Map one GSC API row into `{ date, row }` for the given table, or null if
41
+ * the row has no keys (GSC occasionally emits empty-keys placeholders).
42
+ */
43
+ declare function transformGscRow(table: TableName, apiRow: GscApiRow, options?: IngestOptions): {
44
+ date: string;
45
+ row: Row;
46
+ } | null;
47
+ interface RowAccumulator {
48
+ /**
49
+ * Push a batch of GSC API rows into the accumulator. Returns `false` if
50
+ * the batch pushed total row count past `maxRows`; subsequent pushes
51
+ * become no-ops until `drain()` is called.
52
+ */
53
+ push: (table: TableName, rows: readonly GscApiRow[]) => boolean;
54
+ /**
55
+ * Consume accumulated rows, grouped by `table → date → rows`. Resets
56
+ * internal state; subsequent pushes behave as on a fresh accumulator.
57
+ */
58
+ drain: () => Map<TableName, Map<string, Row[]>>;
59
+ /**
60
+ * Drain only buckets for dates strictly older than the most-recent date
61
+ * seen for each table. Requires `trackDateBoundary` to be enabled — without
62
+ * it, returns an empty map. GSC's date-as-dimension queries return rows
63
+ * sorted by date, so any date older than the latest seen is logically
64
+ * complete within the current job slice and safe to flush mid-job.
65
+ *
66
+ * Returned buckets are removed from internal state and `totalRows` is
67
+ * decremented accordingly. Latest-date buckets stay in place for the
68
+ * eventual `drain()` at job end.
69
+ */
70
+ drainCompleted: () => Map<TableName, Map<string, Row[]>>;
71
+ /** Total row count across all tables/dates since last drain. */
72
+ readonly totalRows: number;
73
+ /** Whether the accumulator has overflowed since last drain. */
74
+ readonly overflowed: boolean;
75
+ }
76
+ interface RowAccumulatorOptions extends IngestOptions {
77
+ /**
78
+ * Soft cap on total accumulated rows before `push` starts returning
79
+ * `false` and dropping rows. Defaults to 500_000 — matches the
80
+ * ~128 MB CF Workers isolate budget at ~200 bytes/row with headroom.
81
+ */
82
+ maxRows?: number;
83
+ /**
84
+ * Track the most-recent date seen per table so `drainCompleted()` can
85
+ * return older-date buckets mid-job. Off by default — callers that don't
86
+ * stream-flush pay zero overhead for the bookkeeping.
87
+ *
88
+ * Caller contract: only safe when GSC dimensions include `date` so the
89
+ * API returns rows in date-ascending order; without that ordering,
90
+ * "older than latest" doesn't mean "complete" and partial buckets would
91
+ * be flushed prematurely.
92
+ */
93
+ trackDateBoundary?: boolean;
94
+ }
95
+ declare function createRowAccumulator(options?: RowAccumulatorOptions): RowAccumulator;
96
+ export { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, TABLE_DIMS, createRowAccumulator, toPath, toSumPosition, transformGscRow };
@@ -0,0 +1,187 @@
1
+ const TABLE_DIMS = {
2
+ pages: ["page", "date"],
3
+ keywords: ["query", "date"],
4
+ countries: ["country", "date"],
5
+ devices: ["device", "date"],
6
+ page_keywords: [
7
+ "page",
8
+ "query",
9
+ "date"
10
+ ],
11
+ search_appearance: ["searchAppearance", "date"]
12
+ };
13
+ function toPath(gscUrl) {
14
+ try {
15
+ return new URL(gscUrl).pathname;
16
+ } catch {
17
+ return gscUrl;
18
+ }
19
+ }
20
+ function toSumPosition(apiPosition, impressions) {
21
+ return (apiPosition - 1) * Math.max(impressions, 1);
22
+ }
23
+ function transformGscRow(table, apiRow, options = {}) {
24
+ const keys = apiRow.keys;
25
+ if (!keys || keys.length === 0) return null;
26
+ const clicks = apiRow.clicks || 0;
27
+ const impressions = apiRow.impressions || 0;
28
+ const sum_position = toSumPosition(apiRow.position || 0, impressions);
29
+ if (table === "pages") {
30
+ const date = String(keys[1] ?? "");
31
+ return {
32
+ date,
33
+ row: {
34
+ url: toPath(String(keys[0] ?? "")),
35
+ date,
36
+ clicks,
37
+ impressions,
38
+ sum_position
39
+ }
40
+ };
41
+ }
42
+ if (table === "keywords") {
43
+ const query = String(keys[0] ?? "");
44
+ const date = String(keys[1] ?? "");
45
+ return {
46
+ date,
47
+ row: {
48
+ query,
49
+ query_canonical: options.normalizeQuery?.(query) ?? null,
50
+ date,
51
+ clicks,
52
+ impressions,
53
+ sum_position
54
+ }
55
+ };
56
+ }
57
+ if (table === "countries") {
58
+ const date = String(keys[1] ?? "");
59
+ return {
60
+ date,
61
+ row: {
62
+ country: String(keys[0] ?? ""),
63
+ date,
64
+ clicks,
65
+ impressions,
66
+ sum_position
67
+ }
68
+ };
69
+ }
70
+ if (table === "devices") {
71
+ const date = String(keys[1] ?? "");
72
+ return {
73
+ date,
74
+ row: {
75
+ device: String(keys[0] ?? ""),
76
+ date,
77
+ clicks,
78
+ impressions,
79
+ sum_position
80
+ }
81
+ };
82
+ }
83
+ if (table === "search_appearance") {
84
+ const date = String(keys[1] ?? "");
85
+ return {
86
+ date,
87
+ row: {
88
+ searchAppearance: String(keys[0] ?? ""),
89
+ date,
90
+ clicks,
91
+ impressions,
92
+ sum_position
93
+ }
94
+ };
95
+ }
96
+ const query = String(keys[1] ?? "");
97
+ const date = String(keys[2] ?? "");
98
+ const query_canonical = options.normalizeQuery?.(query) ?? null;
99
+ return {
100
+ date,
101
+ row: {
102
+ url: toPath(String(keys[0] ?? "")),
103
+ query,
104
+ query_canonical,
105
+ date,
106
+ clicks,
107
+ impressions,
108
+ sum_position
109
+ }
110
+ };
111
+ }
112
+ const DEFAULT_MAX_ROWS = 5e5;
113
+ function createRowAccumulator(options = {}) {
114
+ const maxRows = options.maxRows ?? DEFAULT_MAX_ROWS;
115
+ const trackDateBoundary = options.trackDateBoundary === true;
116
+ let buckets = /* @__PURE__ */ new Map();
117
+ const latestDate = /* @__PURE__ */ new Map();
118
+ let total = 0;
119
+ let overflowed = false;
120
+ function bucketFor(table, date) {
121
+ let byDate = buckets.get(table);
122
+ if (!byDate) {
123
+ byDate = /* @__PURE__ */ new Map();
124
+ buckets.set(table, byDate);
125
+ }
126
+ let rows = byDate.get(date);
127
+ if (!rows) {
128
+ rows = [];
129
+ byDate.set(date, rows);
130
+ }
131
+ return rows;
132
+ }
133
+ return {
134
+ get totalRows() {
135
+ return total;
136
+ },
137
+ get overflowed() {
138
+ return overflowed;
139
+ },
140
+ push(table, rows) {
141
+ if (overflowed) return false;
142
+ for (const r of rows) {
143
+ const t = transformGscRow(table, r, options);
144
+ if (!t || !t.date) continue;
145
+ bucketFor(table, t.date).push(t.row);
146
+ total++;
147
+ if (trackDateBoundary) {
148
+ const prev = latestDate.get(table);
149
+ if (!prev || t.date > prev) latestDate.set(table, t.date);
150
+ }
151
+ if (total > maxRows) {
152
+ overflowed = true;
153
+ return false;
154
+ }
155
+ }
156
+ return true;
157
+ },
158
+ drain() {
159
+ const out = buckets;
160
+ buckets = /* @__PURE__ */ new Map();
161
+ latestDate.clear();
162
+ total = 0;
163
+ overflowed = false;
164
+ return out;
165
+ },
166
+ drainCompleted() {
167
+ const out = /* @__PURE__ */ new Map();
168
+ if (!trackDateBoundary) return out;
169
+ for (const [table, byDate] of buckets) {
170
+ const latest = latestDate.get(table);
171
+ if (!latest) continue;
172
+ let outBy;
173
+ for (const [date, dateRows] of byDate) if (date < latest) {
174
+ if (!outBy) {
175
+ outBy = /* @__PURE__ */ new Map();
176
+ out.set(table, outBy);
177
+ }
178
+ outBy.set(date, dateRows);
179
+ total -= dateRows.length;
180
+ }
181
+ if (outBy) for (const date of outBy.keys()) byDate.delete(date);
182
+ }
183
+ return out;
184
+ }
185
+ };
186
+ }
187
+ export { TABLE_DIMS, createRowAccumulator, toPath, toSumPosition, transformGscRow };
@@ -0,0 +1,16 @@
1
+ import { LogicalQueryPlan } from "gscdump/query/plan";
2
+ import { TableName } from "gscdump/contracts";
3
+ import { BuilderState } from "gscdump/query";
4
+ declare function enumeratePartitions(startDate: string, endDate: string): string[];
5
+ interface ResolvedQuery {
6
+ sql: string;
7
+ params: unknown[];
8
+ partitions: string[];
9
+ table: TableName;
10
+ filesPlaceholder: string;
11
+ }
12
+ declare const FILES_PLACEHOLDER = "{{FILES}}";
13
+ declare function compileLogicalQueryPlan(plan: LogicalQueryPlan, table?: TableName): ResolvedQuery;
14
+ declare function resolveToSQL(state: BuilderState, table?: TableName): ResolvedQuery;
15
+ declare function substituteNamedFiles(sql: string, sets: Record<string, string[]>): string;
16
+ export { FILES_PLACEHOLDER, type ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };
@@ -0,0 +1,321 @@
1
+ import { MS_PER_DAY, toIsoDate } from "gscdump";
2
+ import { date, doublePrecision, getTableConfig, integer, pgTable, varchar } from "drizzle-orm/pg-core";
3
+ import { buildLogicalPlan } from "gscdump/query/plan";
4
+ function metricCols() {
5
+ return {
6
+ clicks: integer("clicks").notNull(),
7
+ impressions: integer("impressions").notNull(),
8
+ sum_position: doublePrecision("sum_position").notNull()
9
+ };
10
+ }
11
+ const dateCol = () => date("date").notNull();
12
+ const drizzleSchema = {
13
+ pages: pgTable("pages", {
14
+ url: varchar("url").notNull(),
15
+ date: dateCol(),
16
+ ...metricCols()
17
+ }),
18
+ keywords: pgTable("keywords", {
19
+ query: varchar("query").notNull(),
20
+ query_canonical: varchar("query_canonical"),
21
+ date: dateCol(),
22
+ ...metricCols()
23
+ }),
24
+ countries: pgTable("countries", {
25
+ country: varchar("country").notNull(),
26
+ date: dateCol(),
27
+ ...metricCols()
28
+ }),
29
+ devices: pgTable("devices", {
30
+ device: varchar("device").notNull(),
31
+ date: dateCol(),
32
+ ...metricCols()
33
+ }),
34
+ page_keywords: pgTable("page_keywords", {
35
+ url: varchar("url").notNull(),
36
+ query: varchar("query").notNull(),
37
+ query_canonical: varchar("query_canonical"),
38
+ date: dateCol(),
39
+ ...metricCols()
40
+ }),
41
+ search_appearance: pgTable("search_appearance", {
42
+ searchAppearance: varchar("searchAppearance").notNull(),
43
+ date: dateCol(),
44
+ ...metricCols()
45
+ })
46
+ };
47
+ const TABLE_METADATA = {
48
+ pages: {
49
+ sortKey: ["date", "url"],
50
+ version: 1
51
+ },
52
+ keywords: {
53
+ sortKey: ["date", "query"],
54
+ version: 2
55
+ },
56
+ countries: {
57
+ sortKey: ["date", "country"],
58
+ version: 1
59
+ },
60
+ devices: {
61
+ sortKey: ["date", "device"],
62
+ version: 1
63
+ },
64
+ page_keywords: {
65
+ sortKey: [
66
+ "date",
67
+ "url",
68
+ "query"
69
+ ],
70
+ version: 2
71
+ },
72
+ search_appearance: {
73
+ sortKey: ["date", "searchAppearance"],
74
+ version: 1
75
+ }
76
+ };
77
+ function pgSqlTypeToColumnType(sqlType) {
78
+ const t = sqlType.toLowerCase();
79
+ if (t.startsWith("varchar") || t === "text" || t.startsWith("char")) return "VARCHAR";
80
+ if (t === "date" || t.startsWith("timestamp")) return "DATE";
81
+ if (t.startsWith("double") || t === "real" || t.startsWith("numeric") || t.startsWith("decimal")) return "DOUBLE";
82
+ if (t === "bigint" || t === "int8") return "BIGINT";
83
+ if (t === "integer" || t === "int" || t === "int4" || t === "smallint" || t === "int2") return "INTEGER";
84
+ throw new Error(`unmapped pg type '${sqlType}' — extend pgSqlTypeToColumnType in @gscdump/engine/schema`);
85
+ }
86
+ function tableSchemaFrom(tableName) {
87
+ const columns = getTableConfig(drizzleSchema[tableName]).columns.map((col) => ({
88
+ name: col.name,
89
+ type: pgSqlTypeToColumnType(col.getSQLType()),
90
+ nullable: !col.notNull
91
+ }));
92
+ const meta = TABLE_METADATA[tableName];
93
+ return {
94
+ name: tableName,
95
+ columns,
96
+ sortKey: meta.sortKey,
97
+ version: meta.version
98
+ };
99
+ }
100
+ Object.fromEntries([
101
+ "pages",
102
+ "keywords",
103
+ "countries",
104
+ "devices",
105
+ "page_keywords",
106
+ "search_appearance"
107
+ ].map((t) => [t, tableSchemaFrom(t)]));
108
+ function dimensionToColumn(dim, _table) {
109
+ if (dim === "page") return "url";
110
+ if (dim === "queryCanonical") return "query_canonical";
111
+ return dim;
112
+ }
113
+ function dayPartition(date) {
114
+ return `daily/${date}`;
115
+ }
116
+ function monthPartition(month) {
117
+ return `monthly/${month}`;
118
+ }
119
+ function weekPartition(mondayIsoDate) {
120
+ return `weekly/${mondayIsoDate}`;
121
+ }
122
+ function quarterPartition(quarter) {
123
+ return `quarterly/${quarter}`;
124
+ }
125
+ function mondayOfWeek(isoDate) {
126
+ const ms = Date.parse(`${isoDate}T00:00:00Z`);
127
+ const dow = new Date(ms).getUTCDay();
128
+ const offset = dow === 0 ? -6 : 1 - dow;
129
+ return toIsoDate(new Date(ms + offset * MS_PER_DAY));
130
+ }
131
+ function quarterOfMonth(month) {
132
+ const [y, m] = month.split("-").map(Number);
133
+ return `${y}-Q${Math.floor((m - 1) / 3) + 1}`;
134
+ }
135
+ const DEFAULT_THRESHOLDS = {
136
+ raw: 7,
137
+ d7: 30,
138
+ d30: 90
139
+ };
140
+ DEFAULT_THRESHOLDS.raw, DEFAULT_THRESHOLDS.d7, DEFAULT_THRESHOLDS.d30;
141
+ function enumeratePartitions(startDate, endDate) {
142
+ const out = [];
143
+ const [sy, sm, sd] = startDate.split("-").map(Number);
144
+ const [ey, em, ed] = endDate.split("-").map(Number);
145
+ const start = Date.UTC(sy, sm - 1, sd);
146
+ const end = Date.UTC(ey, em - 1, ed);
147
+ if (end < start) return out;
148
+ const seenWeeks = /* @__PURE__ */ new Set();
149
+ const seenMonths = /* @__PURE__ */ new Set();
150
+ const seenQuarters = /* @__PURE__ */ new Set();
151
+ for (let t = start; t <= end; t += 864e5) {
152
+ const d = new Date(t);
153
+ const y = d.getUTCFullYear();
154
+ const m = String(d.getUTCMonth() + 1).padStart(2, "0");
155
+ const isoDay = `${y}-${m}-${String(d.getUTCDate()).padStart(2, "0")}`;
156
+ const isoMonth = `${y}-${m}`;
157
+ out.push(dayPartition(isoDay));
158
+ const monday = mondayOfWeek(isoDay);
159
+ if (!seenWeeks.has(monday)) {
160
+ seenWeeks.add(monday);
161
+ out.push(weekPartition(monday));
162
+ }
163
+ if (!seenMonths.has(isoMonth)) {
164
+ seenMonths.add(isoMonth);
165
+ out.push(monthPartition(isoMonth));
166
+ }
167
+ const quarter = quarterOfMonth(isoMonth);
168
+ if (!seenQuarters.has(quarter)) {
169
+ seenQuarters.add(quarter);
170
+ out.push(quarterPartition(quarter));
171
+ }
172
+ }
173
+ return out;
174
+ }
175
+ function escapeLike(value) {
176
+ return value.replace(/\\/g, "\\\\").replace(/%/g, "\\%").replace(/_/g, "\\_");
177
+ }
178
+ const METRIC_EXPR = {
179
+ clicks: "CAST(SUM(clicks) AS DOUBLE)",
180
+ impressions: "CAST(SUM(impressions) AS DOUBLE)",
181
+ ctr: "CAST(SUM(clicks) AS DOUBLE) / NULLIF(SUM(impressions), 0)",
182
+ position: "SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1"
183
+ };
184
+ function topLevelPagePredicateSql(pathExpr) {
185
+ return `LENGTH(${pathExpr}) - LENGTH(REPLACE(${pathExpr}, '/', '')) <= 1`;
186
+ }
187
+ const FILES_PLACEHOLDER = "{{FILES}}";
188
+ function buildDimensionWhere(filters, table) {
189
+ const clauses = [];
190
+ const params = [];
191
+ for (const filter of filters) {
192
+ const column = dimensionToColumn(filter.dimension, table);
193
+ switch (filter.operator) {
194
+ case "equals":
195
+ clauses.push(`${column} = ?`);
196
+ params.push(filter.expression);
197
+ break;
198
+ case "notEquals":
199
+ clauses.push(`${column} != ?`);
200
+ params.push(filter.expression);
201
+ break;
202
+ case "contains":
203
+ clauses.push(`${column} LIKE ? ESCAPE '\\'`);
204
+ params.push(`%${escapeLike(filter.expression)}%`);
205
+ break;
206
+ case "notContains":
207
+ clauses.push(`${column} NOT LIKE ? ESCAPE '\\'`);
208
+ params.push(`%${escapeLike(filter.expression)}%`);
209
+ break;
210
+ case "includingRegex":
211
+ clauses.push(`regexp_matches(${column}, ?)`);
212
+ params.push(filter.expression);
213
+ break;
214
+ case "excludingRegex":
215
+ clauses.push(`NOT regexp_matches(${column}, ?)`);
216
+ params.push(filter.expression);
217
+ break;
218
+ }
219
+ }
220
+ return {
221
+ clause: clauses.join(" AND "),
222
+ params
223
+ };
224
+ }
225
+ function buildTopLevelWhere(plan, table) {
226
+ if (!plan.specialFilters.topLevel) return "";
227
+ return topLevelPagePredicateSql(dimensionToColumn("page", table));
228
+ }
229
+ function buildHaving(filters) {
230
+ if (filters.length === 0) return {
231
+ clause: "",
232
+ params: []
233
+ };
234
+ const clauses = [];
235
+ const params = [];
236
+ for (const filter of filters) {
237
+ const expr = METRIC_EXPR[filter.metric];
238
+ switch (filter.operator) {
239
+ case "metricGte":
240
+ clauses.push(`${expr} >= ?`);
241
+ params.push(filter.expression);
242
+ break;
243
+ case "metricGt":
244
+ clauses.push(`${expr} > ?`);
245
+ params.push(filter.expression);
246
+ break;
247
+ case "metricLte":
248
+ clauses.push(`${expr} <= ?`);
249
+ params.push(filter.expression);
250
+ break;
251
+ case "metricLt":
252
+ clauses.push(`${expr} < ?`);
253
+ params.push(filter.expression);
254
+ break;
255
+ case "metricBetween":
256
+ clauses.push(`${expr} >= ? AND ${expr} <= ?`);
257
+ params.push(filter.expression, filter.expression2 ?? filter.expression);
258
+ break;
259
+ }
260
+ }
261
+ return {
262
+ clause: clauses.length > 0 ? `HAVING ${clauses.join(" AND ")}` : "",
263
+ params
264
+ };
265
+ }
266
+ function compileLogicalQueryPlan(plan, table = plan.dataset) {
267
+ const partitions = enumeratePartitions(plan.dateRange.startDate, plan.dateRange.endDate);
268
+ const metricSelects = plan.metrics.map((metric) => `${METRIC_EXPR[metric]} AS ${metric}`);
269
+ const dimSelects = plan.groupByDimensions.map((dimension) => {
270
+ const column = dimensionToColumn(dimension, table);
271
+ return column !== dimension ? `${column} AS ${dimension}` : dimension;
272
+ });
273
+ const whereClauses = ["date >= ?", "date <= ?"];
274
+ const whereParams = [plan.dateRange.startDate, plan.dateRange.endDate];
275
+ const dimWhere = buildDimensionWhere(plan.dimensionFilters, table);
276
+ if (dimWhere.clause) {
277
+ whereClauses.push(dimWhere.clause);
278
+ whereParams.push(...dimWhere.params);
279
+ }
280
+ const topLevelClause = buildTopLevelWhere(plan, table);
281
+ if (topLevelClause) whereClauses.push(topLevelClause);
282
+ const having = buildHaving(plan.metricFilters);
283
+ const groupByCols = [...plan.groupByDimensions.map((dimension) => dimensionToColumn(dimension, table)), ...plan.hasDate ? ["date"] : []];
284
+ const groupBy = groupByCols.length > 0 ? `GROUP BY ${groupByCols.join(", ")}` : "";
285
+ const orderBy = plan.orderBy ? `ORDER BY ${plan.orderBy.column} ${plan.orderBy.dir.toUpperCase()}` : "ORDER BY clicks DESC";
286
+ const limit = `LIMIT ${plan.rowLimit ?? 1e3}`;
287
+ const offset = plan.startRow ? `OFFSET ${plan.startRow}` : "";
288
+ return {
289
+ sql: [
290
+ `SELECT ${[
291
+ ...dimSelects,
292
+ ...plan.hasDate ? ["date"] : [],
293
+ ...metricSelects
294
+ ].join(", ")}`,
295
+ `FROM read_parquet(${FILES_PLACEHOLDER}, union_by_name = true)`,
296
+ `WHERE ${whereClauses.join(" AND ")}`,
297
+ groupBy,
298
+ having.clause,
299
+ orderBy,
300
+ limit,
301
+ offset
302
+ ].filter(Boolean).join(" ").replace(/\s+/g, " ").trim(),
303
+ params: [...whereParams, ...having.params],
304
+ partitions,
305
+ table,
306
+ filesPlaceholder: FILES_PLACEHOLDER
307
+ };
308
+ }
309
+ function resolveToSQL(state, table) {
310
+ const plan = buildLogicalPlan(state, { regex: true });
311
+ return compileLogicalQueryPlan(plan, table ?? plan.dataset);
312
+ }
313
+ function fileList(keys) {
314
+ return keys.length === 0 ? "[]" : `[${keys.map((key) => `'${key.replace(/'/g, "''")}'`).join(", ")}]`;
315
+ }
316
+ function substituteNamedFiles(sql, sets) {
317
+ let out = sql;
318
+ for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
319
+ return out;
320
+ }
321
+ export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };