@gscdump/engine 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/_chunks/compiler.mjs +288 -0
  2. package/dist/_chunks/duckdb.d.mts +26 -0
  3. package/dist/_chunks/engine.mjs +578 -0
  4. package/dist/_chunks/pg-adapter.mjs +676 -0
  5. package/dist/_chunks/planner.d.mts +15 -0
  6. package/dist/_chunks/schema.d.mts +1258 -0
  7. package/dist/_chunks/schema.mjs +139 -0
  8. package/dist/_chunks/storage.d.mts +476 -0
  9. package/dist/_chunks/storage.mjs +39 -0
  10. package/dist/_chunks/types.d.mts +53 -0
  11. package/dist/adapters/duckdb-node.d.mts +1 -13
  12. package/dist/adapters/duckdb-node.mjs +1 -7
  13. package/dist/adapters/filesystem.d.mts +1 -193
  14. package/dist/adapters/filesystem.mjs +2 -9
  15. package/dist/adapters/http.d.mts +1 -193
  16. package/dist/adapters/http.mjs +1 -5
  17. package/dist/adapters/hyparquet.d.mts +6 -83
  18. package/dist/adapters/hyparquet.mjs +1 -105
  19. package/dist/adapters/inspection-sqlite-browser.d.mts +1 -7
  20. package/dist/adapters/inspection-sqlite-node.d.mts +1 -7
  21. package/dist/adapters/inspection-sqlite-node.mjs +1 -1
  22. package/dist/adapters/node-harness.d.mts +3 -306
  23. package/dist/adapters/node-harness.mjs +4 -1866
  24. package/dist/adapters/r2-manifest.d.mts +4 -149
  25. package/dist/adapters/r2-manifest.mjs +1 -8
  26. package/dist/adapters/r2.d.mts +1 -47
  27. package/dist/contracts.d.mts +1 -435
  28. package/dist/entities.d.mts +1 -47
  29. package/dist/index.d.mts +8 -1844
  30. package/dist/index.mjs +8 -1962
  31. package/dist/ingest.d.mts +1 -1
  32. package/dist/planner.d.mts +3 -16
  33. package/dist/planner.mjs +1 -320
  34. package/dist/resolver/index.d.mts +3 -51
  35. package/dist/resolver/index.mjs +2 -780
  36. package/dist/rollups.d.mts +6 -51
  37. package/dist/rollups.mjs +2 -209
  38. package/dist/schema.d.mts +2 -1258
  39. package/dist/schema.mjs +1 -138
  40. package/package.json +2 -2
@@ -0,0 +1,288 @@
1
+ import { i as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
2
+ import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, s as objectKey } from "./storage.mjs";
3
+ import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
4
+ import { buildLogicalPlan } from "gscdump/query/plan";
5
+ import { MS_PER_DAY } from "gscdump";
6
+ const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
7
+ const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
8
+ const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
9
+ const DEFAULT_THRESHOLDS = {
10
+ raw: 7,
11
+ d7: 30,
12
+ d30: 90
13
+ };
14
+ const PENDING_WINDOW_DAYS = 4;
15
+ const STAGES = [
16
+ {
17
+ inputTier: "raw",
18
+ outputTier: "d7",
19
+ cutoffDays: DEFAULT_THRESHOLDS.raw,
20
+ bucketKey: (e) => {
21
+ const m = e.partition.match(DAILY_PARTITION_RE);
22
+ if (!m) return void 0;
23
+ return mondayOfWeek(m[1]);
24
+ },
25
+ bucketLatestMs: (monday) => Date.parse(`${monday}T00:00:00Z`) + 6 * MS_PER_DAY,
26
+ outputPartition: weekPartition
27
+ },
28
+ {
29
+ inputTier: "d7",
30
+ outputTier: "d30",
31
+ cutoffDays: DEFAULT_THRESHOLDS.d7,
32
+ bucketKey: (e) => {
33
+ const m = e.partition.match(WEEKLY_PARTITION_RE);
34
+ if (!m) return void 0;
35
+ return m[1].slice(0, 7);
36
+ },
37
+ bucketLatestMs: monthEndMs,
38
+ outputPartition: monthPartition
39
+ },
40
+ {
41
+ inputTier: "d30",
42
+ outputTier: "d90",
43
+ cutoffDays: DEFAULT_THRESHOLDS.d30,
44
+ bucketKey: (e) => {
45
+ const m = e.partition.match(MONTHLY_PARTITION_RE);
46
+ if (!m) return void 0;
47
+ return quarterOfMonth(m[1]);
48
+ },
49
+ bucketLatestMs: quarterEndMs,
50
+ outputPartition: quarterPartition
51
+ }
52
+ ];
53
+ async function compactTieredImpl(deps, ctx, now, overrides = {}) {
54
+ const thresholds = {
55
+ ...DEFAULT_THRESHOLDS,
56
+ ...overrides
57
+ };
58
+ const stagesWithThresholds = STAGES.map((s) => ({
59
+ ...s,
60
+ cutoffDays: s.outputTier === "d7" ? thresholds.raw : s.outputTier === "d30" ? thresholds.d7 : thresholds.d30
61
+ }));
62
+ for (const stage of stagesWithThresholds) await runStage(deps, ctx, stage, now);
63
+ }
64
+ async function runStage(deps, ctx, stage, now) {
65
+ const cutoff = now - Math.max(stage.cutoffDays, PENDING_WINDOW_DAYS) * MS_PER_DAY;
66
+ const candidates = await deps.manifestStore.listLive({
67
+ userId: ctx.userId,
68
+ siteId: ctx.siteId,
69
+ table: ctx.table,
70
+ tier: stage.inputTier
71
+ });
72
+ const buckets = /* @__PURE__ */ new Map();
73
+ for (const entry of candidates) {
74
+ const key = stage.bucketKey(entry);
75
+ if (!key) continue;
76
+ if (stage.bucketLatestMs(key) >= cutoff) continue;
77
+ const compositeKey = `${inferSearchType(entry)}\0${key}`;
78
+ if (!buckets.has(compositeKey)) buckets.set(compositeKey, []);
79
+ buckets.get(compositeKey).push(entry);
80
+ }
81
+ for (const [compositeKey, entries] of buckets) {
82
+ const [searchType, bucket] = compositeKey.split("\0");
83
+ const targetPartition = stage.outputPartition(bucket);
84
+ if (entries.length === 1 && entries[0].partition === targetPartition) continue;
85
+ await deps.manifestStore.withLock({
86
+ userId: ctx.userId,
87
+ siteId: ctx.siteId,
88
+ table: ctx.table,
89
+ partition: targetPartition
90
+ }, async () => {
91
+ const key = objectKey(ctx, ctx.table, targetPartition, now, searchType);
92
+ const { bytes, rowCount } = await deps.codec.compactRows({ table: ctx.table }, entries.map((e) => e.objectKey), key, deps.dataSource);
93
+ const newEntry = {
94
+ userId: ctx.userId,
95
+ siteId: ctx.siteId,
96
+ table: ctx.table,
97
+ partition: targetPartition,
98
+ objectKey: key,
99
+ rowCount,
100
+ bytes,
101
+ createdAt: now,
102
+ schemaVersion: currentSchemaVersion(ctx.table),
103
+ tier: stage.outputTier,
104
+ ...searchType !== "web" ? { searchType } : {}
105
+ };
106
+ await deps.manifestStore.registerVersion(newEntry, entries);
107
+ });
108
+ }
109
+ }
110
+ function enumeratePartitions(startDate, endDate) {
111
+ const out = [];
112
+ const [sy, sm, sd] = startDate.split("-").map(Number);
113
+ const [ey, em, ed] = endDate.split("-").map(Number);
114
+ const start = Date.UTC(sy, sm - 1, sd);
115
+ const end = Date.UTC(ey, em - 1, ed);
116
+ if (end < start) return out;
117
+ const seenWeeks = /* @__PURE__ */ new Set();
118
+ const seenMonths = /* @__PURE__ */ new Set();
119
+ const seenQuarters = /* @__PURE__ */ new Set();
120
+ for (let t = start; t <= end; t += 864e5) {
121
+ const d = new Date(t);
122
+ const y = d.getUTCFullYear();
123
+ const m = String(d.getUTCMonth() + 1).padStart(2, "0");
124
+ const isoDay = `${y}-${m}-${String(d.getUTCDate()).padStart(2, "0")}`;
125
+ const isoMonth = `${y}-${m}`;
126
+ out.push(dayPartition(isoDay));
127
+ const monday = mondayOfWeek(isoDay);
128
+ if (!seenWeeks.has(monday)) {
129
+ seenWeeks.add(monday);
130
+ out.push(weekPartition(monday));
131
+ }
132
+ if (!seenMonths.has(isoMonth)) {
133
+ seenMonths.add(isoMonth);
134
+ out.push(monthPartition(isoMonth));
135
+ }
136
+ const quarter = quarterOfMonth(isoMonth);
137
+ if (!seenQuarters.has(quarter)) {
138
+ seenQuarters.add(quarter);
139
+ out.push(quarterPartition(quarter));
140
+ }
141
+ }
142
+ return out;
143
+ }
144
+ function monthEndMs(month) {
145
+ const [y, m] = month.split("-").map(Number);
146
+ return Date.UTC(y, m, 0, 23, 59, 59, 999);
147
+ }
148
+ function quarterEndMs(quarter) {
149
+ const [yStr, qStr] = quarter.split("-Q");
150
+ const y = Number(yStr);
151
+ const q = Number(qStr);
152
+ return Date.UTC(y, q * 3, 0, 23, 59, 59, 999);
153
+ }
154
+ const FILES_PLACEHOLDER = "{{FILES}}";
155
+ function buildDimensionWhere(filters, table) {
156
+ const clauses = [];
157
+ const params = [];
158
+ for (const filter of filters) {
159
+ const column = dimensionToColumn(filter.dimension, table);
160
+ switch (filter.operator) {
161
+ case "equals":
162
+ clauses.push(`${column} = ?`);
163
+ params.push(filter.expression);
164
+ break;
165
+ case "notEquals":
166
+ clauses.push(`${column} != ?`);
167
+ params.push(filter.expression);
168
+ break;
169
+ case "contains":
170
+ clauses.push(`${column} LIKE ? ESCAPE '\\'`);
171
+ params.push(`%${escapeLike(filter.expression)}%`);
172
+ break;
173
+ case "notContains":
174
+ clauses.push(`${column} NOT LIKE ? ESCAPE '\\'`);
175
+ params.push(`%${escapeLike(filter.expression)}%`);
176
+ break;
177
+ case "includingRegex":
178
+ clauses.push(`regexp_matches(${column}, ?)`);
179
+ params.push(filter.expression);
180
+ break;
181
+ case "excludingRegex":
182
+ clauses.push(`NOT regexp_matches(${column}, ?)`);
183
+ params.push(filter.expression);
184
+ break;
185
+ }
186
+ }
187
+ return {
188
+ clause: clauses.join(" AND "),
189
+ params
190
+ };
191
+ }
192
+ function buildTopLevelWhere(plan, table) {
193
+ if (!plan.specialFilters.topLevel) return "";
194
+ return topLevelPagePredicateSql(dimensionToColumn("page", table));
195
+ }
196
+ function buildHaving(filters) {
197
+ if (filters.length === 0) return {
198
+ clause: "",
199
+ params: []
200
+ };
201
+ const clauses = [];
202
+ const params = [];
203
+ for (const filter of filters) {
204
+ const expr = METRIC_EXPR[filter.metric];
205
+ switch (filter.operator) {
206
+ case "metricGte":
207
+ clauses.push(`${expr} >= ?`);
208
+ params.push(filter.expression);
209
+ break;
210
+ case "metricGt":
211
+ clauses.push(`${expr} > ?`);
212
+ params.push(filter.expression);
213
+ break;
214
+ case "metricLte":
215
+ clauses.push(`${expr} <= ?`);
216
+ params.push(filter.expression);
217
+ break;
218
+ case "metricLt":
219
+ clauses.push(`${expr} < ?`);
220
+ params.push(filter.expression);
221
+ break;
222
+ case "metricBetween":
223
+ clauses.push(`${expr} >= ? AND ${expr} <= ?`);
224
+ params.push(filter.expression, filter.expression2 ?? filter.expression);
225
+ break;
226
+ }
227
+ }
228
+ return {
229
+ clause: clauses.length > 0 ? `HAVING ${clauses.join(" AND ")}` : "",
230
+ params
231
+ };
232
+ }
233
+ function compileLogicalQueryPlan(plan, table = plan.dataset) {
234
+ const partitions = enumeratePartitions(plan.dateRange.startDate, plan.dateRange.endDate);
235
+ const metricSelects = plan.metrics.map((metric) => `${METRIC_EXPR[metric]} AS ${metric}`);
236
+ const dimSelects = plan.groupByDimensions.map((dimension) => {
237
+ const column = dimensionToColumn(dimension, table);
238
+ return column !== dimension ? `${column} AS ${dimension}` : dimension;
239
+ });
240
+ const whereClauses = ["date >= ?", "date <= ?"];
241
+ const whereParams = [plan.dateRange.startDate, plan.dateRange.endDate];
242
+ const dimWhere = buildDimensionWhere(plan.dimensionFilters, table);
243
+ if (dimWhere.clause) {
244
+ whereClauses.push(dimWhere.clause);
245
+ whereParams.push(...dimWhere.params);
246
+ }
247
+ const topLevelClause = buildTopLevelWhere(plan, table);
248
+ if (topLevelClause) whereClauses.push(topLevelClause);
249
+ const having = buildHaving(plan.metricFilters);
250
+ const groupByCols = [...plan.groupByDimensions.map((dimension) => dimensionToColumn(dimension, table)), ...plan.hasDate ? ["date"] : []];
251
+ const groupBy = groupByCols.length > 0 ? `GROUP BY ${groupByCols.join(", ")}` : "";
252
+ const orderBy = plan.orderBy ? `ORDER BY ${plan.orderBy.column} ${plan.orderBy.dir.toUpperCase()}` : "ORDER BY clicks DESC";
253
+ const limit = `LIMIT ${plan.rowLimit ?? 1e3}`;
254
+ const offset = plan.startRow ? `OFFSET ${plan.startRow}` : "";
255
+ return {
256
+ sql: [
257
+ `SELECT ${[
258
+ ...dimSelects,
259
+ ...plan.hasDate ? ["date"] : [],
260
+ ...metricSelects
261
+ ].join(", ")}`,
262
+ `FROM read_parquet(${FILES_PLACEHOLDER}, union_by_name = true)`,
263
+ `WHERE ${whereClauses.join(" AND ")}`,
264
+ groupBy,
265
+ having.clause,
266
+ orderBy,
267
+ limit,
268
+ offset
269
+ ].filter(Boolean).join(" ").replace(/\s+/g, " ").trim(),
270
+ params: [...whereParams, ...having.params],
271
+ partitions,
272
+ table,
273
+ filesPlaceholder: FILES_PLACEHOLDER
274
+ };
275
+ }
276
+ function resolveToSQL(state, table) {
277
+ const plan = buildLogicalPlan(state, { regex: true });
278
+ return compileLogicalQueryPlan(plan, table ?? plan.dataset);
279
+ }
280
+ function fileList(keys) {
281
+ return keys.length === 0 ? "[]" : `[${keys.map((key) => `'${key.replace(/'/g, "''")}'`).join(", ")}]`;
282
+ }
283
+ function substituteNamedFiles(sql, sets) {
284
+ let out = sql;
285
+ for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
286
+ return out;
287
+ }
288
+ export { compactTieredImpl as a, substituteNamedFiles as i, compileLogicalQueryPlan as n, enumeratePartitions as o, resolveToSQL as r, FILES_PLACEHOLDER as t };
@@ -0,0 +1,26 @@
1
+ import { N as TableName, S as QueryExecutor, h as ParquetCodec, w as Row } from "./storage.mjs";
2
+ interface DuckDBHandle {
3
+ query: (sql: string, params?: unknown[]) => Promise<Row[]>;
4
+ registerFileBuffer: (name: string, bytes: Uint8Array) => Promise<void>;
5
+ copyFileToBuffer: (name: string) => Promise<Uint8Array>;
6
+ dropFiles: (names: string[]) => Promise<void>;
7
+ /**
8
+ * Returns a unique path suitable for `COPY TO '…'` + `copyFileToBuffer`.
9
+ * In Node this is an absolute path under `os.tmpdir()` so DuckDB doesn't
10
+ * litter the CWD; in browsers/Workers it's a plain virtual-FS name.
11
+ */
12
+ makeTempPath: (ext: string) => string;
13
+ }
14
+ interface DuckDBFactory {
15
+ getDuckDB: () => Promise<DuckDBHandle>;
16
+ }
17
+ declare function createDuckDBCodec(factory: DuckDBFactory): ParquetCodec;
18
+ declare function createDuckDBExecutor(factory: DuckDBFactory): QueryExecutor;
19
+ /**
20
+ * Canonical "empty-file" SELECT clause for a table. Codecs that need to
21
+ * emit a schema-correct empty Parquet can wrap this in:
22
+ * `COPY (SELECT * FROM <clause> WHERE FALSE) TO '<key>' (FORMAT PARQUET)`
23
+ * to satisfy the ParquetCodec empty-rows invariant.
24
+ */
25
+ declare function canonicalEmptyParquetSchema(table: TableName): string;
26
+ export { createDuckDBExecutor as a, createDuckDBCodec as i, DuckDBHandle as n, canonicalEmptyParquetSchema as r, DuckDBFactory as t };