@gscdump/engine 0.19.0 → 0.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/engine.mjs +28 -5
- package/dist/_chunks/parquet-plan.mjs +15 -6
- package/dist/_chunks/resolver.mjs +20 -15
- package/dist/_chunks/schema.d.mts +22 -2
- package/dist/_chunks/schema.mjs +15 -1
- package/dist/_chunks/storage.d.mts +16 -2
- package/dist/adapters/hyparquet.mjs +4 -3
- package/dist/index.d.mts +1 -1
- package/dist/index.mjs +1 -1
- package/dist/resolver/index.d.mts +2 -3
- package/dist/resolver/index.mjs +2 -2
- package/dist/schema.d.mts +2 -2
- package/dist/schema.mjs +2 -2
- package/dist/source/index.mjs +1 -1
- package/package.json +3 -3
package/dist/_chunks/engine.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
|
|
1
|
+
import { i as dedupeByNaturalKey, r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
|
|
2
2
|
import { a as inferSearchType, c as objectKey, d as tenantPrefix, n as dayPartition, r as hourPartition } from "./storage.mjs";
|
|
3
3
|
import { c as dedupeOverlappingTiers, i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./parquet-plan.mjs";
|
|
4
4
|
import { sqlEscape } from "../sql-bind.mjs";
|
|
@@ -57,7 +57,7 @@ function createDuckDBCodec(factory) {
|
|
|
57
57
|
const outName = db.makeTempPath("parquet");
|
|
58
58
|
const fileList = inputUris.map((u) => `'${sqlEscape(u)}'`).join(", ");
|
|
59
59
|
try {
|
|
60
|
-
await db.query(`COPY (
|
|
60
|
+
await db.query(`COPY (${dedupedMergeSql(ctx.table, fileList)}) TO '${sqlEscape(outName)}' (FORMAT PARQUET)`);
|
|
61
61
|
const bytes = await db.copyFileToBuffer(outName);
|
|
62
62
|
const countRows = await db.query(`SELECT count(*)::BIGINT AS n FROM read_parquet('${sqlEscape(outName)}')`);
|
|
63
63
|
const rowCount = Number(countRows[0]?.n ?? 0);
|
|
@@ -82,7 +82,7 @@ function createDuckDBCodec(factory) {
|
|
|
82
82
|
}
|
|
83
83
|
try {
|
|
84
84
|
const fileList = inNames.map((n) => `'${sqlEscape(n)}'`).join(", ");
|
|
85
|
-
await db.query(`COPY (
|
|
85
|
+
await db.query(`COPY (${dedupedMergeSql(ctx.table, fileList)}) TO '${sqlEscape(outName)}' (FORMAT PARQUET)`);
|
|
86
86
|
registered.push(outName);
|
|
87
87
|
const bytes = await db.copyFileToBuffer(outName);
|
|
88
88
|
const countRows = await db.query(`SELECT count(*)::BIGINT AS n FROM read_parquet('${sqlEscape(outName)}')`);
|
|
@@ -98,6 +98,12 @@ function createDuckDBCodec(factory) {
|
|
|
98
98
|
}
|
|
99
99
|
};
|
|
100
100
|
}
|
|
101
|
+
function dedupedMergeSql(table, fileListSql) {
|
|
102
|
+
const base = `SELECT * FROM read_parquet([${fileListSql}], union_by_name = true)`;
|
|
103
|
+
const key = SCHEMAS[table].sortKey;
|
|
104
|
+
if (key.length === 0) return base;
|
|
105
|
+
return `${base} QUALIFY row_number() OVER (PARTITION BY ${key.map((c) => `"${c.replace(/"/g, "\"\"")}"`).join(", ")}) = 1`;
|
|
106
|
+
}
|
|
101
107
|
function rewriteEmptyFileSets(sql, placeholders, defaultTable, placeholderTables) {
|
|
102
108
|
let out = sql;
|
|
103
109
|
for (const [name, keys] of Object.entries(placeholders)) {
|
|
@@ -273,6 +279,23 @@ function normalizeRow(table, row) {
|
|
|
273
279
|
url: normalized
|
|
274
280
|
};
|
|
275
281
|
}
|
|
282
|
+
const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
|
|
283
|
+
function queryRangeOf(partitions) {
|
|
284
|
+
if (!partitions) return void 0;
|
|
285
|
+
let min;
|
|
286
|
+
let max;
|
|
287
|
+
for (const p of partitions) {
|
|
288
|
+
const m = DAILY_PARTITION_RE.exec(p);
|
|
289
|
+
if (!m) continue;
|
|
290
|
+
const d = m[1];
|
|
291
|
+
if (min === void 0 || d < min) min = d;
|
|
292
|
+
if (max === void 0 || d > max) max = d;
|
|
293
|
+
}
|
|
294
|
+
return min !== void 0 ? {
|
|
295
|
+
start: min,
|
|
296
|
+
end: max
|
|
297
|
+
} : void 0;
|
|
298
|
+
}
|
|
276
299
|
function createStorageEngine(opts) {
|
|
277
300
|
const { dataSource, manifestStore, codec, executor } = opts;
|
|
278
301
|
const defaultNow = opts.now ?? (() => Date.now());
|
|
@@ -295,7 +318,7 @@ function createStorageEngine(opts) {
|
|
|
295
318
|
partitions: [partition],
|
|
296
319
|
searchType: inferSearchType({ searchType })
|
|
297
320
|
});
|
|
298
|
-
const normalizedRows = rows.map((r) => normalizeRow(ctx.table, r));
|
|
321
|
+
const normalizedRows = dedupeByNaturalKey(ctx.table, rows.map((r) => normalizeRow(ctx.table, r)));
|
|
299
322
|
const key = objectKey(ctx, ctx.table, partition, now, searchType);
|
|
300
323
|
const { bytes: writtenBytes, rowCount } = await codec.writeRows({ table: ctx.table }, normalizedRows, key, dataSource);
|
|
301
324
|
let bytes = writtenBytes;
|
|
@@ -406,7 +429,7 @@ function createStorageEngine(opts) {
|
|
|
406
429
|
table: ref.table,
|
|
407
430
|
partitions: ref.partitions,
|
|
408
431
|
...opts.searchType !== void 0 ? { searchType: opts.searchType } : {}
|
|
409
|
-
})).map((e) => e.objectKey)];
|
|
432
|
+
}), queryRangeOf(ref.partitions)).map((e) => e.objectKey)];
|
|
410
433
|
}));
|
|
411
434
|
opts.signal?.throwIfAborted();
|
|
412
435
|
const fileKeys = {};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { a as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
|
|
2
2
|
import { a as inferSearchType, c as objectKey, f as weekPartition, l as quarterOfMonth, n as dayPartition, o as mondayOfWeek, s as monthPartition, u as quarterPartition } from "./storage.mjs";
|
|
3
3
|
import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
|
|
4
4
|
import { MS_PER_DAY } from "gscdump";
|
|
@@ -186,9 +186,12 @@ function partitionSpan(partition) {
|
|
|
186
186
|
};
|
|
187
187
|
}
|
|
188
188
|
}
|
|
189
|
-
function splitOverlappingTiers(entries) {
|
|
189
|
+
function splitOverlappingTiers(entries, queryRange) {
|
|
190
|
+
const rangeStartMs = queryRange ? Date.parse(`${queryRange.start}T00:00:00Z`) : void 0;
|
|
191
|
+
const rangeEndMs = queryRange ? Date.parse(`${queryRange.end}T00:00:00Z`) : void 0;
|
|
190
192
|
const spanned = [];
|
|
191
193
|
const kept = [];
|
|
194
|
+
const subsumed = [];
|
|
192
195
|
for (const entry of entries) {
|
|
193
196
|
const span = partitionSpan(entry.partition);
|
|
194
197
|
if (!span) {
|
|
@@ -196,7 +199,14 @@ function splitOverlappingTiers(entries) {
|
|
|
196
199
|
continue;
|
|
197
200
|
}
|
|
198
201
|
const days = [];
|
|
199
|
-
for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY)
|
|
202
|
+
for (let t = span.startMs; t <= span.endMs; t += MS_PER_DAY) {
|
|
203
|
+
if (rangeStartMs !== void 0 && (t < rangeStartMs || t > rangeEndMs)) continue;
|
|
204
|
+
days.push(t);
|
|
205
|
+
}
|
|
206
|
+
if (queryRange && days.length === 0) {
|
|
207
|
+
subsumed.push(entry);
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
200
210
|
spanned.push({
|
|
201
211
|
entry,
|
|
202
212
|
rank: span.rank,
|
|
@@ -205,7 +215,6 @@ function splitOverlappingTiers(entries) {
|
|
|
205
215
|
}
|
|
206
216
|
spanned.sort((a, b) => a.rank - b.rank || b.entry.createdAt - a.entry.createdAt);
|
|
207
217
|
const coveredBySearchType = /* @__PURE__ */ new Map();
|
|
208
|
-
const subsumed = [];
|
|
209
218
|
for (const { entry, days } of spanned) {
|
|
210
219
|
const slice = inferSearchType(entry);
|
|
211
220
|
let covered = coveredBySearchType.get(slice);
|
|
@@ -225,8 +234,8 @@ function splitOverlappingTiers(entries) {
|
|
|
225
234
|
subsumed
|
|
226
235
|
};
|
|
227
236
|
}
|
|
228
|
-
function dedupeOverlappingTiers(entries) {
|
|
229
|
-
return splitOverlappingTiers(entries).kept;
|
|
237
|
+
function dedupeOverlappingTiers(entries, queryRange) {
|
|
238
|
+
return splitOverlappingTiers(entries, queryRange).kept;
|
|
230
239
|
}
|
|
231
240
|
function monthEndMs(month) {
|
|
232
241
|
const [y, m] = month.split("-").map(Number);
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { f as drizzleSchema, t as SCHEMAS } from "./schema.mjs";
|
|
2
2
|
import { l as enumeratePartitions } from "./parquet-plan.mjs";
|
|
3
3
|
import { escapeLike } from "../sql-fragments.mjs";
|
|
4
4
|
import "../planner.mjs";
|
|
5
5
|
import { PgDialect } from "drizzle-orm/pg-core";
|
|
6
|
-
import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
|
|
6
|
+
import { UnresolvableDatasetError, buildLogicalComparisonPlan, buildLogicalPlan, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
|
|
7
7
|
import { normalizeUrl } from "gscdump/normalize";
|
|
8
8
|
import { sql } from "drizzle-orm";
|
|
9
9
|
const DIMENSION_SURFACES = {
|
|
@@ -88,19 +88,22 @@ const LOGICAL_DATASETS = {
|
|
|
88
88
|
column: "date",
|
|
89
89
|
surfaces: ["api", "stored"]
|
|
90
90
|
}
|
|
91
|
+
} },
|
|
92
|
+
hourly_pages: { dimensions: {
|
|
93
|
+
page: {
|
|
94
|
+
column: "url",
|
|
95
|
+
surfaces: ["api", "stored"]
|
|
96
|
+
},
|
|
97
|
+
date: {
|
|
98
|
+
column: "date",
|
|
99
|
+
surfaces: ["api", "stored"]
|
|
100
|
+
},
|
|
101
|
+
hour: {
|
|
102
|
+
column: "hour",
|
|
103
|
+
surfaces: ["api", "stored"]
|
|
104
|
+
}
|
|
91
105
|
} }
|
|
92
106
|
};
|
|
93
|
-
function inferLogicalDataset(dimensions, filterDims = []) {
|
|
94
|
-
const allDims = new Set([...dimensions, ...filterDims]);
|
|
95
|
-
const has = (d) => allDims.has(d);
|
|
96
|
-
if (has("searchAppearance")) return "search_appearance";
|
|
97
|
-
if (has("page") && (has("query") || has("queryCanonical"))) return "page_keywords";
|
|
98
|
-
if (has("query") || has("queryCanonical")) return "keywords";
|
|
99
|
-
if (has("page")) return "pages";
|
|
100
|
-
if (has("country")) return "countries";
|
|
101
|
-
if (has("device")) return "devices";
|
|
102
|
-
return "devices";
|
|
103
|
-
}
|
|
104
107
|
function dimensionColumn(dim, dataset) {
|
|
105
108
|
return LOGICAL_DATASETS[dataset].dimensions[dim]?.column ?? dim;
|
|
106
109
|
}
|
|
@@ -141,6 +144,7 @@ function createSqlFragments(config) {
|
|
|
141
144
|
return datasetToTableKey[dataset];
|
|
142
145
|
}
|
|
143
146
|
function inferTable(dimensions, filterDims = []) {
|
|
147
|
+
if (!isDatasetResolvable(dimensions, filterDims)) throw new UnresolvableDatasetError(dimensions, filterDims);
|
|
144
148
|
return tableKeyForDataset(inferLogicalDataset(dimensions, filterDims));
|
|
145
149
|
}
|
|
146
150
|
const urlToPathExpr = urlToPathExprOverride ?? defaultSqliteUrlToPathExpr;
|
|
@@ -735,7 +739,8 @@ const PG_BASE_CONFIG = {
|
|
|
735
739
|
page_keywords: "page_keywords",
|
|
736
740
|
countries: "countries",
|
|
737
741
|
devices: "devices",
|
|
738
|
-
search_appearance: "search_appearance"
|
|
742
|
+
search_appearance: "search_appearance",
|
|
743
|
+
hourly_pages: "hourly_pages"
|
|
739
744
|
},
|
|
740
745
|
metricCast: "DOUBLE",
|
|
741
746
|
regexPredicate: (expr, pattern, negate) => negate ? sql`NOT regexp_matches(${expr}, ${pattern})` : sql`regexp_matches(${expr}, ${pattern})`,
|
|
@@ -856,4 +861,4 @@ function assertSchemaInSync(options) {
|
|
|
856
861
|
if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
|
|
857
862
|
}
|
|
858
863
|
}
|
|
859
|
-
export { LOGICAL_DATASETS as C,
|
|
864
|
+
export { LOGICAL_DATASETS as C, inferLogicalDataset as D, dimensionColumn as E, isDatasetResolvable as O, DIMENSION_SURFACES as S, assertDimensionsSupported as T, resolveComparisonSQL as _, pgResolverAdapter as a, createResolverAdapter as b, getFilterDimensions as c, matchesMetricFilter as d, matchesTopLevelPage as f, mergeExtras as g, buildTotalsSql as h, createParquetResolverAdapter as i, supportsDimensionOnSurface as k, getInternalFilters as l, buildExtrasQueries as m, runComparisonQuery as n, dimensionValue as o, metricValue as p, runOptimizedQuery as r, getDimensionFilters as s, assertSchemaInSync as t, matchesDimensionFilter as u, resolveToSQL as v, UnresolvableDatasetError as w, createSqlFragments as x, resolveToSQLOptimized as y };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ColumnDef, ColumnType, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
|
|
1
|
+
import { ColumnDef, ColumnType, Row, TableName, TableSchema, TableSchema as TableSchema$1 } from "@gscdump/contracts";
|
|
2
2
|
declare const pages: import("drizzle-orm/pg-core").PgTableWithColumns<{
|
|
3
3
|
name: "pages";
|
|
4
4
|
schema: undefined;
|
|
@@ -1479,5 +1479,25 @@ declare function currentSchemaVersion(table: TableName): number;
|
|
|
1479
1479
|
declare function schemaFor(table: TableName): TableSchema;
|
|
1480
1480
|
declare function allTables(): readonly TableName[];
|
|
1481
1481
|
declare function inferTable(dimensions: readonly string[]): TableName;
|
|
1482
|
+
/**
|
|
1483
|
+
* Natural-key columns for a table: the `sortKey` dimensions that uniquely
|
|
1484
|
+
* identify a metric row (e.g. `['date', 'url']` for `pages`).
|
|
1485
|
+
*/
|
|
1486
|
+
declare function naturalKeyColumns(table: TableName): readonly string[];
|
|
1487
|
+
/**
|
|
1488
|
+
* Collapse rows that share a natural key (see {@link naturalKeyColumns}) to a
|
|
1489
|
+
* single survivor.
|
|
1490
|
+
*
|
|
1491
|
+
* Correct write and compaction inputs never collide on the natural key — each
|
|
1492
|
+
* (date, dimension) tuple is produced exactly once per slice — so on healthy
|
|
1493
|
+
* data this is a no-op. It exists as a recurrence guard: the 2026-04 monthly
|
|
1494
|
+
* compaction corruption folded a complete month back on top of its own daily
|
|
1495
|
+
* inputs, doubling every row. Absorbing duplicates at every write/compaction
|
|
1496
|
+
* boundary keeps that class of bug from inflating impressions downstream.
|
|
1497
|
+
*
|
|
1498
|
+
* Last occurrence wins. Observed duplicates are byte-identical, so the choice
|
|
1499
|
+
* of survivor is immaterial.
|
|
1500
|
+
*/
|
|
1501
|
+
declare function dedupeByNaturalKey(table: TableName, rows: readonly Row[]): Row[];
|
|
1482
1502
|
declare function dimensionToColumn(dim: string, _table: TableName): string;
|
|
1483
|
-
export {
|
|
1503
|
+
export { hourly_pages as _, allTables as a, pages as b, dimensionToColumn as c, schemaFor as d, DrizzleSchema as f, drizzleSchema as g, devices as h, TableSchema$1 as i, inferTable as l, countries as m, ColumnType as n, currentSchemaVersion as o, TABLE_METADATA as p, SCHEMAS as r, dedupeByNaturalKey as s, ColumnDef as t, naturalKeyColumns as u, keywords as v, search_appearance as x, page_keywords as y };
|
package/dist/_chunks/schema.mjs
CHANGED
|
@@ -147,9 +147,23 @@ function inferTable(dimensions) {
|
|
|
147
147
|
if (dims.has("searchAppearance")) return "search_appearance";
|
|
148
148
|
return "devices";
|
|
149
149
|
}
|
|
150
|
+
function naturalKeyColumns(table) {
|
|
151
|
+
return TABLE_METADATA[table].sortKey;
|
|
152
|
+
}
|
|
153
|
+
function dedupeByNaturalKey(table, rows) {
|
|
154
|
+
const key = TABLE_METADATA[table].sortKey;
|
|
155
|
+
if (key.length === 0) return rows.slice();
|
|
156
|
+
const seen = /* @__PURE__ */ new Map();
|
|
157
|
+
for (const row of rows) {
|
|
158
|
+
const r = row;
|
|
159
|
+
const k = key.map((col) => `${r[col] ?? ""}`).join("\0");
|
|
160
|
+
seen.set(k, row);
|
|
161
|
+
}
|
|
162
|
+
return [...seen.values()];
|
|
163
|
+
}
|
|
150
164
|
function dimensionToColumn(dim, _table) {
|
|
151
165
|
if (dim === "page") return "url";
|
|
152
166
|
if (dim === "queryCanonical") return "query_canonical";
|
|
153
167
|
return dim;
|
|
154
168
|
}
|
|
155
|
-
export {
|
|
169
|
+
export { search_appearance as _, dimensionToColumn as a, schemaFor as c, devices as d, drizzleSchema as f, pages as g, page_keywords as h, dedupeByNaturalKey as i, TABLE_METADATA as l, keywords as m, allTables as n, inferTable as o, hourly_pages as p, currentSchemaVersion as r, naturalKeyColumns as s, SCHEMAS as t, countries as u };
|
|
@@ -38,13 +38,27 @@ declare function enumeratePartitions(startDate: string, endDate: string): string
|
|
|
38
38
|
* monthly) still double-counts those boundary days — eliminating that needs
|
|
39
39
|
* per-file date predicates in the SQL, tracked separately. Unrecognised
|
|
40
40
|
* partition shapes (`hourly/`, sidecar keys) are always kept.
|
|
41
|
+
*
|
|
42
|
+
* `queryRange` clamps every entry's day-span to the window the caller will
|
|
43
|
+
* actually read. This is required when `entries` came from a partition-
|
|
44
|
+
* filtered `listLive` (`runSQL` enumerates only the partitions intersecting
|
|
45
|
+
* the query): a `monthly/2026-04` whose Apr 27-30 falls past the query end
|
|
46
|
+
* must not be judged "unsubsumed" just because `weekly/2026-04-27` wasn't
|
|
47
|
+
* enumerated — those out-of-window days are SQL-filtered to nothing anyway.
|
|
48
|
+
* Omit `queryRange` when `entries` is the full manifest (e.g. analysis-sources).
|
|
41
49
|
*/
|
|
42
|
-
declare function splitOverlappingTiers(entries: ManifestEntry[]
|
|
50
|
+
declare function splitOverlappingTiers(entries: ManifestEntry[], queryRange?: {
|
|
51
|
+
start: string;
|
|
52
|
+
end: string;
|
|
53
|
+
}): {
|
|
43
54
|
kept: ManifestEntry[];
|
|
44
55
|
subsumed: ManifestEntry[];
|
|
45
56
|
};
|
|
46
57
|
/** Entries worth reading — see {@link splitOverlappingTiers}. */
|
|
47
|
-
declare function dedupeOverlappingTiers(entries: ManifestEntry[]
|
|
58
|
+
declare function dedupeOverlappingTiers(entries: ManifestEntry[], queryRange?: {
|
|
59
|
+
start: string;
|
|
60
|
+
end: string;
|
|
61
|
+
}): ManifestEntry[];
|
|
48
62
|
/**
|
|
49
63
|
* Default `searchType` for entries written before the field landed and for
|
|
50
64
|
* sync paths that don't request a specific type. GSC's own default; the
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { i as dedupeByNaturalKey, l as TABLE_METADATA, t as SCHEMAS } from "../_chunks/schema.mjs";
|
|
2
2
|
import { parquetReadObjects } from "hyparquet";
|
|
3
3
|
import { parquetWriteBuffer } from "hyparquet-writer";
|
|
4
4
|
const ROW_GROUP_SIZE = 25e3;
|
|
@@ -134,11 +134,12 @@ function createHyparquetCodec(options = {}) {
|
|
|
134
134
|
const rows = await decodeParquetToRows(await dataSource.read(key));
|
|
135
135
|
allRows.push(...rows);
|
|
136
136
|
}
|
|
137
|
-
const
|
|
137
|
+
const rows = dedupeByNaturalKey(ctx.table, allRows);
|
|
138
|
+
const bytes = encodeRowsToParquet(ctx.table, rows);
|
|
138
139
|
await dataSource.write(outputKey, bytes);
|
|
139
140
|
return {
|
|
140
141
|
bytes: bytes.byteLength,
|
|
141
|
-
rowCount:
|
|
142
|
+
rowCount: rows.length
|
|
142
143
|
};
|
|
143
144
|
}
|
|
144
145
|
};
|
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { A as SyncStateKind, B as hourPartition, C as Row, D as SyncState, E as StorageEngine, F as WatermarkFilter, G as RAW_DAILY_COMPACT_THRESHOLD, H as inferSearchType, I as WatermarkScope, J as enumeratePartitions, K as countRawDailies, L as WriteCtx, M as TableName, N as TenantCtx, O as SyncStateDetail, P as Watermark, R as WriteResult, S as QueryResult, T as SearchType, U as objectKey, V as inferLegacyTier, W as CompactionThresholds, Y as splitOverlappingTiers, _ as PurgeUrlsResult, a as EngineOptions, b as QueryExecuteResult, c as Grain, d as ManifestEntry, f as ManifestPurgeResult, g as PurgeResult, h as PurgeFilter, i as DataSource, j as SyncStateScope, k as SyncStateFilter, l as ListLiveFilter, m as ParquetCodec, n as CompactionTier, o as FileSetRef, p as ManifestStore, q as dedupeOverlappingTiers, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as LockScope, v as QueryCtx, w as RunSQLOptions, x as QueryExecutor, y as QueryExecuteOptions, z as dayPartition } from "./_chunks/storage.mjs";
|
|
2
2
|
import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
|
|
3
|
-
import { _ as
|
|
3
|
+
import { _ as hourly_pages, a as allTables, b as pages, c as dimensionToColumn, f as DrizzleSchema, g as drizzleSchema, h as devices, i as TableSchema, l as inferTable, m as countries, n as ColumnType, o as currentSchemaVersion, p as TABLE_METADATA, r as SCHEMAS, t as ColumnDef, v as keywords, y as page_keywords } from "./_chunks/schema.mjs";
|
|
4
4
|
import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
5
5
|
import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
6
6
|
import { a as substituteNamedFiles, i as resolveParquetSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
|
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
|
|
2
|
-
import { a as
|
|
2
|
+
import { a as dimensionToColumn, d as devices, f as drizzleSchema, g as pages, h as page_keywords, l as TABLE_METADATA, m as keywords, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
|
|
3
3
|
import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
|
|
4
4
|
import { a as RAW_DAILY_COMPACT_THRESHOLD, c as dedupeOverlappingTiers, i as substituteNamedFiles, l as enumeratePartitions, r as resolveParquetSQL, s as countRawDailies, t as FILES_PLACEHOLDER, u as splitOverlappingTiers } from "./_chunks/parquet-plan.mjs";
|
|
5
5
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { M as TableName$1, T as SearchType$1 } from "../_chunks/storage.mjs";
|
|
2
2
|
import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
|
|
3
|
-
import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
|
|
3
|
+
import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities, UnresolvableDatasetError, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
|
|
4
4
|
import { SQL } from "drizzle-orm";
|
|
5
5
|
import { Grain, TableName } from "@gscdump/contracts";
|
|
6
6
|
import { BuilderState, Dimension, FilterInput, InternalFilter, Metric } from "gscdump/query";
|
|
@@ -14,7 +14,6 @@ interface LogicalDatasetDefinition {
|
|
|
14
14
|
}
|
|
15
15
|
declare const DIMENSION_SURFACES: Record<Dimension, readonly DimensionSurface[]>;
|
|
16
16
|
declare const LOGICAL_DATASETS: Record<LogicalDataset, LogicalDatasetDefinition>;
|
|
17
|
-
declare function inferLogicalDataset(dimensions: readonly Dimension[], filterDims?: readonly Dimension[]): LogicalDataset;
|
|
18
17
|
declare function dimensionColumn(dim: Dimension, dataset: LogicalDataset): string;
|
|
19
18
|
declare function supportsDimensionOnSurface(dim: Dimension, surface: DimensionSurface): boolean;
|
|
20
19
|
declare function assertDimensionsSupported(dimensions: readonly Dimension[], surface: DimensionSurface, context: string): void;
|
|
@@ -175,4 +174,4 @@ interface AssertSchemaInSyncOptions {
|
|
|
175
174
|
mode: 'exact' | 'superset';
|
|
176
175
|
}
|
|
177
176
|
declare function assertSchemaInSync(options: AssertSchemaInSyncOptions): void;
|
|
178
|
-
export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
|
177
|
+
export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
package/dist/resolver/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { C as LOGICAL_DATASETS, D as
|
|
2
|
-
export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
|
1
|
+
import { C as LOGICAL_DATASETS, D as inferLogicalDataset, E as dimensionColumn, O as isDatasetResolvable, S as DIMENSION_SURFACES, T as assertDimensionsSupported, _ as resolveComparisonSQL, a as pgResolverAdapter, b as createResolverAdapter, c as getFilterDimensions, d as matchesMetricFilter, f as matchesTopLevelPage, g as mergeExtras, h as buildTotalsSql, i as createParquetResolverAdapter, k as supportsDimensionOnSurface, l as getInternalFilters, m as buildExtrasQueries, n as runComparisonQuery, o as dimensionValue, p as metricValue, r as runOptimizedQuery, s as getDimensionFilters, t as assertSchemaInSync, u as matchesDimensionFilter, v as resolveToSQL, w as UnresolvableDatasetError, x as createSqlFragments, y as resolveToSQLOptimized } from "../_chunks/resolver.mjs";
|
|
2
|
+
export { DIMENSION_SURFACES, LOGICAL_DATASETS, UnresolvableDatasetError, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isDatasetResolvable, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };
|
package/dist/schema.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { _ as
|
|
2
|
-
export { type ColumnDef, type ColumnType, type DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
|
|
1
|
+
import { _ as hourly_pages, a as allTables, b as pages, c as dimensionToColumn, d as schemaFor, f as DrizzleSchema, g as drizzleSchema, h as devices, i as TableSchema, l as inferTable, m as countries, n as ColumnType, o as currentSchemaVersion, p as TABLE_METADATA, r as SCHEMAS, s as dedupeByNaturalKey, t as ColumnDef, u as naturalKeyColumns, v as keywords, x as search_appearance, y as page_keywords } from "./_chunks/schema.mjs";
|
|
2
|
+
export { type ColumnDef, type ColumnType, type DrizzleSchema, SCHEMAS, TABLE_METADATA, type TableSchema, allTables, countries, currentSchemaVersion, dedupeByNaturalKey, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, naturalKeyColumns, page_keywords, pages, schemaFor, search_appearance };
|
package/dist/schema.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { a as
|
|
2
|
-
export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, page_keywords, pages, schemaFor, search_appearance };
|
|
1
|
+
import { _ as search_appearance, a as dimensionToColumn, c as schemaFor, d as devices, f as drizzleSchema, g as pages, h as page_keywords, i as dedupeByNaturalKey, l as TABLE_METADATA, m as keywords, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, s as naturalKeyColumns, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
|
|
2
|
+
export { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dedupeByNaturalKey, devices, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, keywords, naturalKeyColumns, page_keywords, pages, schemaFor, search_appearance };
|
package/dist/source/index.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { n as coerceRows } from "../_chunks/coerce.mjs";
|
|
2
|
-
import { a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL
|
|
2
|
+
import { T as assertDimensionsSupported, a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL } from "../_chunks/resolver.mjs";
|
|
3
3
|
import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
|
|
4
4
|
var AttachedTableMissingError = class extends Error {
|
|
5
5
|
missing;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.19.
|
|
4
|
+
"version": "0.19.2",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -169,8 +169,8 @@
|
|
|
169
169
|
"dependencies": {
|
|
170
170
|
"drizzle-orm": "^0.45.2",
|
|
171
171
|
"proper-lockfile": "^4.1.2",
|
|
172
|
-
"gscdump": "0.19.
|
|
173
|
-
"@gscdump/contracts": "0.19.
|
|
172
|
+
"gscdump": "0.19.2",
|
|
173
|
+
"@gscdump/contracts": "0.19.2"
|
|
174
174
|
},
|
|
175
175
|
"devDependencies": {
|
|
176
176
|
"@duckdb/duckdb-wasm": "^1.32.0",
|