@gscdump/engine 0.30.0 → 0.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/engine.mjs +7 -4
- package/dist/_chunks/entities.mjs +100 -1
- package/dist/_chunks/libs/icebird.d.mts +2 -2
- package/dist/_chunks/resolver.mjs +8 -6
- package/dist/_chunks/sink.d.mts +28 -1
- package/dist/entities.d.mts +55 -1
- package/dist/entities.mjs +2 -2
- package/dist/iceberg/index.d.mts +2 -2
- package/dist/iceberg/index.mjs +40 -3
- package/dist/rollups.d.mts +6 -0
- package/dist/rollups.mjs +38 -17
- package/package.json +4 -3
package/dist/_chunks/engine.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { dayPartition, hourPartition, inferSearchType, objectKey, tenantPrefix } from "./layout.mjs";
|
|
2
|
-
import { SCHEMAS, currentSchemaVersion, dateColumnsFor, dedupeByNaturalKey } from "./schema.mjs";
|
|
2
|
+
import { SCHEMAS, TABLE_METADATA, currentSchemaVersion, dateColumnsFor, dedupeByNaturalKey } from "./schema.mjs";
|
|
3
3
|
import { compactTieredImpl, dedupeOverlappingTiers, splitOverlappingTiers } from "./compaction.mjs";
|
|
4
4
|
import { dateReplaceClause as dateReplaceClause$1 } from "../sql-fragments.mjs";
|
|
5
5
|
import { compileLogicalQueryPlan, substituteNamedFiles } from "./parquet-plan.mjs";
|
|
@@ -100,11 +100,14 @@ function createDuckDBCodec(factory) {
|
|
|
100
100
|
}
|
|
101
101
|
};
|
|
102
102
|
}
|
|
103
|
+
const quoteCol = (c) => `"${c.replace(/"/g, "\"\"")}"`;
|
|
103
104
|
function dedupedMergeSql(table, fileListSql) {
|
|
104
105
|
const base = `SELECT * FROM read_parquet([${fileListSql}], union_by_name = true)`;
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
const sortKey = SCHEMAS[table].sortKey;
|
|
107
|
+
const clusterKey = TABLE_METADATA[table].clusterKey;
|
|
108
|
+
const dedup = sortKey.length === 0 ? base : `${base} QUALIFY row_number() OVER (PARTITION BY ${sortKey.map(quoteCol).join(", ")}) = 1`;
|
|
109
|
+
if (clusterKey.length === 0) return dedup;
|
|
110
|
+
return `${dedup} ORDER BY ${clusterKey.map(quoteCol).join(", ")}`;
|
|
108
111
|
}
|
|
109
112
|
function rewriteEmptyFileSets(sql, placeholders, defaultTable, placeholderTables) {
|
|
110
113
|
let out = sql;
|
|
@@ -16,6 +16,105 @@ async function readOptional(ds, key, signal) {
|
|
|
16
16
|
throw e;
|
|
17
17
|
});
|
|
18
18
|
}
|
|
19
|
+
const QUERY_DIM_COLUMNS = [
|
|
20
|
+
{
|
|
21
|
+
name: "query",
|
|
22
|
+
type: "VARCHAR",
|
|
23
|
+
nullable: false
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
name: "query_canonical",
|
|
27
|
+
type: "VARCHAR",
|
|
28
|
+
nullable: false
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
name: "intent_code",
|
|
32
|
+
type: "INTEGER",
|
|
33
|
+
nullable: false
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
name: "normalizer_version",
|
|
37
|
+
type: "INTEGER",
|
|
38
|
+
nullable: false
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "intent_version",
|
|
42
|
+
type: "INTEGER",
|
|
43
|
+
nullable: false
|
|
44
|
+
}
|
|
45
|
+
];
|
|
46
|
+
function queryDimPrefix(ctx) {
|
|
47
|
+
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/query_dim` : `u_${ctx.userId}/entities/query_dim`;
|
|
48
|
+
}
|
|
49
|
+
function queryDimParquetKey(ctx) {
|
|
50
|
+
return `${queryDimPrefix(ctx)}/index.parquet`;
|
|
51
|
+
}
|
|
52
|
+
function queryDimMetaKey(ctx) {
|
|
53
|
+
return `${queryDimPrefix(ctx)}/index.json`;
|
|
54
|
+
}
|
|
55
|
+
function buildQueryDimRecords(queries, deps) {
|
|
56
|
+
const seen = /* @__PURE__ */ new Set();
|
|
57
|
+
const out = [];
|
|
58
|
+
for (const raw of queries) {
|
|
59
|
+
const query = String(raw);
|
|
60
|
+
if (query.trim() === "" || seen.has(query)) continue;
|
|
61
|
+
seen.add(query);
|
|
62
|
+
const canonical = deps.normalizeQuery(query);
|
|
63
|
+
out.push({
|
|
64
|
+
query,
|
|
65
|
+
query_canonical: canonical === "" ? query : canonical,
|
|
66
|
+
intent_code: deps.classifyIntentCode(query),
|
|
67
|
+
normalizer_version: deps.normalizerVersion,
|
|
68
|
+
intent_version: deps.intentVersion
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
return out;
|
|
72
|
+
}
|
|
73
|
+
function createQueryDimStore({ dataSource }) {
|
|
74
|
+
async function exists(key, prefix) {
|
|
75
|
+
return (await dataSource.list(prefix)).includes(key);
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
parquetKey: queryDimParquetKey,
|
|
79
|
+
async write(ctx, records, builtAt) {
|
|
80
|
+
const parquetKey = queryDimParquetKey(ctx);
|
|
81
|
+
const bytes = encodeRowsToParquetFlex(records, {
|
|
82
|
+
columns: QUERY_DIM_COLUMNS,
|
|
83
|
+
sortKey: ["query"]
|
|
84
|
+
});
|
|
85
|
+
await dataSource.write(parquetKey, bytes);
|
|
86
|
+
const meta = {
|
|
87
|
+
version: 1,
|
|
88
|
+
builtAt,
|
|
89
|
+
rowCount: records.length,
|
|
90
|
+
normalizerVersion: records[0]?.normalizer_version ?? 0,
|
|
91
|
+
intentVersion: records[0]?.intent_version ?? 0
|
|
92
|
+
};
|
|
93
|
+
await dataSource.write(queryDimMetaKey(ctx), new TextEncoder().encode(JSON.stringify(meta)));
|
|
94
|
+
return {
|
|
95
|
+
parquetKey,
|
|
96
|
+
rowCount: records.length
|
|
97
|
+
};
|
|
98
|
+
},
|
|
99
|
+
async loadMeta(ctx) {
|
|
100
|
+
const key = queryDimMetaKey(ctx);
|
|
101
|
+
if (!await exists(key, `${queryDimPrefix(ctx)}/`)) return null;
|
|
102
|
+
const bytes = await dataSource.read(key);
|
|
103
|
+
return JSON.parse(new TextDecoder().decode(bytes));
|
|
104
|
+
},
|
|
105
|
+
async loadRecords(ctx) {
|
|
106
|
+
const key = queryDimParquetKey(ctx);
|
|
107
|
+
if (!await exists(key, `${queryDimPrefix(ctx)}/`)) return [];
|
|
108
|
+
return (await decodeParquetToRows(await dataSource.read(key))).map((r) => ({
|
|
109
|
+
query: String(r.query),
|
|
110
|
+
query_canonical: String(r.query_canonical),
|
|
111
|
+
intent_code: Number(r.intent_code),
|
|
112
|
+
normalizer_version: Number(r.normalizer_version),
|
|
113
|
+
intent_version: Number(r.intent_version)
|
|
114
|
+
}));
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
}
|
|
19
118
|
const YEAR_MONTH_RE = /^(\d{4})-(\d{2})-/;
|
|
20
119
|
function inspectionIndexKey(ctx) {
|
|
21
120
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/inspections/index.json` : `u_${ctx.userId}/entities/inspections/index.json`;
|
|
@@ -893,4 +992,4 @@ function createEmptyTypesStore(opts) {
|
|
|
893
992
|
}
|
|
894
993
|
};
|
|
895
994
|
}
|
|
896
|
-
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
995
|
+
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
@@ -133,7 +133,7 @@ interface Snapshot {
|
|
|
133
133
|
'sequence-number': number;
|
|
134
134
|
'timestamp-ms': number;
|
|
135
135
|
'manifest-list': string;
|
|
136
|
-
manifests?: Manifest
|
|
136
|
+
manifests?: Manifest[];
|
|
137
137
|
summary: {
|
|
138
138
|
// spec: "value of these fields should be of string type"
|
|
139
139
|
operation: string; // 'spark.app.id'?: string
|
|
@@ -192,7 +192,7 @@ interface MetadataLog {
|
|
|
192
192
|
'timestamp-ms': number;
|
|
193
193
|
'metadata-file': string;
|
|
194
194
|
}
|
|
195
|
-
interface Manifest
|
|
195
|
+
interface Manifest {
|
|
196
196
|
manifest_path: string;
|
|
197
197
|
manifest_length: bigint;
|
|
198
198
|
partition_spec_id: number;
|
|
@@ -297,8 +297,10 @@ function createSqlFragments(config) {
|
|
|
297
297
|
if (isMetricDimension(f.dimension)) continue;
|
|
298
298
|
if (f.dimension === "date") continue;
|
|
299
299
|
if (f.operator === "topLevel") continue;
|
|
300
|
-
const
|
|
301
|
-
const
|
|
300
|
+
const dim = f.dimension;
|
|
301
|
+
const cRef = colRef(tableKey, dimColumn(dim, tableKey));
|
|
302
|
+
const matchExpr = dim === "page" || dim === "queryCanonical" ? dimExprSql(dim, tableKey) : cRef;
|
|
303
|
+
const patternExpr = dim === "queryCanonical" ? matchExpr : cRef;
|
|
302
304
|
switch (f.operator) {
|
|
303
305
|
case "equals":
|
|
304
306
|
preds.push(sql`${matchExpr} = ${f.expression}`);
|
|
@@ -307,16 +309,16 @@ function createSqlFragments(config) {
|
|
|
307
309
|
preds.push(sql`${matchExpr} != ${f.expression}`);
|
|
308
310
|
break;
|
|
309
311
|
case "contains":
|
|
310
|
-
preds.push(sql`${
|
|
312
|
+
preds.push(sql`${patternExpr} LIKE ${`%${escapeLike(f.expression)}%`} ESCAPE '\\'`);
|
|
311
313
|
break;
|
|
312
314
|
case "notContains":
|
|
313
|
-
preds.push(sql`${
|
|
315
|
+
preds.push(sql`${patternExpr} NOT LIKE ${`%${escapeLike(f.expression)}%`} ESCAPE '\\'`);
|
|
314
316
|
break;
|
|
315
317
|
case "includingRegex":
|
|
316
|
-
preds.push(regexPredicate(
|
|
318
|
+
preds.push(regexPredicate(patternExpr, f.expression, false));
|
|
317
319
|
break;
|
|
318
320
|
case "excludingRegex":
|
|
319
|
-
preds.push(regexPredicate(
|
|
321
|
+
preds.push(regexPredicate(patternExpr, f.expression, true));
|
|
320
322
|
break;
|
|
321
323
|
}
|
|
322
324
|
}
|
package/dist/_chunks/sink.d.mts
CHANGED
|
@@ -185,6 +185,18 @@ interface IcebergPartitionSpec {
|
|
|
185
185
|
'spec-id': number;
|
|
186
186
|
'fields': IcebergPartitionSpecField[];
|
|
187
187
|
}
|
|
188
|
+
/** A field in an icebird `SortOrder`. */
|
|
189
|
+
interface IcebergSortOrderField {
|
|
190
|
+
'source-id': number;
|
|
191
|
+
'transform': 'identity';
|
|
192
|
+
'direction': 'asc' | 'desc';
|
|
193
|
+
'null-order': 'nulls-first' | 'nulls-last';
|
|
194
|
+
}
|
|
195
|
+
/** An icebird `SortOrder` (Iceberg write-order). */
|
|
196
|
+
interface IcebergSortOrder {
|
|
197
|
+
'order-id': number;
|
|
198
|
+
'fields': IcebergSortOrderField[];
|
|
199
|
+
}
|
|
188
200
|
/** Everything needed to talk to the R2 Data Catalog. */
|
|
189
201
|
interface IcebergCatalogConfig {
|
|
190
202
|
/** REST catalog URI, e.g. `https://catalog.cloudflarestorage.com/<acct>/<warehouse>`. */
|
|
@@ -220,6 +232,21 @@ declare function icebergSchemaFor(table: IcebergTableName, encoding?: PartitionK
|
|
|
220
232
|
* {@link icebergSchemaFor}.
|
|
221
233
|
*/
|
|
222
234
|
declare function icebergPartitionSpecFor(table: IcebergTableName, encoding?: PartitionKeyEncoding): IcebergPartitionSpec;
|
|
235
|
+
/**
|
|
236
|
+
* Build the icebird `SortOrder` for a fact table from its `clusterKey`
|
|
237
|
+
* (dimension-first, then `date`) — e.g. `pages` → sort by `url`, then `date`.
|
|
238
|
+
*
|
|
239
|
+
* Declared so any sort-aware compaction (a self-run `icebergRewrite`, or R2
|
|
240
|
+
* managed compaction if/when it honors sort order) re-clusters merged files the
|
|
241
|
+
* same way the append path already orders them ({@link sortByClusterKey} in
|
|
242
|
+
* `append-sink.ts`). R2's managed compaction currently only bin-packs small
|
|
243
|
+
* files without re-sorting, so this is forward-looking: it costs nothing today
|
|
244
|
+
* (the table simply carries the metadata) and means a future sort-aware pass
|
|
245
|
+
* produces globally clustered files for free, maximizing row-group skipping on
|
|
246
|
+
* the DuckDB-over-R2 read path. clusterKey columns are all non-null, so the
|
|
247
|
+
* null ordering is moot; `identity`/`asc` mirrors the physical write order.
|
|
248
|
+
*/
|
|
249
|
+
declare function icebergSortOrderFor(table: IcebergTableName, encoding?: PartitionKeyEncoding): IcebergSortOrder;
|
|
223
250
|
/** Options for {@link connectIcebergCatalog}. */
|
|
224
251
|
interface ConnectIcebergOptions {
|
|
225
252
|
/**
|
|
@@ -501,4 +528,4 @@ interface LocalIcebergSinkOptions extends SinkOptions {
|
|
|
501
528
|
/** S3-compatible warehouse location (POC: MinIO). */
|
|
502
529
|
warehouse: string;
|
|
503
530
|
}
|
|
504
|
-
export { CatalogCache, CommitRetryOptions, ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, PartitionKeyEncoding, SEARCH_TYPE_INT, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables };
|
|
531
|
+
export { CatalogCache, CommitRetryOptions, ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergSortOrder, IcebergSortOrderField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, LocalIcebergSinkOptions, PartitionKeyEncoding, SEARCH_TYPE_INT, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergSortOrderFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables };
|
package/dist/entities.d.mts
CHANGED
|
@@ -1,6 +1,60 @@
|
|
|
1
1
|
import { DataSource } from "./_chunks/storage.mjs";
|
|
2
2
|
import { ScheduleState } from "./schedule.mjs";
|
|
3
3
|
import { ColumnDef, TenantCtx } from "@gscdump/contracts";
|
|
4
|
+
interface QueryDimRecord {
|
|
5
|
+
query: string;
|
|
6
|
+
/** Lexical canonical, never empty: NULL/'' folds to the raw query. */
|
|
7
|
+
query_canonical: string;
|
|
8
|
+
/** Packed search-intent code (see `@gscdump/analysis` `encodeIntent`). */
|
|
9
|
+
intent_code: number;
|
|
10
|
+
normalizer_version: number;
|
|
11
|
+
intent_version: number;
|
|
12
|
+
}
|
|
13
|
+
/** JSON sidecar: versions + freshness, readable without decoding the parquet. */
|
|
14
|
+
interface QueryDimMeta {
|
|
15
|
+
version: 1;
|
|
16
|
+
builtAt: number;
|
|
17
|
+
rowCount: number;
|
|
18
|
+
normalizerVersion: number;
|
|
19
|
+
intentVersion: number;
|
|
20
|
+
}
|
|
21
|
+
declare function queryDimParquetKey(ctx: TenantCtx): string;
|
|
22
|
+
declare function queryDimMetaKey(ctx: TenantCtx): string;
|
|
23
|
+
/**
|
|
24
|
+
* Injected derivation. `engine` never imports `@gscdump/analysis`; the host
|
|
25
|
+
* passes `normalizeQuery` / `classifyIntentCode` (e.g. `encodeIntent ∘
|
|
26
|
+
* classifyQueryIntent`) plus their version constants.
|
|
27
|
+
*/
|
|
28
|
+
interface QueryDimDeps {
|
|
29
|
+
normalizeQuery: (query: string) => string;
|
|
30
|
+
normalizerVersion: number;
|
|
31
|
+
/** Returns the packed intent code for a raw query. */
|
|
32
|
+
classifyIntentCode: (query: string) => number;
|
|
33
|
+
intentVersion: number;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Pure: distinct raw queries → dimension records. De-dupes, drops empties, and
|
|
37
|
+
* folds an empty/whitespace canonical back to the raw query so the key is
|
|
38
|
+
* total (matches the read path's `COALESCE(NULLIF(query_canonical, ''), query)`).
|
|
39
|
+
*/
|
|
40
|
+
declare function buildQueryDimRecords(queries: Iterable<string>, deps: QueryDimDeps): QueryDimRecord[];
|
|
41
|
+
interface QueryDimStore {
|
|
42
|
+
parquetKey: (ctx: TenantCtx) => string;
|
|
43
|
+
/** Write the parquet + JSON sidecar. Last-write-wins; no history. */
|
|
44
|
+
write: (ctx: TenantCtx, records: readonly QueryDimRecord[], builtAt: number) => Promise<{
|
|
45
|
+
parquetKey: string;
|
|
46
|
+
rowCount: number;
|
|
47
|
+
}>;
|
|
48
|
+
/** Read the sidecar (versions + freshness), or null on first build. */
|
|
49
|
+
loadMeta: (ctx: TenantCtx) => Promise<QueryDimMeta | null>;
|
|
50
|
+
/** Decode the dimension rows (test/inspection; reads JOIN the parquet by key). */
|
|
51
|
+
loadRecords: (ctx: TenantCtx) => Promise<QueryDimRecord[]>;
|
|
52
|
+
}
|
|
53
|
+
declare function createQueryDimStore({
|
|
54
|
+
dataSource
|
|
55
|
+
}: {
|
|
56
|
+
dataSource: DataSource;
|
|
57
|
+
}): QueryDimStore;
|
|
4
58
|
/**
|
|
5
59
|
* GSC URL inspection result fields we persist. Mirrors the
|
|
6
60
|
* `searchconsole_v1.Schema$UrlInspectionResult` shape but as plain JSON
|
|
@@ -442,4 +496,4 @@ interface CreateEmptyTypesStoreOptions {
|
|
|
442
496
|
now?: () => number;
|
|
443
497
|
}
|
|
444
498
|
declare function createEmptyTypesStore(opts: CreateEmptyTypesStoreOptions): EmptyTypesStore;
|
|
445
|
-
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionEventRow, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, ReconcileResult, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
499
|
+
export { CreateEmptyTypesStoreOptions, CreateIndexingMetadataStoreOptions, CreateInspectionStoreOptions, CreateSitemapStoreOptions, DateRange, DeltaEntry, EmptyTypesDoc, EmptyTypesStore, INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, IndexingMetadataIndex, IndexingMetadataRecord, IndexingMetadataStore, InspectionEventRow, InspectionHistoryShard, InspectionIndex, InspectionParquetRow, InspectionRecord, InspectionStore, LoadUrlsOptions, ParsedUrl, QueryDimDeps, QueryDimMeta, QueryDimRecord, QueryDimStore, ReconcileResult, SitemapHistoryDoc, SitemapIndex, SitemapRecord, SitemapStore, SitemapUrlRecord, SnapshotUrlsResult, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
package/dist/entities.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
2
|
-
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
|
1
|
+
import { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
2
|
+
export { INSPECTION_EVENT_COLUMNS, INSPECTION_HISTORY_MAX_BYTES, buildQueryDimRecords, createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createQueryDimStore, createSitemapStore, emptyTypesKey, hashUrl, hashUrlList, indexingMetadataIndexKey, inspectionBaseKey, inspectionEventKey, inspectionEventsPrefix, inspectionHistoryPrefix, inspectionHistoryShardKey, inspectionIndexKey, inspectionParquetKey, queryDimMetaKey, queryDimParquetKey, sitemapHistoryKey, sitemapIndexKey, sitemapUrlsDeltaKey, sitemapUrlsIndexKey, sitemapUrlsIndexPrefix };
|
package/dist/iceberg/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { CatalogCache, CommitRetryOptions, ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, PartitionKeyEncoding, SEARCH_TYPE_INT, Sink, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables } from "../_chunks/sink.mjs";
|
|
1
|
+
import { CatalogCache, CommitRetryOptions, ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, IcebergAppendSinkOptions, IcebergCatalogConfig, IcebergColumn, IcebergColumnType, IcebergConnection, IcebergListedDataFile, IcebergPartitionField, IcebergPartitionSpec, IcebergPartitionSpecField, IcebergPartitionTransform, IcebergPrimitiveType, IcebergS3Config, IcebergSchema, IcebergSchemaField, IcebergSortOrder, IcebergSortOrderField, IcebergTableName, IcebergTableOpResult, IcebergTableSpec, ListIcebergDataFilesOptions, PartitionKeyEncoding, SEARCH_TYPE_INT, Sink, assertIcebergTable, connectIcebergCatalog, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergSortOrderFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables } from "../_chunks/sink.mjs";
|
|
2
2
|
import { icebergCreateTable, icebergManifests, restCatalogLoadTable } from "../_chunks/libs/icebird.mjs";
|
|
3
3
|
type IcebergAppendSink = Sink;
|
|
4
4
|
/**
|
|
@@ -10,4 +10,4 @@ type IcebergAppendSink = Sink;
|
|
|
10
10
|
* with no rows never touches the network.
|
|
11
11
|
*/
|
|
12
12
|
declare function createIcebergAppendSink(options: IcebergAppendSinkOptions): IcebergAppendSink;
|
|
13
|
-
export { type CatalogCache, type CommitRetryOptions, type ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type ListIcebergDataFilesOptions, type PartitionKeyEncoding, SEARCH_TYPE_INT, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergCreateTable, icebergManifests, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables, restCatalogLoadTable };
|
|
13
|
+
export { type CatalogCache, type CommitRetryOptions, type ConnectIcebergOptions, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, type IcebergAppendSink, type IcebergAppendSinkOptions, type IcebergCatalogConfig, type IcebergColumn, type IcebergColumnType, type IcebergConnection, type IcebergListedDataFile, type IcebergPartitionField, type IcebergPartitionSpec, type IcebergPartitionSpecField, type IcebergPartitionTransform, type IcebergPrimitiveType, type IcebergS3Config, type IcebergSchema, type IcebergSchemaField, type IcebergSortOrder, type IcebergSortOrderField, type IcebergTableName, type IcebergTableOpResult, type IcebergTableSpec, type ListIcebergDataFilesOptions, type PartitionKeyEncoding, SEARCH_TYPE_INT, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergCreateTable, icebergManifests, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergSortOrderFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables, restCatalogLoadTable };
|
package/dist/iceberg/index.mjs
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { TABLE_METADATA } from "../_chunks/schema.mjs";
|
|
1
2
|
import { engineErrors } from "../errors.mjs";
|
|
2
3
|
import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, SEARCH_TYPE_INT, assertIcebergTable, icebergPartitionColumns, icebergSchemasFor, icebergTableSpec, isIcebergTable } from "../_chunks/schema2.mjs";
|
|
3
4
|
import { cachingResolver, icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "../_chunks/libs/icebird.mjs";
|
|
@@ -107,6 +108,23 @@ function icebergPartitionSpecFor(table, encoding = "string") {
|
|
|
107
108
|
}))
|
|
108
109
|
};
|
|
109
110
|
}
|
|
111
|
+
function icebergSortOrderFor(table, encoding = "string") {
|
|
112
|
+
const fields = icebergSchemasFor(encoding)[table].columns;
|
|
113
|
+
const fieldId = (name) => {
|
|
114
|
+
const col = fields.find((c) => c.name === name);
|
|
115
|
+
if (!col) throw new Error(`iceberg-catalog: table '${table}' has no '${name}' column`);
|
|
116
|
+
return col.fieldId;
|
|
117
|
+
};
|
|
118
|
+
return {
|
|
119
|
+
"order-id": 1,
|
|
120
|
+
"fields": TABLE_METADATA[table].clusterKey.map((col) => ({
|
|
121
|
+
"source-id": fieldId(col),
|
|
122
|
+
"transform": "identity",
|
|
123
|
+
"direction": "asc",
|
|
124
|
+
"null-order": "nulls-last"
|
|
125
|
+
}))
|
|
126
|
+
};
|
|
127
|
+
}
|
|
110
128
|
const CATALOG_CONFIG_TTL_MS = 3600 * 1e3;
|
|
111
129
|
function catalogConfigKey(config) {
|
|
112
130
|
return `gsc-catalog-cfg\0${config.catalogUri}\0${config.warehouse}`;
|
|
@@ -187,7 +205,8 @@ async function createIcebergTables(conn, tables = ICEBERG_TABLES, encoding = "st
|
|
|
187
205
|
namespace: conn.namespace,
|
|
188
206
|
table,
|
|
189
207
|
schema: icebergSchemaFor(table, encoding),
|
|
190
|
-
partitionSpec: icebergPartitionSpecFor(table, encoding)
|
|
208
|
+
partitionSpec: icebergPartitionSpecFor(table, encoding),
|
|
209
|
+
sortOrder: icebergSortOrderFor(table, encoding)
|
|
191
210
|
}).then(() => results.push({
|
|
192
211
|
table,
|
|
193
212
|
outcome: ok(void 0)
|
|
@@ -369,6 +388,24 @@ function dedupeByIdentity(table, records) {
|
|
|
369
388
|
}
|
|
370
389
|
return seen.size === records.length ? records : [...seen.values()];
|
|
371
390
|
}
|
|
391
|
+
function sortByClusterKey(table, records) {
|
|
392
|
+
const cols = TABLE_METADATA[table].clusterKey;
|
|
393
|
+
if (cols.length === 0 || records.length < 2) return records;
|
|
394
|
+
return records.slice().sort((a, b) => {
|
|
395
|
+
for (const col of cols) {
|
|
396
|
+
const av = a[col];
|
|
397
|
+
const bv = b[col];
|
|
398
|
+
if (av === bv) continue;
|
|
399
|
+
if (av == null) return -1;
|
|
400
|
+
if (bv == null) return 1;
|
|
401
|
+
if (typeof av === "number" && typeof bv === "number") return av - bv;
|
|
402
|
+
const as = String(av);
|
|
403
|
+
const bs = String(bv);
|
|
404
|
+
if (as !== bs) return as < bs ? -1 : 1;
|
|
405
|
+
}
|
|
406
|
+
return 0;
|
|
407
|
+
});
|
|
408
|
+
}
|
|
372
409
|
function toRecords(slice, rows, encoding) {
|
|
373
410
|
const siteVal = encoding === "int" ? toIntPartitionSiteId(slice.ctx.siteId) : slice.ctx.siteId ?? "";
|
|
374
411
|
const searchVal = encoding === "int" ? SEARCH_TYPE_INT[slice.searchType] : slice.searchType;
|
|
@@ -423,7 +460,7 @@ function createIcebergAppendSink(options) {
|
|
|
423
460
|
}
|
|
424
461
|
for (const [table, records] of buffers) {
|
|
425
462
|
if (records.length === 0) continue;
|
|
426
|
-
const deduped = dedupeByIdentity(table, records);
|
|
463
|
+
const deduped = sortByClusterKey(table, dedupeByIdentity(table, records));
|
|
427
464
|
await icebergAppendRetrying({
|
|
428
465
|
catalog: conn.catalog,
|
|
429
466
|
namespace: conn.namespace,
|
|
@@ -447,4 +484,4 @@ function createIcebergAppendSink(options) {
|
|
|
447
484
|
}
|
|
448
485
|
};
|
|
449
486
|
}
|
|
450
|
-
export { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, SEARCH_TYPE_INT, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergCreateTable, icebergManifests, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables, restCatalogLoadTable };
|
|
487
|
+
export { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_SCHEMAS_INT, ICEBERG_TABLES, INT_SEARCH_TYPE, SEARCH_TYPE_INT, assertIcebergTable, connectIcebergCatalog, createIcebergAppendSink, createIcebergTables, dropIcebergTables, ensureIcebergNamespace, icebergAppendRetrying, icebergCreateTable, icebergManifests, icebergPartitionColumns, icebergPartitionSpecFor, icebergSchemaFor, icebergSchemasFor, icebergSortOrderFor, icebergTableSpec, isCommitRateLimited, isIcebergTable, listIcebergDataFiles, listIcebergTables, restCatalogLoadTable };
|
package/dist/rollups.d.mts
CHANGED
|
@@ -244,6 +244,12 @@ declare function runWindowed(opts: {
|
|
|
244
244
|
start: string;
|
|
245
245
|
end: string;
|
|
246
246
|
}) => string;
|
|
247
|
+
/**
|
|
248
|
+
* Extra named file sets merged into every window's `runSQL` (alongside the
|
|
249
|
+
* windowed `FILES`). Use to JOIN a non-windowed sidecar (e.g. the query
|
|
250
|
+
* dimension parquet via `{ QUERY_DIM: { keys: [...] } }`) inside `sqlFor`.
|
|
251
|
+
*/
|
|
252
|
+
extraFileSets?: Record<string, FileSetRef>;
|
|
247
253
|
}): Promise<Row$1[]>;
|
|
248
254
|
/**
|
|
249
255
|
* Daily totals across the full history. One row per (date, table) with
|
package/dist/rollups.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import "./_chunks/layout.mjs";
|
|
2
2
|
import { engineErrors } from "./errors.mjs";
|
|
3
3
|
import { encodeRowsToParquetFlex } from "./adapters/hyparquet.mjs";
|
|
4
|
-
import { createIndexingMetadataStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
4
|
+
import { createIndexingMetadataStore, createQueryDimStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
|
|
5
5
|
import { MS_PER_DAY } from "gscdump";
|
|
6
6
|
function rollupPrefix(ctx, searchType) {
|
|
7
7
|
const base = ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/rollups` : `u_${ctx.userId}/rollups`;
|
|
@@ -237,10 +237,13 @@ async function runWindowed(opts) {
|
|
|
237
237
|
const result = await opts.engine.runSQL({
|
|
238
238
|
ctx: opts.ctx,
|
|
239
239
|
table: opts.table,
|
|
240
|
-
fileSets: {
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
240
|
+
fileSets: {
|
|
241
|
+
FILES: {
|
|
242
|
+
table: opts.table,
|
|
243
|
+
partitions: w.partitions
|
|
244
|
+
},
|
|
245
|
+
...opts.extraFileSets
|
|
246
|
+
},
|
|
244
247
|
sql: opts.sqlFor(w),
|
|
245
248
|
...opts.searchType !== void 0 ? { searchType: opts.searchType } : {}
|
|
246
249
|
});
|
|
@@ -638,23 +641,41 @@ const queryCanonicalDailyRollup = {
|
|
|
638
641
|
}
|
|
639
642
|
],
|
|
640
643
|
parquetSortKey: ["date", "query_canonical"],
|
|
641
|
-
async build({ engine, ctx, searchType }) {
|
|
644
|
+
async build({ engine, ctx, dataSource, searchType }) {
|
|
645
|
+
const dimStore = createQueryDimStore({ dataSource });
|
|
646
|
+
const useDim = await dimStore.loadMeta(ctx) !== null;
|
|
647
|
+
const canonExpr = useDim ? `COALESCE(qd.query_canonical, NULLIF(q.query_canonical, ''), q.query)` : `COALESCE(NULLIF(query_canonical, ''), query)`;
|
|
642
648
|
return (await runWindowed({
|
|
643
649
|
engine,
|
|
644
650
|
ctx,
|
|
645
651
|
table: "queries",
|
|
646
652
|
...searchType !== void 0 ? { searchType } : {},
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
653
|
+
...useDim ? { extraFileSets: { QUERY_DIM: {
|
|
654
|
+
table: "queries",
|
|
655
|
+
keys: [dimStore.parquetKey(ctx)]
|
|
656
|
+
} } } : {},
|
|
657
|
+
sqlFor: useDim ? (w) => `
|
|
658
|
+
SELECT
|
|
659
|
+
${canonExpr} AS query_canonical,
|
|
660
|
+
CAST(q.date AS VARCHAR) AS date,
|
|
661
|
+
SUM(q.clicks)::BIGINT AS clicks,
|
|
662
|
+
SUM(q.impressions)::BIGINT AS impressions,
|
|
663
|
+
SUM(q.sum_position)::DOUBLE AS sum_position
|
|
664
|
+
FROM read_parquet({{FILES}}, union_by_name = true) q
|
|
665
|
+
LEFT JOIN read_parquet({{QUERY_DIM}}, union_by_name = true) qd ON q.query = qd.query
|
|
666
|
+
WHERE q.date >= '${w.start}' AND q.date <= '${w.end}'
|
|
667
|
+
GROUP BY ${canonExpr}, q.date
|
|
668
|
+
` : (w) => `
|
|
669
|
+
SELECT
|
|
670
|
+
${canonExpr} AS query_canonical,
|
|
671
|
+
CAST(date AS VARCHAR) AS date,
|
|
672
|
+
SUM(clicks)::BIGINT AS clicks,
|
|
673
|
+
SUM(impressions)::BIGINT AS impressions,
|
|
674
|
+
SUM(sum_position)::DOUBLE AS sum_position
|
|
675
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
676
|
+
WHERE date >= '${w.start}' AND date <= '${w.end}'
|
|
677
|
+
GROUP BY ${canonExpr}, date
|
|
678
|
+
`
|
|
658
679
|
})).map((r) => ({
|
|
659
680
|
query_canonical: String(r.query_canonical),
|
|
660
681
|
date: String(r.date),
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.31.1",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -191,8 +191,8 @@
|
|
|
191
191
|
"hyparquet": "^1.26.1",
|
|
192
192
|
"hyparquet-writer": "^0.16.1",
|
|
193
193
|
"proper-lockfile": "^4.1.2",
|
|
194
|
-
"@gscdump/contracts": "0.
|
|
195
|
-
"gscdump": "0.
|
|
194
|
+
"@gscdump/contracts": "0.31.1",
|
|
195
|
+
"gscdump": "0.31.1"
|
|
196
196
|
},
|
|
197
197
|
"devDependencies": {
|
|
198
198
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
@@ -208,6 +208,7 @@
|
|
|
208
208
|
"build": "obuild",
|
|
209
209
|
"typecheck": "tsc --noEmit",
|
|
210
210
|
"test": "vitest",
|
|
211
|
+
"benchmark-store": "tsx scripts/benchmark-store.mts",
|
|
211
212
|
"r2-harness": "tsx scripts/r2-contention-harness.ts",
|
|
212
213
|
"backfill-audit": "tsx scripts/backfill-audit.ts"
|
|
213
214
|
}
|