@gscdump/engine 0.25.14 → 0.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,202 @@
1
+ const engineErrors = {
2
+ analyzerNotFound(tool) {
3
+ return {
4
+ kind: "analyzer-not-found",
5
+ tool,
6
+ message: `analyzer "${tool}" requires capabilities [executeSql] not provided by source`
7
+ };
8
+ },
9
+ analyzerCapabilityMissing(tool, missing) {
10
+ return {
11
+ kind: "analyzer-capability-missing",
12
+ tool,
13
+ missing,
14
+ message: `analyzer "${tool}" requires capabilities [${missing.join(", ")}] not provided by source`
15
+ };
16
+ },
17
+ nonFiniteNumberLiteral(value) {
18
+ return {
19
+ kind: "invalid-sql-literal",
20
+ message: `cannot inline non-finite number: ${value}`
21
+ };
22
+ },
23
+ controlCharsInLiteral() {
24
+ return {
25
+ kind: "invalid-sql-literal",
26
+ message: "string literal contains disallowed control characters"
27
+ };
28
+ },
29
+ uninlinableLiteralType(type) {
30
+ return {
31
+ kind: "invalid-sql-literal",
32
+ message: `cannot inline value of type ${type}`
33
+ };
34
+ },
35
+ morePlaceholdersThanParams(have) {
36
+ return {
37
+ kind: "placeholder-arity-mismatch",
38
+ message: `bindLiterals: more '?' placeholders than params (have ${have})`
39
+ };
40
+ },
41
+ dollarPlaceholderOutOfRange(n, have) {
42
+ return {
43
+ kind: "placeholder-arity-mismatch",
44
+ message: `bindLiterals: $${n} out of range (have ${have} params)`
45
+ };
46
+ },
47
+ mixedPlaceholderStyles() {
48
+ return {
49
+ kind: "placeholder-arity-mismatch",
50
+ message: "bindLiterals: cannot mix '?' and '$N' placeholders in the same query"
51
+ };
52
+ },
53
+ unusedParams(unused) {
54
+ return {
55
+ kind: "placeholder-arity-mismatch",
56
+ message: `bindLiterals: ${unused} params unused`
57
+ };
58
+ },
59
+ searchTypesNotArray() {
60
+ return {
61
+ kind: "invalid-search-types",
62
+ message: "enabledSearchTypes must be a non-empty array"
63
+ };
64
+ },
65
+ unknownSearchType(value) {
66
+ return {
67
+ kind: "invalid-search-types",
68
+ message: `enabledSearchTypes: unknown searchType ${String(value)}`
69
+ };
70
+ },
71
+ searchTypesMissingWeb() {
72
+ return {
73
+ kind: "invalid-search-types",
74
+ message: "enabledSearchTypes must include \"web\""
75
+ };
76
+ },
77
+ attachedTableMissing(missing) {
78
+ return {
79
+ kind: "attached-table-missing",
80
+ missing,
81
+ message: `attached-table source: required table(s) not attached: ${missing.join(", ")}`
82
+ };
83
+ },
84
+ manifestCasExhausted(siteId, table, attempts) {
85
+ return {
86
+ kind: "manifest-cas-exhausted",
87
+ siteId,
88
+ table,
89
+ attempts,
90
+ message: `R2 manifest CAS exceeded ${attempts} retries for ${siteId}/${table}`
91
+ };
92
+ },
93
+ invalidSnapshotFilename(fileName) {
94
+ return {
95
+ kind: "invalid-snapshot-filename",
96
+ fileName,
97
+ message: `snapshotAlias: unrecognised filename ${JSON.stringify(fileName)}`
98
+ };
99
+ },
100
+ unsupportedSnapshotIndexVersion(version) {
101
+ return {
102
+ kind: "unsupported-snapshot-index-version",
103
+ version,
104
+ message: `attachSnapshotIndex: unsupported snapshot index version ${String(version)}; expected 1`
105
+ };
106
+ },
107
+ invalidSchemaIdentifier(schema) {
108
+ return {
109
+ kind: "invalid-schema-identifier",
110
+ schema,
111
+ message: `attachSnapshotIndex: invalid schema identifier ${JSON.stringify(schema)}`
112
+ };
113
+ },
114
+ invalidYearMonth(value) {
115
+ return {
116
+ kind: "invalid-year-month",
117
+ value,
118
+ message: `attachSnapshotIndex: invalid YYYY-MM entry ${JSON.stringify(value)} in index.cold`
119
+ };
120
+ },
121
+ missingAttachUrl(fileName) {
122
+ return {
123
+ kind: "missing-attach-url",
124
+ fileName,
125
+ message: `attachSnapshotIndex: attachUrls missing entry for ${fileName}`
126
+ };
127
+ },
128
+ manifestCasRoundLost(siteId, table, attempt) {
129
+ return {
130
+ kind: "manifest-cas-round-lost",
131
+ siteId,
132
+ table,
133
+ attempt,
134
+ message: `R2 manifest CAS round ${attempt} lost the conditional-PUT race for ${siteId}/${table}`
135
+ };
136
+ },
137
+ icebergTableOpFailed(op, table, cause) {
138
+ return {
139
+ kind: "iceberg-table-op-failed",
140
+ op,
141
+ table,
142
+ cause,
143
+ message: String(cause)
144
+ };
145
+ },
146
+ sinkTableFlushFailed(table, cause) {
147
+ return {
148
+ kind: "sink-table-flush-failed",
149
+ table,
150
+ cause,
151
+ message: String(cause)
152
+ };
153
+ },
154
+ rollupBuildFailed(id, cause) {
155
+ return {
156
+ kind: "rollup-build-failed",
157
+ id,
158
+ cause,
159
+ message: cause instanceof Error ? cause.stack || cause.message : String(cause)
160
+ };
161
+ },
162
+ lockAcquireTimeout(scope, timeoutMs) {
163
+ return {
164
+ kind: "lock-acquire-timeout",
165
+ scope,
166
+ timeoutMs,
167
+ message: `withLock: timed out acquiring ${scope} after ${timeoutMs}ms`
168
+ };
169
+ }
170
+ };
171
+ const ENGINE_ERROR_KINDS = new Set([
172
+ "analyzer-not-found",
173
+ "analyzer-capability-missing",
174
+ "invalid-sql-literal",
175
+ "placeholder-arity-mismatch",
176
+ "invalid-search-types",
177
+ "attached-table-missing",
178
+ "manifest-cas-exhausted",
179
+ "invalid-snapshot-filename",
180
+ "unsupported-snapshot-index-version",
181
+ "invalid-schema-identifier",
182
+ "invalid-year-month",
183
+ "missing-attach-url",
184
+ "manifest-cas-round-lost",
185
+ "iceberg-table-op-failed",
186
+ "sink-table-flush-failed",
187
+ "rollup-build-failed",
188
+ "lock-acquire-timeout"
189
+ ]);
190
+ function isEngineError(value) {
191
+ return typeof value === "object" && value !== null && ENGINE_ERROR_KINDS.has(value.kind) && typeof value.message === "string";
192
+ }
193
+ function formatEngineError(error) {
194
+ return error.message;
195
+ }
196
+ function engineErrorToException(error) {
197
+ const exception = new Error(error.message);
198
+ if ("cause" in error && error.cause !== void 0) exception.cause = error.cause;
199
+ exception.engineError = error;
200
+ return exception;
201
+ }
202
+ export { engineErrorToException, engineErrors, formatEngineError, isEngineError };
@@ -1,4 +1,6 @@
1
+ import { engineErrors } from "../errors.mjs";
1
2
  import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, assertIcebergTable, icebergTableSpec, isIcebergTable } from "../_chunks/schema2.mjs";
3
+ import { err, ok } from "gscdump/result";
2
4
  import { icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "icebird";
3
5
  const ICEBERG_TYPE_MAP = {
4
6
  STRING: "string",
@@ -88,16 +90,15 @@ async function createIcebergTables(conn, tables = ICEBERG_TABLES) {
88
90
  partitionSpec: icebergPartitionSpecFor(table)
89
91
  }).then(() => results.push({
90
92
  table,
91
- ok: true
93
+ outcome: ok(void 0)
92
94
  }), (e) => results.push({
93
95
  table,
94
- ok: false,
95
- error: String(e)
96
+ outcome: err(engineErrors.icebergTableOpFailed("create", table, e))
96
97
  }));
97
98
  return results;
98
99
  }
99
100
  async function listIcebergTables(conn) {
100
- return restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name).sort(), () => []);
101
+ return (await restCatalogListTables(conn.catalog, { namespace: conn.namespace })).map((t) => t.name).sort();
101
102
  }
102
103
  function monthsInRange(range) {
103
104
  const [sy, sm] = range.start.split("-").map(Number);
@@ -156,7 +157,7 @@ async function listIcebergDataFiles(conn, opts) {
156
157
  return out;
157
158
  }
158
159
  async function dropIcebergTables(conn, tables) {
159
- const targets = tables ?? await restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name), () => []);
160
+ const targets = tables ?? (await restCatalogListTables(conn.catalog, { namespace: conn.namespace })).map((t) => t.name);
160
161
  const results = [];
161
162
  for (const table of targets) await icebergDropTable({
162
163
  catalog: conn.catalog,
@@ -165,11 +166,10 @@ async function dropIcebergTables(conn, tables) {
165
166
  purgeRequested: true
166
167
  }).then(() => results.push({
167
168
  table,
168
- ok: true
169
+ outcome: ok(void 0)
169
170
  }), (e) => results.push({
170
171
  table,
171
- ok: false,
172
- error: String(e)
172
+ outcome: err(engineErrors.icebergTableOpFailed("drop", table, e))
173
173
  }));
174
174
  return results;
175
175
  }
@@ -226,14 +226,14 @@ function createIcebergAppendSink(options) {
226
226
  flushed,
227
227
  failed
228
228
  };
229
- const conn = await connect().then((c) => c, (err) => {
229
+ const conn = await connect().then((c) => c, (cause) => {
230
230
  connection = void 0;
231
- return { error: String(err) };
231
+ return { connectError: cause };
232
232
  });
233
- if ("error" in conn) {
233
+ if ("connectError" in conn) {
234
234
  for (const [table, records] of buffers) if (records.length > 0) failed.push({
235
235
  table,
236
- error: conn.error
236
+ error: engineErrors.sinkTableFlushFailed(table, conn.connectError)
237
237
  });
238
238
  buffers.clear();
239
239
  return {
@@ -251,10 +251,10 @@ function createIcebergAppendSink(options) {
251
251
  records
252
252
  }, options.commitRetry).then(() => {
253
253
  flushed.push(table);
254
- }, (err) => {
254
+ }, (cause) => {
255
255
  failed.push({
256
256
  table,
257
- error: String(err)
257
+ error: engineErrors.sinkTableFlushFailed(table, cause)
258
258
  });
259
259
  });
260
260
  }
package/dist/index.d.mts CHANGED
@@ -1,6 +1,7 @@
1
1
  import { CodecCtx, CompactionThresholds, CompactionTier, DataSource, EngineOptions, FileSetRef, GcCtx, Grain, ListLiveFilter, LockScope, ManifestEntry, ManifestPurgeResult, ManifestStore, ParquetCodec, PurgeFilter, PurgeResult, PurgeUrlsResult, QueryCtx, QueryExecuteOptions, QueryExecuteResult, QueryExecutor, QueryResult, Row, RunSQLOptions, SearchType, StorageEngine, SyncState, SyncStateDetail, SyncStateFilter, SyncStateKind, SyncStateScope, TableName, TenantCtx, Watermark, WatermarkFilter, WatermarkScope, WriteCtx, WriteResult, enumeratePartitions } from "./_chunks/storage.mjs";
2
2
  import { DuckDBFactory, DuckDBHandle, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor } from "./_chunks/duckdb.mjs";
3
3
  import { ColumnDef, ColumnType, DrizzleSchema, SCHEMAS, TABLE_METADATA, TableSchema, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
4
+ import { EngineError, EngineErrorKind, engineErrorToException, engineErrors, formatEngineError, isEngineError } from "./_chunks/errors.mjs";
4
5
  import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
5
6
  import { IcebergTableName, Sink, SinkCapabilities, SinkCloseResult, SinkOptions, SinkSlice, SinkWriteResult } from "./_chunks/sink.mjs";
6
7
  import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
@@ -9,8 +10,9 @@ import { createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverA
9
10
  import { rebuildDailyFromHourly } from "./rollups.mjs";
10
11
  import { ENGINE_QUERY_CAPABILITIES, createSqlQuerySource } from "./_chunks/index.mjs";
11
12
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
12
- import { Grain as Grain$1, Row as Row$1, TableName as TableName$1, TenantCtx as TenantCtx$1 } from "@gscdump/contracts";
13
+ import { Result } from "gscdump/result";
13
14
  import { SearchType as SearchType$1 } from "gscdump/query";
15
+ import { Grain as Grain$1, Row as Row$1, TableName as TableName$1, TenantCtx as TenantCtx$1 } from "@gscdump/contracts";
14
16
  declare function coerceRow(row: Row$1): Row$1;
15
17
  declare function coerceRows(rows: readonly Row$1[]): Row$1[];
16
18
  declare const MAX_DAY_BYTES: number;
@@ -160,6 +162,12 @@ declare function createInMemorySink(): InMemorySink;
160
162
  type SyncTableName = Extract<TableName$1, 'pages' | 'queries' | 'countries' | 'page_queries' | 'dates'>;
161
163
  declare const TABLES_BY_SEARCH_TYPE: Record<SearchType, readonly SyncTableName[]>;
162
164
  declare function parseEnabledSearchTypes(raw: string | null | undefined): SearchType[];
165
+ /**
166
+ * Errors-as-values core for {@link validateEnabledSearchTypes}: returns a typed
167
+ * `invalid-search-types` `EngineError` instead of throwing, so a host saving the
168
+ * persisted config can map a bad value to a 4xx rather than a 500.
169
+ */
170
+ declare function validateEnabledSearchTypesResult(value: unknown): Result<SearchType[], EngineError>;
163
171
  declare function validateEnabledSearchTypes(value: unknown): SearchType[];
164
172
  declare const TABLE_TIERS: {
165
173
  readonly pages: "critical";
@@ -182,4 +190,4 @@ declare const MIN_SYNC_IMPRESSIONS = 1;
182
190
  declare const MIN_COUNTRY_IMPRESSIONS = 10;
183
191
  declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
184
192
  declare const MAX_TRACKED_URLS_PER_SITE = 200000;
185
- export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, ENGINE_QUERY_CAPABILITIES, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
193
+ export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, ENGINE_QUERY_CAPABILITIES, EngineError, EngineErrorKind, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type Grain, type GscApiRow, type InMemorySink, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type Sink, type SinkCapabilities, type SinkCloseResult, type SinkOptions, type SinkSlice, type SinkWriteResult, type StorageEngine, type StoredRow, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, engineErrorToException, engineErrors, enumeratePartitions, fixedPolicy, formatEngineError, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isEngineError, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes, validateEnabledSearchTypesResult };
package/dist/index.mjs CHANGED
@@ -3,6 +3,7 @@ import { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, infe
3
3
  import { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
4
4
  import { enumeratePartitions } from "./_chunks/compaction.mjs";
5
5
  import { FILES_PLACEHOLDER, resolveParquetSQL, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
6
+ import { engineErrorToException, engineErrors, formatEngineError, isEngineError } from "./errors.mjs";
6
7
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
7
8
  import { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor, createStorageEngine } from "./_chunks/engine.mjs";
8
9
  import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
@@ -10,6 +11,7 @@ import "./planner.mjs";
10
11
  import { createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "./_chunks/resolver.mjs";
11
12
  import { rebuildDailyFromHourly } from "./rollups.mjs";
12
13
  import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
14
+ import { err, ok, unwrapResult } from "gscdump/result";
13
15
  const NOOP_RESULT = {
14
16
  flushed: 0,
15
17
  recovered: 0,
@@ -228,18 +230,21 @@ function parseEnabledSearchTypes(raw) {
228
230
  if (!valid.includes("web")) valid.unshift("web");
229
231
  return valid;
230
232
  }
231
- function validateEnabledSearchTypes(value) {
232
- if (!Array.isArray(value) || value.length === 0) throw new Error("enabledSearchTypes must be a non-empty array");
233
+ function validateEnabledSearchTypesResult(value) {
234
+ if (!Array.isArray(value) || value.length === 0) return err(engineErrors.searchTypesNotArray());
233
235
  const seen = /* @__PURE__ */ new Set();
234
236
  const out = [];
235
237
  for (const v of value) {
236
- if (typeof v !== "string" || !(v in TABLES_BY_SEARCH_TYPE)) throw new Error(`enabledSearchTypes: unknown searchType ${String(v)}`);
238
+ if (typeof v !== "string" || !(v in TABLES_BY_SEARCH_TYPE)) return err(engineErrors.unknownSearchType(v));
237
239
  if (seen.has(v)) continue;
238
240
  seen.add(v);
239
241
  out.push(v);
240
242
  }
241
- if (!out.includes("web")) throw new Error("enabledSearchTypes must include \"web\"");
242
- return out;
243
+ if (!out.includes("web")) return err(engineErrors.searchTypesMissingWeb());
244
+ return ok(out);
245
+ }
246
+ function validateEnabledSearchTypes(value) {
247
+ return unwrapResult(validateEnabledSearchTypesResult(value), engineErrorToException);
243
248
  }
244
249
  const TABLE_TIERS = {
245
250
  pages: "critical",
@@ -277,4 +282,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
277
282
  const MIN_COUNTRY_IMPRESSIONS = 10;
278
283
  const MAX_SITEMAP_URLS_PER_SITE = 5e4;
279
284
  const MAX_TRACKED_URLS_PER_SITE = 2e5;
280
- export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
285
+ export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, engineErrorToException, engineErrors, enumeratePartitions, fixedPolicy, formatEngineError, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isEngineError, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes, validateEnabledSearchTypesResult };
@@ -3,8 +3,8 @@ import { ComparisonFilter, ExtraQuery, ResolvedComparisonSQL, ResolvedSQL, Resol
3
3
  import { PgTableKey, createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "../_chunks/pg-adapter.mjs";
4
4
  import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities, UnresolvableDatasetError, inferDataset as inferLogicalDataset, isDatasetResolvable } from "gscdump/query/plan";
5
5
  import { SQL } from "drizzle-orm";
6
- import { Grain, TableName } from "@gscdump/contracts";
7
6
  import { BuilderState, Dimension, FilterInput, InternalFilter, Metric } from "gscdump/query";
7
+ import { Grain, TableName } from "@gscdump/contracts";
8
8
  type DimensionSurface = 'api' | 'stored' | 'derived';
9
9
  interface DimensionBinding {
10
10
  column: string;
@@ -1,7 +1,8 @@
1
1
  import { DataSource, FileSetRef, Row as Row$1 } from "./_chunks/storage.mjs";
2
2
  import { ColumnDef } from "./_chunks/schema.mjs";
3
- import { TenantCtx } from "@gscdump/contracts";
3
+ import { EngineError } from "./_chunks/errors.mjs";
4
4
  import { SearchType } from "gscdump/query";
5
+ import { TenantCtx } from "@gscdump/contracts";
5
6
  interface RollupCtx extends TenantCtx {
6
7
  /** When the rollup was built. Stamped into payload + filename. */
7
8
  builtAt: number;
@@ -181,9 +182,10 @@ interface RebuildRollupResult {
181
182
  /**
182
183
  * Set when this def's build/encode/write failed. The runner records the
183
184
  * failure and continues with the remaining defs so one bad rollup never
184
- * aborts the rest. Successful defs have no `error`.
185
+ * aborts the rest. Successful defs have no `error`. The human-readable
186
+ * message (including the stack when available) lives on `error.message`.
185
187
  */
186
- error?: string;
188
+ error?: EngineError;
187
189
  }
188
190
  declare function rebuildRollups(opts: RebuildRollupsOptions): Promise<RebuildRollupResult[]>;
189
191
  /**
package/dist/rollups.mjs CHANGED
@@ -1,6 +1,7 @@
1
1
  import "./_chunks/layout.mjs";
2
+ import { engineErrors } from "./errors.mjs";
2
3
  import { encodeRowsToParquetFlex } from "./adapters/hyparquet.mjs";
3
- import { createIndexingMetadataStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./entities.mjs";
4
+ import { createIndexingMetadataStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./_chunks/entities.mjs";
4
5
  import { MS_PER_DAY } from "gscdump";
5
6
  function rollupPrefix(ctx, searchType) {
6
7
  const base = ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/rollups` : `u_${ctx.userId}/rollups`;
@@ -21,8 +22,7 @@ async function readLatestRollup(bucket, ctx, id, searchType) {
21
22
  const listing = await bucket.list({
22
23
  prefix,
23
24
  cursor
24
- }).catch(() => null);
25
- if (!listing) return null;
25
+ });
26
26
  for (const obj of listing.objects) {
27
27
  const m = ROLLUP_FILE_RE.exec(obj.key.slice(prefix.length));
28
28
  if (!m?.groups || m.groups.id !== id) continue;
@@ -35,7 +35,7 @@ async function readLatestRollup(bucket, ctx, id, searchType) {
35
35
  cursor = listing.truncated ? listing.cursor : void 0;
36
36
  } while (cursor !== void 0);
37
37
  if (!newest) return null;
38
- const obj = await bucket.get(newest.key).catch(() => null);
38
+ const obj = await bucket.get(newest.key);
39
39
  if (!obj) return null;
40
40
  return JSON.parse(await obj.text());
41
41
  }
@@ -111,7 +111,7 @@ async function rebuildRollups(opts) {
111
111
  objectKey: "",
112
112
  bytes: 0,
113
113
  builtAt,
114
- error: err instanceof Error ? err.stack || err.message : String(err)
114
+ error: engineErrors.rollupBuildFailed(def.id, err)
115
115
  });
116
116
  }
117
117
  }
@@ -1,5 +1,12 @@
1
+ import { EngineError } from "./_chunks/errors.mjs";
2
+ import { Result } from "gscdump/result";
1
3
  /** Escape single quotes for inlining inside a SQL string literal (SQL-standard `''` escaping). */
2
4
  declare function sqlEscape(s: string): string;
5
+ /**
6
+ * Errors-as-values core for {@link formatLiteral}: returns a typed
7
+ * `invalid-sql-literal` `EngineError` for values that can't be safely inlined.
8
+ */
9
+ declare function formatLiteralResult(value: unknown): Result<string, EngineError>;
3
10
  declare function formatLiteral(value: unknown): string;
4
11
  /**
5
12
  * Replace `?` and `$N` placeholders with inline SQL literals. Single-quoted
@@ -15,5 +22,12 @@ declare function formatLiteral(value: unknown): string;
15
22
  * Throws when placeholder count and params length disagree, or when a `$N`
16
23
  * index is out of range.
17
24
  */
25
+ /**
26
+ * Errors-as-values core for {@link bindLiterals}: returns a typed
27
+ * `placeholder-arity-mismatch` / `invalid-sql-literal` `EngineError` instead of
28
+ * throwing, so the edge RPC / proxy callers that build SQL from untrusted params
29
+ * can branch on the failure.
30
+ */
31
+ declare function bindLiteralsResult(sql: string, params: readonly unknown[]): Result<string, EngineError>;
18
32
  declare function bindLiterals(sql: string, params: readonly unknown[]): string;
19
- export { bindLiterals, formatLiteral, sqlEscape };
33
+ export { bindLiterals, bindLiteralsResult, formatLiteral, formatLiteralResult, sqlEscape };
package/dist/sql-bind.mjs CHANGED
@@ -1,3 +1,5 @@
1
+ import { engineErrorToException, engineErrors } from "./errors.mjs";
2
+ import { err, ok, unwrapResult } from "gscdump/result";
1
3
  function containsDisallowedControlChars(value) {
2
4
  for (let i = 0; i < value.length; i++) {
3
5
  const code = value.charCodeAt(i);
@@ -8,23 +10,26 @@ function containsDisallowedControlChars(value) {
8
10
  function sqlEscape(s) {
9
11
  return s.replace(/'/g, "''");
10
12
  }
11
- function formatLiteral(value) {
12
- if (value == null) return "NULL";
13
+ function formatLiteralResult(value) {
14
+ if (value == null) return ok("NULL");
13
15
  if (typeof value === "number") {
14
- if (!Number.isFinite(value)) throw new Error(`cannot inline non-finite number: ${value}`);
15
- return String(value);
16
+ if (!Number.isFinite(value)) return err(engineErrors.nonFiniteNumberLiteral(value));
17
+ return ok(String(value));
16
18
  }
17
- if (typeof value === "boolean") return value ? "TRUE" : "FALSE";
18
- if (typeof value === "bigint") return value.toString();
19
- if (value instanceof Date) return `'${value.toISOString()}'`;
19
+ if (typeof value === "boolean") return ok(value ? "TRUE" : "FALSE");
20
+ if (typeof value === "bigint") return ok(value.toString());
21
+ if (value instanceof Date) return ok(`'${value.toISOString()}'`);
20
22
  if (typeof value === "string") {
21
- if (containsDisallowedControlChars(value)) throw new Error("string literal contains disallowed control characters");
22
- return `'${value.replace(/'/g, "''")}'`;
23
+ if (containsDisallowedControlChars(value)) return err(engineErrors.controlCharsInLiteral());
24
+ return ok(`'${value.replace(/'/g, "''")}'`);
23
25
  }
24
- throw new Error(`cannot inline value of type ${typeof value}`);
26
+ return err(engineErrors.uninlinableLiteralType(typeof value));
25
27
  }
26
- function bindLiterals(sql, params) {
27
- if (params.length === 0) return sql;
28
+ function formatLiteral(value) {
29
+ return unwrapResult(formatLiteralResult(value), engineErrorToException);
30
+ }
31
+ function bindLiteralsResult(sql, params) {
32
+ if (params.length === 0) return ok(sql);
28
33
  let out = "";
29
34
  let i = 0;
30
35
  let qmarkIdx = 0;
@@ -66,8 +71,10 @@ function bindLiterals(sql, params) {
66
71
  continue;
67
72
  }
68
73
  if (c === "?") {
69
- if (qmarkIdx >= params.length) throw new Error(`bindLiterals: more '?' placeholders than params (have ${params.length})`);
70
- out += formatLiteral(params[qmarkIdx++]);
74
+ if (qmarkIdx >= params.length) return err(engineErrors.morePlaceholdersThanParams(params.length));
75
+ const literal = formatLiteralResult(params[qmarkIdx++]);
76
+ if (!literal.ok) return literal;
77
+ out += literal.value;
71
78
  i++;
72
79
  continue;
73
80
  }
@@ -75,18 +82,23 @@ function bindLiterals(sql, params) {
75
82
  let j = i + 1;
76
83
  while (j < sql.length && sql[j] >= "0" && sql[j] <= "9") j++;
77
84
  const n = Number(sql.slice(i + 1, j));
78
- if (n < 1 || n > params.length) throw new Error(`bindLiterals: $${n} out of range (have ${params.length} params)`);
85
+ if (n < 1 || n > params.length) return err(engineErrors.dollarPlaceholderOutOfRange(n, params.length));
79
86
  usedDollar.add(n - 1);
80
- out += formatLiteral(params[n - 1]);
87
+ const literal = formatLiteralResult(params[n - 1]);
88
+ if (!literal.ok) return literal;
89
+ out += literal.value;
81
90
  i = j;
82
91
  continue;
83
92
  }
84
93
  out += c;
85
94
  i++;
86
95
  }
87
- if (qmarkIdx > 0 && usedDollar.size > 0) throw new Error("bindLiterals: cannot mix '?' and '$N' placeholders in the same query");
96
+ if (qmarkIdx > 0 && usedDollar.size > 0) return err(engineErrors.mixedPlaceholderStyles());
88
97
  const used = qmarkIdx > 0 ? qmarkIdx : usedDollar.size;
89
- if (used !== params.length) throw new Error(`bindLiterals: ${params.length - used} params unused`);
90
- return out;
98
+ if (used !== params.length) return err(engineErrors.unusedParams(params.length - used));
99
+ return ok(out);
100
+ }
101
+ function bindLiterals(sql, params) {
102
+ return unwrapResult(bindLiteralsResult(sql, params), engineErrorToException);
91
103
  }
92
- export { bindLiterals, formatLiteral, sqlEscape };
104
+ export { bindLiterals, bindLiteralsResult, formatLiteral, formatLiteralResult, sqlEscape };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.25.14",
4
+ "version": "0.26.1",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -31,6 +31,11 @@
31
31
  "import": "./dist/contracts.mjs",
32
32
  "default": "./dist/contracts.mjs"
33
33
  },
34
+ "./errors": {
35
+ "types": "./dist/errors.d.mts",
36
+ "import": "./dist/errors.mjs",
37
+ "default": "./dist/errors.mjs"
38
+ },
34
39
  "./snapshot": {
35
40
  "types": "./dist/snapshot.d.mts",
36
41
  "import": "./dist/snapshot.mjs",
@@ -185,8 +190,8 @@
185
190
  "drizzle-orm": "1.0.0-rc.3",
186
191
  "icebird": "^0.8.8",
187
192
  "proper-lockfile": "^4.1.2",
188
- "@gscdump/contracts": "0.25.14",
189
- "gscdump": "0.25.14"
193
+ "@gscdump/contracts": "0.26.1",
194
+ "gscdump": "0.26.1"
190
195
  },
191
196
  "devDependencies": {
192
197
  "@duckdb/duckdb-wasm": "^1.32.0",