@gscdump/engine 0.24.1 → 0.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/compaction.mjs +247 -0
- package/dist/_chunks/engine.mjs +22 -4
- package/dist/_chunks/parquet-plan.mjs +3 -248
- package/dist/_chunks/resolver.mjs +3 -3
- package/dist/_chunks/{iceberg-schema.mjs → schema2.mjs} +9 -2
- package/dist/_chunks/sink.d.mts +11 -1
- package/dist/_chunks/source.mjs +1 -1
- package/dist/_chunks/storage.d.mts +24 -33
- package/dist/adapters/filesystem.mjs +1 -1
- package/dist/adapters/node.mjs +1 -1
- package/dist/adapters/r2-manifest.mjs +1 -1
- package/dist/compaction-public.d.mts +15 -0
- package/dist/compaction-public.mjs +5 -0
- package/dist/iceberg/index.d.mts +12 -0
- package/dist/iceberg/index.mjs +269 -0
- package/dist/index.d.mts +30 -29
- package/dist/index.mjs +5 -272
- package/dist/planner.mjs +2 -1
- package/dist/rollups.mjs +1 -1
- package/dist/sink-node.d.mts +1 -1
- package/dist/sink-node.mjs +1 -1
- package/package.json +13 -8
- package/dist/_chunks/{storage.mjs → layout.mjs} +11 -11
package/dist/index.mjs
CHANGED
|
@@ -1,189 +1,15 @@
|
|
|
1
1
|
import { ENGINE_QUERY_CAPABILITIES, coerceRow, coerceRows, createSqlQuerySource } from "./_chunks/source.mjs";
|
|
2
|
+
import { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, objectKey } from "./_chunks/layout.mjs";
|
|
2
3
|
import { SCHEMAS, TABLE_METADATA, allTables, countries, currentSchemaVersion, dates, dimensionToColumn, drizzleSchema, hourly_pages, inferTable, page_queries, pages, queries } from "./_chunks/schema.mjs";
|
|
3
|
-
import {
|
|
4
|
-
import { FILES_PLACEHOLDER,
|
|
4
|
+
import { enumeratePartitions } from "./_chunks/compaction.mjs";
|
|
5
|
+
import { FILES_PLACEHOLDER, resolveParquetSQL, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
|
|
5
6
|
import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
|
|
6
|
-
import { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor, createStorageEngine
|
|
7
|
-
import { ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, icebergTableSpec } from "./_chunks/iceberg-schema.mjs";
|
|
7
|
+
import { MAX_DAY_BYTES, canonicalEmptyParquetSchema, createDuckDBCodec, createDuckDBExecutor, createStorageEngine } from "./_chunks/engine.mjs";
|
|
8
8
|
import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
|
|
9
9
|
import "./planner.mjs";
|
|
10
10
|
import { createIcebergResolverAdapter, createParquetResolverAdapter, pgResolverAdapter } from "./_chunks/resolver.mjs";
|
|
11
11
|
import { rebuildDailyFromHourly } from "./rollups.mjs";
|
|
12
12
|
import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
|
|
13
|
-
import { icebergAppend, icebergCreateTable, icebergDropTable, icebergManifests, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, restCatalogLoadTable, s3SignedResolver } from "icebird";
|
|
14
|
-
const ICEBERG_TYPE_MAP = {
|
|
15
|
-
STRING: "string",
|
|
16
|
-
INT: "int",
|
|
17
|
-
LONG: "long",
|
|
18
|
-
DOUBLE: "double",
|
|
19
|
-
DATE: "date"
|
|
20
|
-
};
|
|
21
|
-
function icebergSchemaFor(table) {
|
|
22
|
-
return {
|
|
23
|
-
"type": "struct",
|
|
24
|
-
"schema-id": 0,
|
|
25
|
-
"fields": ICEBERG_SCHEMAS[table].columns.map((col) => ({
|
|
26
|
-
id: col.fieldId,
|
|
27
|
-
name: col.name,
|
|
28
|
-
required: col.required,
|
|
29
|
-
type: ICEBERG_TYPE_MAP[col.type]
|
|
30
|
-
}))
|
|
31
|
-
};
|
|
32
|
-
}
|
|
33
|
-
function icebergPartitionSpecFor(table) {
|
|
34
|
-
const fields = ICEBERG_SCHEMAS[table].columns;
|
|
35
|
-
const fieldId = (name) => {
|
|
36
|
-
const col = fields.find((c) => c.name === name);
|
|
37
|
-
if (!col) throw new Error(`iceberg-catalog: table '${table}' has no '${name}' column`);
|
|
38
|
-
return col.fieldId;
|
|
39
|
-
};
|
|
40
|
-
return {
|
|
41
|
-
"spec-id": 0,
|
|
42
|
-
"fields": ICEBERG_PARTITION_SPEC.map((p, i) => ({
|
|
43
|
-
"source-id": fieldId(p.sourceColumn),
|
|
44
|
-
"field-id": 1e3 + i,
|
|
45
|
-
"name": p.name,
|
|
46
|
-
"transform": p.transform
|
|
47
|
-
}))
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
async function connectIcebergCatalog(config) {
|
|
51
|
-
return {
|
|
52
|
-
catalog: await restCatalogConnect({
|
|
53
|
-
url: config.catalogUri,
|
|
54
|
-
warehouse: config.warehouse,
|
|
55
|
-
requestInit: { headers: { Authorization: `Bearer ${config.catalogToken}` } }
|
|
56
|
-
}),
|
|
57
|
-
resolver: s3SignedResolver({
|
|
58
|
-
accessKeyId: config.s3.accessKeyId,
|
|
59
|
-
secretAccessKey: config.s3.secretAccessKey,
|
|
60
|
-
region: config.s3.region ?? "auto",
|
|
61
|
-
endpoint: config.s3.endpoint,
|
|
62
|
-
pathStyle: true
|
|
63
|
-
}),
|
|
64
|
-
namespace: config.namespace
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
function isCommitRateLimited(err) {
|
|
68
|
-
if (err && typeof err === "object" && err.status === 429) return true;
|
|
69
|
-
const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
|
|
70
|
-
return msg.includes("429") || msg.includes("too many commits") || msg.includes("rate limit");
|
|
71
|
-
}
|
|
72
|
-
function defaultCommitSleep(ms) {
|
|
73
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
74
|
-
}
|
|
75
|
-
async function icebergAppendRetrying(args, options = {}) {
|
|
76
|
-
const maxAttempts = options.maxAttempts ?? 6;
|
|
77
|
-
const baseDelayMs = options.baseDelayMs ?? 1e3;
|
|
78
|
-
const maxDelayMs = options.maxDelayMs ?? 2e4;
|
|
79
|
-
const sleep = options.sleep ?? defaultCommitSleep;
|
|
80
|
-
const random = options.random ?? Math.random;
|
|
81
|
-
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
82
|
-
const err = await icebergAppend(args).then(() => void 0, (e) => e);
|
|
83
|
-
if (err === void 0) return;
|
|
84
|
-
if (!isCommitRateLimited(err) || attempt === maxAttempts - 1) throw err;
|
|
85
|
-
const ceiling = Math.min(maxDelayMs, baseDelayMs * 2 ** attempt);
|
|
86
|
-
await sleep(Math.floor(random() * ceiling));
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
async function ensureIcebergNamespace(conn) {
|
|
90
|
-
await restCatalogCreateNamespace(conn.catalog, { namespace: conn.namespace }).catch(() => {});
|
|
91
|
-
}
|
|
92
|
-
async function createIcebergTables(conn, tables = ICEBERG_TABLES) {
|
|
93
|
-
const results = [];
|
|
94
|
-
for (const table of tables) await icebergCreateTable({
|
|
95
|
-
catalog: conn.catalog,
|
|
96
|
-
namespace: conn.namespace,
|
|
97
|
-
table,
|
|
98
|
-
schema: icebergSchemaFor(table),
|
|
99
|
-
partitionSpec: icebergPartitionSpecFor(table)
|
|
100
|
-
}).then(() => results.push({
|
|
101
|
-
table,
|
|
102
|
-
ok: true
|
|
103
|
-
}), (e) => results.push({
|
|
104
|
-
table,
|
|
105
|
-
ok: false,
|
|
106
|
-
error: String(e)
|
|
107
|
-
}));
|
|
108
|
-
return results;
|
|
109
|
-
}
|
|
110
|
-
async function listIcebergTables(conn) {
|
|
111
|
-
return restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name).sort(), () => []);
|
|
112
|
-
}
|
|
113
|
-
function monthsInRange(range) {
|
|
114
|
-
const [sy, sm] = range.start.split("-").map(Number);
|
|
115
|
-
const [ey, em] = range.end.split("-").map(Number);
|
|
116
|
-
const out = [];
|
|
117
|
-
let y = sy;
|
|
118
|
-
let m = sm;
|
|
119
|
-
while (y < ey || y === ey && m <= em) {
|
|
120
|
-
out.push(`${y}-${String(m).padStart(2, "0")}`);
|
|
121
|
-
m++;
|
|
122
|
-
if (m > 12) {
|
|
123
|
-
m = 1;
|
|
124
|
-
y++;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
return out;
|
|
128
|
-
}
|
|
129
|
-
function monthsSinceEpoch(ym) {
|
|
130
|
-
const [y, m] = ym.split("-").map(Number);
|
|
131
|
-
return (y - 1970) * 12 + (m - 1);
|
|
132
|
-
}
|
|
133
|
-
function stripBucket(filePath) {
|
|
134
|
-
if (!filePath.startsWith("s3://")) return filePath;
|
|
135
|
-
const rest = filePath.slice(5);
|
|
136
|
-
const slash = rest.indexOf("/");
|
|
137
|
-
return slash >= 0 ? rest.slice(slash + 1) : rest;
|
|
138
|
-
}
|
|
139
|
-
async function listIcebergDataFiles(conn, opts) {
|
|
140
|
-
const { metadata } = await restCatalogLoadTable(conn.catalog, {
|
|
141
|
-
namespace: conn.namespace,
|
|
142
|
-
table: opts.table
|
|
143
|
-
});
|
|
144
|
-
if (metadata["current-snapshot-id"] == null) return [];
|
|
145
|
-
const wantedMonths = new Set(monthsInRange(opts.range).map(monthsSinceEpoch));
|
|
146
|
-
const manifests = await icebergManifests({
|
|
147
|
-
metadata,
|
|
148
|
-
resolver: conn.resolver
|
|
149
|
-
});
|
|
150
|
-
const out = [];
|
|
151
|
-
for (const m of manifests) for (const entry of m.entries) {
|
|
152
|
-
if (entry.status === 2) continue;
|
|
153
|
-
const df = entry.data_file;
|
|
154
|
-
if (df.content !== 0) continue;
|
|
155
|
-
const part = df.partition;
|
|
156
|
-
if (part.site_id !== opts.siteId) continue;
|
|
157
|
-
if (part.search_type !== opts.searchType) continue;
|
|
158
|
-
const month = part.date_month;
|
|
159
|
-
if (typeof month !== "number" || !wantedMonths.has(month)) continue;
|
|
160
|
-
out.push({
|
|
161
|
-
filePath: df.file_path,
|
|
162
|
-
objectKey: stripBucket(df.file_path),
|
|
163
|
-
bytes: Number(df.file_size_in_bytes),
|
|
164
|
-
rowCount: Number(df.record_count)
|
|
165
|
-
});
|
|
166
|
-
}
|
|
167
|
-
return out;
|
|
168
|
-
}
|
|
169
|
-
async function dropIcebergTables(conn, tables) {
|
|
170
|
-
const targets = tables ?? await restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name), () => []);
|
|
171
|
-
const results = [];
|
|
172
|
-
for (const table of targets) await icebergDropTable({
|
|
173
|
-
catalog: conn.catalog,
|
|
174
|
-
namespace: conn.namespace,
|
|
175
|
-
table,
|
|
176
|
-
purgeRequested: true
|
|
177
|
-
}).then(() => results.push({
|
|
178
|
-
table,
|
|
179
|
-
ok: true
|
|
180
|
-
}), (e) => results.push({
|
|
181
|
-
table,
|
|
182
|
-
ok: false,
|
|
183
|
-
error: String(e)
|
|
184
|
-
}));
|
|
185
|
-
return results;
|
|
186
|
-
}
|
|
187
13
|
const NOOP_RESULT = {
|
|
188
14
|
flushed: 0,
|
|
189
15
|
recovered: 0,
|
|
@@ -299,99 +125,6 @@ function createIngestAccumulator(opts) {
|
|
|
299
125
|
}
|
|
300
126
|
};
|
|
301
127
|
}
|
|
302
|
-
const DAY_MILLIS = 864e5;
|
|
303
|
-
function toIcebergDate(value) {
|
|
304
|
-
if (typeof value === "string") {
|
|
305
|
-
const ms = Date.parse(`${value}T00:00:00Z`);
|
|
306
|
-
if (Number.isNaN(ms)) throw new TypeError(`toIcebergDate: invalid date string '${value}'`);
|
|
307
|
-
return Math.floor(ms / DAY_MILLIS);
|
|
308
|
-
}
|
|
309
|
-
if (value instanceof Date) {
|
|
310
|
-
const ms = value.getTime();
|
|
311
|
-
if (Number.isNaN(ms)) throw new TypeError("toIcebergDate: invalid Date (NaN)");
|
|
312
|
-
return Math.floor(ms / DAY_MILLIS);
|
|
313
|
-
}
|
|
314
|
-
return value;
|
|
315
|
-
}
|
|
316
|
-
function coerceJsonSafe(value) {
|
|
317
|
-
if (typeof value === "bigint") return Number(value);
|
|
318
|
-
return value;
|
|
319
|
-
}
|
|
320
|
-
function toRecords(slice, rows) {
|
|
321
|
-
const siteId = slice.ctx.siteId ?? "";
|
|
322
|
-
return rows.map((row) => {
|
|
323
|
-
const out = {};
|
|
324
|
-
for (const k in row) out[k] = coerceJsonSafe(row[k]);
|
|
325
|
-
out.date = toIcebergDate(out.date);
|
|
326
|
-
out.site_id = siteId;
|
|
327
|
-
out.search_type = slice.searchType;
|
|
328
|
-
return out;
|
|
329
|
-
});
|
|
330
|
-
}
|
|
331
|
-
function createIcebergAppendSink(options) {
|
|
332
|
-
let connection;
|
|
333
|
-
const buffers = /* @__PURE__ */ new Map();
|
|
334
|
-
function connect() {
|
|
335
|
-
connection ??= connectIcebergCatalog(options.catalog);
|
|
336
|
-
return connection;
|
|
337
|
-
}
|
|
338
|
-
return {
|
|
339
|
-
capabilities: { appendOnly: true },
|
|
340
|
-
async emit(slice, rows) {
|
|
341
|
-
if (rows.length === 0) return { rowCount: 0 };
|
|
342
|
-
const records = toRecords(slice, rows);
|
|
343
|
-
const buffer = buffers.get(slice.table);
|
|
344
|
-
if (buffer) for (let i = 0; i < records.length; i++) buffer.push(records[i]);
|
|
345
|
-
else buffers.set(slice.table, records);
|
|
346
|
-
return { rowCount: records.length };
|
|
347
|
-
},
|
|
348
|
-
async close() {
|
|
349
|
-
const flushed = [];
|
|
350
|
-
const failed = [];
|
|
351
|
-
if (buffers.size === 0) return {
|
|
352
|
-
flushed,
|
|
353
|
-
failed
|
|
354
|
-
};
|
|
355
|
-
const conn = await connect().then((c) => c, (err) => {
|
|
356
|
-
connection = void 0;
|
|
357
|
-
return { error: String(err) };
|
|
358
|
-
});
|
|
359
|
-
if ("error" in conn) {
|
|
360
|
-
for (const [table, records] of buffers) if (records.length > 0) failed.push({
|
|
361
|
-
table,
|
|
362
|
-
error: conn.error
|
|
363
|
-
});
|
|
364
|
-
buffers.clear();
|
|
365
|
-
return {
|
|
366
|
-
flushed,
|
|
367
|
-
failed
|
|
368
|
-
};
|
|
369
|
-
}
|
|
370
|
-
for (const [table, records] of buffers) {
|
|
371
|
-
if (records.length === 0) continue;
|
|
372
|
-
await icebergAppendRetrying({
|
|
373
|
-
catalog: conn.catalog,
|
|
374
|
-
namespace: conn.namespace,
|
|
375
|
-
table,
|
|
376
|
-
resolver: conn.resolver,
|
|
377
|
-
records
|
|
378
|
-
}, options.commitRetry).then(() => {
|
|
379
|
-
flushed.push(table);
|
|
380
|
-
}, (err) => {
|
|
381
|
-
failed.push({
|
|
382
|
-
table,
|
|
383
|
-
error: String(err)
|
|
384
|
-
});
|
|
385
|
-
});
|
|
386
|
-
}
|
|
387
|
-
buffers.clear();
|
|
388
|
-
return {
|
|
389
|
-
flushed,
|
|
390
|
-
failed
|
|
391
|
-
};
|
|
392
|
-
}
|
|
393
|
-
};
|
|
394
|
-
}
|
|
395
128
|
const KEY_SEP = "\0";
|
|
396
129
|
function partitionKey(slice) {
|
|
397
130
|
return [
|
|
@@ -544,4 +277,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
|
|
|
544
277
|
const MIN_COUNTRY_IMPRESSIONS = 10;
|
|
545
278
|
const MAX_SITEMAP_URLS_PER_SITE = 5e4;
|
|
546
279
|
const MAX_TRACKED_URLS_PER_SITE = 2e5;
|
|
547
|
-
export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER,
|
|
280
|
+
export { DEFAULT_SEARCH_TYPE, ENGINE_QUERY_CAPABILITIES, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergResolverAdapter, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createParquetResolverAdapter, createRowAccumulator, createSqlQuerySource, createStorageEngine, currentSchemaVersion, dates, dayPartition, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, objectKey, page_queries, pages, parseEnabledSearchTypes, pgResolverAdapter, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
|
package/dist/planner.mjs
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { enumeratePartitions } from "./_chunks/compaction.mjs";
|
|
2
|
+
import { FILES_PLACEHOLDER, compileLogicalQueryPlan, resolveParquetSQL, substituteNamedFiles } from "./_chunks/parquet-plan.mjs";
|
|
2
3
|
export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveParquetSQL, substituteNamedFiles };
|
package/dist/rollups.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import "./_chunks/
|
|
1
|
+
import "./_chunks/layout.mjs";
|
|
2
2
|
import { encodeRowsToParquetFlex } from "./adapters/hyparquet.mjs";
|
|
3
3
|
import { createIndexingMetadataStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./entities.mjs";
|
|
4
4
|
import { MS_PER_DAY } from "gscdump";
|
package/dist/sink-node.d.mts
CHANGED
|
@@ -20,4 +20,4 @@ interface LocalIcebergSink extends Sink {
|
|
|
20
20
|
* use this sink must skip when the stack is unreachable.
|
|
21
21
|
*/
|
|
22
22
|
declare function createLocalIcebergSink(options: LocalIcebergSinkFullOptions): LocalIcebergSink;
|
|
23
|
-
export { type LocalIcebergSink, type LocalIcebergSinkFullOptions, createLocalIcebergSink };
|
|
23
|
+
export { type LocalIcebergSink, type LocalIcebergSinkFullOptions, type LocalIcebergSinkOptions, createLocalIcebergSink };
|
package/dist/sink-node.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.25.1",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -41,6 +41,11 @@
|
|
|
41
41
|
"import": "./dist/planner.mjs",
|
|
42
42
|
"default": "./dist/planner.mjs"
|
|
43
43
|
},
|
|
44
|
+
"./compaction": {
|
|
45
|
+
"types": "./dist/compaction-public.d.mts",
|
|
46
|
+
"import": "./dist/compaction-public.mjs",
|
|
47
|
+
"default": "./dist/compaction-public.mjs"
|
|
48
|
+
},
|
|
44
49
|
"./schema": {
|
|
45
50
|
"types": "./dist/schema.d.mts",
|
|
46
51
|
"import": "./dist/schema.mjs",
|
|
@@ -66,11 +71,6 @@
|
|
|
66
71
|
"import": "./dist/sql-fragments.mjs",
|
|
67
72
|
"default": "./dist/sql-fragments.mjs"
|
|
68
73
|
},
|
|
69
|
-
"./schedule": {
|
|
70
|
-
"types": "./dist/schedule.d.mts",
|
|
71
|
-
"import": "./dist/schedule.mjs",
|
|
72
|
-
"default": "./dist/schedule.mjs"
|
|
73
|
-
},
|
|
74
74
|
"./entities": {
|
|
75
75
|
"types": "./dist/entities.d.mts",
|
|
76
76
|
"import": "./dist/entities.mjs",
|
|
@@ -81,6 +81,11 @@
|
|
|
81
81
|
"import": "./dist/rollups.mjs",
|
|
82
82
|
"default": "./dist/rollups.mjs"
|
|
83
83
|
},
|
|
84
|
+
"./iceberg": {
|
|
85
|
+
"types": "./dist/iceberg/index.d.mts",
|
|
86
|
+
"import": "./dist/iceberg/index.mjs",
|
|
87
|
+
"default": "./dist/iceberg/index.mjs"
|
|
88
|
+
},
|
|
84
89
|
"./node": {
|
|
85
90
|
"types": "./dist/adapters/node.d.mts",
|
|
86
91
|
"import": "./dist/adapters/node.mjs",
|
|
@@ -180,8 +185,8 @@
|
|
|
180
185
|
"drizzle-orm": "1.0.0-rc.3",
|
|
181
186
|
"icebird": "^0.8.6",
|
|
182
187
|
"proper-lockfile": "^4.1.2",
|
|
183
|
-
"@gscdump/contracts": "0.
|
|
184
|
-
"gscdump": "0.
|
|
188
|
+
"@gscdump/contracts": "0.25.1",
|
|
189
|
+
"gscdump": "0.25.1"
|
|
185
190
|
},
|
|
186
191
|
"devDependencies": {
|
|
187
192
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
@@ -1,13 +1,4 @@
|
|
|
1
1
|
import { MS_PER_DAY, toIsoDate } from "gscdump";
|
|
2
|
-
const DEFAULT_SEARCH_TYPE = "web";
|
|
3
|
-
function inferSearchType(entry) {
|
|
4
|
-
return entry.searchType ?? "web";
|
|
5
|
-
}
|
|
6
|
-
function inferLegacyTier(entry) {
|
|
7
|
-
if (entry.tier !== void 0) return entry.tier;
|
|
8
|
-
if (entry.partition.startsWith("daily/")) return "raw";
|
|
9
|
-
if (entry.partition.startsWith("monthly/")) return "d30";
|
|
10
|
-
}
|
|
11
2
|
function dayPartition(date) {
|
|
12
3
|
return `daily/${date}`;
|
|
13
4
|
}
|
|
@@ -33,10 +24,19 @@ function quarterOfMonth(month) {
|
|
|
33
24
|
const [y, m] = month.split("-").map(Number);
|
|
34
25
|
return `${y}-Q${Math.floor((m - 1) / 3) + 1}`;
|
|
35
26
|
}
|
|
27
|
+
function tenantPrefix(ctx) {
|
|
28
|
+
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/` : `u_${ctx.userId}/`;
|
|
29
|
+
}
|
|
30
|
+
const DEFAULT_SEARCH_TYPE = "web";
|
|
36
31
|
function objectKey(ctx, table, partition, version, searchType) {
|
|
37
32
|
return `${ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/${table}` : `u_${ctx.userId}/${table}`}/${searchType !== void 0 && searchType !== "web" ? `${searchType}/` : ""}${partition}__v${version}.parquet`;
|
|
38
33
|
}
|
|
39
|
-
function
|
|
40
|
-
return
|
|
34
|
+
function inferSearchType(entry) {
|
|
35
|
+
return entry.searchType ?? "web";
|
|
36
|
+
}
|
|
37
|
+
function inferLegacyTier(entry) {
|
|
38
|
+
if (entry.tier !== void 0) return entry.tier;
|
|
39
|
+
if (entry.partition.startsWith("daily/")) return "raw";
|
|
40
|
+
if (entry.partition.startsWith("monthly/")) return "d30";
|
|
41
41
|
}
|
|
42
42
|
export { DEFAULT_SEARCH_TYPE, dayPartition, hourPartition, inferLegacyTier, inferSearchType, mondayOfWeek, monthPartition, objectKey, quarterOfMonth, quarterPartition, tenantPrefix, weekPartition };
|