@gscdump/engine 0.19.7 → 0.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/resolver/index.d.mts +2 -1
- package/dist/rollups.d.mts +12 -5
- package/dist/rollups.mjs +5 -3
- package/dist/source/index.d.mts +2 -2
- package/dist/source/index.mjs +3 -1
- package/package.json +4 -4
package/dist/rollups.d.mts
CHANGED
|
@@ -30,9 +30,9 @@ interface RollupEngine {
|
|
|
30
30
|
/**
|
|
31
31
|
* Read the live manifest for a (tenant, table[, searchType]) cohort —
|
|
32
32
|
* cheap, no parquet decode. Builders use this to chunk a full-history scan
|
|
33
|
-
* into byte-bounded windows so a single `runSQL`
|
|
34
|
-
*
|
|
35
|
-
* (32MiB hard cap).
|
|
33
|
+
* into byte-bounded windows (see `WINDOW_BYTE_BUDGET`) so a single `runSQL`
|
|
34
|
+
* call never ships an oversized Arrow IPC payload across the Workers
|
|
35
|
+
* service-binding RPC (32MiB hard cap).
|
|
36
36
|
*/
|
|
37
37
|
listPartitions: (opts: {
|
|
38
38
|
ctx: TenantCtx;
|
|
@@ -174,8 +174,15 @@ interface RebuildRollupResult {
|
|
|
174
174
|
}
|
|
175
175
|
declare function rebuildRollups(opts: RebuildRollupsOptions): Promise<RebuildRollupResult[]>;
|
|
176
176
|
/**
|
|
177
|
-
*
|
|
178
|
-
*
|
|
177
|
+
* Per-window budget, measured in *parquet* bytes (manifest `bytes`), used by
|
|
178
|
+
* `planRollupWindows` to chunk a full-history scan.
|
|
179
|
+
*
|
|
180
|
+
* The executor decodes a window's parquet and ships it as an Arrow IPC stream
|
|
181
|
+
* over the service binding; that IPC is hard-guarded at 28MiB
|
|
182
|
+
* (`IPC_PLACEHOLDER_BUDGET` in @gscdump/cloudflare). Parquet is compressed and
|
|
183
|
+
* the IPC stream is not, so a window inflates on the wire — keep this
|
|
184
|
+
* conservatively below the guard. Re-measure the parquet→IPC ratio against
|
|
185
|
+
* production and raise if headroom allows.
|
|
179
186
|
*/
|
|
180
187
|
declare const WINDOW_BYTE_BUDGET: number;
|
|
181
188
|
/**
|
package/dist/rollups.mjs
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import "./_chunks/storage.mjs";
|
|
1
2
|
import { encodeRowsToParquetFlex } from "./adapters/hyparquet.mjs";
|
|
2
3
|
import { createIndexingMetadataStore, createSitemapStore, inspectionParquetKey, sitemapUrlsIndexPrefix } from "./entities.mjs";
|
|
3
4
|
import { MS_PER_DAY } from "gscdump";
|
|
@@ -111,7 +112,7 @@ function utcDateMinusDays(at, days) {
|
|
|
111
112
|
const d = new Date(at - days * MS_PER_DAY);
|
|
112
113
|
return `${d.getUTCFullYear()}-${String(d.getUTCMonth() + 1).padStart(2, "0")}-${String(d.getUTCDate()).padStart(2, "0")}`;
|
|
113
114
|
}
|
|
114
|
-
const WINDOW_BYTE_BUDGET =
|
|
115
|
+
const WINDOW_BYTE_BUDGET = 10 * 1024 * 1024;
|
|
115
116
|
const DAY_RE = /^daily\/(\d{4})-(\d{2})-(\d{2})$/;
|
|
116
117
|
const WEEK_RE = /^weekly\/(\d{4})-(\d{2})-(\d{2})$/;
|
|
117
118
|
const MONTH_RE = /^monthly\/(\d{4})-(\d{2})$/;
|
|
@@ -618,10 +619,11 @@ const indexPercentRollup = {
|
|
|
618
619
|
days: []
|
|
619
620
|
};
|
|
620
621
|
const cutoff = utcDateMinusDays(builtAt, 90);
|
|
622
|
+
const factSearchType = searchType ?? "web";
|
|
621
623
|
const pagesPartitions = partitionsInRange(await engine.listPartitions({
|
|
622
624
|
ctx,
|
|
623
625
|
table: "pages",
|
|
624
|
-
|
|
626
|
+
searchType: factSearchType
|
|
625
627
|
}), cutoff, utcDateMinusDays(builtAt, 0));
|
|
626
628
|
const numerator = await engine.runSQL({
|
|
627
629
|
ctx,
|
|
@@ -636,7 +638,7 @@ const indexPercentRollup = {
|
|
|
636
638
|
keys: urlsKeys
|
|
637
639
|
}
|
|
638
640
|
},
|
|
639
|
-
|
|
641
|
+
searchType: factSearchType,
|
|
640
642
|
sql: `
|
|
641
643
|
SELECT
|
|
642
644
|
p.date AS date,
|
package/dist/source/index.d.mts
CHANGED
|
@@ -90,8 +90,8 @@ interface EngineQuerySourceOptions {
|
|
|
90
90
|
declare function createEngineQuerySource(options: EngineQuerySourceOptions): AnalysisQuerySource;
|
|
91
91
|
/**
|
|
92
92
|
* Convenience: wrap a storage engine + tenant ctx in a source and dispatch.
|
|
93
|
-
* Equivalent to
|
|
94
|
-
*
|
|
93
|
+
* Equivalent to wrapping `createEngineQuerySource`, with omitted searchType
|
|
94
|
+
* defaulted to web at this public helper boundary.
|
|
95
95
|
*/
|
|
96
96
|
declare function runAnalyzerWithEngine(deps: {
|
|
97
97
|
engine: StorageEngine;
|
package/dist/source/index.mjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { n as coerceRows } from "../_chunks/coerce.mjs";
|
|
2
|
+
import "../_chunks/storage.mjs";
|
|
2
3
|
import { T as assertDimensionsSupported, a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL } from "../_chunks/resolver.mjs";
|
|
3
4
|
import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
|
|
4
5
|
var AttachedTableMissingError = class extends Error {
|
|
@@ -127,7 +128,8 @@ function createEngineQuerySource(options) {
|
|
|
127
128
|
async function runAnalyzerWithEngine(deps, ctx, params, registry) {
|
|
128
129
|
return runAnalyzerFromSource(createEngineQuerySource({
|
|
129
130
|
engine: deps.engine,
|
|
130
|
-
ctx
|
|
131
|
+
ctx,
|
|
132
|
+
searchType: params.searchType ?? "web"
|
|
131
133
|
}), params, registry);
|
|
132
134
|
}
|
|
133
135
|
function typedQuery(state) {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.20.1",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -169,8 +169,8 @@
|
|
|
169
169
|
"dependencies": {
|
|
170
170
|
"drizzle-orm": "^0.45.2",
|
|
171
171
|
"proper-lockfile": "^4.1.2",
|
|
172
|
-
"gscdump": "0.
|
|
173
|
-
"@gscdump/contracts": "0.
|
|
172
|
+
"gscdump": "0.20.1",
|
|
173
|
+
"@gscdump/contracts": "0.20.1"
|
|
174
174
|
},
|
|
175
175
|
"devDependencies": {
|
|
176
176
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
@@ -178,7 +178,7 @@
|
|
|
178
178
|
"aws4fetch": "^1.0.20",
|
|
179
179
|
"hyparquet": "^1.25.8",
|
|
180
180
|
"hyparquet-writer": "^0.15.1",
|
|
181
|
-
"tsx": "^4.22.
|
|
181
|
+
"tsx": "^4.22.3",
|
|
182
182
|
"vitest": "^4.1.6"
|
|
183
183
|
},
|
|
184
184
|
"scripts": {
|