@chkit/plugin-backfill 0.1.0-beta.20 → 0.1.0-beta.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +130 -0
- package/dist/async-backfill.d.ts +4 -4
- package/dist/async-backfill.d.ts.map +1 -1
- package/dist/async-backfill.js +2 -2
- package/dist/async-backfill.js.map +1 -1
- package/dist/chunking/analyze.d.ts +5 -35
- package/dist/chunking/analyze.d.ts.map +1 -1
- package/dist/chunking/analyze.js +3 -71
- package/dist/chunking/analyze.js.map +1 -1
- package/dist/chunking/boundary-codec.d.ts +4 -0
- package/dist/chunking/boundary-codec.d.ts.map +1 -0
- package/dist/chunking/boundary-codec.js +79 -0
- package/dist/chunking/boundary-codec.js.map +1 -0
- package/dist/chunking/e2e/constants.d.ts +2 -0
- package/dist/chunking/e2e/constants.d.ts.map +1 -0
- package/dist/chunking/e2e/constants.js +2 -0
- package/dist/chunking/e2e/constants.js.map +1 -0
- package/dist/chunking/e2e/seed-datasets.script.d.ts +20 -0
- package/dist/chunking/e2e/seed-datasets.script.d.ts.map +1 -0
- package/dist/chunking/e2e/seed-datasets.script.js +134 -0
- package/dist/chunking/e2e/seed-datasets.script.js.map +1 -0
- package/dist/chunking/introspect.d.ts +6 -0
- package/dist/chunking/introspect.d.ts.map +1 -1
- package/dist/chunking/introspect.js +113 -22
- package/dist/chunking/introspect.js.map +1 -1
- package/dist/chunking/partition-slices.d.ts +14 -0
- package/dist/chunking/partition-slices.d.ts.map +1 -0
- package/dist/chunking/partition-slices.js +111 -0
- package/dist/chunking/partition-slices.js.map +1 -0
- package/dist/chunking/planner.d.ts +3 -0
- package/dist/chunking/planner.d.ts.map +1 -0
- package/dist/chunking/planner.js +343 -0
- package/dist/chunking/planner.js.map +1 -0
- package/dist/chunking/services/distribution-source.d.ts +11 -0
- package/dist/chunking/services/distribution-source.d.ts.map +1 -0
- package/dist/chunking/services/distribution-source.js +60 -0
- package/dist/chunking/services/distribution-source.js.map +1 -0
- package/dist/chunking/services/metadata-source.d.ts +4 -0
- package/dist/chunking/services/metadata-source.d.ts.map +1 -0
- package/dist/chunking/services/metadata-source.js +138 -0
- package/dist/chunking/services/metadata-source.js.map +1 -0
- package/dist/chunking/services/row-probe.d.ts +12 -0
- package/dist/chunking/services/row-probe.d.ts.map +1 -0
- package/dist/chunking/services/row-probe.js +49 -0
- package/dist/chunking/services/row-probe.js.map +1 -0
- package/dist/chunking/sql.d.ts +11 -5
- package/dist/chunking/sql.d.ts.map +1 -1
- package/dist/chunking/sql.js +190 -107
- package/dist/chunking/sql.js.map +1 -1
- package/dist/chunking/strategies/equal-width-split.d.ts +3 -0
- package/dist/chunking/strategies/equal-width-split.d.ts.map +1 -0
- package/dist/chunking/strategies/equal-width-split.js +46 -0
- package/dist/chunking/strategies/equal-width-split.js.map +1 -0
- package/dist/chunking/strategies/group-by-key-split.d.ts +3 -0
- package/dist/chunking/strategies/group-by-key-split.d.ts.map +1 -0
- package/dist/chunking/strategies/group-by-key-split.js +54 -0
- package/dist/chunking/strategies/group-by-key-split.js.map +1 -0
- package/dist/chunking/strategies/metadata-single-chunk.d.ts +3 -0
- package/dist/chunking/strategies/metadata-single-chunk.d.ts.map +1 -0
- package/dist/chunking/strategies/metadata-single-chunk.js +5 -0
- package/dist/chunking/strategies/metadata-single-chunk.js.map +1 -0
- package/dist/chunking/strategies/quantile-range-split.d.ts +5 -0
- package/dist/chunking/strategies/quantile-range-split.d.ts.map +1 -0
- package/dist/chunking/strategies/quantile-range-split.js +132 -0
- package/dist/chunking/strategies/quantile-range-split.js.map +1 -0
- package/dist/chunking/strategies/refinement.d.ts +3 -0
- package/dist/chunking/strategies/refinement.d.ts.map +1 -0
- package/dist/chunking/strategies/refinement.js +61 -0
- package/dist/chunking/strategies/refinement.js.map +1 -0
- package/dist/chunking/strategies/string-prefix-split.d.ts +4 -0
- package/dist/chunking/strategies/string-prefix-split.d.ts.map +1 -0
- package/dist/chunking/strategies/string-prefix-split.js +73 -0
- package/dist/chunking/strategies/string-prefix-split.js.map +1 -0
- package/dist/chunking/strategies/temporal-bucket-split.d.ts +3 -0
- package/dist/chunking/strategies/temporal-bucket-split.d.ts.map +1 -0
- package/dist/chunking/strategies/temporal-bucket-split.js +67 -0
- package/dist/chunking/strategies/temporal-bucket-split.js.map +1 -0
- package/dist/chunking/strategy-policy.d.ts +3 -0
- package/dist/chunking/strategy-policy.d.ts.map +1 -0
- package/dist/chunking/strategy-policy.js +4 -0
- package/dist/chunking/strategy-policy.js.map +1 -0
- package/dist/chunking/types.d.ts +126 -18
- package/dist/chunking/types.d.ts.map +1 -1
- package/dist/chunking/utils/binary-string.d.ts +8 -0
- package/dist/chunking/utils/binary-string.d.ts.map +1 -0
- package/dist/chunking/utils/binary-string.js +52 -0
- package/dist/chunking/utils/binary-string.js.map +1 -0
- package/dist/chunking/utils/ids.d.ts +4 -0
- package/dist/chunking/utils/ids.d.ts.map +1 -0
- package/dist/chunking/utils/ids.js +11 -0
- package/dist/chunking/utils/ids.js.map +1 -0
- package/dist/chunking/utils/ranges.d.ts +5 -0
- package/dist/chunking/utils/ranges.d.ts.map +1 -0
- package/dist/chunking/utils/ranges.js +19 -0
- package/dist/chunking/utils/ranges.js.map +1 -0
- package/dist/detect.d.ts +0 -5
- package/dist/detect.d.ts.map +1 -1
- package/dist/detect.js +0 -41
- package/dist/detect.js.map +1 -1
- package/dist/index.d.ts +0 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +0 -2
- package/dist/index.js.map +1 -1
- package/dist/logging.d.ts +12 -0
- package/dist/logging.d.ts.map +1 -0
- package/dist/logging.js +61 -0
- package/dist/logging.js.map +1 -0
- package/dist/options.d.ts +6 -15
- package/dist/options.d.ts.map +1 -1
- package/dist/options.js +1 -3
- package/dist/options.js.map +1 -1
- package/dist/payload.d.ts.map +1 -1
- package/dist/payload.js +4 -6
- package/dist/payload.js.map +1 -1
- package/dist/planner.d.ts +2 -1
- package/dist/planner.d.ts.map +1 -1
- package/dist/planner.js +31 -52
- package/dist/planner.js.map +1 -1
- package/dist/plugin.d.ts.map +1 -1
- package/dist/plugin.js +27 -11
- package/dist/plugin.js.map +1 -1
- package/dist/queries.js +3 -3
- package/dist/queries.js.map +1 -1
- package/dist/sdk.d.ts +12 -0
- package/dist/sdk.d.ts.map +1 -0
- package/dist/sdk.js +9 -0
- package/dist/sdk.js.map +1 -0
- package/dist/state.d.ts.map +1 -1
- package/dist/state.js +16 -5
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +12 -25
- package/dist/types.d.ts.map +1 -1
- package/package.json +11 -3
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { buildSliceFromRows } from '../partition-slices.js';
|
|
2
|
+
import { probeStringKeyDistribution, } from '../services/distribution-source.js';
|
|
3
|
+
import { compareBinaryStrings, maxBinaryString, minBinaryString } from '../utils/binary-string.js';
|
|
4
|
+
import { getChunkRange, replaceChunkRange } from '../utils/ranges.js';
|
|
5
|
+
const KEY_LIMIT = 100;
|
|
6
|
+
export async function splitSliceWithGroupByKey(context, partition, slice, sortKeys, dimensionIndex) {
|
|
7
|
+
const sortKey = sortKeys[dimensionIndex];
|
|
8
|
+
if (!sortKey || sortKey.category !== 'string')
|
|
9
|
+
return undefined;
|
|
10
|
+
const range = getChunkRange(slice, dimensionIndex);
|
|
11
|
+
if (range.from === undefined || range.to === undefined)
|
|
12
|
+
return undefined;
|
|
13
|
+
const buckets = await probeStringKeyDistribution(context, slice.partitionId, slice.ranges, sortKey, dimensionIndex, sortKeys, KEY_LIMIT);
|
|
14
|
+
if (!buckets || buckets.length === 0)
|
|
15
|
+
return undefined;
|
|
16
|
+
// Sort by value for range-ordered slice construction
|
|
17
|
+
const sorted = [...buckets].sort((a, b) => compareBinaryStrings(a.value, b.value));
|
|
18
|
+
return buildKeySlices(partition, slice, dimensionIndex, range.from, range.to, sorted);
|
|
19
|
+
}
|
|
20
|
+
function buildKeySlices(partition, parentSlice, dimensionIndex, rangeFrom, rangeTo, sortedBuckets) {
|
|
21
|
+
const slices = [];
|
|
22
|
+
let cursor = rangeFrom;
|
|
23
|
+
for (const bucket of sortedBuckets) {
|
|
24
|
+
const keyFrom = bucket.value;
|
|
25
|
+
const keyTo = `${bucket.value}\0`;
|
|
26
|
+
// Gap slice before this key (non-hot residual between keys)
|
|
27
|
+
const gapFrom = maxBinaryString(cursor, rangeFrom);
|
|
28
|
+
const gapTo = minBinaryString(keyFrom, rangeTo);
|
|
29
|
+
if (compareBinaryStrings(gapFrom, gapTo) < 0) {
|
|
30
|
+
// There's a gap — but it has zero rows in our full distribution,
|
|
31
|
+
// so we skip it (all rows are accounted for by the key buckets)
|
|
32
|
+
}
|
|
33
|
+
// Exact key slice
|
|
34
|
+
const sliceFrom = maxBinaryString(keyFrom, rangeFrom);
|
|
35
|
+
const sliceTo = minBinaryString(keyTo, rangeTo);
|
|
36
|
+
if (compareBinaryStrings(sliceFrom, sliceTo) < 0) {
|
|
37
|
+
slices.push(buildSliceFromRows(partition, {
|
|
38
|
+
ranges: replaceChunkRange(parentSlice, dimensionIndex, sliceFrom, sliceTo),
|
|
39
|
+
rows: bucket.rowCount,
|
|
40
|
+
focusedValue: { dimensionIndex, value: bucket.value },
|
|
41
|
+
confidence: 'high',
|
|
42
|
+
reason: 'group-by-key-distribution',
|
|
43
|
+
lineage: parentSlice.analysis.lineage.concat([{
|
|
44
|
+
strategyId: 'group-by-key-split',
|
|
45
|
+
dimensionIndex,
|
|
46
|
+
reason: 'split slice using full GROUP BY key distribution',
|
|
47
|
+
}]),
|
|
48
|
+
}));
|
|
49
|
+
}
|
|
50
|
+
cursor = keyTo;
|
|
51
|
+
}
|
|
52
|
+
return slices;
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=group-by-key-split.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"group-by-key-split.js","sourceRoot":"","sources":["../../../src/chunking/strategies/group-by-key-split.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAC3D,OAAO,EAEL,0BAA0B,GAC3B,MAAM,oCAAoC,CAAA;AAO3C,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAA;AAClG,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AAErE,MAAM,SAAS,GAAG,GAAG,CAAA;AAErB,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,OAAuB,EACvB,SAAoB,EACpB,KAAqB,EACrB,QAAmB,EACnB,cAAsB;IAEtB,MAAM,OAAO,GAAG,QAAQ,CAAC,cAAc,CAAC,CAAA;IACxC,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAA;IAE/D,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,EAAE,cAAc,CAAC,CAAA;IAClD,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,EAAE,KAAK,SAAS;QAAE,OAAO,SAAS,CAAA;IAExE,MAAM,OAAO,GAAG,MAAM,0BAA0B,CAC9C,OAAO,EACP,KAAK,CAAC,WAAW,EACjB,KAAK,CAAC,MAAM,EACZ,OAAO,EACP,cAAc,EACd,QAAQ,EACR,SAAS,CACV,CAAA;IAED,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAA;IAEtD,qDAAqD;IACrD,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAA;IAElF,OAAO,cAAc,CAAC,SAAS,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,MAAM,CAAC,CAAA;AACvF,CAAC;AAED,SAAS,cAAc,CACrB,SAAoB,EACpB,WAA2B,EAC3B,cAAsB,EACtB,SAAiB,EACjB,OAAe,EACf,aAAgC;IAEhC,MAAM,MAAM,GAAqB,EAAE,CAAA;IACnC,IAAI,MAAM,GAAG,SAAS,CAAA;IAEtB,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAA;QAC5B,MAAM,KAAK,GAAG,GAAG,MAAM,CAAC,KAAK,IAAI,CAAA;QAEjC,4DAA4D;QAC5D,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,EAAE,SAAS,CAAC,CAAA;QAClD,MAAM,KAAK,GAAG,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAA;QAC/C,IAAI,oBAAoB,CAAC,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YAC7C,iEAAiE;YACjE,gEAAgE;QAClE,CAAC;QAED,kBAAkB;QAClB,MAAM,SAAS,GAAG,eAAe,CAAC,OAAO,EAAE,SAAS,CAAC,CAAA;QACrD,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;QAC/C,IAAI,oBAAoB,CAAC,SAAS,EAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE;gBACxC,MAAM,EAAE,iBAAiB,CAAC,WAAW,EAAE,cAAc,EAAE,SAAS,EAAE,OAAO,CAAC;gBAC1E,IAAI,EAAE,MAAM,CAAC,QAAQ;gBACrB,YAAY,EAAE,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE;gBACrD,UAAU,EAAE,MAAM;gBAClB,MAAM,EAAE,2BAA2B;gBACnC,OAAO,EAAE,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;wBAC5C,UAAU,EAAE,oBAAoB;wBAChC,cAAc;wBACd,MAAM,EAAE,kDAAkD;qBAC3D,CAAC,CAAC;aACJ,CAAC,CAAC,CAAA;QACL,CAAC;QAED,MAAM,GAAG,KAAK,CAAA;IAChB,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metadata-single-chunk.d.ts","sourceRoot":"","sources":["../../../src/chunking/strategies/metadata-single-chunk.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAE5D,wBAAgB,yBAAyB,CAAC,SAAS,EAAE,SAAS,GAAG,cAAc,EAAE,CAEhF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metadata-single-chunk.js","sourceRoot":"","sources":["../../../src/chunking/strategies/metadata-single-chunk.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAA;AAGvD,MAAM,UAAU,yBAAyB,CAAC,SAAoB;IAC5D,OAAO,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC,CAAA;AACpC,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { Partition, PartitionSlice, PlannerContext, SortKey } from '../types.js';
|
|
2
|
+
export declare function splitSliceWithQuantiles(context: PlannerContext, partition: Partition, slice: PartitionSlice, sortKeys: SortKey[], dimensionIndex: number, boundaries: string[]): Promise<PartitionSlice[]>;
|
|
3
|
+
export declare function findQuantileBoundaryOnDimension(context: PlannerContext, slice: PartitionSlice, sortKeys: SortKey[], dimensionIndex: number, targetCumRows: number): Promise<string>;
|
|
4
|
+
export declare function buildEvenlySpacedBoundaries(rangeFrom: string, rangeTo: string, subCount: number, sortKey: SortKey): string[];
|
|
5
|
+
//# sourceMappingURL=quantile-range-split.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quantile-range-split.d.ts","sourceRoot":"","sources":["../../../src/chunking/strategies/quantile-range-split.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,SAAS,EACT,cAAc,EACd,cAAc,EACd,OAAO,EACR,MAAM,aAAa,CAAA;AAOpB,wBAAsB,uBAAuB,CAC3C,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,SAAS,EACpB,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,OAAO,EAAE,EACnB,cAAc,EAAE,MAAM,EACtB,UAAU,EAAE,MAAM,EAAE,GACnB,OAAO,CAAC,cAAc,EAAE,CAAC,CAsC3B;AAED,wBAAsB,+BAA+B,CACnD,OAAO,EAAE,cAAc,EACvB,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,OAAO,EAAE,EACnB,cAAc,EAAE,MAAM,EACtB,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAkBjB;AAED,wBAAgB,2BAA2B,CACzC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,OAAO,GACf,MAAM,EAAE,CA6BV"}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import pMap from 'p-map';
|
|
2
|
+
import { buildSliceFromRows } from '../partition-slices.js';
|
|
3
|
+
import { estimateRows, parsePlannerDateTime } from '../services/row-probe.js';
|
|
4
|
+
import { bigIntToStr, strToBigInt } from '../utils/binary-string.js';
|
|
5
|
+
import { getChunkRange, replaceChunkRange } from '../utils/ranges.js';
|
|
6
|
+
const BINARY_SEARCH_STEPS = 24;
|
|
7
|
+
const ESTIMATE_CONCURRENCY = 50;
|
|
8
|
+
export async function splitSliceWithQuantiles(context, partition, slice, sortKeys, dimensionIndex, boundaries) {
|
|
9
|
+
const intervals = [];
|
|
10
|
+
for (let index = 0; index < boundaries.length - 1; index++) {
|
|
11
|
+
const from = boundaries[index];
|
|
12
|
+
const to = boundaries[index + 1];
|
|
13
|
+
if (from === undefined || to === undefined || from === to)
|
|
14
|
+
continue;
|
|
15
|
+
intervals.push({ from, to });
|
|
16
|
+
}
|
|
17
|
+
const results = await pMap(intervals, async ({ from, to }) => {
|
|
18
|
+
const ranges = replaceChunkRange(slice, dimensionIndex, from, to);
|
|
19
|
+
const rows = await estimateRows(context, { partitionId: partition.partitionId, ranges }, sortKeys);
|
|
20
|
+
if (rows <= 0)
|
|
21
|
+
return null;
|
|
22
|
+
return buildSliceFromRows(partition, {
|
|
23
|
+
ranges,
|
|
24
|
+
rows,
|
|
25
|
+
focusedValue: slice.analysis.focusedValue,
|
|
26
|
+
confidence: context.rowProbeStrategy === 'count' ? 'exact' : 'high',
|
|
27
|
+
reason: context.rowProbeStrategy === 'count' ? 'exact-count' : 'quantile-estimate',
|
|
28
|
+
lineage: slice.analysis.lineage.concat([
|
|
29
|
+
{
|
|
30
|
+
strategyId: 'quantile-range-split',
|
|
31
|
+
dimensionIndex,
|
|
32
|
+
reason: 'split slice into quantile-aligned ranges',
|
|
33
|
+
},
|
|
34
|
+
]),
|
|
35
|
+
});
|
|
36
|
+
}, { concurrency: ESTIMATE_CONCURRENCY });
|
|
37
|
+
return results.filter((s) => s !== null);
|
|
38
|
+
}
|
|
39
|
+
export async function findQuantileBoundaryOnDimension(context, slice, sortKeys, dimensionIndex, targetCumRows) {
|
|
40
|
+
const sortKey = sortKeys[dimensionIndex];
|
|
41
|
+
if (!sortKey) {
|
|
42
|
+
throw new Error(`Missing sort key at dimension ${dimensionIndex}`);
|
|
43
|
+
}
|
|
44
|
+
const range = getChunkRange(slice, dimensionIndex);
|
|
45
|
+
if (range.from === undefined || range.to === undefined) {
|
|
46
|
+
throw new Error(`Missing range for quantile split on dimension ${dimensionIndex}`);
|
|
47
|
+
}
|
|
48
|
+
if (sortKey.category === 'string') {
|
|
49
|
+
return findStringBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows);
|
|
50
|
+
}
|
|
51
|
+
if (sortKey.category === 'datetime') {
|
|
52
|
+
return findDateTimeBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows);
|
|
53
|
+
}
|
|
54
|
+
return findNumericBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows);
|
|
55
|
+
}
|
|
56
|
+
export function buildEvenlySpacedBoundaries(rangeFrom, rangeTo, subCount, sortKey) {
|
|
57
|
+
if (subCount <= 1)
|
|
58
|
+
return [rangeFrom, rangeTo];
|
|
59
|
+
if (sortKey.category === 'datetime') {
|
|
60
|
+
const start = parsePlannerDateTime(rangeFrom);
|
|
61
|
+
const end = parsePlannerDateTime(rangeTo);
|
|
62
|
+
return Array.from({ length: subCount + 1 }, (_, index) => new Date(start + Math.floor(((end - start) * index) / subCount)).toISOString());
|
|
63
|
+
}
|
|
64
|
+
if (sortKey.category === 'numeric') {
|
|
65
|
+
const start = Number(rangeFrom);
|
|
66
|
+
const end = Number(rangeTo);
|
|
67
|
+
return Array.from({ length: subCount + 1 }, (_, index) => String(start + Math.floor(((end - start) * index) / subCount)));
|
|
68
|
+
}
|
|
69
|
+
const width = Math.max(rangeFrom.length, rangeTo.length);
|
|
70
|
+
const start = strToBigInt(rangeFrom, width);
|
|
71
|
+
const end = strToBigInt(rangeTo, width);
|
|
72
|
+
const boundaries = Array.from({ length: subCount + 1 }, (_, index) => bigIntToStr(start + ((end - start) * BigInt(index)) / BigInt(subCount), width, width));
|
|
73
|
+
// Use original values at endpoints to avoid round-trip length changes
|
|
74
|
+
boundaries[0] = rangeFrom;
|
|
75
|
+
boundaries[boundaries.length - 1] = rangeTo;
|
|
76
|
+
return boundaries;
|
|
77
|
+
}
|
|
78
|
+
async function findStringBoundary(context, slice, sortKeys, dimensionIndex, rangeFrom, rangeTo, targetCumRows) {
|
|
79
|
+
const width = Math.max(rangeFrom.length, rangeTo.length);
|
|
80
|
+
let low = strToBigInt(rangeFrom, width);
|
|
81
|
+
let high = strToBigInt(rangeTo, width);
|
|
82
|
+
for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
|
|
83
|
+
const midpoint = (low + high) / 2n;
|
|
84
|
+
if (midpoint === low || midpoint === high)
|
|
85
|
+
break;
|
|
86
|
+
const mid = bigIntToStr(midpoint, width, width);
|
|
87
|
+
const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, mid);
|
|
88
|
+
if (rows < targetCumRows)
|
|
89
|
+
low = midpoint;
|
|
90
|
+
else
|
|
91
|
+
high = midpoint;
|
|
92
|
+
}
|
|
93
|
+
return bigIntToStr((low + high) / 2n, width, width);
|
|
94
|
+
}
|
|
95
|
+
async function findDateTimeBoundary(context, slice, sortKeys, dimensionIndex, rangeFrom, rangeTo, targetCumRows) {
|
|
96
|
+
let low = parsePlannerDateTime(rangeFrom);
|
|
97
|
+
let high = parsePlannerDateTime(rangeTo);
|
|
98
|
+
for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
|
|
99
|
+
const midpoint = Math.floor((low + high) / 2);
|
|
100
|
+
if (midpoint === low || midpoint === high)
|
|
101
|
+
break;
|
|
102
|
+
const mid = new Date(midpoint).toISOString();
|
|
103
|
+
const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, mid);
|
|
104
|
+
if (rows < targetCumRows)
|
|
105
|
+
low = midpoint;
|
|
106
|
+
else
|
|
107
|
+
high = midpoint;
|
|
108
|
+
}
|
|
109
|
+
return new Date(Math.floor((low + high) / 2)).toISOString();
|
|
110
|
+
}
|
|
111
|
+
async function findNumericBoundary(context, slice, sortKeys, dimensionIndex, rangeFrom, rangeTo, targetCumRows) {
|
|
112
|
+
let low = Number(rangeFrom);
|
|
113
|
+
let high = Number(rangeTo);
|
|
114
|
+
for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
|
|
115
|
+
const midpoint = Math.floor((low + high) / 2);
|
|
116
|
+
if (midpoint === low || midpoint === high)
|
|
117
|
+
break;
|
|
118
|
+
const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, String(midpoint));
|
|
119
|
+
if (rows < targetCumRows)
|
|
120
|
+
low = midpoint;
|
|
121
|
+
else
|
|
122
|
+
high = midpoint;
|
|
123
|
+
}
|
|
124
|
+
return String(Math.floor((low + high) / 2));
|
|
125
|
+
}
|
|
126
|
+
async function estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, rangeTo) {
|
|
127
|
+
return estimateRows(context, {
|
|
128
|
+
partitionId: slice.partitionId,
|
|
129
|
+
ranges: replaceChunkRange(slice, dimensionIndex, rangeFrom, rangeTo),
|
|
130
|
+
}, sortKeys);
|
|
131
|
+
}
|
|
132
|
+
//# sourceMappingURL=quantile-range-split.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"quantile-range-split.js","sourceRoot":"","sources":["../../../src/chunking/strategies/quantile-range-split.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,OAAO,CAAA;AACxB,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAC3D,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,0BAA0B,CAAA;AAO7E,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAA;AACpE,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AAErE,MAAM,mBAAmB,GAAG,EAAE,CAAA;AAC9B,MAAM,oBAAoB,GAAG,EAAE,CAAA;AAE/B,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,OAAuB,EACvB,SAAoB,EACpB,KAAqB,EACrB,QAAmB,EACnB,cAAsB,EACtB,UAAoB;IAEpB,MAAM,SAAS,GAAwC,EAAE,CAAA;IACzD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;QAC3D,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,CAAA;QAC9B,MAAM,EAAE,GAAG,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC,CAAA;QAChC,IAAI,IAAI,KAAK,SAAS,IAAI,EAAE,KAAK,SAAS,IAAI,IAAI,KAAK,EAAE;YAAE,SAAQ;QACnE,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;IAC9B,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,IAAI,CACxB,SAAS,EACT,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE;QACrB,MAAM,MAAM,GAAG,iBAAiB,CAAC,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,EAAE,CAAC,CAAA;QACjE,MAAM,IAAI,GAAG,MAAM,YAAY,CAC7B,OAAO,EACP,EAAE,WAAW,EAAE,SAAS,CAAC,WAAW,EAAE,MAAM,EAAE,EAC9C,QAAQ,CACT,CAAA;QACD,IAAI,IAAI,IAAI,CAAC;YAAE,OAAO,IAAI,CAAA;QAC1B,OAAO,kBAAkB,CAAC,SAAS,EAAE;YACnC,MAAM;YACN,IAAI;YACJ,YAAY,EAAE,KAAK,CAAC,QAAQ,CAAC,YAAY;YACzC,UAAU,EAAE,OAAO,CAAC,gBAAgB,KAAK,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM;YACnE,MAAM,EAAE,OAAO,CAAC,gBAAgB,KAAK,OAAO,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,mBAAmB;YAClF,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC;gBACrC;oBACE,UAAU,EAAE,sBAAsB;oBAClC,cAAc;oBACd,MAAM,EAAE,0CAA0C;iBACnD;aACF,CAAC;SACH,CAAC,CAAA;IACJ,CAAC,EACD,EAAE,WAAW,EAAE,oBAAoB,EAAE,CACtC,CAAA;IAED,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAuB,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAA;AAC/D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,+BAA+B,CACnD,OAAuB,EACvB,KAAqB,EACrB,QAAmB,EACnB,cAAsB,EACtB,aAAqB;IAErB,MAAM,OAAO,GAAG,QAAQ,CAAC,cAAc,CAAC,CAAA;IACxC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,iCAAiC,cAAc,EAAE,CAAC,CAAA;IACpE,CAAC;IAED,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,EAAE,cAAc,CAAC,CAAA;IAClD,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,EAAE,KAAK,SAAS,EAAE,CAAC;QACvD,MAAM,IAAI,KAAK,CAAC,iDAAiD,cAAc,EAAE,CAAC,CAAA;IACpF,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAClC,OAAO,kBAAkB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,aAAa,CAAC,CAAA;IAC1G,CAAC;IACD,IAAI,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;QACpC,OAAO,oBAAoB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,aAAa,CAAC,CAAA;IAC5G,CAAC;IACD,OAAO,mBAAmB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,aAAa,CAAC,CAAA;AAC3G,CAAC;AAED,MAAM,UAAU,2BAA2B,CACzC,SAAiB,EACjB,OAAe,EACf,QAAgB,EAChB,OAAgB;IAEhB,IAAI,QAAQ,IAAI,CAAC;QAAE,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,CAAA;IAE9C,IAAI,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;QACpC,MAAM,KAAK,GAAG,oBAAoB,CAAC,SAAS,CAAC,CAAA;QAC7C,MAAM,GAAG,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QACzC,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CACvD,IAAI,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,WAAW,EAAE,CAC/E,CAAA;IACH,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,MAAM,CAAC,SAAS,CAAC,CAAA;QAC/B,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,CAAA;QAC3B,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CACvD,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC,GAAG,QAAQ,CAAC,CAAC,CAC/D,CAAA;IACH,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;IACxD,MAAM,KAAK,GAAG,WAAW,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;IAC3C,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,EAAE,KAAK,CAAC,CAAA;IACvC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CACnE,WAAW,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CACtF,CAAA;IACD,sEAAsE;IACtE,UAAU,CAAC,CAAC,CAAC,GAAG,SAAS,CAAA;IACzB,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,OAAO,CAAA;IAC3C,OAAO,UAAU,CAAA;AACnB,CAAC;AAED,KAAK,UAAU,kBAAkB,CAC/B,OAAuB,EACvB,KAAqB,EACrB,QAAmB,EACnB,cAAsB,EACtB,SAAiB,EACjB,OAAe,EACf,aAAqB;IAErB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAA;IACxD,IAAI,GAAG,GAAG,WAAW,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;IACvC,IAAI,IAAI,GAAG,WAAW,CAAC,OAAO,EAAE,KAAK,CAAC,CAAA;IAEtC,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,mBAAmB,EAAE,IAAI,EAAE,EAAE,CAAC;QACtD,MAAM,QAAQ,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;QAClC,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,IAAI;YAAE,MAAK;QAEhD,MAAM,GAAG,GAAG,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,CAAC,CAAA;QAC/C,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,CAAC,CAAA;QAC9F,IAAI,IAAI,GAAG,aAAa;YAAE,GAAG,GAAG,QAAQ,CAAA;;YACnC,IAAI,GAAG,QAAQ,CAAA;IACtB,CAAC;IAED,OAAO,WAAW,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,CAAA;AACrD,CAAC;AAED,KAAK,UAAU,oBAAoB,CACjC,OAAuB,EACvB,KAAqB,EACrB,QAAmB,EACnB,cAAsB,EACtB,SAAiB,EACjB,OAAe,EACf,aAAqB;IAErB,IAAI,GAAG,GAAG,oBAAoB,CAAC,SAAS,CAAC,CAAA;IACzC,IAAI,IAAI,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;IAExC,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,mBAAmB,EAAE,IAAI,EAAE,EAAE,CAAC;QACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;QAC7C,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,IAAI;YAAE,MAAK;QAEhD,MAAM,GAAG,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;QAC5C,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,CAAC,CAAA;QAC9F,IAAI,IAAI,GAAG,aAAa;YAAE,GAAG,GAAG,QAAQ,CAAA;;YACnC,IAAI,GAAG,QAAQ,CAAA;IACtB,CAAC;IAED,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;AAC7D,CAAC;AAED,KAAK,UAAU,mBAAmB,CAChC,OAAuB,EACvB,KAAqB,EACrB,QAAmB,EACnB,cAAsB,EACtB,SAAiB,EACjB,OAAe,EACf,aAAqB;IAErB,IAAI,GAAG,GAAG,MAAM,CAAC,SAAS,CAAC,CAAA;IAC3B,IAAI,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,CAAA;IAE1B,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,mBAAmB,EAAE,IAAI,EAAE,EAAE,CAAC;QACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;QAC7C,IAAI,QAAQ,KAAK,GAAG,IAAI,QAAQ,KAAK,IAAI;YAAE,MAAK;QAEhD,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAA;QAC3G,IAAI,IAAI,GAAG,aAAa;YAAE,GAAG,GAAG,QAAQ,CAAA;;YACnC,IAAI,GAAG,QAAQ,CAAA;IACtB,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;AAC7C,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,OAAuB,EACvB,KAAqB,EACrB,QAAmB,EACnB,cAAsB,EACtB,SAAiB,EACjB,OAAe;IAEf,OAAO,YAAY,CACjB,OAAO,EACP;QACE,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,MAAM,EAAE,iBAAiB,CAAC,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,OAAO,CAAC;KACrE,EACD,QAAQ,CACT,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { Partition, PartitionBuildResult, PartitionSlice, PlannerContext, SortKey } from '../types.js';
|
|
2
|
+
export declare function refinePartitionSlices(context: PlannerContext, partition: Partition, slices: PartitionSlice[], sortKeys: SortKey[], usedDistributionFallback: boolean): Promise<PartitionBuildResult>;
|
|
3
|
+
//# sourceMappingURL=refinement.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"refinement.d.ts","sourceRoot":"","sources":["../../../src/chunking/strategies/refinement.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,SAAS,EACT,oBAAoB,EAEpB,cAAc,EACd,cAAc,EACd,OAAO,EACR,MAAM,aAAa,CAAA;AAKpB,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,SAAS,EACpB,MAAM,EAAE,cAAc,EAAE,EACxB,QAAQ,EAAE,OAAO,EAAE,EACnB,wBAAwB,EAAE,OAAO,GAChC,OAAO,CAAC,oBAAoB,CAAC,CAmC/B"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { buildSliceEstimate } from '../partition-slices.js';
|
|
2
|
+
import { countRowsExact, getRowProbeStrategy } from '../services/row-probe.js';
|
|
3
|
+
const ESTIMATE_RATIO_MIN = 0.7;
|
|
4
|
+
const ESTIMATE_RATIO_MAX = 1.3;
|
|
5
|
+
export async function refinePartitionSlices(context, partition, slices, sortKeys, usedDistributionFallback) {
|
|
6
|
+
let workingSlices = slices;
|
|
7
|
+
let usedLowConfidenceChunkRefinement = false;
|
|
8
|
+
if (slices.some((slice) => slice.estimate.confidence === 'low')) {
|
|
9
|
+
workingSlices = await refineLowConfidenceSlices(context, partition, slices, sortKeys);
|
|
10
|
+
usedLowConfidenceChunkRefinement = true;
|
|
11
|
+
}
|
|
12
|
+
const diagnostics = buildPartitionDiagnostics(partition, workingSlices, usedDistributionFallback, usedLowConfidenceChunkRefinement, false);
|
|
13
|
+
if (getRowProbeStrategy(context) !== 'explain-estimate' ||
|
|
14
|
+
!diagnostics.suspiciousEstimate) {
|
|
15
|
+
return { slices: workingSlices, diagnostics };
|
|
16
|
+
}
|
|
17
|
+
const refinedSlices = await refineAllSlices(context, partition, workingSlices, sortKeys);
|
|
18
|
+
return {
|
|
19
|
+
slices: refinedSlices,
|
|
20
|
+
diagnostics: buildPartitionDiagnostics(partition, refinedSlices, usedDistributionFallback, usedLowConfidenceChunkRefinement, true),
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
function buildPartitionDiagnostics(partition, slices, usedDistributionFallback, usedLowConfidenceChunkRefinement, usedExactCountFallback) {
|
|
24
|
+
const estimatedRowSum = slices.reduce((sum, slice) => sum + slice.estimate.rows, 0);
|
|
25
|
+
const estimateToExactRatio = partition.rows > 0 ? estimatedRowSum / partition.rows : 1;
|
|
26
|
+
return {
|
|
27
|
+
estimatedRowSum,
|
|
28
|
+
exactPartitionRows: partition.rows,
|
|
29
|
+
estimateToExactRatio,
|
|
30
|
+
suspiciousEstimate: estimateToExactRatio < ESTIMATE_RATIO_MIN || estimateToExactRatio > ESTIMATE_RATIO_MAX,
|
|
31
|
+
lowConfidenceChunkCount: slices.filter((slice) => slice.estimate.confidence === 'low').length,
|
|
32
|
+
usedDistributionFallback,
|
|
33
|
+
usedLowConfidenceChunkRefinement,
|
|
34
|
+
usedExactCountFallback,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
async function refineLowConfidenceSlices(context, partition, slices, sortKeys) {
|
|
38
|
+
const refined = [];
|
|
39
|
+
for (const slice of slices) {
|
|
40
|
+
if (slice.estimate.confidence !== 'low') {
|
|
41
|
+
refined.push(slice);
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
refined.push(await refineSlice(context, partition, slice, sortKeys));
|
|
45
|
+
}
|
|
46
|
+
return refined;
|
|
47
|
+
}
|
|
48
|
+
async function refineAllSlices(context, partition, slices, sortKeys) {
|
|
49
|
+
return Promise.all(slices.map((slice) => refineSlice(context, partition, slice, sortKeys)));
|
|
50
|
+
}
|
|
51
|
+
async function refineSlice(context, partition, slice, sortKeys) {
|
|
52
|
+
const rows = await countRowsExact(context, {
|
|
53
|
+
partitionId: partition.partitionId,
|
|
54
|
+
ranges: slice.ranges,
|
|
55
|
+
}, sortKeys);
|
|
56
|
+
return {
|
|
57
|
+
...slice,
|
|
58
|
+
estimate: buildSliceEstimate(partition, rows, 'exact', 'exact-count'),
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=refinement.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"refinement.js","sourceRoot":"","sources":["../../../src/chunking/strategies/refinement.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAC3D,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAA;AAU9E,MAAM,kBAAkB,GAAG,GAAG,CAAA;AAC9B,MAAM,kBAAkB,GAAG,GAAG,CAAA;AAE9B,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAuB,EACvB,SAAoB,EACpB,MAAwB,EACxB,QAAmB,EACnB,wBAAiC;IAEjC,IAAI,aAAa,GAAG,MAAM,CAAA;IAC1B,IAAI,gCAAgC,GAAG,KAAK,CAAA;IAE5C,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,KAAK,KAAK,CAAC,EAAE,CAAC;QAChE,aAAa,GAAG,MAAM,yBAAyB,CAAC,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAA;QACrF,gCAAgC,GAAG,IAAI,CAAA;IACzC,CAAC;IAED,MAAM,WAAW,GAAG,yBAAyB,CAC3C,SAAS,EACT,aAAa,EACb,wBAAwB,EACxB,gCAAgC,EAChC,KAAK,CACN,CAAA;IAED,IACE,mBAAmB,CAAC,OAAO,CAAC,KAAK,kBAAkB;QACnD,CAAC,WAAW,CAAC,kBAAkB,EAC/B,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,WAAW,EAAE,CAAA;IAC/C,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,eAAe,CAAC,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAA;IACxF,OAAO;QACL,MAAM,EAAE,aAAa;QACrB,WAAW,EAAE,yBAAyB,CACpC,SAAS,EACT,aAAa,EACb,wBAAwB,EACxB,gCAAgC,EAChC,IAAI,CACL;KACF,CAAA;AACH,CAAC;AAED,SAAS,yBAAyB,CAChC,SAAoB,EACpB,MAAwB,EACxB,wBAAiC,EACjC,gCAAyC,EACzC,sBAA+B;IAE/B,MAAM,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IACnF,MAAM,oBAAoB,GAAG,SAAS,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAEtF,OAAO;QACL,eAAe;QACf,kBAAkB,EAAE,SAAS,CAAC,IAAI;QAClC,oBAAoB;QACpB,kBAAkB,EAChB,oBAAoB,GAAG,kBAAkB,IAAI,oBAAoB,GAAG,kBAAkB;QACxF,uBAAuB,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,KAAK,KAAK,CAAC,CAAC,MAAM;QAC7F,wBAAwB;QACxB,gCAAgC;QAChC,sBAAsB;KACvB,CAAA;AACH,CAAC;AAED,KAAK,UAAU,yBAAyB,CACtC,OAAuB,EACvB,SAAoB,EACpB,MAAwB,EACxB,QAAmB;IAEnB,MAAM,OAAO,GAAqB,EAAE,CAAA;IAEpC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,QAAQ,CAAC,UAAU,KAAK,KAAK,EAAE,CAAC;YACxC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACnB,SAAQ;QACV,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,MAAM,WAAW,CAAC,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAA;IACtE,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,KAAK,UAAU,eAAe,CAC5B,OAAuB,EACvB,SAAoB,EACpB,MAAwB,EACxB,QAAmB;IAEnB,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,WAAW,CAAC,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAA;AAC7F,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,OAAuB,EACvB,SAAoB,EACpB,KAAqB,EACrB,QAAmB;IAEnB,MAAM,IAAI,GAAG,MAAM,cAAc,CAC/B,OAAO,EACP;QACE,WAAW,EAAE,SAAS,CAAC,WAAW;QAClC,MAAM,EAAE,KAAK,CAAC,MAAM;KACrB,EACD,QAAQ,CACT,CAAA;IAED,OAAO;QACL,GAAG,KAAK;QACR,QAAQ,EAAE,kBAAkB,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,aAAa,CAAC;KACtE,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { Partition, PartitionSlice, PlannerContext, SortKey } from '../types.js';
|
|
2
|
+
export declare function splitSliceWithStringPrefixes(context: PlannerContext, partition: Partition, slice: PartitionSlice, sortKeys: SortKey[], dimensionIndex: number): Promise<PartitionSlice[]>;
|
|
3
|
+
export declare function buildRootStringUpperBound(maxValue: string): string;
|
|
4
|
+
//# sourceMappingURL=string-prefix-split.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"string-prefix-split.d.ts","sourceRoot":"","sources":["../../../src/chunking/strategies/string-prefix-split.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,SAAS,EACT,cAAc,EACd,cAAc,EACd,OAAO,EAER,MAAM,aAAa,CAAA;AAapB,wBAAsB,4BAA4B,CAChD,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,SAAS,EACpB,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,OAAO,EAAE,EACnB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC,cAAc,EAAE,CAAC,CAiB3B;AAED,wBAAgB,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAElE"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { buildSliceFromRows } from '../partition-slices.js';
|
|
2
|
+
import { probeStringPrefixDistribution } from '../services/distribution-source.js';
|
|
3
|
+
import { buildObservedStringUpperBound, maxBinaryString, minBinaryString, nextPrefixValue, } from '../utils/binary-string.js';
|
|
4
|
+
import { getChunkRange, replaceChunkRange } from '../utils/ranges.js';
|
|
5
|
+
const TARGET_BYTES_FUZZ_FACTOR = 1.15;
|
|
6
|
+
const PREFIX_START_DEPTH = 1;
|
|
7
|
+
const PREFIX_MAX_DEPTH = 4;
|
|
8
|
+
export async function splitSliceWithStringPrefixes(context, partition, slice, sortKeys, dimensionIndex) {
|
|
9
|
+
const sortKey = sortKeys[dimensionIndex];
|
|
10
|
+
if (!sortKey || sortKey.category !== 'string')
|
|
11
|
+
return [];
|
|
12
|
+
const range = getChunkRange(slice, dimensionIndex);
|
|
13
|
+
if (range.from === undefined || range.to === undefined)
|
|
14
|
+
return [];
|
|
15
|
+
return buildPrefixSlices(context, partition, slice, sortKeys, dimensionIndex, range.from, range.to, PREFIX_START_DEPTH);
|
|
16
|
+
}
|
|
17
|
+
export function buildRootStringUpperBound(maxValue) {
|
|
18
|
+
return buildObservedStringUpperBound(maxValue);
|
|
19
|
+
}
|
|
20
|
+
async function buildPrefixSlices(context, partition, slice, sortKeys, dimensionIndex, rangeFrom, rangeTo, depth) {
|
|
21
|
+
const sortKey = sortKeys[dimensionIndex];
|
|
22
|
+
if (!sortKey)
|
|
23
|
+
return [];
|
|
24
|
+
const buckets = await probeStringPrefixDistribution(context, partition.partitionId, replaceChunkRange(slice, dimensionIndex, rangeFrom, rangeTo), sortKey, dimensionIndex, depth, sortKeys);
|
|
25
|
+
const slices = [];
|
|
26
|
+
for (const bucket of buckets) {
|
|
27
|
+
if (bucket.rowCount <= 0)
|
|
28
|
+
continue;
|
|
29
|
+
const bucketSlice = buildBucketSlice(partition, slice, dimensionIndex, rangeFrom, rangeTo, bucket);
|
|
30
|
+
if (!bucketSlice)
|
|
31
|
+
continue;
|
|
32
|
+
if (bucketSlice.estimate.bytesUncompressed <= context.targetChunkBytes * TARGET_BYTES_FUZZ_FACTOR) {
|
|
33
|
+
slices.push(bucketSlice);
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (!bucket.isExactValue && depth < PREFIX_MAX_DEPTH) {
|
|
37
|
+
const bucketRange = getChunkRange(bucketSlice, dimensionIndex);
|
|
38
|
+
if (bucketRange.from !== undefined && bucketRange.to !== undefined) {
|
|
39
|
+
slices.push(...(await buildPrefixSlices(context, partition, slice, sortKeys, dimensionIndex, bucketRange.from, bucketRange.to, depth + 1)));
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
slices.push(bucketSlice);
|
|
44
|
+
}
|
|
45
|
+
return slices;
|
|
46
|
+
}
|
|
47
|
+
function buildBucketSlice(partition, parentSlice, dimensionIndex, rangeFrom, rangeTo, bucket) {
|
|
48
|
+
const bucketFrom = maxBinaryString(rangeFrom, bucket.value);
|
|
49
|
+
const bucketUpper = bucket.isExactValue ? `${bucket.value}\0` : nextPrefixValue(bucket.value);
|
|
50
|
+
if (bucketUpper === undefined)
|
|
51
|
+
return undefined;
|
|
52
|
+
const bucketTo = minBinaryString(rangeTo, bucketUpper);
|
|
53
|
+
if (bucketFrom === bucketTo)
|
|
54
|
+
return undefined;
|
|
55
|
+
const focusedValue = bucket.isExactValue
|
|
56
|
+
? { dimensionIndex, value: bucket.value }
|
|
57
|
+
: parentSlice.analysis.focusedValue;
|
|
58
|
+
return buildSliceFromRows(partition, {
|
|
59
|
+
ranges: replaceChunkRange(parentSlice, dimensionIndex, bucketFrom, bucketTo),
|
|
60
|
+
rows: bucket.rowCount,
|
|
61
|
+
focusedValue,
|
|
62
|
+
confidence: 'high',
|
|
63
|
+
reason: 'string-prefix-distribution',
|
|
64
|
+
lineage: parentSlice.analysis.lineage.concat([
|
|
65
|
+
{
|
|
66
|
+
strategyId: 'string-prefix-split',
|
|
67
|
+
dimensionIndex,
|
|
68
|
+
reason: 'split slice using string prefix distribution',
|
|
69
|
+
},
|
|
70
|
+
]),
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=string-prefix-split.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"string-prefix-split.js","sourceRoot":"","sources":["../../../src/chunking/strategies/string-prefix-split.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAC3D,OAAO,EAAE,6BAA6B,EAAE,MAAM,oCAAoC,CAAA;AAQlF,OAAO,EACL,6BAA6B,EAC7B,eAAe,EACf,eAAe,EACf,eAAe,GAChB,MAAM,2BAA2B,CAAA;AAClC,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AAErE,MAAM,wBAAwB,GAAG,IAAI,CAAA;AACrC,MAAM,kBAAkB,GAAG,CAAC,CAAA;AAC5B,MAAM,gBAAgB,GAAG,CAAC,CAAA;AAE1B,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,OAAuB,EACvB,SAAoB,EACpB,KAAqB,EACrB,QAAmB,EACnB,cAAsB;IAEtB,MAAM,OAAO,GAAG,QAAQ,CAAC,cAAc,CAAC,CAAA;IACxC,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAA;IAExD,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,EAAE,cAAc,CAAC,CAAA;IAClD,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,EAAE,KAAK,SAAS;QAAE,OAAO,EAAE,CAAA;IAEjE,OAAO,iBAAiB,CACtB,OAAO,EACP,SAAS,EACT,KAAK,EACL,QAAQ,EACR,cAAc,EACd,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,EAAE,EACR,kBAAkB,CACnB,CAAA;AACH,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,QAAgB;IACxD,OAAO,6BAA6B,CAAC,QAAQ,CAAC,CAAA;AAChD,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,OAAuB,EACvB,SAAoB,EACpB,KAAqB,EACrB,QAAmB,EACnB,cAAsB,EACtB,SAAiB,EACjB,OAAe,EACf,KAAa;IAEb,MAAM,OAAO,GAAG,QAAQ,CAAC,cAAc,CAAC,CAAA;IACxC,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAA;IAEvB,MAAM,OAAO,GAAG,MAAM,6BAA6B,CACjD,OAAO,EACP,SAAS,CAAC,WAAW,EACrB,iBAAiB,CAAC,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,OAAO,CAAC,EAC5D,OAAO,EACP,cAAc,EACd,KAAK,EACL,QAAQ,CACT,CAAA;IAED,MAAM,MAAM,GAAqB,EAAE,CAAA;IACnC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,QAAQ,IAAI,CAAC;YAAE,SAAQ;QAElC,MAAM,WAAW,GAAG,gBAAgB,CAAC,SAAS,EAAE,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,CAAC,CAAA;QAClG,IAAI,CAAC,WAAW;YAAE,SAAQ;QAE1B,IAAI,WAAW,CAAC,QAAQ,CAAC,iBAAiB,IAAI,OAAO,CAAC,gBAAgB,GAAG,wBAAwB,EAAE,CAAC;YAClG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YACxB,SAAQ;QACV,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,YAAY,IAAI,KAAK,GAAG,gBAAgB,EAAE,CAAC;YACrD,MAAM,WAAW,GAAG,aAAa,CAAC,WAAW,EAAE,cAAc,CAAC,CAAA;YAC9D,IAAI,WAAW,CAAC,IAAI,KAAK,SAAS,IAAI,WAAW,CAAC,EAAE,KAAK,SAAS,EAAE,CAAC;gBACnE,MAAM,CAAC,IAAI,CACT,GAAG,CAAC,MAAM,iBAAiB,CACzB,OAAO,EACP,SAAS,EACT,KAAK,EACL,QAAQ,EACR,cAAc,EACd,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,EAAE,EACd,KAAK,GAAG,CAAC,CACV,CAAC,CACH,CAAA;gBACD,SAAQ;YACV,CAAC;QACH,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;IAC1B,CAAC;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED,SAAS,gBAAgB,CACvB,SAAoB,EACpB,WAA2B,EAC3B,cAAsB,EACtB,SAAiB,EACjB,OAAe,EACf,MAA0B;IAE1B,MAAM,UAAU,GAAG,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,CAAA;IAC3D,MAAM,WAAW,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;IAC7F,IAAI,WAAW,KAAK,SAAS;QAAE,OAAO,SAAS,CAAA;IAE/C,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;IACtD,IAAI,UAAU,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAA;IAE7C,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY;QACtC,CAAC,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE;QACzC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAA;IAErC,OAAO,kBAAkB,CAAC,SAAS,EAAE;QACnC,MAAM,EAAE,iBAAiB,CAAC,WAAW,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,CAAC;QAC5E,IAAI,EAAE,MAAM,CAAC,QAAQ;QACrB,YAAY;QACZ,UAAU,EAAE,MAAM;QAClB,MAAM,EAAE,4BAA4B;QACpC,OAAO,EAAE,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC;YAC3C;gBACE,UAAU,EAAE,qBAAqB;gBACjC,cAAc;gBACd,MAAM,EAAE,8CAA8C;aACvD;SACF,CAAC;KACH,CAAC,CAAA;AACJ,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { Partition, PartitionSlice, PlannerContext, SortKey } from '../types.js';
|
|
2
|
+
export declare function splitSliceWithTemporalBuckets(context: PlannerContext, partition: Partition, slice: PartitionSlice, sortKeys: SortKey[], dimensionIndex: number): Promise<PartitionSlice[]>;
|
|
3
|
+
//# sourceMappingURL=temporal-bucket-split.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"temporal-bucket-split.d.ts","sourceRoot":"","sources":["../../../src/chunking/strategies/temporal-bucket-split.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,SAAS,EACT,cAAc,EACd,cAAc,EACd,OAAO,EAER,MAAM,aAAa,CAAA;AAKpB,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,SAAS,EACpB,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,OAAO,EAAE,EACnB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC,cAAc,EAAE,CAAC,CA2B3B"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { buildSliceFromRows, getTargetChunkRows } from '../partition-slices.js';
|
|
2
|
+
import { probeTemporalDistribution } from '../services/distribution-source.js';
|
|
3
|
+
import { parsePlannerDateTime } from '../services/row-probe.js';
|
|
4
|
+
import { getChunkRange, replaceChunkRange } from '../utils/ranges.js';
|
|
5
|
+
const TARGET_BYTES_FUZZ_FACTOR = 1.15;
|
|
6
|
+
export async function splitSliceWithTemporalBuckets(context, partition, slice, sortKeys, dimensionIndex) {
|
|
7
|
+
const dayBuckets = await probeTemporalDistribution(context, partition.partitionId, slice.ranges, sortKeys, dimensionIndex, 'day');
|
|
8
|
+
if (dayBuckets.length === 0)
|
|
9
|
+
return [slice];
|
|
10
|
+
const daySlices = buildTemporalSlices(partition, slice, dimensionIndex, dayBuckets, context.targetChunkBytes);
|
|
11
|
+
if (daySlices.every((candidate) => candidate.estimate.bytesUncompressed <= context.targetChunkBytes * TARGET_BYTES_FUZZ_FACTOR)) {
|
|
12
|
+
return daySlices;
|
|
13
|
+
}
|
|
14
|
+
const hourBuckets = await probeTemporalDistribution(context, partition.partitionId, slice.ranges, sortKeys, dimensionIndex, 'hour');
|
|
15
|
+
if (hourBuckets.length === 0)
|
|
16
|
+
return daySlices;
|
|
17
|
+
return buildTemporalSlices(partition, slice, dimensionIndex, hourBuckets, context.targetChunkBytes);
|
|
18
|
+
}
|
|
19
|
+
function getPartitionEndExclusive(partition) {
|
|
20
|
+
return new Date(parsePlannerDateTime(partition.maxTime) + 1000).toISOString();
|
|
21
|
+
}
|
|
22
|
+
function buildTemporalSlices(partition, parentSlice, dimensionIndex, buckets, targetChunkBytes) {
|
|
23
|
+
const targetChunkRows = getTargetChunkRows(partition, targetChunkBytes);
|
|
24
|
+
const slices = [];
|
|
25
|
+
let currentStart;
|
|
26
|
+
let currentRows = 0;
|
|
27
|
+
const parentRange = getChunkRange(parentSlice, dimensionIndex);
|
|
28
|
+
const sliceStart = parentRange.from;
|
|
29
|
+
const sliceEnd = parentRange.to ?? getPartitionEndExclusive(partition);
|
|
30
|
+
for (let index = 0; index < buckets.length; index++) {
|
|
31
|
+
const bucket = buckets[index];
|
|
32
|
+
if (!bucket)
|
|
33
|
+
continue;
|
|
34
|
+
const bucketStart = sliceStart && bucket.start < sliceStart ? sliceStart : bucket.start;
|
|
35
|
+
if (currentStart === undefined) {
|
|
36
|
+
currentStart = bucketStart;
|
|
37
|
+
}
|
|
38
|
+
const wouldExceed = currentRows > 0 && currentRows + bucket.rowCount > targetChunkRows * TARGET_BYTES_FUZZ_FACTOR;
|
|
39
|
+
if (wouldExceed && currentStart !== undefined && currentStart < bucketStart) {
|
|
40
|
+
slices.push(buildSlice(parentSlice, partition, dimensionIndex, currentStart, bucketStart, currentRows));
|
|
41
|
+
currentStart = bucketStart;
|
|
42
|
+
currentRows = 0;
|
|
43
|
+
}
|
|
44
|
+
currentRows += bucket.rowCount;
|
|
45
|
+
if (index === buckets.length - 1 && currentStart !== undefined && currentStart < sliceEnd) {
|
|
46
|
+
slices.push(buildSlice(parentSlice, partition, dimensionIndex, currentStart, sliceEnd, currentRows));
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return slices.length > 0 ? slices : [parentSlice];
|
|
50
|
+
}
|
|
51
|
+
function buildSlice(parentSlice, partition, dimensionIndex, from, to, rows) {
|
|
52
|
+
return buildSliceFromRows(partition, {
|
|
53
|
+
ranges: replaceChunkRange(parentSlice, dimensionIndex, from, to),
|
|
54
|
+
rows,
|
|
55
|
+
focusedValue: parentSlice.analysis.focusedValue,
|
|
56
|
+
confidence: 'low',
|
|
57
|
+
reason: 'temporal-distribution',
|
|
58
|
+
lineage: parentSlice.analysis.lineage.concat([
|
|
59
|
+
{
|
|
60
|
+
strategyId: 'temporal-bucket-split',
|
|
61
|
+
dimensionIndex,
|
|
62
|
+
reason: 'split slice using temporal distribution buckets',
|
|
63
|
+
},
|
|
64
|
+
]),
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=temporal-bucket-split.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"temporal-bucket-split.js","sourceRoot":"","sources":["../../../src/chunking/strategies/temporal-bucket-split.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAC/E,OAAO,EAAE,yBAAyB,EAAE,MAAM,oCAAoC,CAAA;AAC9E,OAAO,EAAE,oBAAoB,EAAE,MAAM,0BAA0B,CAAA;AAQ/D,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAA;AAErE,MAAM,wBAAwB,GAAG,IAAI,CAAA;AAErC,MAAM,CAAC,KAAK,UAAU,6BAA6B,CACjD,OAAuB,EACvB,SAAoB,EACpB,KAAqB,EACrB,QAAmB,EACnB,cAAsB;IAEtB,MAAM,UAAU,GAAG,MAAM,yBAAyB,CAChD,OAAO,EACP,SAAS,CAAC,WAAW,EACrB,KAAK,CAAC,MAAM,EACZ,QAAQ,EACR,cAAc,EACd,KAAK,CACN,CAAA;IACD,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,KAAK,CAAC,CAAA;IAE3C,MAAM,SAAS,GAAG,mBAAmB,CAAC,SAAS,EAAE,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAA;IAC7G,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,IAAI,OAAO,CAAC,gBAAgB,GAAG,wBAAwB,CAAC,EAAE,CAAC;QAChI,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,yBAAyB,CACjD,OAAO,EACP,SAAS,CAAC,WAAW,EACrB,KAAK,CAAC,MAAM,EACZ,QAAQ,EACR,cAAc,EACd,MAAM,CACP,CAAA;IACD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAA;IAE9C,OAAO,mBAAmB,CAAC,SAAS,EAAE,KAAK,EAAE,cAAc,EAAE,WAAW,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAA;AACrG,CAAC;AAED,SAAS,wBAAwB,CAAC,SAAoB;IACpD,OAAO,IAAI,IAAI,CAAC,oBAAoB,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC,WAAW,EAAE,CAAA;AAC/E,CAAC;AAED,SAAS,mBAAmB,CAC1B,SAAoB,EACpB,WAA2B,EAC3B,cAAsB,EACtB,OAAyB,EACzB,gBAAwB;IAExB,MAAM,eAAe,GAAG,kBAAkB,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAA;IACvE,MAAM,MAAM,GAAqB,EAAE,CAAA;IACnC,IAAI,YAAgC,CAAA;IACpC,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,MAAM,WAAW,GAAG,aAAa,CAAC,WAAW,EAAE,cAAc,CAAC,CAAA;IAC9D,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,CAAA;IACnC,MAAM,QAAQ,GAAG,WAAW,CAAC,EAAE,IAAI,wBAAwB,CAAC,SAAS,CAAC,CAAA;IAEtE,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QACpD,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAA;QAC7B,IAAI,CAAC,MAAM;YAAE,SAAQ;QAErB,MAAM,WAAW,GAAG,UAAU,IAAI,MAAM,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAA;QACvF,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;YAC/B,YAAY,GAAG,WAAW,CAAA;QAC5B,CAAC;QAED,MAAM,WAAW,GAAG,WAAW,GAAG,CAAC,IAAI,WAAW,GAAG,MAAM,CAAC,QAAQ,GAAG,eAAe,GAAG,wBAAwB,CAAA;QACjH,IAAI,WAAW,IAAI,YAAY,KAAK,SAAS,IAAI,YAAY,GAAG,WAAW,EAAE,CAAC;YAC5E,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC,CAAA;YACvG,YAAY,GAAG,WAAW,CAAA;YAC1B,WAAW,GAAG,CAAC,CAAA;QACjB,CAAC;QAED,WAAW,IAAI,MAAM,CAAC,QAAQ,CAAA;QAE9B,IAAI,KAAK,KAAK,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,KAAK,SAAS,IAAI,YAAY,GAAG,QAAQ,EAAE,CAAC;YAC1F,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAC,CAAA;QACtG,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAA;AACnD,CAAC;AAED,SAAS,UAAU,CACjB,WAA2B,EAC3B,SAAoB,EACpB,cAAsB,EACtB,IAAY,EACZ,EAAU,EACV,IAAY;IAEZ,OAAO,kBAAkB,CAAC,SAAS,EAAE;QACnC,MAAM,EAAE,iBAAiB,CAAC,WAAW,EAAE,cAAc,EAAE,IAAI,EAAE,EAAE,CAAC;QAChE,IAAI;QACJ,YAAY,EAAE,WAAW,CAAC,QAAQ,CAAC,YAAY;QAC/C,UAAU,EAAE,KAAK;QACjB,MAAM,EAAE,uBAAuB;QAC/B,OAAO,EAAE,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC;YAC3C;gBACE,UAAU,EAAE,uBAAuB;gBACnC,cAAc;gBACd,MAAM,EAAE,iDAAiD;aAC1D;SACF,CAAC;KACH,CAAC,CAAA;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strategy-policy.d.ts","sourceRoot":"","sources":["../../src/chunking/strategy-policy.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAEzD,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,OAAO,EAAE,EACnB,MAAM,CAAC,EAAE,cAAc,GACtB,MAAM,EAAE,CAEV"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strategy-policy.js","sourceRoot":"","sources":["../../src/chunking/strategy-policy.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,sBAAsB,CACpC,QAAmB,EACnB,MAAuB;IAEvB,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,CAAA;AAC1C,CAAC"}
|