@milaboratories/pl-middle-layer 1.43.58 → 1.43.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/debug/index.cjs +0 -1
- package/dist/debug/index.cjs.map +1 -1
- package/dist/debug/index.d.ts +0 -1
- package/dist/debug/index.d.ts.map +1 -1
- package/dist/debug/index.js +0 -1
- package/dist/debug/index.js.map +1 -1
- package/dist/js_render/computable_context.cjs +6 -2
- package/dist/js_render/computable_context.cjs.map +1 -1
- package/dist/js_render/computable_context.d.ts +1 -1
- package/dist/js_render/computable_context.d.ts.map +1 -1
- package/dist/js_render/computable_context.js +6 -2
- package/dist/js_render/computable_context.js.map +1 -1
- package/dist/middle_layer/driver_kit.cjs +6 -1
- package/dist/middle_layer/driver_kit.cjs.map +1 -1
- package/dist/middle_layer/driver_kit.js +7 -2
- package/dist/middle_layer/driver_kit.js.map +1 -1
- package/dist/middle_layer/ops.cjs +4 -8
- package/dist/middle_layer/ops.cjs.map +1 -1
- package/dist/middle_layer/ops.d.ts +1 -1
- package/dist/middle_layer/ops.d.ts.map +1 -1
- package/dist/middle_layer/ops.js +4 -8
- package/dist/middle_layer/ops.js.map +1 -1
- package/dist/middle_layer/project.cjs +3 -0
- package/dist/middle_layer/project.cjs.map +1 -1
- package/dist/middle_layer/project.d.ts.map +1 -1
- package/dist/middle_layer/project.js +3 -0
- package/dist/middle_layer/project.js.map +1 -1
- package/dist/pool/data.cjs +0 -15
- package/dist/pool/data.cjs.map +1 -1
- package/dist/pool/data.d.ts +1 -2
- package/dist/pool/data.d.ts.map +1 -1
- package/dist/pool/data.js +1 -15
- package/dist/pool/data.js.map +1 -1
- package/dist/pool/driver.cjs +88 -906
- package/dist/pool/driver.cjs.map +1 -1
- package/dist/pool/driver.d.ts +14 -86
- package/dist/pool/driver.d.ts.map +1 -1
- package/dist/pool/driver.js +88 -907
- package/dist/pool/driver.js.map +1 -1
- package/package.json +17 -16
- package/src/debug/index.ts +0 -2
- package/src/js_render/computable_context.ts +8 -6
- package/src/middle_layer/driver_kit.ts +6 -6
- package/src/middle_layer/ops.ts +2 -9
- package/src/middle_layer/project.ts +3 -0
- package/src/pool/data.ts +0 -22
- package/src/pool/driver.ts +141 -1208
package/src/pool/driver.ts
CHANGED
|
@@ -1,140 +1,121 @@
|
|
|
1
|
-
import type { DownloadDriver } from '@milaboratories/pl-drivers';
|
|
2
|
-
import { PFrameInternal } from '@milaboratories/pl-model-middle-layer';
|
|
3
|
-
import type { PlTreeEntry, PlTreeNodeAccessor } from '@milaboratories/pl-tree';
|
|
4
|
-
import { isPlTreeNodeAccessor } from '@milaboratories/pl-tree';
|
|
5
|
-
import type {
|
|
6
|
-
Computable,
|
|
7
|
-
ComputableCtx,
|
|
8
|
-
ComputableStableDefined,
|
|
9
|
-
} from '@milaboratories/computable';
|
|
10
|
-
import type {
|
|
11
|
-
CalculateTableDataRequest,
|
|
12
|
-
CalculateTableDataResponse,
|
|
13
|
-
FindColumnsRequest,
|
|
14
|
-
FindColumnsResponse,
|
|
15
|
-
LocalBlobHandleAndSize,
|
|
16
|
-
PColumnIdAndSpec,
|
|
17
|
-
PColumnSpec,
|
|
18
|
-
PFrameHandle,
|
|
19
|
-
PObjectId,
|
|
20
|
-
PTableColumnSpec,
|
|
21
|
-
PTableHandle,
|
|
22
|
-
PTableShape,
|
|
23
|
-
PTableVector,
|
|
24
|
-
TableRange,
|
|
25
|
-
UniqueValuesRequest,
|
|
26
|
-
UniqueValuesResponse,
|
|
27
|
-
PFrameDriver as SdkPFrameDriver,
|
|
28
|
-
PColumn,
|
|
29
|
-
PFrameDef,
|
|
30
|
-
JoinEntry,
|
|
31
|
-
PTableDef,
|
|
32
|
-
ValueType,
|
|
33
|
-
PTableRecordSingleValueFilterV2,
|
|
34
|
-
PTableRecordFilter,
|
|
35
|
-
PColumnValues,
|
|
36
|
-
DataInfo,
|
|
37
|
-
PColumnValue,
|
|
38
|
-
RemoteBlobHandleAndSize,
|
|
39
|
-
RemoteBlobHandle,
|
|
40
|
-
ContentHandler,
|
|
41
|
-
} from '@platforma-sdk/model';
|
|
42
1
|
import {
|
|
43
|
-
mapPObjectData,
|
|
44
|
-
mapPTableDef,
|
|
45
|
-
extractAllColumns,
|
|
46
2
|
mapDataInfo,
|
|
47
3
|
isDataInfo,
|
|
48
4
|
ensureError,
|
|
49
5
|
PFrameDriverError,
|
|
50
6
|
isAbortError,
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
7
|
+
type LocalBlobHandleAndSize,
|
|
8
|
+
type RemoteBlobHandleAndSize,
|
|
9
|
+
type RemoteBlobHandle,
|
|
10
|
+
type ContentHandler,
|
|
11
|
+
type PColumnSpec,
|
|
12
|
+
type PColumnDataUniversal,
|
|
55
13
|
} from '@platforma-sdk/model';
|
|
56
|
-
import {
|
|
57
|
-
import {
|
|
58
|
-
makeDataInfoFromPColumnValues,
|
|
59
|
-
parseDataInfoResource,
|
|
60
|
-
traverseParquetChunkResource,
|
|
61
|
-
} from './data';
|
|
62
|
-
import { createHash } from 'node:crypto';
|
|
63
|
-
import { type MiLogger } from '@milaboratories/ts-helpers';
|
|
64
|
-
import { mapValues } from 'es-toolkit';
|
|
14
|
+
import { PFrameInternal } from '@milaboratories/pl-model-middle-layer';
|
|
65
15
|
import {
|
|
66
|
-
assertNever,
|
|
67
16
|
emptyDir,
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
type
|
|
17
|
+
RefCountPoolBase,
|
|
18
|
+
type PoolEntry,
|
|
19
|
+
type MiLogger,
|
|
71
20
|
} from '@milaboratories/ts-helpers';
|
|
72
|
-
import
|
|
73
|
-
import {
|
|
21
|
+
import type { DownloadDriver } from '@milaboratories/pl-drivers';
|
|
22
|
+
import {
|
|
23
|
+
isPlTreeNodeAccessor,
|
|
24
|
+
type PlTreeEntry,
|
|
25
|
+
type PlTreeNodeAccessor,
|
|
26
|
+
} from '@milaboratories/pl-tree';
|
|
27
|
+
import type {
|
|
28
|
+
Computable,
|
|
29
|
+
ComputableStableDefined,
|
|
30
|
+
} from '@milaboratories/computable';
|
|
31
|
+
import {
|
|
32
|
+
makeJsonDataInfo,
|
|
33
|
+
AbstractPFrameDriver,
|
|
34
|
+
AbstractPFrameDriverOpsDefaults,
|
|
35
|
+
type AbstractInternalPFrameDriver,
|
|
36
|
+
type AbstractPFrameDriverOps,
|
|
37
|
+
type LocalBlobProvider,
|
|
38
|
+
type RemoteBlobProvider,
|
|
39
|
+
} from '@milaboratories/pf-driver';
|
|
40
|
+
import { HttpHelpers } from '@milaboratories/pframes-rs-node';
|
|
74
41
|
import path from 'node:path';
|
|
75
|
-
import { getDebugFlags } from '../debug';
|
|
76
42
|
import { Readable } from 'node:stream';
|
|
43
|
+
import {
|
|
44
|
+
parseDataInfoResource,
|
|
45
|
+
traverseParquetChunkResource,
|
|
46
|
+
} from './data';
|
|
77
47
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
function makeBlobId(res: PlTreeEntry): string {
|
|
48
|
+
function makeBlobId(res: PlTreeEntry): PFrameInternal.PFrameBlobId {
|
|
81
49
|
return String(res.rid);
|
|
82
50
|
}
|
|
83
51
|
|
|
84
|
-
type
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
52
|
+
type LocalBlob = ComputableStableDefined<LocalBlobHandleAndSize>;
|
|
53
|
+
class LocalBlobProviderImpl
|
|
54
|
+
extends RefCountPoolBase<PlTreeEntry, PFrameInternal.PFrameBlobId, LocalBlob>
|
|
55
|
+
implements LocalBlobProvider<PlTreeEntry> {
|
|
56
|
+
constructor(
|
|
57
|
+
private readonly blobDriver: DownloadDriver,
|
|
58
|
+
private readonly logger: PFrameInternal.Logger,
|
|
59
|
+
) {
|
|
90
60
|
super();
|
|
91
61
|
}
|
|
92
62
|
|
|
93
|
-
protected calculateParamsKey(params: PlTreeEntry):
|
|
63
|
+
protected calculateParamsKey(params: PlTreeEntry): PFrameInternal.PFrameBlobId {
|
|
94
64
|
return makeBlobId(params);
|
|
95
65
|
}
|
|
96
66
|
|
|
97
|
-
protected createNewResource(params: PlTreeEntry, _key:
|
|
67
|
+
protected createNewResource(params: PlTreeEntry, _key: PFrameInternal.PFrameBlobId): LocalBlob {
|
|
98
68
|
return this.blobDriver.getDownloadedBlob(params);
|
|
99
69
|
}
|
|
100
70
|
|
|
101
|
-
public getByKey(blobId:
|
|
71
|
+
public getByKey(blobId: PFrameInternal.PFrameBlobId): LocalBlob {
|
|
102
72
|
const resource = super.tryGetByKey(blobId);
|
|
103
|
-
if (!resource) throw new PFrameDriverError(`
|
|
73
|
+
if (!resource) throw new PFrameDriverError(`Local blob with id ${blobId} not found.`);
|
|
104
74
|
return resource;
|
|
105
75
|
}
|
|
106
76
|
|
|
107
|
-
public
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
77
|
+
public makeDataSource(signal: AbortSignal): Omit<PFrameInternal.PFrameDataSourceV2, 'parquetServer'> {
|
|
78
|
+
return {
|
|
79
|
+
preloadBlob: async (blobIds: PFrameInternal.PFrameBlobId[]) => {
|
|
80
|
+
try {
|
|
81
|
+
await Promise.all(blobIds.map((blobId) => this.getByKey(blobId).awaitStableFullValue(signal)));
|
|
82
|
+
} catch (err: unknown) {
|
|
83
|
+
if (!isAbortError(err)) throw err;
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
resolveBlobContent: async (blobId: PFrameInternal.PFrameBlobId) => {
|
|
87
|
+
const computable = this.getByKey(blobId);
|
|
88
|
+
const blob = await computable.awaitStableValue(signal);
|
|
89
|
+
return await this.blobDriver.getContent(blob.handle, { signal });
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
}
|
|
120
93
|
}
|
|
121
94
|
|
|
122
|
-
type
|
|
123
|
-
|
|
95
|
+
type RemoteBlob = Computable<RemoteBlobHandleAndSize>;
|
|
124
96
|
class RemoteBlobPool
|
|
125
|
-
extends
|
|
126
|
-
constructor(
|
|
97
|
+
extends RefCountPoolBase<PlTreeEntry, PFrameInternal.PFrameBlobId, RemoteBlob> {
|
|
98
|
+
constructor(
|
|
99
|
+
private readonly blobDriver: DownloadDriver,
|
|
100
|
+
private readonly logger: PFrameInternal.Logger,
|
|
101
|
+
) {
|
|
127
102
|
super();
|
|
128
103
|
}
|
|
129
104
|
|
|
130
|
-
protected calculateParamsKey(params: PlTreeEntry):
|
|
131
|
-
return
|
|
105
|
+
protected calculateParamsKey(params: PlTreeEntry): PFrameInternal.PFrameBlobId {
|
|
106
|
+
return makeBlobId(params);
|
|
132
107
|
}
|
|
133
108
|
|
|
134
|
-
protected createNewResource(params: PlTreeEntry, _key:
|
|
109
|
+
protected createNewResource(params: PlTreeEntry, _key: PFrameInternal.PFrameBlobId): RemoteBlob {
|
|
135
110
|
return this.blobDriver.getOnDemandBlob(params);
|
|
136
111
|
}
|
|
137
112
|
|
|
113
|
+
public getByKey(blobId: PFrameInternal.PFrameBlobId): RemoteBlob {
|
|
114
|
+
const resource = super.tryGetByKey(blobId);
|
|
115
|
+
if (!resource) throw new PFrameDriverError(`Remote blob with id ${blobId} not found.`);
|
|
116
|
+
return resource;
|
|
117
|
+
}
|
|
118
|
+
|
|
138
119
|
public async withContent<T>(
|
|
139
120
|
handle: RemoteBlobHandle,
|
|
140
121
|
options: {
|
|
@@ -155,15 +136,15 @@ class RemoteBlobPool
|
|
|
155
136
|
}
|
|
156
137
|
|
|
157
138
|
interface BlobStoreOptions extends PFrameInternal.ObjectStoreOptions {
|
|
158
|
-
|
|
139
|
+
remoteBlobProvider: RemoteBlobPool;
|
|
159
140
|
};
|
|
160
141
|
|
|
161
142
|
class BlobStore extends PFrameInternal.BaseObjectStore {
|
|
162
|
-
private readonly
|
|
143
|
+
private readonly remoteBlobProvider: RemoteBlobPool;
|
|
163
144
|
|
|
164
145
|
constructor(options: BlobStoreOptions) {
|
|
165
146
|
super(options);
|
|
166
|
-
this.
|
|
147
|
+
this.remoteBlobProvider = options.remoteBlobProvider;
|
|
167
148
|
}
|
|
168
149
|
|
|
169
150
|
public override async request(
|
|
@@ -187,7 +168,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
|
|
|
187
168
|
};
|
|
188
169
|
|
|
189
170
|
try {
|
|
190
|
-
const computable = this.
|
|
171
|
+
const computable = this.remoteBlobProvider.tryGetByKey(blobId);
|
|
191
172
|
if (!computable) return await respond({ type: 'NotFound' });
|
|
192
173
|
|
|
193
174
|
let blob: RemoteBlobHandleAndSize;
|
|
@@ -221,7 +202,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
|
|
|
221
202
|
`PFrames blob store requesting content for ${blobId}, `
|
|
222
203
|
+ `range [${translatedRange.start}..=${translatedRange.end}]`,
|
|
223
204
|
);
|
|
224
|
-
return await this.
|
|
205
|
+
return await this.remoteBlobProvider.withContent(blob.handle, {
|
|
225
206
|
range: translatedRange,
|
|
226
207
|
signal: params.signal,
|
|
227
208
|
handler: async (data) => {
|
|
@@ -245,1134 +226,86 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
|
|
|
245
226
|
}
|
|
246
227
|
}
|
|
247
228
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
const valueTypes: ValueType[] = ['Int', 'Long', 'Float', 'Double', 'String', 'Bytes'] as const;
|
|
251
|
-
|
|
252
|
-
function migrateFilters(
|
|
253
|
-
filters: PTableRecordFilter[],
|
|
254
|
-
logger: PFrameInternal.Logger,
|
|
255
|
-
): PTableRecordFilter[] {
|
|
256
|
-
const filtersV1 = [];
|
|
257
|
-
const filtersV2: PTableRecordSingleValueFilterV2[] = [];
|
|
258
|
-
for (const filter of filters) {
|
|
259
|
-
if ((filter.type as unknown) === 'bySingleColumn') {
|
|
260
|
-
filtersV1.push(filter);
|
|
261
|
-
filtersV2.push({
|
|
262
|
-
...filter,
|
|
263
|
-
type: 'bySingleColumnV2',
|
|
264
|
-
});
|
|
265
|
-
} else {
|
|
266
|
-
filtersV2.push(filter);
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
if (filtersV1.length > 0) {
|
|
270
|
-
const filtersV1Json = JSON.stringify(filtersV1);
|
|
271
|
-
logger('warn',
|
|
272
|
-
`type overriten from 'bySingleColumn' to 'bySingleColumnV2' for filters: ${filtersV1Json}`,
|
|
273
|
-
);
|
|
274
|
-
}
|
|
275
|
-
return filtersV2;
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
function migratePTableFilters<T>(
|
|
279
|
-
def: Omit<PTableDef<T>, 'partitionFilters'> | PTableDef<T>,
|
|
280
|
-
logger: PFrameInternal.Logger,
|
|
281
|
-
): PTableDef<T> {
|
|
282
|
-
if (!('partitionFilters' in def)) {
|
|
283
|
-
// For old blocks assume all axes filters to be partition filters
|
|
284
|
-
return {
|
|
285
|
-
...def,
|
|
286
|
-
partitionFilters: migrateFilters(def.filters.filter((f) => f.column.type === 'axis'), logger),
|
|
287
|
-
filters: migrateFilters(def.filters.filter((f) => f.column.type === 'column'), logger),
|
|
288
|
-
};
|
|
289
|
-
}
|
|
290
|
-
return {
|
|
291
|
-
...def,
|
|
292
|
-
partitionFilters: migrateFilters(def.partitionFilters, logger),
|
|
293
|
-
filters: migrateFilters(def.filters, logger),
|
|
294
|
-
};
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
function hasArtificialColumns<T>(entry: JoinEntry<T>): boolean {
|
|
298
|
-
switch (entry.type) {
|
|
299
|
-
case 'column':
|
|
300
|
-
case 'slicedColumn':
|
|
301
|
-
case 'inlineColumn':
|
|
302
|
-
return false;
|
|
303
|
-
case 'artificialColumn':
|
|
304
|
-
return true;
|
|
305
|
-
case 'full':
|
|
306
|
-
case 'inner':
|
|
307
|
-
return entry.entries.some(hasArtificialColumns);
|
|
308
|
-
case 'outer':
|
|
309
|
-
return hasArtificialColumns(entry.primary) || entry.secondary.some(hasArtificialColumns);
|
|
310
|
-
default:
|
|
311
|
-
assertNever(entry);
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
const bigintReplacer = (_: string, v: unknown) => (typeof v === 'bigint' ? v.toString() : v);
|
|
316
|
-
|
|
317
|
-
class PFramePool extends RefCountResourcePool<InternalPFrameData, PFrameHolder> {
|
|
229
|
+
class RemoteBlobProviderImpl implements RemoteBlobProvider<PlTreeEntry> {
|
|
318
230
|
constructor(
|
|
319
|
-
private readonly
|
|
320
|
-
private readonly
|
|
321
|
-
|
|
322
|
-
private readonly logger: PFrameInternal.Logger,
|
|
323
|
-
private readonly spillPath: string,
|
|
324
|
-
) {
|
|
325
|
-
super();
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
protected calculateParamsKey(params: InternalPFrameData): string {
|
|
329
|
-
try {
|
|
330
|
-
return stableKeyFromPFrameData(params);
|
|
331
|
-
} catch (err: unknown) {
|
|
332
|
-
if (isPFrameDriverError(err)) throw err;
|
|
333
|
-
throw new PFrameDriverError(
|
|
334
|
-
`PFrame handle calculation failed, `
|
|
335
|
-
+ `request: ${JSON.stringify(params, bigintReplacer)}, `
|
|
336
|
-
+ `error: ${ensureError(err)}`,
|
|
337
|
-
);
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
protected createNewResource(params: InternalPFrameData, key: string): PFrameHolder {
|
|
342
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
343
|
-
this.logger('info',
|
|
344
|
-
`PFrame creation (pFrameHandle = ${key}): `
|
|
345
|
-
+ `${JSON.stringify(params, bigintReplacer)}`,
|
|
346
|
-
);
|
|
347
|
-
}
|
|
348
|
-
return new PFrameHolder(
|
|
349
|
-
this.parquetServer,
|
|
350
|
-
this.localBlobPool,
|
|
351
|
-
this.remoteBlobPool,
|
|
352
|
-
this.logger,
|
|
353
|
-
this.spillPath,
|
|
354
|
-
params,
|
|
355
|
-
);
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
public getByKey(key: PFrameHandle): PFrameHolder {
|
|
359
|
-
const resource = super.tryGetByKey(key);
|
|
360
|
-
if (!resource) throw new PFrameDriverError(`PFrame not found, handle = ${key}`);
|
|
361
|
-
return resource;
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
class PTableDefPool extends RefCountResourcePool<FullPTableDef, PTableDefHolder> {
|
|
366
|
-
constructor(private readonly logger: PFrameInternal.Logger) {
|
|
367
|
-
super();
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
protected calculateParamsKey(params: FullPTableDef): string {
|
|
371
|
-
return stableKeyFromFullPTableDef(params);
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
protected createNewResource(params: FullPTableDef, key: string): PTableDefHolder {
|
|
375
|
-
return new PTableDefHolder(params, key as PTableHandle, this.logger);
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
public getByKey(key: PTableHandle): PTableDefHolder {
|
|
379
|
-
const resource = super.tryGetByKey(key);
|
|
380
|
-
if (!resource) throw new PFrameDriverError(`PTable definition not found, handle = ${key}`);
|
|
381
|
-
return resource;
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
class PTablePool extends RefCountResourcePool<FullPTableDef, PTableHolder> {
|
|
386
|
-
constructor(
|
|
387
|
-
private readonly pFrames: PFramePool,
|
|
388
|
-
private readonly pTableDefs: PTableDefPool,
|
|
389
|
-
private readonly logger: PFrameInternal.Logger,
|
|
390
|
-
) {
|
|
391
|
-
super();
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
protected calculateParamsKey(params: FullPTableDef): string {
|
|
395
|
-
return stableKeyFromFullPTableDef(params);
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
protected createNewResource(params: FullPTableDef, key: string): PTableHolder {
|
|
399
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
400
|
-
this.logger('info',
|
|
401
|
-
`PTable creation (pTableHandle = ${key}): `
|
|
402
|
-
+ `${JSON.stringify(params, bigintReplacer)}`,
|
|
403
|
-
);
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
const handle = params.pFrameHandle;
|
|
407
|
-
const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
|
|
408
|
-
|
|
409
|
-
const defDisposeSignal = this.pTableDefs.tryGetByKey(key)?.disposeSignal;
|
|
410
|
-
const combinedSignal = AbortSignal.any([disposeSignal, defDisposeSignal].filter((s) => !!s));
|
|
411
|
-
|
|
412
|
-
// 3. Sort
|
|
413
|
-
if (params.def.sorting.length > 0) {
|
|
414
|
-
const predecessor = this.acquire({
|
|
415
|
-
...params,
|
|
416
|
-
def: {
|
|
417
|
-
...params.def,
|
|
418
|
-
sorting: [],
|
|
419
|
-
},
|
|
420
|
-
});
|
|
421
|
-
const { resource: { pTablePromise } } = predecessor;
|
|
422
|
-
const sortedTable = pTablePromise.then((pTable) => pTable.sort(params.def.sorting));
|
|
423
|
-
return new PTableHolder(handle, combinedSignal, sortedTable, predecessor);
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
// 2. Filter (except the case with artificial columns where cartesian creates too many rows)
|
|
427
|
-
if (!hasArtificialColumns(params.def.src) && params.def.filters.length > 0) {
|
|
428
|
-
const predecessor = this.acquire({
|
|
429
|
-
...params,
|
|
430
|
-
def: {
|
|
431
|
-
...params.def,
|
|
432
|
-
filters: [],
|
|
433
|
-
},
|
|
434
|
-
});
|
|
435
|
-
const { resource: { pTablePromise } } = predecessor;
|
|
436
|
-
const filteredTable = pTablePromise.then((pTable) => pTable.filter(params.def.filters));
|
|
437
|
-
return new PTableHolder(handle, combinedSignal, filteredTable, predecessor);
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
// 1. Join
|
|
441
|
-
const table = pFramePromise.then((pFrame) => pFrame.createTable({
|
|
442
|
-
src: joinEntryToInternal(params.def.src),
|
|
443
|
-
// `params.def.filters` would be non-empty only when join has artificial columns
|
|
444
|
-
filters: [...params.def.partitionFilters, ...params.def.filters],
|
|
445
|
-
}));
|
|
446
|
-
return new PTableHolder(handle, combinedSignal, table);
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
public getByKey(key: PTableHandle): PTableHolder {
|
|
450
|
-
const resource = super.tryGetByKey(key);
|
|
451
|
-
if (!resource) throw new PFrameDriverError(`PTable not found, handle = ${key}`);
|
|
452
|
-
return resource;
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
class PTableCacheUi {
|
|
457
|
-
private readonly perFrame = new Map<PFrameHandle, LRUCache<PTableHandle, PoolResource<PTableHolder>>>();
|
|
458
|
-
private readonly global: LRUCache<PTableHandle, PoolResource<PTableHolder>>;
|
|
459
|
-
private readonly disposeListeners = new Set<PTableHandle>();
|
|
460
|
-
|
|
461
|
-
constructor(
|
|
462
|
-
private readonly logger: PFrameInternal.Logger,
|
|
463
|
-
private readonly ops: Pick<PFrameDriverOps, 'pFramesCacheMaxSize' | 'pFrameCacheMaxCount'>,
|
|
464
|
-
) {
|
|
465
|
-
this.global = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
|
|
466
|
-
maxSize: this.ops.pFramesCacheMaxSize,
|
|
467
|
-
dispose: (resource, key, reason) => {
|
|
468
|
-
if (reason === 'evict') {
|
|
469
|
-
this.perFrame.get(resource.resource.pFrame)?.delete(key);
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
if (this.perFrame.get(resource.resource.pFrame)?.size === 0) {
|
|
473
|
-
this.perFrame.delete(resource.resource.pFrame);
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
resource.unref();
|
|
477
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
478
|
-
logger('info', `calculateTableData cache - removed PTable ${key} (reason: ${reason})`);
|
|
479
|
-
}
|
|
480
|
-
},
|
|
481
|
-
});
|
|
482
|
-
}
|
|
483
|
-
|
|
484
|
-
public cache(resource: PoolResource<PTableHolder>, size: number): void {
|
|
485
|
-
const key = resource.key as PTableHandle;
|
|
486
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
487
|
-
this.logger('info', `calculateTableData cache - added PTable ${key} with size ${size}`);
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
this.global.set(key, resource, { size: Math.max(size, 1) }); // 1 is minimum size to avoid cache evictions
|
|
491
|
-
|
|
492
|
-
let perFrame = this.perFrame.get(resource.resource.pFrame);
|
|
493
|
-
if (!perFrame) {
|
|
494
|
-
perFrame = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
|
|
495
|
-
max: this.ops.pFrameCacheMaxCount,
|
|
496
|
-
dispose: (_resource, key, reason) => {
|
|
497
|
-
if (reason === 'evict') {
|
|
498
|
-
this.global.delete(key);
|
|
499
|
-
}
|
|
500
|
-
},
|
|
501
|
-
});
|
|
502
|
-
this.perFrame.set(resource.resource.pFrame, perFrame);
|
|
503
|
-
}
|
|
504
|
-
perFrame.set(key, resource);
|
|
505
|
-
|
|
506
|
-
if (!this.disposeListeners.has(key)) {
|
|
507
|
-
const disposeListener = () => {
|
|
508
|
-
this.perFrame.get(resource.resource.pFrame)?.delete(key);
|
|
509
|
-
this.global.delete(key);
|
|
510
|
-
|
|
511
|
-
this.disposeListeners.delete(key);
|
|
512
|
-
resource.resource.disposeSignal.removeEventListener('abort', disposeListener);
|
|
513
|
-
};
|
|
514
|
-
this.disposeListeners.add(key);
|
|
515
|
-
resource.resource.disposeSignal.addEventListener('abort', disposeListener);
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
class PTableCacheModel {
|
|
521
|
-
private readonly global: LRUCache<PTableHandle, PoolResource<PTableHolder>>;
|
|
522
|
-
private readonly disposeListeners = new Set<PTableHandle>();
|
|
523
|
-
|
|
524
|
-
constructor(
|
|
525
|
-
private readonly logger: PFrameInternal.Logger,
|
|
526
|
-
ops: Pick<PFrameDriverOps, 'pTablesCacheMaxSize'>,
|
|
527
|
-
) {
|
|
528
|
-
this.global = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
|
|
529
|
-
maxSize: ops.pTablesCacheMaxSize,
|
|
530
|
-
dispose: (resource, key, reason) => {
|
|
531
|
-
resource.unref();
|
|
532
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
533
|
-
logger('info', `createPTable cache - removed PTable ${key} (reason: ${reason})`);
|
|
534
|
-
}
|
|
535
|
-
},
|
|
536
|
-
});
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
public cache(resource: PoolResource<PTableHolder>, size: number, defDisposeSignal: AbortSignal): void {
|
|
540
|
-
const key = resource.key as PTableHandle;
|
|
541
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
542
|
-
this.logger('info', `createPTable cache - added PTable ${key} with size ${size}`);
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
const status: LRUCache.Status<PoolResource<PTableHolder>> = {};
|
|
546
|
-
this.global.set(key, resource, { size: Math.max(size, 1), status }); // 1 is minimum size to avoid cache evictions
|
|
547
|
-
|
|
548
|
-
if (status.maxEntrySizeExceeded) {
|
|
549
|
-
resource.unref();
|
|
550
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
551
|
-
this.logger('info', `createPTable cache - removed PTable ${key} (maxEntrySizeExceeded)`);
|
|
552
|
-
}
|
|
553
|
-
} else {
|
|
554
|
-
if (!this.disposeListeners.has(key)) {
|
|
555
|
-
const disposeListener = () => {
|
|
556
|
-
this.global.delete(key);
|
|
557
|
-
|
|
558
|
-
this.disposeListeners.delete(key);
|
|
559
|
-
defDisposeSignal.removeEventListener('abort', disposeListener);
|
|
560
|
-
};
|
|
561
|
-
this.disposeListeners.add(key);
|
|
562
|
-
defDisposeSignal.addEventListener('abort', disposeListener);
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
}
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
class PFrameHolder implements PFrameInternal.PFrameDataSourceV2, AsyncDisposable {
|
|
569
|
-
public readonly pFramePromise: Promise<PFrameInternal.PFrameV12>;
|
|
570
|
-
private readonly abortController = new AbortController();
|
|
571
|
-
private readonly localBlobs: LocalBlobPoolEntry[] = [];
|
|
572
|
-
private readonly remoteBlobs: RemoteBlobPoolEntry[] = [];
|
|
231
|
+
private readonly pool: RemoteBlobPool,
|
|
232
|
+
private readonly server: PFrameInternal.HttpServer,
|
|
233
|
+
) {}
|
|
573
234
|
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
private readonly localBlobPool: LocalBlobPool,
|
|
577
|
-
private readonly remoteBlobPool: RemoteBlobPool,
|
|
235
|
+
public static async init(
|
|
236
|
+
blobDriver: DownloadDriver,
|
|
578
237
|
logger: PFrameInternal.Logger,
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
const
|
|
583
|
-
const localBlob = this.localBlobPool.acquire(blob);
|
|
584
|
-
this.localBlobs.push(localBlob);
|
|
585
|
-
return localBlob.key;
|
|
586
|
-
};
|
|
238
|
+
serverOptions: Omit<PFrameInternal.HttpServerOptions, 'handler'>,
|
|
239
|
+
): Promise<RemoteBlobProviderImpl> {
|
|
240
|
+
const pool = new RemoteBlobPool(blobDriver, logger);
|
|
241
|
+
const store = new BlobStore({ remoteBlobProvider: pool, logger });
|
|
587
242
|
|
|
588
|
-
const
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
return remoteBlob.key + PFrameInternal.ParquetExtension;
|
|
592
|
-
};
|
|
243
|
+
const handler = HttpHelpers.createRequestHandler({ store });
|
|
244
|
+
const server = await HttpHelpers.createHttpServer({ ...serverOptions, handler });
|
|
245
|
+
logger('info', `PFrames HTTP server started on ${server.info.url}`);
|
|
593
246
|
|
|
594
|
-
|
|
595
|
-
switch (data.type) {
|
|
596
|
-
case 'Json':
|
|
597
|
-
return { ...data };
|
|
598
|
-
case 'JsonPartitioned':
|
|
599
|
-
return {
|
|
600
|
-
...data,
|
|
601
|
-
parts: mapValues(data.parts, makeLocalBlobId),
|
|
602
|
-
};
|
|
603
|
-
case 'BinaryPartitioned':
|
|
604
|
-
return {
|
|
605
|
-
...data,
|
|
606
|
-
parts: mapValues(data.parts, (v) => ({
|
|
607
|
-
index: makeLocalBlobId(v.index),
|
|
608
|
-
values: makeLocalBlobId(v.values),
|
|
609
|
-
})),
|
|
610
|
-
};
|
|
611
|
-
case 'ParquetPartitioned':
|
|
612
|
-
return {
|
|
613
|
-
...data,
|
|
614
|
-
parts: mapValues(data.parts, (v) => ({
|
|
615
|
-
...v,
|
|
616
|
-
data: makeRemoteBlobId(v.data),
|
|
617
|
-
})),
|
|
618
|
-
};
|
|
619
|
-
default:
|
|
620
|
-
assertNever(data);
|
|
621
|
-
}
|
|
622
|
-
};
|
|
623
|
-
|
|
624
|
-
const jsonifiedColumns = columns.map((column) => ({
|
|
625
|
-
...column,
|
|
626
|
-
data: mapColumnData(column.data),
|
|
627
|
-
}));
|
|
628
|
-
|
|
629
|
-
try {
|
|
630
|
-
const pFrame = PFrameFactory.createPFrame({ spillPath: this.spillPath, logger });
|
|
631
|
-
pFrame.setDataSource(this);
|
|
632
|
-
|
|
633
|
-
const promises: Promise<void>[] = [];
|
|
634
|
-
for (const column of jsonifiedColumns) {
|
|
635
|
-
pFrame.addColumnSpec(column.id, column.spec);
|
|
636
|
-
promises.push(pFrame.setColumnData(column.id, column.data, { signal: this.disposeSignal }));
|
|
637
|
-
}
|
|
638
|
-
|
|
639
|
-
this.pFramePromise = Promise.all(promises)
|
|
640
|
-
.then(() => pFrame)
|
|
641
|
-
.catch((err) => {
|
|
642
|
-
this.dispose();
|
|
643
|
-
pFrame.dispose();
|
|
644
|
-
throw new PFrameDriverError(
|
|
645
|
-
`PFrame creation failed asynchronously, `
|
|
646
|
-
+ `columns: ${JSON.stringify(jsonifiedColumns)}, `
|
|
647
|
-
+ `error: ${ensureError(err)}`,
|
|
648
|
-
);
|
|
649
|
-
});
|
|
650
|
-
} catch (err: unknown) {
|
|
651
|
-
throw new PFrameDriverError(
|
|
652
|
-
`PFrame creation failed synchronously, `
|
|
653
|
-
+ `columns: ${JSON.stringify(jsonifiedColumns)}, `
|
|
654
|
-
+ `error: ${ensureError(err)}`,
|
|
655
|
-
);
|
|
656
|
-
}
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
public readonly preloadBlob = async (blobIds: string[]): Promise<void> => {
|
|
660
|
-
return await this.localBlobPool.preloadBlob(blobIds, this.disposeSignal);
|
|
661
|
-
};
|
|
662
|
-
|
|
663
|
-
public readonly resolveBlobContent = async (blobId: string): Promise<Uint8Array> => {
|
|
664
|
-
return await this.localBlobPool.resolveBlobContent(blobId, this.disposeSignal);
|
|
665
|
-
};
|
|
666
|
-
|
|
667
|
-
public get disposeSignal(): AbortSignal {
|
|
668
|
-
return this.abortController.signal;
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
private dispose(): void {
|
|
672
|
-
this.abortController.abort();
|
|
673
|
-
this.localBlobs.forEach((entry) => entry.unref());
|
|
674
|
-
this.remoteBlobs.forEach((entry) => entry.unref());
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
async [Symbol.asyncDispose](): Promise<void> {
|
|
678
|
-
this.dispose();
|
|
679
|
-
await this.pFramePromise
|
|
680
|
-
.then((pFrame) => pFrame.dispose())
|
|
681
|
-
.catch(() => { /* mute error */ });
|
|
682
|
-
}
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
class PTableDefHolder implements Disposable {
|
|
686
|
-
private readonly abortController = new AbortController();
|
|
687
|
-
|
|
688
|
-
constructor(
|
|
689
|
-
public readonly def: FullPTableDef,
|
|
690
|
-
private readonly pTableHandle: PTableHandle,
|
|
691
|
-
private readonly logger: PFrameInternal.Logger,
|
|
692
|
-
) {
|
|
693
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
694
|
-
this.logger('info', `PTable definition saved (pTableHandle = ${this.pTableHandle})`);
|
|
695
|
-
}
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
public get disposeSignal(): AbortSignal {
|
|
699
|
-
return this.abortController.signal;
|
|
700
|
-
}
|
|
701
|
-
|
|
702
|
-
[Symbol.dispose](): void {
|
|
703
|
-
this.abortController.abort();
|
|
704
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
705
|
-
this.logger('info', `PTable definition disposed (pTableHandle = ${this.pTableHandle})`);
|
|
706
|
-
}
|
|
247
|
+
return new RemoteBlobProviderImpl(pool, server);
|
|
707
248
|
}
|
|
708
|
-
}
|
|
709
|
-
|
|
710
|
-
class PTableHolder implements AsyncDisposable {
|
|
711
|
-
private readonly abortController = new AbortController();
|
|
712
|
-
private readonly combinedDisposeSignal: AbortSignal;
|
|
713
249
|
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
pFrameDisposeSignal: AbortSignal,
|
|
717
|
-
public readonly pTablePromise: Promise<PFrameInternal.PTableV7>,
|
|
718
|
-
private readonly predecessor?: PoolResource<PTableHolder>,
|
|
719
|
-
) {
|
|
720
|
-
this.combinedDisposeSignal = AbortSignal.any([pFrameDisposeSignal, this.abortController.signal]);
|
|
250
|
+
public acquire(params: PlTreeEntry): PoolEntry {
|
|
251
|
+
return this.pool.acquire(params);
|
|
721
252
|
}
|
|
722
253
|
|
|
723
|
-
public
|
|
724
|
-
return this.
|
|
254
|
+
public httpServerInfo(): PFrameInternal.HttpServerInfo {
|
|
255
|
+
return this.server.info;
|
|
725
256
|
}
|
|
726
257
|
|
|
727
258
|
async [Symbol.asyncDispose](): Promise<void> {
|
|
728
|
-
this.
|
|
729
|
-
await this.pTablePromise
|
|
730
|
-
.then((pTable) => pTable.dispose())
|
|
731
|
-
.catch(() => { /* mute error */ });
|
|
732
|
-
this.predecessor?.unref();
|
|
259
|
+
await this.server.stop();
|
|
733
260
|
}
|
|
734
261
|
}
|
|
735
262
|
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
def: PTableDef<PObjectId>;
|
|
739
|
-
};
|
|
263
|
+
export interface InternalPFrameDriver
|
|
264
|
+
extends AbstractInternalPFrameDriver<PColumnDataUniversal<PlTreeNodeAccessor>> {};
|
|
740
265
|
|
|
741
|
-
export type PFrameDriverOps = {
|
|
266
|
+
export type PFrameDriverOps = AbstractPFrameDriverOps & {
|
|
742
267
|
/** Port to run parquet HTTP server on. */
|
|
743
268
|
parquetServerPort: number;
|
|
744
|
-
/** Concurrency limits for `getUniqueValues` and `calculateTableData` requests */
|
|
745
|
-
pFrameConcurrency: number;
|
|
746
|
-
/** Concurrency limits for `getShape` and `getData` requests */
|
|
747
|
-
pTableConcurrency: number;
|
|
748
|
-
/** Maximum number of `calculateTableData` results cached for each PFrame */
|
|
749
|
-
pFrameCacheMaxCount: number;
|
|
750
|
-
/**
|
|
751
|
-
* Maximum size of `calculateTableData` results cached for PFrames overall.
|
|
752
|
-
* The limit is soft, as the same table could be materialized with other requests and will not be deleted in such case.
|
|
753
|
-
* Also each table has predeccessors, overlapping predecessors will be counted twice, so the effective limit is smaller.
|
|
754
|
-
*/
|
|
755
|
-
pFramesCacheMaxSize: number;
|
|
756
|
-
/**
|
|
757
|
-
* Maximum size of `createPTable` results cached on disk.
|
|
758
|
-
* The limit is soft, as the same table could be materialized with other requests and will not be deleted in such case.
|
|
759
|
-
* Also each table has predeccessors, overlapping predecessors will be counted twice, so the effective limit is smaller.
|
|
760
|
-
*/
|
|
761
|
-
pTablesCacheMaxSize: number;
|
|
762
269
|
};
|
|
763
270
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
export interface InternalPFrameDriver extends SdkPFrameDriver, AsyncDisposable {
|
|
769
|
-
/** Dispose the driver and all its resources. */
|
|
770
|
-
dispose(): Promise<void>;
|
|
771
|
-
|
|
772
|
-
/**
|
|
773
|
-
* Dump active PFrames allocations in pprof format.
|
|
774
|
-
* The result of this function should be saved as `profile.pb.gz`.
|
|
775
|
-
* Use {@link https://pprof.me/} or {@link https://www.speedscope.app/}
|
|
776
|
-
* to view the allocation flamechart.
|
|
777
|
-
* @warning This method will always reject on Windows!
|
|
778
|
-
*/
|
|
779
|
-
pprofDump(): Promise<Uint8Array>;
|
|
780
|
-
|
|
781
|
-
/** Create a new PFrame */
|
|
782
|
-
createPFrame(
|
|
783
|
-
def: PFrameDef<PColumnDataUniversal>,
|
|
784
|
-
ctx: ComputableCtx,
|
|
785
|
-
): PFrameHandle;
|
|
786
|
-
|
|
787
|
-
/** Create a new PTable */
|
|
788
|
-
createPTable(
|
|
789
|
-
def: PTableDef<PColumn<PColumnDataUniversal>>,
|
|
790
|
-
ctx: ComputableCtx,
|
|
791
|
-
): PTableHandle;
|
|
792
|
-
|
|
793
|
-
/** Calculates data for the table and returns complete data representation of it */
|
|
794
|
-
calculateTableData(
|
|
795
|
-
handle: PFrameHandle,
|
|
796
|
-
request: CalculateTableDataRequest<PObjectId>,
|
|
797
|
-
range: TableRange | undefined,
|
|
798
|
-
signal?: AbortSignal
|
|
799
|
-
): Promise<CalculateTableDataResponse>;
|
|
800
|
-
|
|
801
|
-
/** Calculate set of unique values for a specific axis for the filtered set of records */
|
|
802
|
-
getUniqueValues(
|
|
803
|
-
handle: PFrameHandle,
|
|
804
|
-
request: UniqueValuesRequest,
|
|
805
|
-
signal?: AbortSignal
|
|
806
|
-
): Promise<UniqueValuesResponse>;
|
|
807
|
-
|
|
808
|
-
/** Unified table shape */
|
|
809
|
-
getShape(
|
|
810
|
-
handle: PTableHandle,
|
|
811
|
-
signal?: AbortSignal,
|
|
812
|
-
): Promise<PTableShape>;
|
|
813
|
-
|
|
814
|
-
/**
|
|
815
|
-
* Retrieve the data from the table. To retrieve only data required, it can be
|
|
816
|
-
* sliced both horizontally ({@link columnIndices}) and vertically
|
|
817
|
-
* ({@link range}).
|
|
818
|
-
*
|
|
819
|
-
* @param columnIndices unified indices of columns to be retrieved
|
|
820
|
-
* @param range optionally limit the range of records to retrieve
|
|
821
|
-
* */
|
|
822
|
-
getData(
|
|
823
|
-
handle: PTableHandle,
|
|
824
|
-
columnIndices: number[],
|
|
825
|
-
range: TableRange | undefined,
|
|
826
|
-
signal?: AbortSignal,
|
|
827
|
-
): Promise<PTableVector[]>;
|
|
828
|
-
}
|
|
829
|
-
|
|
830
|
-
export class PFrameDriver implements InternalPFrameDriver {
|
|
831
|
-
private readonly pFrames: PFramePool;
|
|
832
|
-
private readonly pTableDefs: PTableDefPool;
|
|
833
|
-
private readonly pTables: PTablePool;
|
|
834
|
-
|
|
835
|
-
private readonly pTableCacheUi: PTableCacheUi;
|
|
836
|
-
private readonly pTableCacheModel: PTableCacheModel;
|
|
837
|
-
|
|
838
|
-
private readonly frameConcurrencyLimiter: ConcurrencyLimitingExecutor;
|
|
839
|
-
private readonly tableConcurrencyLimiter: ConcurrencyLimitingExecutor;
|
|
840
|
-
|
|
841
|
-
public async pprofDump(): Promise<Uint8Array> {
|
|
842
|
-
return await PFrameFactory.pprofDump();
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
public static async init(
|
|
846
|
-
blobDriver: DownloadDriver,
|
|
847
|
-
miLogger: MiLogger,
|
|
848
|
-
spillPath: string,
|
|
849
|
-
ops: PFrameDriverOps,
|
|
850
|
-
): Promise<PFrameDriver> {
|
|
851
|
-
const resolvedSpillPath = path.resolve(spillPath);
|
|
852
|
-
await emptyDir(resolvedSpillPath);
|
|
853
|
-
|
|
854
|
-
const logger: PFrameInternal.Logger = (level, message) => miLogger[level](message);
|
|
855
|
-
const localBlobPool = new LocalBlobPool(blobDriver);
|
|
856
|
-
const remoteBlobPool = new RemoteBlobPool(blobDriver);
|
|
857
|
-
|
|
858
|
-
const store = new BlobStore({ remoteBlobPool, logger });
|
|
859
|
-
const handler = HttpHelpers.createRequestHandler({ store: store });
|
|
860
|
-
const server = await HttpHelpers.createHttpServer({ handler, port: ops.parquetServerPort });
|
|
861
|
-
|
|
862
|
-
return new PFrameDriver(logger, server, localBlobPool, remoteBlobPool, resolvedSpillPath, ops);
|
|
863
|
-
}
|
|
864
|
-
|
|
865
|
-
private constructor(
|
|
866
|
-
private readonly logger: PFrameInternal.Logger,
|
|
867
|
-
private readonly server: PFrameInternal.HttpServer,
|
|
868
|
-
localBlobPool: LocalBlobPool,
|
|
869
|
-
remoteBlobPool: RemoteBlobPool,
|
|
870
|
-
spillPath: string,
|
|
871
|
-
ops: PFrameDriverOps,
|
|
872
|
-
) {
|
|
873
|
-
const concurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pFrameConcurrency);
|
|
874
|
-
this.frameConcurrencyLimiter = concurrencyLimiter;
|
|
875
|
-
this.tableConcurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pTableConcurrency);
|
|
876
|
-
|
|
877
|
-
this.pFrames = new PFramePool(server.info, localBlobPool, remoteBlobPool, logger, spillPath);
|
|
878
|
-
this.pTableDefs = new PTableDefPool(logger);
|
|
879
|
-
this.pTables = new PTablePool(this.pFrames, this.pTableDefs, logger);
|
|
880
|
-
|
|
881
|
-
this.pTableCacheUi = new PTableCacheUi(logger, ops);
|
|
882
|
-
this.pTableCacheModel = new PTableCacheModel(logger, ops);
|
|
883
|
-
}
|
|
884
|
-
|
|
885
|
-
async dispose(): Promise<void> {
|
|
886
|
-
return await this.server.stop();
|
|
887
|
-
}
|
|
888
|
-
|
|
889
|
-
async [Symbol.asyncDispose](): Promise<void> {
|
|
890
|
-
return await this.dispose();
|
|
891
|
-
}
|
|
892
|
-
|
|
893
|
-
//
|
|
894
|
-
// Internal / Config API Methods
|
|
895
|
-
//
|
|
896
|
-
|
|
897
|
-
public createPFrame(
|
|
898
|
-
def: PFrameDef<PColumnDataUniversal>,
|
|
899
|
-
ctx: ComputableCtx,
|
|
900
|
-
): PFrameHandle {
|
|
901
|
-
const columns: InternalPFrameData = def
|
|
902
|
-
.filter((c) => valueTypes.find((t) => t === c.spec.valueType))
|
|
903
|
-
.map((c) =>
|
|
904
|
-
mapPObjectData(c, (d) =>
|
|
905
|
-
isPlTreeNodeAccessor(d)
|
|
906
|
-
? parseDataInfoResource(d)
|
|
907
|
-
: isDataInfo(d)
|
|
908
|
-
? d.type === 'ParquetPartitioned'
|
|
909
|
-
? mapDataInfo(d, (a) => traverseParquetChunkResource(a))
|
|
910
|
-
: mapDataInfo(d, (a) => a.persist())
|
|
911
|
-
: makeDataInfoFromPColumnValues(c.spec, d),
|
|
912
|
-
),
|
|
913
|
-
);
|
|
914
|
-
const distinctColumns = uniqueBy(columns, (column) => column.id);
|
|
915
|
-
|
|
916
|
-
const res = this.pFrames.acquire(distinctColumns);
|
|
917
|
-
ctx.addOnDestroy(res.unref);
|
|
918
|
-
return res.key as PFrameHandle;
|
|
919
|
-
}
|
|
920
|
-
|
|
921
|
-
public createPTable(
|
|
922
|
-
rawDef: PTableDef<PColumn<PColumnDataUniversal>>,
|
|
923
|
-
ctx: ComputableCtx,
|
|
924
|
-
): PTableHandle {
|
|
925
|
-
const def = migratePTableFilters(rawDef, this.logger);
|
|
926
|
-
const pFrameHandle = this.createPFrame(extractAllColumns(def.src), ctx);
|
|
927
|
-
const defIds = mapPTableDef(def, (c) => c.id);
|
|
928
|
-
const sortedDef = sortPTableDef(defIds);
|
|
929
|
-
|
|
930
|
-
const { key, unref } = this.pTableDefs.acquire({ def: sortedDef, pFrameHandle });
|
|
931
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
932
|
-
this.logger('info', `Create PTable call (pFrameHandle = ${pFrameHandle}; pTableHandle = ${key})`);
|
|
933
|
-
}
|
|
934
|
-
ctx.addOnDestroy(unref); // in addition to pframe unref added in createPFrame above
|
|
935
|
-
return key as PTableHandle;
|
|
936
|
-
}
|
|
937
|
-
|
|
938
|
-
//
|
|
939
|
-
// PFrame istance methods
|
|
940
|
-
//
|
|
941
|
-
|
|
942
|
-
public async findColumns(
|
|
943
|
-
handle: PFrameHandle,
|
|
944
|
-
request: FindColumnsRequest,
|
|
945
|
-
): Promise<FindColumnsResponse> {
|
|
946
|
-
const iRequest: PFrameInternal.FindColumnsRequest = {
|
|
947
|
-
...request,
|
|
948
|
-
compatibleWith:
|
|
949
|
-
request.compatibleWith.length !== 0
|
|
950
|
-
? [{
|
|
951
|
-
axesSpec: [
|
|
952
|
-
...new Map(request.compatibleWith.map(
|
|
953
|
-
(item) => [canonicalize(item)!, item] as const,
|
|
954
|
-
)).values(),
|
|
955
|
-
],
|
|
956
|
-
qualifications: [],
|
|
957
|
-
}]
|
|
958
|
-
: [],
|
|
959
|
-
};
|
|
960
|
-
|
|
961
|
-
const { pFramePromise } = this.pFrames.getByKey(handle);
|
|
962
|
-
const pFrame = await pFramePromise;
|
|
963
|
-
|
|
964
|
-
const responce = await pFrame.findColumns(iRequest);
|
|
965
|
-
return {
|
|
966
|
-
hits: responce.hits
|
|
967
|
-
.filter((h) => // only exactly matching columns
|
|
968
|
-
h.mappingVariants.length === 0
|
|
969
|
-
|| h.mappingVariants.some((v) =>
|
|
970
|
-
v.qualifications.forHit.length === 0
|
|
971
|
-
&& v.qualifications.forQueries.every((q) => q.length === 0)))
|
|
972
|
-
.map((h) => h.hit),
|
|
973
|
-
};
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
public async getColumnSpec(handle: PFrameHandle, columnId: PObjectId): Promise<PColumnSpec> {
|
|
977
|
-
const { pFramePromise } = this.pFrames.getByKey(handle);
|
|
978
|
-
const pFrame = await pFramePromise;
|
|
979
|
-
return await pFrame.getColumnSpec(columnId);
|
|
980
|
-
}
|
|
981
|
-
|
|
982
|
-
public async listColumns(handle: PFrameHandle): Promise<PColumnIdAndSpec[]> {
|
|
983
|
-
const { pFramePromise } = this.pFrames.getByKey(handle);
|
|
984
|
-
const pFrame = await pFramePromise;
|
|
985
|
-
return await pFrame.listColumns();
|
|
986
|
-
}
|
|
987
|
-
|
|
988
|
-
public async calculateTableData(
|
|
989
|
-
handle: PFrameHandle,
|
|
990
|
-
request: CalculateTableDataRequest<PObjectId>,
|
|
991
|
-
range: TableRange | undefined,
|
|
992
|
-
signal?: AbortSignal,
|
|
993
|
-
): Promise<CalculateTableDataResponse> {
|
|
994
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
995
|
-
this.logger('info',
|
|
996
|
-
`Call calculateTableData, handle = ${handle}, request = ${JSON.stringify(request, bigintReplacer)}`,
|
|
997
|
-
);
|
|
998
|
-
}
|
|
999
|
-
|
|
1000
|
-
const table = this.pTables.acquire({
|
|
1001
|
-
pFrameHandle: handle,
|
|
1002
|
-
def: sortPTableDef(migratePTableFilters(request, this.logger)),
|
|
1003
|
-
});
|
|
1004
|
-
const { pTablePromise, disposeSignal } = table.resource;
|
|
1005
|
-
const pTable = await pTablePromise;
|
|
1006
|
-
|
|
1007
|
-
const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
|
|
1008
|
-
return await this.frameConcurrencyLimiter.run(async () => {
|
|
1009
|
-
try {
|
|
1010
|
-
const spec = pTable.getSpec();
|
|
1011
|
-
const data = await pTable.getData([...spec.keys()], {
|
|
1012
|
-
range,
|
|
1013
|
-
signal: combinedSignal,
|
|
1014
|
-
});
|
|
1015
|
-
|
|
1016
|
-
const resultSize = await pTable.getFootprint({
|
|
1017
|
-
withPredecessors: false,
|
|
1018
|
-
signal: combinedSignal,
|
|
1019
|
-
});
|
|
1020
|
-
if (resultSize >= 2 * 1024 * 1024 * 1024) {
|
|
1021
|
-
throw new PFrameDriverError(`Join results exceed 2GB, please add filters to shrink the result size`);
|
|
1022
|
-
}
|
|
1023
|
-
|
|
1024
|
-
const overallSize = await pTable.getFootprint({
|
|
1025
|
-
withPredecessors: true,
|
|
1026
|
-
signal: combinedSignal,
|
|
1027
|
-
});
|
|
1028
|
-
this.pTableCacheUi.cache(table, overallSize);
|
|
1029
|
-
|
|
1030
|
-
return spec.map((spec, i) => ({
|
|
1031
|
-
spec: spec,
|
|
1032
|
-
data: data[i],
|
|
1033
|
-
}));
|
|
1034
|
-
} catch (err: unknown) {
|
|
1035
|
-
table.unref();
|
|
1036
|
-
throw err;
|
|
1037
|
-
}
|
|
1038
|
-
});
|
|
1039
|
-
}
|
|
1040
|
-
|
|
1041
|
-
public async getUniqueValues(
|
|
1042
|
-
handle: PFrameHandle,
|
|
1043
|
-
request: UniqueValuesRequest,
|
|
1044
|
-
signal?: AbortSignal,
|
|
1045
|
-
): Promise<UniqueValuesResponse> {
|
|
1046
|
-
if (getDebugFlags().logPFrameRequests) {
|
|
1047
|
-
this.logger('info',
|
|
1048
|
-
`Call getUniqueValues, handle = ${handle}, request = ${JSON.stringify(request, bigintReplacer)}`,
|
|
1049
|
-
);
|
|
1050
|
-
}
|
|
1051
|
-
|
|
1052
|
-
const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
|
|
1053
|
-
const pFrame = await pFramePromise;
|
|
1054
|
-
|
|
1055
|
-
const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
|
|
1056
|
-
return await this.frameConcurrencyLimiter.run(async () => {
|
|
1057
|
-
return await pFrame.getUniqueValues({
|
|
1058
|
-
...request,
|
|
1059
|
-
filters: migrateFilters(request.filters, this.logger),
|
|
1060
|
-
}, {
|
|
1061
|
-
signal: combinedSignal,
|
|
1062
|
-
});
|
|
1063
|
-
});
|
|
1064
|
-
}
|
|
1065
|
-
|
|
1066
|
-
//
|
|
1067
|
-
// PTable istance methods
|
|
1068
|
-
//
|
|
1069
|
-
|
|
1070
|
-
public async getSpec(handle: PTableHandle): Promise<PTableColumnSpec[]> {
|
|
1071
|
-
const { def } = this.pTableDefs.getByKey(handle);
|
|
1072
|
-
using table = this.pTables.acquire(def);
|
|
1073
|
-
|
|
1074
|
-
const { pTablePromise } = table.resource;
|
|
1075
|
-
const pTable = await pTablePromise;
|
|
1076
|
-
|
|
1077
|
-
return pTable.getSpec();
|
|
1078
|
-
}
|
|
1079
|
-
|
|
1080
|
-
public async getShape(handle: PTableHandle, signal?: AbortSignal): Promise<PTableShape> {
|
|
1081
|
-
const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
|
|
1082
|
-
const table = this.pTables.acquire(def);
|
|
1083
|
-
|
|
1084
|
-
const { pTablePromise, disposeSignal } = table.resource;
|
|
1085
|
-
const pTable = await pTablePromise;
|
|
1086
|
-
|
|
1087
|
-
const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
|
|
1088
|
-
const { shape, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
|
|
1089
|
-
const shape = await pTable.getShape({
|
|
1090
|
-
signal: combinedSignal,
|
|
1091
|
-
});
|
|
1092
|
-
|
|
1093
|
-
const overallSize = await pTable.getFootprint({
|
|
1094
|
-
withPredecessors: true,
|
|
1095
|
-
signal: combinedSignal,
|
|
1096
|
-
});
|
|
1097
|
-
|
|
1098
|
-
return { shape, overallSize };
|
|
1099
|
-
});
|
|
1100
|
-
|
|
1101
|
-
this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
|
|
1102
|
-
return shape;
|
|
1103
|
-
}
|
|
1104
|
-
|
|
1105
|
-
public async getData(
|
|
1106
|
-
handle: PTableHandle,
|
|
1107
|
-
columnIndices: number[],
|
|
1108
|
-
range: TableRange | undefined,
|
|
1109
|
-
signal?: AbortSignal,
|
|
1110
|
-
): Promise<PTableVector[]> {
|
|
1111
|
-
const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
|
|
1112
|
-
const table = this.pTables.acquire(def);
|
|
1113
|
-
|
|
1114
|
-
const { pTablePromise, disposeSignal } = table.resource;
|
|
1115
|
-
const pTable = await pTablePromise;
|
|
1116
|
-
|
|
1117
|
-
const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
|
|
1118
|
-
const { data, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
|
|
1119
|
-
const data = await pTable.getData(columnIndices, {
|
|
1120
|
-
range,
|
|
1121
|
-
signal: combinedSignal,
|
|
1122
|
-
});
|
|
1123
|
-
|
|
1124
|
-
const overallSize = await pTable.getFootprint({
|
|
1125
|
-
withPredecessors: true,
|
|
1126
|
-
signal: combinedSignal,
|
|
1127
|
-
});
|
|
1128
|
-
|
|
1129
|
-
return { data, overallSize };
|
|
1130
|
-
});
|
|
1131
|
-
|
|
1132
|
-
this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
|
|
1133
|
-
return data;
|
|
1134
|
-
}
|
|
1135
|
-
}
|
|
271
|
+
export const PFrameDriverOpsDefaults: PFrameDriverOps = {
|
|
272
|
+
...AbstractPFrameDriverOpsDefaults,
|
|
273
|
+
parquetServerPort: 0, // 0 means that some unused port will be assigned by the OS
|
|
274
|
+
};
|
|
1136
275
|
|
|
1137
|
-
function
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
return {
|
|
1154
|
-
type: 'artificialColumn',
|
|
1155
|
-
columnId: entry.column,
|
|
1156
|
-
newId: entry.newId,
|
|
1157
|
-
axesIndices: entry.axesIndices,
|
|
1158
|
-
};
|
|
1159
|
-
case 'inlineColumn':
|
|
1160
|
-
return {
|
|
1161
|
-
type: 'inlineColumn',
|
|
1162
|
-
newId: entry.column.id,
|
|
1163
|
-
spec: entry.column.spec,
|
|
1164
|
-
dataInfo: {
|
|
1165
|
-
type: 'Json',
|
|
1166
|
-
keyLength: entry.column.spec.axesSpec.length,
|
|
1167
|
-
data: entry.column.data.reduce((acc, row) => {
|
|
1168
|
-
acc[JSON.stringify(row.key)] = row.val;
|
|
1169
|
-
return acc;
|
|
1170
|
-
}, {} as Record<string, PColumnValue>),
|
|
1171
|
-
},
|
|
1172
|
-
};
|
|
1173
|
-
case 'inner':
|
|
1174
|
-
case 'full':
|
|
1175
|
-
return {
|
|
1176
|
-
type: entry.type,
|
|
1177
|
-
entries: entry.entries.map((col) => joinEntryToInternal(col)),
|
|
1178
|
-
};
|
|
1179
|
-
case 'outer':
|
|
1180
|
-
return {
|
|
1181
|
-
type: 'outer',
|
|
1182
|
-
primary: joinEntryToInternal(entry.primary),
|
|
1183
|
-
secondary: entry.secondary.map((col) => joinEntryToInternal(col)),
|
|
1184
|
-
};
|
|
1185
|
-
default:
|
|
1186
|
-
throw new PFrameDriverError(`unsupported PFrame join entry type: ${type satisfies never}`);
|
|
1187
|
-
}
|
|
1188
|
-
}
|
|
276
|
+
export async function createPFrameDriver(params: {
|
|
277
|
+
blobDriver: DownloadDriver;
|
|
278
|
+
logger: MiLogger;
|
|
279
|
+
spillPath: string;
|
|
280
|
+
options: PFrameDriverOps;
|
|
281
|
+
}): Promise<InternalPFrameDriver> {
|
|
282
|
+
const resolvedSpillPath = path.resolve(params.spillPath);
|
|
283
|
+
await emptyDir(resolvedSpillPath);
|
|
284
|
+
|
|
285
|
+
const logger: PFrameInternal.Logger = (level, message) => params.logger[level](message);
|
|
286
|
+
const localBlobProvider = new LocalBlobProviderImpl(params.blobDriver, logger);
|
|
287
|
+
const remoteBlobProvider = await RemoteBlobProviderImpl.init(
|
|
288
|
+
params.blobDriver,
|
|
289
|
+
logger,
|
|
290
|
+
{ port: params.options.parquetServerPort },
|
|
291
|
+
);
|
|
1189
292
|
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
return lhs.column < (rhs as typeof lhs).column ? -1 : 1;
|
|
1199
|
-
case 'slicedColumn':
|
|
1200
|
-
case 'artificialColumn':
|
|
1201
|
-
return lhs.newId < (rhs as typeof lhs).newId ? -1 : 1;
|
|
1202
|
-
case 'inlineColumn': {
|
|
1203
|
-
return lhs.column.id < (rhs as typeof lhs).column.id ? -1 : 1;
|
|
1204
|
-
}
|
|
1205
|
-
case 'inner':
|
|
1206
|
-
case 'full': {
|
|
1207
|
-
const rhsInner = rhs as typeof lhs;
|
|
1208
|
-
if (lhs.entries.length !== rhsInner.entries.length) {
|
|
1209
|
-
return lhs.entries.length - rhsInner.entries.length;
|
|
1210
|
-
}
|
|
1211
|
-
for (let i = 0; i < lhs.entries.length; i++) {
|
|
1212
|
-
const cmp = cmpJoinEntries(lhs.entries[i], rhsInner.entries[i]);
|
|
1213
|
-
if (cmp !== 0) {
|
|
1214
|
-
return cmp;
|
|
1215
|
-
}
|
|
1216
|
-
}
|
|
1217
|
-
return 0;
|
|
1218
|
-
}
|
|
1219
|
-
case 'outer': {
|
|
1220
|
-
const rhsOuter = rhs as typeof lhs;
|
|
1221
|
-
const cmp = cmpJoinEntries(lhs.primary, rhsOuter.primary);
|
|
1222
|
-
if (cmp !== 0) {
|
|
1223
|
-
return cmp;
|
|
1224
|
-
}
|
|
1225
|
-
if (lhs.secondary.length !== rhsOuter.secondary.length) {
|
|
1226
|
-
return lhs.secondary.length - rhsOuter.secondary.length;
|
|
1227
|
-
}
|
|
1228
|
-
for (let i = 0; i < lhs.secondary.length; i++) {
|
|
1229
|
-
const cmp = cmpJoinEntries(lhs.secondary[i], rhsOuter.secondary[i]);
|
|
1230
|
-
if (cmp !== 0) {
|
|
1231
|
-
return cmp;
|
|
1232
|
-
}
|
|
1233
|
-
}
|
|
1234
|
-
return 0;
|
|
1235
|
-
}
|
|
1236
|
-
default:
|
|
1237
|
-
assertNever(type);
|
|
1238
|
-
}
|
|
1239
|
-
}
|
|
1240
|
-
function sortJoinEntry(entry: JoinEntry<PObjectId>): JoinEntry<PObjectId> {
|
|
1241
|
-
switch (entry.type) {
|
|
1242
|
-
case 'column':
|
|
1243
|
-
case 'slicedColumn':
|
|
1244
|
-
case 'inlineColumn':
|
|
1245
|
-
return entry;
|
|
1246
|
-
case 'artificialColumn': {
|
|
1247
|
-
const sortedAxesIndices = entry.axesIndices.toSorted((lhs, rhs) => lhs - rhs);
|
|
1248
|
-
return {
|
|
1249
|
-
...entry,
|
|
1250
|
-
axesIndices: sortedAxesIndices,
|
|
1251
|
-
};
|
|
1252
|
-
}
|
|
1253
|
-
case 'inner':
|
|
1254
|
-
case 'full': {
|
|
1255
|
-
const sortedEntries = entry.entries.map(sortJoinEntry);
|
|
1256
|
-
sortedEntries.sort(cmpJoinEntries);
|
|
1257
|
-
return {
|
|
1258
|
-
...entry,
|
|
1259
|
-
entries: sortedEntries,
|
|
1260
|
-
};
|
|
1261
|
-
}
|
|
1262
|
-
case 'outer': {
|
|
1263
|
-
const sortedSecondary = entry.secondary.map(sortJoinEntry);
|
|
1264
|
-
sortedSecondary.sort(cmpJoinEntries);
|
|
1265
|
-
return {
|
|
1266
|
-
...entry,
|
|
1267
|
-
primary: sortJoinEntry(entry.primary),
|
|
1268
|
-
secondary: sortedSecondary,
|
|
1269
|
-
};
|
|
1270
|
-
}
|
|
1271
|
-
default:
|
|
1272
|
-
assertNever(entry);
|
|
1273
|
-
}
|
|
1274
|
-
}
|
|
1275
|
-
function sortFilters(filters: PTableRecordFilter[]): PTableRecordFilter[] {
|
|
1276
|
-
return filters.toSorted((lhs, rhs) => {
|
|
1277
|
-
if (lhs.column.type === 'axis' && rhs.column.type === 'axis') {
|
|
1278
|
-
const lhsId = canonicalizeJson(getAxisId(lhs.column.id));
|
|
1279
|
-
const rhsId = canonicalizeJson(getAxisId(rhs.column.id));
|
|
1280
|
-
return lhsId < rhsId ? -1 : 1;
|
|
1281
|
-
} else if (lhs.column.type === 'column' && rhs.column.type === 'column') {
|
|
1282
|
-
return lhs.column.id < rhs.column.id ? -1 : 1;
|
|
1283
|
-
} else {
|
|
1284
|
-
return lhs.column.type === 'axis' ? -1 : 1;
|
|
1285
|
-
}
|
|
1286
|
-
});
|
|
1287
|
-
}
|
|
1288
|
-
return {
|
|
1289
|
-
src: sortJoinEntry(def.src),
|
|
1290
|
-
partitionFilters: sortFilters(def.partitionFilters),
|
|
1291
|
-
filters: sortFilters(def.filters),
|
|
1292
|
-
sorting: def.sorting,
|
|
293
|
+
const resolveDataInfo = (spec: PColumnSpec, data: PColumnDataUniversal<PlTreeNodeAccessor>) => {
|
|
294
|
+
return isPlTreeNodeAccessor(data)
|
|
295
|
+
? parseDataInfoResource(data)
|
|
296
|
+
: isDataInfo(data)
|
|
297
|
+
? data.type === 'ParquetPartitioned'
|
|
298
|
+
? mapDataInfo(data, (a) => traverseParquetChunkResource(a))
|
|
299
|
+
: mapDataInfo(data, (a) => a.persist())
|
|
300
|
+
: makeJsonDataInfo(spec, data);
|
|
1293
301
|
};
|
|
1294
|
-
}
|
|
1295
|
-
|
|
1296
|
-
function stableKeyFromFullPTableDef(data: FullPTableDef): string {
|
|
1297
|
-
try {
|
|
1298
|
-
const hash = createHash('sha256');
|
|
1299
|
-
hash.update(canonicalize(data)!);
|
|
1300
|
-
return hash.digest().toString('hex');
|
|
1301
|
-
} catch (err: unknown) {
|
|
1302
|
-
throw new PFrameDriverError(
|
|
1303
|
-
`PTable handle calculation failed, `
|
|
1304
|
-
+ `request: ${JSON.stringify(data)}, `
|
|
1305
|
-
+ `error: ${ensureError(err)}`,
|
|
1306
|
-
);
|
|
1307
|
-
}
|
|
1308
|
-
}
|
|
1309
|
-
|
|
1310
|
-
function stableKeyFromPFrameData(data: PColumn<PFrameInternal.DataInfo<PlTreeEntry>>[]): string {
|
|
1311
|
-
const orderedData = [...data].map((column) =>
|
|
1312
|
-
mapPObjectData(column, (r) => {
|
|
1313
|
-
let result: {
|
|
1314
|
-
type: string;
|
|
1315
|
-
keyLength: number;
|
|
1316
|
-
payload: {
|
|
1317
|
-
key: string;
|
|
1318
|
-
value: null | number | string | [string, string];
|
|
1319
|
-
}[];
|
|
1320
|
-
};
|
|
1321
|
-
const type = r.type;
|
|
1322
|
-
switch (type) {
|
|
1323
|
-
case 'Json':
|
|
1324
|
-
result = {
|
|
1325
|
-
type: r.type,
|
|
1326
|
-
keyLength: r.keyLength,
|
|
1327
|
-
payload: Object.entries(r.data).map(([part, value]) => ({
|
|
1328
|
-
key: part,
|
|
1329
|
-
value,
|
|
1330
|
-
})),
|
|
1331
|
-
};
|
|
1332
|
-
break;
|
|
1333
|
-
case 'JsonPartitioned':
|
|
1334
|
-
result = {
|
|
1335
|
-
type: r.type,
|
|
1336
|
-
keyLength: r.partitionKeyLength,
|
|
1337
|
-
payload: Object.entries(r.parts).map(([part, info]) => ({
|
|
1338
|
-
key: part,
|
|
1339
|
-
value: makeBlobId(info),
|
|
1340
|
-
})),
|
|
1341
|
-
};
|
|
1342
|
-
break;
|
|
1343
|
-
case 'BinaryPartitioned':
|
|
1344
|
-
result = {
|
|
1345
|
-
type: r.type,
|
|
1346
|
-
keyLength: r.partitionKeyLength,
|
|
1347
|
-
payload: Object.entries(r.parts).map(([part, info]) => ({
|
|
1348
|
-
key: part,
|
|
1349
|
-
value: [makeBlobId(info.index), makeBlobId(info.values)] as const,
|
|
1350
|
-
})),
|
|
1351
|
-
};
|
|
1352
|
-
break;
|
|
1353
|
-
case 'ParquetPartitioned':
|
|
1354
|
-
result = {
|
|
1355
|
-
type: r.type,
|
|
1356
|
-
keyLength: r.partitionKeyLength,
|
|
1357
|
-
payload: Object.entries(r.parts).map(([part, info]) => ({
|
|
1358
|
-
key: part,
|
|
1359
|
-
value: info.dataDigest || [
|
|
1360
|
-
makeBlobId(info.data),
|
|
1361
|
-
JSON.stringify({ axes: info.axes, column: info.column }),
|
|
1362
|
-
] as const,
|
|
1363
|
-
})),
|
|
1364
|
-
};
|
|
1365
|
-
break;
|
|
1366
|
-
default:
|
|
1367
|
-
throw new PFrameDriverError(`unsupported resource type: ${JSON.stringify(type satisfies never)}`);
|
|
1368
|
-
}
|
|
1369
|
-
result.payload.sort((lhs, rhs) => lhs.key < rhs.key ? -1 : 1);
|
|
1370
|
-
return result;
|
|
1371
|
-
}),
|
|
1372
|
-
);
|
|
1373
|
-
orderedData.sort((lhs, rhs) => lhs.id < rhs.id ? -1 : 1);
|
|
1374
302
|
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
303
|
+
return new AbstractPFrameDriver({
|
|
304
|
+
logger,
|
|
305
|
+
localBlobProvider,
|
|
306
|
+
remoteBlobProvider,
|
|
307
|
+
spillPath: resolvedSpillPath,
|
|
308
|
+
options: params.options,
|
|
309
|
+
resolveDataInfo,
|
|
310
|
+
});
|
|
1378
311
|
}
|