@milaboratories/pl-middle-layer 1.43.58 → 1.43.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/debug/index.cjs +0 -1
  2. package/dist/debug/index.cjs.map +1 -1
  3. package/dist/debug/index.d.ts +0 -1
  4. package/dist/debug/index.d.ts.map +1 -1
  5. package/dist/debug/index.js +0 -1
  6. package/dist/debug/index.js.map +1 -1
  7. package/dist/js_render/computable_context.cjs +6 -2
  8. package/dist/js_render/computable_context.cjs.map +1 -1
  9. package/dist/js_render/computable_context.d.ts +1 -1
  10. package/dist/js_render/computable_context.d.ts.map +1 -1
  11. package/dist/js_render/computable_context.js +6 -2
  12. package/dist/js_render/computable_context.js.map +1 -1
  13. package/dist/middle_layer/driver_kit.cjs +6 -1
  14. package/dist/middle_layer/driver_kit.cjs.map +1 -1
  15. package/dist/middle_layer/driver_kit.js +7 -2
  16. package/dist/middle_layer/driver_kit.js.map +1 -1
  17. package/dist/middle_layer/ops.cjs +4 -8
  18. package/dist/middle_layer/ops.cjs.map +1 -1
  19. package/dist/middle_layer/ops.d.ts +1 -1
  20. package/dist/middle_layer/ops.d.ts.map +1 -1
  21. package/dist/middle_layer/ops.js +4 -8
  22. package/dist/middle_layer/ops.js.map +1 -1
  23. package/dist/middle_layer/project.cjs +3 -0
  24. package/dist/middle_layer/project.cjs.map +1 -1
  25. package/dist/middle_layer/project.d.ts.map +1 -1
  26. package/dist/middle_layer/project.js +3 -0
  27. package/dist/middle_layer/project.js.map +1 -1
  28. package/dist/pool/data.cjs +0 -15
  29. package/dist/pool/data.cjs.map +1 -1
  30. package/dist/pool/data.d.ts +1 -2
  31. package/dist/pool/data.d.ts.map +1 -1
  32. package/dist/pool/data.js +1 -15
  33. package/dist/pool/data.js.map +1 -1
  34. package/dist/pool/driver.cjs +88 -906
  35. package/dist/pool/driver.cjs.map +1 -1
  36. package/dist/pool/driver.d.ts +14 -86
  37. package/dist/pool/driver.d.ts.map +1 -1
  38. package/dist/pool/driver.js +88 -907
  39. package/dist/pool/driver.js.map +1 -1
  40. package/package.json +17 -16
  41. package/src/debug/index.ts +0 -2
  42. package/src/js_render/computable_context.ts +8 -6
  43. package/src/middle_layer/driver_kit.ts +6 -6
  44. package/src/middle_layer/ops.ts +2 -9
  45. package/src/middle_layer/project.ts +3 -0
  46. package/src/pool/data.ts +0 -22
  47. package/src/pool/driver.ts +141 -1208
@@ -1,140 +1,121 @@
1
- import type { DownloadDriver } from '@milaboratories/pl-drivers';
2
- import { PFrameInternal } from '@milaboratories/pl-model-middle-layer';
3
- import type { PlTreeEntry, PlTreeNodeAccessor } from '@milaboratories/pl-tree';
4
- import { isPlTreeNodeAccessor } from '@milaboratories/pl-tree';
5
- import type {
6
- Computable,
7
- ComputableCtx,
8
- ComputableStableDefined,
9
- } from '@milaboratories/computable';
10
- import type {
11
- CalculateTableDataRequest,
12
- CalculateTableDataResponse,
13
- FindColumnsRequest,
14
- FindColumnsResponse,
15
- LocalBlobHandleAndSize,
16
- PColumnIdAndSpec,
17
- PColumnSpec,
18
- PFrameHandle,
19
- PObjectId,
20
- PTableColumnSpec,
21
- PTableHandle,
22
- PTableShape,
23
- PTableVector,
24
- TableRange,
25
- UniqueValuesRequest,
26
- UniqueValuesResponse,
27
- PFrameDriver as SdkPFrameDriver,
28
- PColumn,
29
- PFrameDef,
30
- JoinEntry,
31
- PTableDef,
32
- ValueType,
33
- PTableRecordSingleValueFilterV2,
34
- PTableRecordFilter,
35
- PColumnValues,
36
- DataInfo,
37
- PColumnValue,
38
- RemoteBlobHandleAndSize,
39
- RemoteBlobHandle,
40
- ContentHandler,
41
- } from '@platforma-sdk/model';
42
1
  import {
43
- mapPObjectData,
44
- mapPTableDef,
45
- extractAllColumns,
46
2
  mapDataInfo,
47
3
  isDataInfo,
48
4
  ensureError,
49
5
  PFrameDriverError,
50
6
  isAbortError,
51
- isPFrameDriverError,
52
- uniqueBy,
53
- getAxisId,
54
- canonicalizeJson,
7
+ type LocalBlobHandleAndSize,
8
+ type RemoteBlobHandleAndSize,
9
+ type RemoteBlobHandle,
10
+ type ContentHandler,
11
+ type PColumnSpec,
12
+ type PColumnDataUniversal,
55
13
  } from '@platforma-sdk/model';
56
- import { LRUCache } from 'lru-cache';
57
- import {
58
- makeDataInfoFromPColumnValues,
59
- parseDataInfoResource,
60
- traverseParquetChunkResource,
61
- } from './data';
62
- import { createHash } from 'node:crypto';
63
- import { type MiLogger } from '@milaboratories/ts-helpers';
64
- import { mapValues } from 'es-toolkit';
14
+ import { PFrameInternal } from '@milaboratories/pl-model-middle-layer';
65
15
  import {
66
- assertNever,
67
16
  emptyDir,
68
- ConcurrencyLimitingExecutor,
69
- RefCountResourcePool,
70
- type PoolResource,
17
+ RefCountPoolBase,
18
+ type PoolEntry,
19
+ type MiLogger,
71
20
  } from '@milaboratories/ts-helpers';
72
- import canonicalize from 'canonicalize';
73
- import { PFrameFactory, HttpHelpers } from '@milaboratories/pframes-rs-node';
21
+ import type { DownloadDriver } from '@milaboratories/pl-drivers';
22
+ import {
23
+ isPlTreeNodeAccessor,
24
+ type PlTreeEntry,
25
+ type PlTreeNodeAccessor,
26
+ } from '@milaboratories/pl-tree';
27
+ import type {
28
+ Computable,
29
+ ComputableStableDefined,
30
+ } from '@milaboratories/computable';
31
+ import {
32
+ makeJsonDataInfo,
33
+ AbstractPFrameDriver,
34
+ AbstractPFrameDriverOpsDefaults,
35
+ type AbstractInternalPFrameDriver,
36
+ type AbstractPFrameDriverOps,
37
+ type LocalBlobProvider,
38
+ type RemoteBlobProvider,
39
+ } from '@milaboratories/pf-driver';
40
+ import { HttpHelpers } from '@milaboratories/pframes-rs-node';
74
41
  import path from 'node:path';
75
- import { getDebugFlags } from '../debug';
76
42
  import { Readable } from 'node:stream';
43
+ import {
44
+ parseDataInfoResource,
45
+ traverseParquetChunkResource,
46
+ } from './data';
77
47
 
78
- type PColumnDataUniversal = PlTreeNodeAccessor | DataInfo<PlTreeNodeAccessor> | PColumnValues;
79
-
80
- function makeBlobId(res: PlTreeEntry): string {
48
+ function makeBlobId(res: PlTreeEntry): PFrameInternal.PFrameBlobId {
81
49
  return String(res.rid);
82
50
  }
83
51
 
84
- type LocalBlobPoolEntry = PoolResource<ComputableStableDefined<LocalBlobHandleAndSize>>;
85
-
86
- class LocalBlobPool
87
- extends RefCountResourcePool<PlTreeEntry, ComputableStableDefined<LocalBlobHandleAndSize>>
88
- implements PFrameInternal.PFrameDataSourceV2 {
89
- constructor(private readonly blobDriver: DownloadDriver) {
52
+ type LocalBlob = ComputableStableDefined<LocalBlobHandleAndSize>;
53
+ class LocalBlobProviderImpl
54
+ extends RefCountPoolBase<PlTreeEntry, PFrameInternal.PFrameBlobId, LocalBlob>
55
+ implements LocalBlobProvider<PlTreeEntry> {
56
+ constructor(
57
+ private readonly blobDriver: DownloadDriver,
58
+ private readonly logger: PFrameInternal.Logger,
59
+ ) {
90
60
  super();
91
61
  }
92
62
 
93
- protected calculateParamsKey(params: PlTreeEntry): string {
63
+ protected calculateParamsKey(params: PlTreeEntry): PFrameInternal.PFrameBlobId {
94
64
  return makeBlobId(params);
95
65
  }
96
66
 
97
- protected createNewResource(params: PlTreeEntry, _key: string): ComputableStableDefined<LocalBlobHandleAndSize> {
67
+ protected createNewResource(params: PlTreeEntry, _key: PFrameInternal.PFrameBlobId): LocalBlob {
98
68
  return this.blobDriver.getDownloadedBlob(params);
99
69
  }
100
70
 
101
- public getByKey(blobId: string): ComputableStableDefined<LocalBlobHandleAndSize> {
71
+ public getByKey(blobId: PFrameInternal.PFrameBlobId): LocalBlob {
102
72
  const resource = super.tryGetByKey(blobId);
103
- if (!resource) throw new PFrameDriverError(`Blob with id ${blobId} not found.`);
73
+ if (!resource) throw new PFrameDriverError(`Local blob with id ${blobId} not found.`);
104
74
  return resource;
105
75
  }
106
76
 
107
- public async preloadBlob(blobIds: string[], signal?: AbortSignal): Promise<void> {
108
- try {
109
- await Promise.all(blobIds.map((blobId) => this.getByKey(blobId).awaitStableFullValue(signal)));
110
- } catch (err: unknown) {
111
- if (!isAbortError(err)) throw err;
112
- }
113
- };
114
-
115
- public async resolveBlobContent(blobId: string, signal?: AbortSignal): Promise<Uint8Array> {
116
- const computable = this.getByKey(blobId);
117
- const blob = await computable.awaitStableValue(signal);
118
- return await this.blobDriver.getContent(blob.handle, { signal });
119
- };
77
+ public makeDataSource(signal: AbortSignal): Omit<PFrameInternal.PFrameDataSourceV2, 'parquetServer'> {
78
+ return {
79
+ preloadBlob: async (blobIds: PFrameInternal.PFrameBlobId[]) => {
80
+ try {
81
+ await Promise.all(blobIds.map((blobId) => this.getByKey(blobId).awaitStableFullValue(signal)));
82
+ } catch (err: unknown) {
83
+ if (!isAbortError(err)) throw err;
84
+ }
85
+ },
86
+ resolveBlobContent: async (blobId: PFrameInternal.PFrameBlobId) => {
87
+ const computable = this.getByKey(blobId);
88
+ const blob = await computable.awaitStableValue(signal);
89
+ return await this.blobDriver.getContent(blob.handle, { signal });
90
+ },
91
+ };
92
+ }
120
93
  }
121
94
 
122
- type RemoteBlobPoolEntry = PoolResource<Computable<RemoteBlobHandleAndSize>>;
123
-
95
+ type RemoteBlob = Computable<RemoteBlobHandleAndSize>;
124
96
  class RemoteBlobPool
125
- extends RefCountResourcePool<PlTreeEntry, Computable<RemoteBlobHandleAndSize>> {
126
- constructor(private readonly blobDriver: DownloadDriver) {
97
+ extends RefCountPoolBase<PlTreeEntry, PFrameInternal.PFrameBlobId, RemoteBlob> {
98
+ constructor(
99
+ private readonly blobDriver: DownloadDriver,
100
+ private readonly logger: PFrameInternal.Logger,
101
+ ) {
127
102
  super();
128
103
  }
129
104
 
130
- protected calculateParamsKey(params: PlTreeEntry): string {
131
- return String(params.rid);
105
+ protected calculateParamsKey(params: PlTreeEntry): PFrameInternal.PFrameBlobId {
106
+ return makeBlobId(params);
132
107
  }
133
108
 
134
- protected createNewResource(params: PlTreeEntry, _key: string): Computable<RemoteBlobHandleAndSize> {
109
+ protected createNewResource(params: PlTreeEntry, _key: PFrameInternal.PFrameBlobId): RemoteBlob {
135
110
  return this.blobDriver.getOnDemandBlob(params);
136
111
  }
137
112
 
113
+ public getByKey(blobId: PFrameInternal.PFrameBlobId): RemoteBlob {
114
+ const resource = super.tryGetByKey(blobId);
115
+ if (!resource) throw new PFrameDriverError(`Remote blob with id ${blobId} not found.`);
116
+ return resource;
117
+ }
118
+
138
119
  public async withContent<T>(
139
120
  handle: RemoteBlobHandle,
140
121
  options: {
@@ -155,15 +136,15 @@ class RemoteBlobPool
155
136
  }
156
137
 
157
138
  interface BlobStoreOptions extends PFrameInternal.ObjectStoreOptions {
158
- remoteBlobPool: RemoteBlobPool;
139
+ remoteBlobProvider: RemoteBlobPool;
159
140
  };
160
141
 
161
142
  class BlobStore extends PFrameInternal.BaseObjectStore {
162
- private readonly remoteBlobPool: RemoteBlobPool;
143
+ private readonly remoteBlobProvider: RemoteBlobPool;
163
144
 
164
145
  constructor(options: BlobStoreOptions) {
165
146
  super(options);
166
- this.remoteBlobPool = options.remoteBlobPool;
147
+ this.remoteBlobProvider = options.remoteBlobProvider;
167
148
  }
168
149
 
169
150
  public override async request(
@@ -187,7 +168,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
187
168
  };
188
169
 
189
170
  try {
190
- const computable = this.remoteBlobPool.tryGetByKey(blobId);
171
+ const computable = this.remoteBlobProvider.tryGetByKey(blobId);
191
172
  if (!computable) return await respond({ type: 'NotFound' });
192
173
 
193
174
  let blob: RemoteBlobHandleAndSize;
@@ -221,7 +202,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
221
202
  `PFrames blob store requesting content for ${blobId}, `
222
203
  + `range [${translatedRange.start}..=${translatedRange.end}]`,
223
204
  );
224
- return await this.remoteBlobPool.withContent(blob.handle, {
205
+ return await this.remoteBlobProvider.withContent(blob.handle, {
225
206
  range: translatedRange,
226
207
  signal: params.signal,
227
208
  handler: async (data) => {
@@ -245,1134 +226,86 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
245
226
  }
246
227
  }
247
228
 
248
- type InternalPFrameData = PFrameDef<PFrameInternal.DataInfo<PlTreeEntry>>;
249
-
250
- const valueTypes: ValueType[] = ['Int', 'Long', 'Float', 'Double', 'String', 'Bytes'] as const;
251
-
252
- function migrateFilters(
253
- filters: PTableRecordFilter[],
254
- logger: PFrameInternal.Logger,
255
- ): PTableRecordFilter[] {
256
- const filtersV1 = [];
257
- const filtersV2: PTableRecordSingleValueFilterV2[] = [];
258
- for (const filter of filters) {
259
- if ((filter.type as unknown) === 'bySingleColumn') {
260
- filtersV1.push(filter);
261
- filtersV2.push({
262
- ...filter,
263
- type: 'bySingleColumnV2',
264
- });
265
- } else {
266
- filtersV2.push(filter);
267
- }
268
- }
269
- if (filtersV1.length > 0) {
270
- const filtersV1Json = JSON.stringify(filtersV1);
271
- logger('warn',
272
- `type overriten from 'bySingleColumn' to 'bySingleColumnV2' for filters: ${filtersV1Json}`,
273
- );
274
- }
275
- return filtersV2;
276
- }
277
-
278
- function migratePTableFilters<T>(
279
- def: Omit<PTableDef<T>, 'partitionFilters'> | PTableDef<T>,
280
- logger: PFrameInternal.Logger,
281
- ): PTableDef<T> {
282
- if (!('partitionFilters' in def)) {
283
- // For old blocks assume all axes filters to be partition filters
284
- return {
285
- ...def,
286
- partitionFilters: migrateFilters(def.filters.filter((f) => f.column.type === 'axis'), logger),
287
- filters: migrateFilters(def.filters.filter((f) => f.column.type === 'column'), logger),
288
- };
289
- }
290
- return {
291
- ...def,
292
- partitionFilters: migrateFilters(def.partitionFilters, logger),
293
- filters: migrateFilters(def.filters, logger),
294
- };
295
- }
296
-
297
- function hasArtificialColumns<T>(entry: JoinEntry<T>): boolean {
298
- switch (entry.type) {
299
- case 'column':
300
- case 'slicedColumn':
301
- case 'inlineColumn':
302
- return false;
303
- case 'artificialColumn':
304
- return true;
305
- case 'full':
306
- case 'inner':
307
- return entry.entries.some(hasArtificialColumns);
308
- case 'outer':
309
- return hasArtificialColumns(entry.primary) || entry.secondary.some(hasArtificialColumns);
310
- default:
311
- assertNever(entry);
312
- }
313
- }
314
-
315
- const bigintReplacer = (_: string, v: unknown) => (typeof v === 'bigint' ? v.toString() : v);
316
-
317
- class PFramePool extends RefCountResourcePool<InternalPFrameData, PFrameHolder> {
229
+ class RemoteBlobProviderImpl implements RemoteBlobProvider<PlTreeEntry> {
318
230
  constructor(
319
- private readonly parquetServer: PFrameInternal.HttpServerInfo,
320
- private readonly localBlobPool: LocalBlobPool,
321
- private readonly remoteBlobPool: RemoteBlobPool,
322
- private readonly logger: PFrameInternal.Logger,
323
- private readonly spillPath: string,
324
- ) {
325
- super();
326
- }
327
-
328
- protected calculateParamsKey(params: InternalPFrameData): string {
329
- try {
330
- return stableKeyFromPFrameData(params);
331
- } catch (err: unknown) {
332
- if (isPFrameDriverError(err)) throw err;
333
- throw new PFrameDriverError(
334
- `PFrame handle calculation failed, `
335
- + `request: ${JSON.stringify(params, bigintReplacer)}, `
336
- + `error: ${ensureError(err)}`,
337
- );
338
- }
339
- }
340
-
341
- protected createNewResource(params: InternalPFrameData, key: string): PFrameHolder {
342
- if (getDebugFlags().logPFrameRequests) {
343
- this.logger('info',
344
- `PFrame creation (pFrameHandle = ${key}): `
345
- + `${JSON.stringify(params, bigintReplacer)}`,
346
- );
347
- }
348
- return new PFrameHolder(
349
- this.parquetServer,
350
- this.localBlobPool,
351
- this.remoteBlobPool,
352
- this.logger,
353
- this.spillPath,
354
- params,
355
- );
356
- }
357
-
358
- public getByKey(key: PFrameHandle): PFrameHolder {
359
- const resource = super.tryGetByKey(key);
360
- if (!resource) throw new PFrameDriverError(`PFrame not found, handle = ${key}`);
361
- return resource;
362
- }
363
- }
364
-
365
- class PTableDefPool extends RefCountResourcePool<FullPTableDef, PTableDefHolder> {
366
- constructor(private readonly logger: PFrameInternal.Logger) {
367
- super();
368
- }
369
-
370
- protected calculateParamsKey(params: FullPTableDef): string {
371
- return stableKeyFromFullPTableDef(params);
372
- }
373
-
374
- protected createNewResource(params: FullPTableDef, key: string): PTableDefHolder {
375
- return new PTableDefHolder(params, key as PTableHandle, this.logger);
376
- }
377
-
378
- public getByKey(key: PTableHandle): PTableDefHolder {
379
- const resource = super.tryGetByKey(key);
380
- if (!resource) throw new PFrameDriverError(`PTable definition not found, handle = ${key}`);
381
- return resource;
382
- }
383
- }
384
-
385
- class PTablePool extends RefCountResourcePool<FullPTableDef, PTableHolder> {
386
- constructor(
387
- private readonly pFrames: PFramePool,
388
- private readonly pTableDefs: PTableDefPool,
389
- private readonly logger: PFrameInternal.Logger,
390
- ) {
391
- super();
392
- }
393
-
394
- protected calculateParamsKey(params: FullPTableDef): string {
395
- return stableKeyFromFullPTableDef(params);
396
- }
397
-
398
- protected createNewResource(params: FullPTableDef, key: string): PTableHolder {
399
- if (getDebugFlags().logPFrameRequests) {
400
- this.logger('info',
401
- `PTable creation (pTableHandle = ${key}): `
402
- + `${JSON.stringify(params, bigintReplacer)}`,
403
- );
404
- }
405
-
406
- const handle = params.pFrameHandle;
407
- const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
408
-
409
- const defDisposeSignal = this.pTableDefs.tryGetByKey(key)?.disposeSignal;
410
- const combinedSignal = AbortSignal.any([disposeSignal, defDisposeSignal].filter((s) => !!s));
411
-
412
- // 3. Sort
413
- if (params.def.sorting.length > 0) {
414
- const predecessor = this.acquire({
415
- ...params,
416
- def: {
417
- ...params.def,
418
- sorting: [],
419
- },
420
- });
421
- const { resource: { pTablePromise } } = predecessor;
422
- const sortedTable = pTablePromise.then((pTable) => pTable.sort(params.def.sorting));
423
- return new PTableHolder(handle, combinedSignal, sortedTable, predecessor);
424
- }
425
-
426
- // 2. Filter (except the case with artificial columns where cartesian creates too many rows)
427
- if (!hasArtificialColumns(params.def.src) && params.def.filters.length > 0) {
428
- const predecessor = this.acquire({
429
- ...params,
430
- def: {
431
- ...params.def,
432
- filters: [],
433
- },
434
- });
435
- const { resource: { pTablePromise } } = predecessor;
436
- const filteredTable = pTablePromise.then((pTable) => pTable.filter(params.def.filters));
437
- return new PTableHolder(handle, combinedSignal, filteredTable, predecessor);
438
- }
439
-
440
- // 1. Join
441
- const table = pFramePromise.then((pFrame) => pFrame.createTable({
442
- src: joinEntryToInternal(params.def.src),
443
- // `params.def.filters` would be non-empty only when join has artificial columns
444
- filters: [...params.def.partitionFilters, ...params.def.filters],
445
- }));
446
- return new PTableHolder(handle, combinedSignal, table);
447
- }
448
-
449
- public getByKey(key: PTableHandle): PTableHolder {
450
- const resource = super.tryGetByKey(key);
451
- if (!resource) throw new PFrameDriverError(`PTable not found, handle = ${key}`);
452
- return resource;
453
- }
454
- }
455
-
456
- class PTableCacheUi {
457
- private readonly perFrame = new Map<PFrameHandle, LRUCache<PTableHandle, PoolResource<PTableHolder>>>();
458
- private readonly global: LRUCache<PTableHandle, PoolResource<PTableHolder>>;
459
- private readonly disposeListeners = new Set<PTableHandle>();
460
-
461
- constructor(
462
- private readonly logger: PFrameInternal.Logger,
463
- private readonly ops: Pick<PFrameDriverOps, 'pFramesCacheMaxSize' | 'pFrameCacheMaxCount'>,
464
- ) {
465
- this.global = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
466
- maxSize: this.ops.pFramesCacheMaxSize,
467
- dispose: (resource, key, reason) => {
468
- if (reason === 'evict') {
469
- this.perFrame.get(resource.resource.pFrame)?.delete(key);
470
- }
471
-
472
- if (this.perFrame.get(resource.resource.pFrame)?.size === 0) {
473
- this.perFrame.delete(resource.resource.pFrame);
474
- }
475
-
476
- resource.unref();
477
- if (getDebugFlags().logPFrameRequests) {
478
- logger('info', `calculateTableData cache - removed PTable ${key} (reason: ${reason})`);
479
- }
480
- },
481
- });
482
- }
483
-
484
- public cache(resource: PoolResource<PTableHolder>, size: number): void {
485
- const key = resource.key as PTableHandle;
486
- if (getDebugFlags().logPFrameRequests) {
487
- this.logger('info', `calculateTableData cache - added PTable ${key} with size ${size}`);
488
- }
489
-
490
- this.global.set(key, resource, { size: Math.max(size, 1) }); // 1 is minimum size to avoid cache evictions
491
-
492
- let perFrame = this.perFrame.get(resource.resource.pFrame);
493
- if (!perFrame) {
494
- perFrame = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
495
- max: this.ops.pFrameCacheMaxCount,
496
- dispose: (_resource, key, reason) => {
497
- if (reason === 'evict') {
498
- this.global.delete(key);
499
- }
500
- },
501
- });
502
- this.perFrame.set(resource.resource.pFrame, perFrame);
503
- }
504
- perFrame.set(key, resource);
505
-
506
- if (!this.disposeListeners.has(key)) {
507
- const disposeListener = () => {
508
- this.perFrame.get(resource.resource.pFrame)?.delete(key);
509
- this.global.delete(key);
510
-
511
- this.disposeListeners.delete(key);
512
- resource.resource.disposeSignal.removeEventListener('abort', disposeListener);
513
- };
514
- this.disposeListeners.add(key);
515
- resource.resource.disposeSignal.addEventListener('abort', disposeListener);
516
- }
517
- }
518
- }
519
-
520
- class PTableCacheModel {
521
- private readonly global: LRUCache<PTableHandle, PoolResource<PTableHolder>>;
522
- private readonly disposeListeners = new Set<PTableHandle>();
523
-
524
- constructor(
525
- private readonly logger: PFrameInternal.Logger,
526
- ops: Pick<PFrameDriverOps, 'pTablesCacheMaxSize'>,
527
- ) {
528
- this.global = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
529
- maxSize: ops.pTablesCacheMaxSize,
530
- dispose: (resource, key, reason) => {
531
- resource.unref();
532
- if (getDebugFlags().logPFrameRequests) {
533
- logger('info', `createPTable cache - removed PTable ${key} (reason: ${reason})`);
534
- }
535
- },
536
- });
537
- }
538
-
539
- public cache(resource: PoolResource<PTableHolder>, size: number, defDisposeSignal: AbortSignal): void {
540
- const key = resource.key as PTableHandle;
541
- if (getDebugFlags().logPFrameRequests) {
542
- this.logger('info', `createPTable cache - added PTable ${key} with size ${size}`);
543
- }
544
-
545
- const status: LRUCache.Status<PoolResource<PTableHolder>> = {};
546
- this.global.set(key, resource, { size: Math.max(size, 1), status }); // 1 is minimum size to avoid cache evictions
547
-
548
- if (status.maxEntrySizeExceeded) {
549
- resource.unref();
550
- if (getDebugFlags().logPFrameRequests) {
551
- this.logger('info', `createPTable cache - removed PTable ${key} (maxEntrySizeExceeded)`);
552
- }
553
- } else {
554
- if (!this.disposeListeners.has(key)) {
555
- const disposeListener = () => {
556
- this.global.delete(key);
557
-
558
- this.disposeListeners.delete(key);
559
- defDisposeSignal.removeEventListener('abort', disposeListener);
560
- };
561
- this.disposeListeners.add(key);
562
- defDisposeSignal.addEventListener('abort', disposeListener);
563
- }
564
- }
565
- }
566
- }
567
-
568
- class PFrameHolder implements PFrameInternal.PFrameDataSourceV2, AsyncDisposable {
569
- public readonly pFramePromise: Promise<PFrameInternal.PFrameV12>;
570
- private readonly abortController = new AbortController();
571
- private readonly localBlobs: LocalBlobPoolEntry[] = [];
572
- private readonly remoteBlobs: RemoteBlobPoolEntry[] = [];
231
+ private readonly pool: RemoteBlobPool,
232
+ private readonly server: PFrameInternal.HttpServer,
233
+ ) {}
573
234
 
574
- constructor(
575
- public readonly parquetServer: PFrameInternal.HttpServerInfo,
576
- private readonly localBlobPool: LocalBlobPool,
577
- private readonly remoteBlobPool: RemoteBlobPool,
235
+ public static async init(
236
+ blobDriver: DownloadDriver,
578
237
  logger: PFrameInternal.Logger,
579
- private readonly spillPath: string,
580
- columns: InternalPFrameData,
581
- ) {
582
- const makeLocalBlobId = (blob: PlTreeEntry): string => {
583
- const localBlob = this.localBlobPool.acquire(blob);
584
- this.localBlobs.push(localBlob);
585
- return localBlob.key;
586
- };
238
+ serverOptions: Omit<PFrameInternal.HttpServerOptions, 'handler'>,
239
+ ): Promise<RemoteBlobProviderImpl> {
240
+ const pool = new RemoteBlobPool(blobDriver, logger);
241
+ const store = new BlobStore({ remoteBlobProvider: pool, logger });
587
242
 
588
- const makeRemoteBlobId = (blob: PlTreeEntry): string => {
589
- const remoteBlob = this.remoteBlobPool.acquire(blob);
590
- this.remoteBlobs.push(remoteBlob);
591
- return remoteBlob.key + PFrameInternal.ParquetExtension;
592
- };
243
+ const handler = HttpHelpers.createRequestHandler({ store });
244
+ const server = await HttpHelpers.createHttpServer({ ...serverOptions, handler });
245
+ logger('info', `PFrames HTTP server started on ${server.info.url}`);
593
246
 
594
- const mapColumnData = (data: PFrameInternal.DataInfo<PlTreeEntry>): PFrameInternal.DataInfo<string> => {
595
- switch (data.type) {
596
- case 'Json':
597
- return { ...data };
598
- case 'JsonPartitioned':
599
- return {
600
- ...data,
601
- parts: mapValues(data.parts, makeLocalBlobId),
602
- };
603
- case 'BinaryPartitioned':
604
- return {
605
- ...data,
606
- parts: mapValues(data.parts, (v) => ({
607
- index: makeLocalBlobId(v.index),
608
- values: makeLocalBlobId(v.values),
609
- })),
610
- };
611
- case 'ParquetPartitioned':
612
- return {
613
- ...data,
614
- parts: mapValues(data.parts, (v) => ({
615
- ...v,
616
- data: makeRemoteBlobId(v.data),
617
- })),
618
- };
619
- default:
620
- assertNever(data);
621
- }
622
- };
623
-
624
- const jsonifiedColumns = columns.map((column) => ({
625
- ...column,
626
- data: mapColumnData(column.data),
627
- }));
628
-
629
- try {
630
- const pFrame = PFrameFactory.createPFrame({ spillPath: this.spillPath, logger });
631
- pFrame.setDataSource(this);
632
-
633
- const promises: Promise<void>[] = [];
634
- for (const column of jsonifiedColumns) {
635
- pFrame.addColumnSpec(column.id, column.spec);
636
- promises.push(pFrame.setColumnData(column.id, column.data, { signal: this.disposeSignal }));
637
- }
638
-
639
- this.pFramePromise = Promise.all(promises)
640
- .then(() => pFrame)
641
- .catch((err) => {
642
- this.dispose();
643
- pFrame.dispose();
644
- throw new PFrameDriverError(
645
- `PFrame creation failed asynchronously, `
646
- + `columns: ${JSON.stringify(jsonifiedColumns)}, `
647
- + `error: ${ensureError(err)}`,
648
- );
649
- });
650
- } catch (err: unknown) {
651
- throw new PFrameDriverError(
652
- `PFrame creation failed synchronously, `
653
- + `columns: ${JSON.stringify(jsonifiedColumns)}, `
654
- + `error: ${ensureError(err)}`,
655
- );
656
- }
657
- }
658
-
659
- public readonly preloadBlob = async (blobIds: string[]): Promise<void> => {
660
- return await this.localBlobPool.preloadBlob(blobIds, this.disposeSignal);
661
- };
662
-
663
- public readonly resolveBlobContent = async (blobId: string): Promise<Uint8Array> => {
664
- return await this.localBlobPool.resolveBlobContent(blobId, this.disposeSignal);
665
- };
666
-
667
- public get disposeSignal(): AbortSignal {
668
- return this.abortController.signal;
669
- }
670
-
671
- private dispose(): void {
672
- this.abortController.abort();
673
- this.localBlobs.forEach((entry) => entry.unref());
674
- this.remoteBlobs.forEach((entry) => entry.unref());
675
- }
676
-
677
- async [Symbol.asyncDispose](): Promise<void> {
678
- this.dispose();
679
- await this.pFramePromise
680
- .then((pFrame) => pFrame.dispose())
681
- .catch(() => { /* mute error */ });
682
- }
683
- }
684
-
685
- class PTableDefHolder implements Disposable {
686
- private readonly abortController = new AbortController();
687
-
688
- constructor(
689
- public readonly def: FullPTableDef,
690
- private readonly pTableHandle: PTableHandle,
691
- private readonly logger: PFrameInternal.Logger,
692
- ) {
693
- if (getDebugFlags().logPFrameRequests) {
694
- this.logger('info', `PTable definition saved (pTableHandle = ${this.pTableHandle})`);
695
- }
696
- }
697
-
698
- public get disposeSignal(): AbortSignal {
699
- return this.abortController.signal;
700
- }
701
-
702
- [Symbol.dispose](): void {
703
- this.abortController.abort();
704
- if (getDebugFlags().logPFrameRequests) {
705
- this.logger('info', `PTable definition disposed (pTableHandle = ${this.pTableHandle})`);
706
- }
247
+ return new RemoteBlobProviderImpl(pool, server);
707
248
  }
708
- }
709
-
710
- class PTableHolder implements AsyncDisposable {
711
- private readonly abortController = new AbortController();
712
- private readonly combinedDisposeSignal: AbortSignal;
713
249
 
714
- constructor(
715
- public readonly pFrame: PFrameHandle,
716
- pFrameDisposeSignal: AbortSignal,
717
- public readonly pTablePromise: Promise<PFrameInternal.PTableV7>,
718
- private readonly predecessor?: PoolResource<PTableHolder>,
719
- ) {
720
- this.combinedDisposeSignal = AbortSignal.any([pFrameDisposeSignal, this.abortController.signal]);
250
+ public acquire(params: PlTreeEntry): PoolEntry {
251
+ return this.pool.acquire(params);
721
252
  }
722
253
 
723
- public get disposeSignal(): AbortSignal {
724
- return this.combinedDisposeSignal;
254
+ public httpServerInfo(): PFrameInternal.HttpServerInfo {
255
+ return this.server.info;
725
256
  }
726
257
 
727
258
  async [Symbol.asyncDispose](): Promise<void> {
728
- this.abortController.abort();
729
- await this.pTablePromise
730
- .then((pTable) => pTable.dispose())
731
- .catch(() => { /* mute error */ });
732
- this.predecessor?.unref();
259
+ await this.server.stop();
733
260
  }
734
261
  }
735
262
 
736
- type FullPTableDef = {
737
- pFrameHandle: PFrameHandle;
738
- def: PTableDef<PObjectId>;
739
- };
263
+ export interface InternalPFrameDriver
264
+ extends AbstractInternalPFrameDriver<PColumnDataUniversal<PlTreeNodeAccessor>> {};
740
265
 
741
- export type PFrameDriverOps = {
266
+ export type PFrameDriverOps = AbstractPFrameDriverOps & {
742
267
  /** Port to run parquet HTTP server on. */
743
268
  parquetServerPort: number;
744
- /** Concurrency limits for `getUniqueValues` and `calculateTableData` requests */
745
- pFrameConcurrency: number;
746
- /** Concurrency limits for `getShape` and `getData` requests */
747
- pTableConcurrency: number;
748
- /** Maximum number of `calculateTableData` results cached for each PFrame */
749
- pFrameCacheMaxCount: number;
750
- /**
751
- * Maximum size of `calculateTableData` results cached for PFrames overall.
752
- * The limit is soft, as the same table could be materialized with other requests and will not be deleted in such case.
753
- * Also each table has predeccessors, overlapping predecessors will be counted twice, so the effective limit is smaller.
754
- */
755
- pFramesCacheMaxSize: number;
756
- /**
757
- * Maximum size of `createPTable` results cached on disk.
758
- * The limit is soft, as the same table could be materialized with other requests and will not be deleted in such case.
759
- * Also each table has predeccessors, overlapping predecessors will be counted twice, so the effective limit is smaller.
760
- */
761
- pTablesCacheMaxSize: number;
762
269
  };
763
270
 
764
- /**
765
- * Extends public and safe SDK's driver API with methods used internally in the middle
766
- * layer and in tests.
767
- */
768
- export interface InternalPFrameDriver extends SdkPFrameDriver, AsyncDisposable {
769
- /** Dispose the driver and all its resources. */
770
- dispose(): Promise<void>;
771
-
772
- /**
773
- * Dump active PFrames allocations in pprof format.
774
- * The result of this function should be saved as `profile.pb.gz`.
775
- * Use {@link https://pprof.me/} or {@link https://www.speedscope.app/}
776
- * to view the allocation flamechart.
777
- * @warning This method will always reject on Windows!
778
- */
779
- pprofDump(): Promise<Uint8Array>;
780
-
781
- /** Create a new PFrame */
782
- createPFrame(
783
- def: PFrameDef<PColumnDataUniversal>,
784
- ctx: ComputableCtx,
785
- ): PFrameHandle;
786
-
787
- /** Create a new PTable */
788
- createPTable(
789
- def: PTableDef<PColumn<PColumnDataUniversal>>,
790
- ctx: ComputableCtx,
791
- ): PTableHandle;
792
-
793
- /** Calculates data for the table and returns complete data representation of it */
794
- calculateTableData(
795
- handle: PFrameHandle,
796
- request: CalculateTableDataRequest<PObjectId>,
797
- range: TableRange | undefined,
798
- signal?: AbortSignal
799
- ): Promise<CalculateTableDataResponse>;
800
-
801
- /** Calculate set of unique values for a specific axis for the filtered set of records */
802
- getUniqueValues(
803
- handle: PFrameHandle,
804
- request: UniqueValuesRequest,
805
- signal?: AbortSignal
806
- ): Promise<UniqueValuesResponse>;
807
-
808
- /** Unified table shape */
809
- getShape(
810
- handle: PTableHandle,
811
- signal?: AbortSignal,
812
- ): Promise<PTableShape>;
813
-
814
- /**
815
- * Retrieve the data from the table. To retrieve only data required, it can be
816
- * sliced both horizontally ({@link columnIndices}) and vertically
817
- * ({@link range}).
818
- *
819
- * @param columnIndices unified indices of columns to be retrieved
820
- * @param range optionally limit the range of records to retrieve
821
- * */
822
- getData(
823
- handle: PTableHandle,
824
- columnIndices: number[],
825
- range: TableRange | undefined,
826
- signal?: AbortSignal,
827
- ): Promise<PTableVector[]>;
828
- }
829
-
830
- export class PFrameDriver implements InternalPFrameDriver {
831
- private readonly pFrames: PFramePool;
832
- private readonly pTableDefs: PTableDefPool;
833
- private readonly pTables: PTablePool;
834
-
835
- private readonly pTableCacheUi: PTableCacheUi;
836
- private readonly pTableCacheModel: PTableCacheModel;
837
-
838
- private readonly frameConcurrencyLimiter: ConcurrencyLimitingExecutor;
839
- private readonly tableConcurrencyLimiter: ConcurrencyLimitingExecutor;
840
-
841
- public async pprofDump(): Promise<Uint8Array> {
842
- return await PFrameFactory.pprofDump();
843
- }
844
-
845
- public static async init(
846
- blobDriver: DownloadDriver,
847
- miLogger: MiLogger,
848
- spillPath: string,
849
- ops: PFrameDriverOps,
850
- ): Promise<PFrameDriver> {
851
- const resolvedSpillPath = path.resolve(spillPath);
852
- await emptyDir(resolvedSpillPath);
853
-
854
- const logger: PFrameInternal.Logger = (level, message) => miLogger[level](message);
855
- const localBlobPool = new LocalBlobPool(blobDriver);
856
- const remoteBlobPool = new RemoteBlobPool(blobDriver);
857
-
858
- const store = new BlobStore({ remoteBlobPool, logger });
859
- const handler = HttpHelpers.createRequestHandler({ store: store });
860
- const server = await HttpHelpers.createHttpServer({ handler, port: ops.parquetServerPort });
861
-
862
- return new PFrameDriver(logger, server, localBlobPool, remoteBlobPool, resolvedSpillPath, ops);
863
- }
864
-
865
- private constructor(
866
- private readonly logger: PFrameInternal.Logger,
867
- private readonly server: PFrameInternal.HttpServer,
868
- localBlobPool: LocalBlobPool,
869
- remoteBlobPool: RemoteBlobPool,
870
- spillPath: string,
871
- ops: PFrameDriverOps,
872
- ) {
873
- const concurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pFrameConcurrency);
874
- this.frameConcurrencyLimiter = concurrencyLimiter;
875
- this.tableConcurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pTableConcurrency);
876
-
877
- this.pFrames = new PFramePool(server.info, localBlobPool, remoteBlobPool, logger, spillPath);
878
- this.pTableDefs = new PTableDefPool(logger);
879
- this.pTables = new PTablePool(this.pFrames, this.pTableDefs, logger);
880
-
881
- this.pTableCacheUi = new PTableCacheUi(logger, ops);
882
- this.pTableCacheModel = new PTableCacheModel(logger, ops);
883
- }
884
-
885
- async dispose(): Promise<void> {
886
- return await this.server.stop();
887
- }
888
-
889
- async [Symbol.asyncDispose](): Promise<void> {
890
- return await this.dispose();
891
- }
892
-
893
- //
894
- // Internal / Config API Methods
895
- //
896
-
897
- public createPFrame(
898
- def: PFrameDef<PColumnDataUniversal>,
899
- ctx: ComputableCtx,
900
- ): PFrameHandle {
901
- const columns: InternalPFrameData = def
902
- .filter((c) => valueTypes.find((t) => t === c.spec.valueType))
903
- .map((c) =>
904
- mapPObjectData(c, (d) =>
905
- isPlTreeNodeAccessor(d)
906
- ? parseDataInfoResource(d)
907
- : isDataInfo(d)
908
- ? d.type === 'ParquetPartitioned'
909
- ? mapDataInfo(d, (a) => traverseParquetChunkResource(a))
910
- : mapDataInfo(d, (a) => a.persist())
911
- : makeDataInfoFromPColumnValues(c.spec, d),
912
- ),
913
- );
914
- const distinctColumns = uniqueBy(columns, (column) => column.id);
915
-
916
- const res = this.pFrames.acquire(distinctColumns);
917
- ctx.addOnDestroy(res.unref);
918
- return res.key as PFrameHandle;
919
- }
920
-
921
- public createPTable(
922
- rawDef: PTableDef<PColumn<PColumnDataUniversal>>,
923
- ctx: ComputableCtx,
924
- ): PTableHandle {
925
- const def = migratePTableFilters(rawDef, this.logger);
926
- const pFrameHandle = this.createPFrame(extractAllColumns(def.src), ctx);
927
- const defIds = mapPTableDef(def, (c) => c.id);
928
- const sortedDef = sortPTableDef(defIds);
929
-
930
- const { key, unref } = this.pTableDefs.acquire({ def: sortedDef, pFrameHandle });
931
- if (getDebugFlags().logPFrameRequests) {
932
- this.logger('info', `Create PTable call (pFrameHandle = ${pFrameHandle}; pTableHandle = ${key})`);
933
- }
934
- ctx.addOnDestroy(unref); // in addition to pframe unref added in createPFrame above
935
- return key as PTableHandle;
936
- }
937
-
938
- //
939
- // PFrame istance methods
940
- //
941
-
942
- public async findColumns(
943
- handle: PFrameHandle,
944
- request: FindColumnsRequest,
945
- ): Promise<FindColumnsResponse> {
946
- const iRequest: PFrameInternal.FindColumnsRequest = {
947
- ...request,
948
- compatibleWith:
949
- request.compatibleWith.length !== 0
950
- ? [{
951
- axesSpec: [
952
- ...new Map(request.compatibleWith.map(
953
- (item) => [canonicalize(item)!, item] as const,
954
- )).values(),
955
- ],
956
- qualifications: [],
957
- }]
958
- : [],
959
- };
960
-
961
- const { pFramePromise } = this.pFrames.getByKey(handle);
962
- const pFrame = await pFramePromise;
963
-
964
- const responce = await pFrame.findColumns(iRequest);
965
- return {
966
- hits: responce.hits
967
- .filter((h) => // only exactly matching columns
968
- h.mappingVariants.length === 0
969
- || h.mappingVariants.some((v) =>
970
- v.qualifications.forHit.length === 0
971
- && v.qualifications.forQueries.every((q) => q.length === 0)))
972
- .map((h) => h.hit),
973
- };
974
- }
975
-
976
- public async getColumnSpec(handle: PFrameHandle, columnId: PObjectId): Promise<PColumnSpec> {
977
- const { pFramePromise } = this.pFrames.getByKey(handle);
978
- const pFrame = await pFramePromise;
979
- return await pFrame.getColumnSpec(columnId);
980
- }
981
-
982
- public async listColumns(handle: PFrameHandle): Promise<PColumnIdAndSpec[]> {
983
- const { pFramePromise } = this.pFrames.getByKey(handle);
984
- const pFrame = await pFramePromise;
985
- return await pFrame.listColumns();
986
- }
987
-
988
- public async calculateTableData(
989
- handle: PFrameHandle,
990
- request: CalculateTableDataRequest<PObjectId>,
991
- range: TableRange | undefined,
992
- signal?: AbortSignal,
993
- ): Promise<CalculateTableDataResponse> {
994
- if (getDebugFlags().logPFrameRequests) {
995
- this.logger('info',
996
- `Call calculateTableData, handle = ${handle}, request = ${JSON.stringify(request, bigintReplacer)}`,
997
- );
998
- }
999
-
1000
- const table = this.pTables.acquire({
1001
- pFrameHandle: handle,
1002
- def: sortPTableDef(migratePTableFilters(request, this.logger)),
1003
- });
1004
- const { pTablePromise, disposeSignal } = table.resource;
1005
- const pTable = await pTablePromise;
1006
-
1007
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
1008
- return await this.frameConcurrencyLimiter.run(async () => {
1009
- try {
1010
- const spec = pTable.getSpec();
1011
- const data = await pTable.getData([...spec.keys()], {
1012
- range,
1013
- signal: combinedSignal,
1014
- });
1015
-
1016
- const resultSize = await pTable.getFootprint({
1017
- withPredecessors: false,
1018
- signal: combinedSignal,
1019
- });
1020
- if (resultSize >= 2 * 1024 * 1024 * 1024) {
1021
- throw new PFrameDriverError(`Join results exceed 2GB, please add filters to shrink the result size`);
1022
- }
1023
-
1024
- const overallSize = await pTable.getFootprint({
1025
- withPredecessors: true,
1026
- signal: combinedSignal,
1027
- });
1028
- this.pTableCacheUi.cache(table, overallSize);
1029
-
1030
- return spec.map((spec, i) => ({
1031
- spec: spec,
1032
- data: data[i],
1033
- }));
1034
- } catch (err: unknown) {
1035
- table.unref();
1036
- throw err;
1037
- }
1038
- });
1039
- }
1040
-
1041
- public async getUniqueValues(
1042
- handle: PFrameHandle,
1043
- request: UniqueValuesRequest,
1044
- signal?: AbortSignal,
1045
- ): Promise<UniqueValuesResponse> {
1046
- if (getDebugFlags().logPFrameRequests) {
1047
- this.logger('info',
1048
- `Call getUniqueValues, handle = ${handle}, request = ${JSON.stringify(request, bigintReplacer)}`,
1049
- );
1050
- }
1051
-
1052
- const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
1053
- const pFrame = await pFramePromise;
1054
-
1055
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
1056
- return await this.frameConcurrencyLimiter.run(async () => {
1057
- return await pFrame.getUniqueValues({
1058
- ...request,
1059
- filters: migrateFilters(request.filters, this.logger),
1060
- }, {
1061
- signal: combinedSignal,
1062
- });
1063
- });
1064
- }
1065
-
1066
- //
1067
- // PTable istance methods
1068
- //
1069
-
1070
- public async getSpec(handle: PTableHandle): Promise<PTableColumnSpec[]> {
1071
- const { def } = this.pTableDefs.getByKey(handle);
1072
- using table = this.pTables.acquire(def);
1073
-
1074
- const { pTablePromise } = table.resource;
1075
- const pTable = await pTablePromise;
1076
-
1077
- return pTable.getSpec();
1078
- }
1079
-
1080
- public async getShape(handle: PTableHandle, signal?: AbortSignal): Promise<PTableShape> {
1081
- const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
1082
- const table = this.pTables.acquire(def);
1083
-
1084
- const { pTablePromise, disposeSignal } = table.resource;
1085
- const pTable = await pTablePromise;
1086
-
1087
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
1088
- const { shape, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
1089
- const shape = await pTable.getShape({
1090
- signal: combinedSignal,
1091
- });
1092
-
1093
- const overallSize = await pTable.getFootprint({
1094
- withPredecessors: true,
1095
- signal: combinedSignal,
1096
- });
1097
-
1098
- return { shape, overallSize };
1099
- });
1100
-
1101
- this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
1102
- return shape;
1103
- }
1104
-
1105
- public async getData(
1106
- handle: PTableHandle,
1107
- columnIndices: number[],
1108
- range: TableRange | undefined,
1109
- signal?: AbortSignal,
1110
- ): Promise<PTableVector[]> {
1111
- const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
1112
- const table = this.pTables.acquire(def);
1113
-
1114
- const { pTablePromise, disposeSignal } = table.resource;
1115
- const pTable = await pTablePromise;
1116
-
1117
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
1118
- const { data, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
1119
- const data = await pTable.getData(columnIndices, {
1120
- range,
1121
- signal: combinedSignal,
1122
- });
1123
-
1124
- const overallSize = await pTable.getFootprint({
1125
- withPredecessors: true,
1126
- signal: combinedSignal,
1127
- });
1128
-
1129
- return { data, overallSize };
1130
- });
1131
-
1132
- this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
1133
- return data;
1134
- }
1135
- }
271
+ export const PFrameDriverOpsDefaults: PFrameDriverOps = {
272
+ ...AbstractPFrameDriverOpsDefaults,
273
+ parquetServerPort: 0, // 0 means that some unused port will be assigned by the OS
274
+ };
1136
275
 
1137
- function joinEntryToInternal(entry: JoinEntry<PObjectId>): PFrameInternal.JoinEntryV4 {
1138
- const type = entry.type;
1139
- switch (type) {
1140
- case 'column':
1141
- return {
1142
- type: 'column',
1143
- columnId: entry.column,
1144
- };
1145
- case 'slicedColumn':
1146
- return {
1147
- type: 'slicedColumn',
1148
- columnId: entry.column,
1149
- newId: entry.newId,
1150
- axisFilters: entry.axisFilters,
1151
- };
1152
- case 'artificialColumn':
1153
- return {
1154
- type: 'artificialColumn',
1155
- columnId: entry.column,
1156
- newId: entry.newId,
1157
- axesIndices: entry.axesIndices,
1158
- };
1159
- case 'inlineColumn':
1160
- return {
1161
- type: 'inlineColumn',
1162
- newId: entry.column.id,
1163
- spec: entry.column.spec,
1164
- dataInfo: {
1165
- type: 'Json',
1166
- keyLength: entry.column.spec.axesSpec.length,
1167
- data: entry.column.data.reduce((acc, row) => {
1168
- acc[JSON.stringify(row.key)] = row.val;
1169
- return acc;
1170
- }, {} as Record<string, PColumnValue>),
1171
- },
1172
- };
1173
- case 'inner':
1174
- case 'full':
1175
- return {
1176
- type: entry.type,
1177
- entries: entry.entries.map((col) => joinEntryToInternal(col)),
1178
- };
1179
- case 'outer':
1180
- return {
1181
- type: 'outer',
1182
- primary: joinEntryToInternal(entry.primary),
1183
- secondary: entry.secondary.map((col) => joinEntryToInternal(col)),
1184
- };
1185
- default:
1186
- throw new PFrameDriverError(`unsupported PFrame join entry type: ${type satisfies never}`);
1187
- }
1188
- }
276
+ export async function createPFrameDriver(params: {
277
+ blobDriver: DownloadDriver;
278
+ logger: MiLogger;
279
+ spillPath: string;
280
+ options: PFrameDriverOps;
281
+ }): Promise<InternalPFrameDriver> {
282
+ const resolvedSpillPath = path.resolve(params.spillPath);
283
+ await emptyDir(resolvedSpillPath);
284
+
285
+ const logger: PFrameInternal.Logger = (level, message) => params.logger[level](message);
286
+ const localBlobProvider = new LocalBlobProviderImpl(params.blobDriver, logger);
287
+ const remoteBlobProvider = await RemoteBlobProviderImpl.init(
288
+ params.blobDriver,
289
+ logger,
290
+ { port: params.options.parquetServerPort },
291
+ );
1189
292
 
1190
- function sortPTableDef(def: PTableDef<PObjectId>): PTableDef<PObjectId> {
1191
- function cmpJoinEntries(lhs: JoinEntry<PObjectId>, rhs: JoinEntry<PObjectId>): number {
1192
- if (lhs.type !== rhs.type) {
1193
- return lhs.type < rhs.type ? -1 : 1;
1194
- }
1195
- const type = lhs.type;
1196
- switch (type) {
1197
- case 'column':
1198
- return lhs.column < (rhs as typeof lhs).column ? -1 : 1;
1199
- case 'slicedColumn':
1200
- case 'artificialColumn':
1201
- return lhs.newId < (rhs as typeof lhs).newId ? -1 : 1;
1202
- case 'inlineColumn': {
1203
- return lhs.column.id < (rhs as typeof lhs).column.id ? -1 : 1;
1204
- }
1205
- case 'inner':
1206
- case 'full': {
1207
- const rhsInner = rhs as typeof lhs;
1208
- if (lhs.entries.length !== rhsInner.entries.length) {
1209
- return lhs.entries.length - rhsInner.entries.length;
1210
- }
1211
- for (let i = 0; i < lhs.entries.length; i++) {
1212
- const cmp = cmpJoinEntries(lhs.entries[i], rhsInner.entries[i]);
1213
- if (cmp !== 0) {
1214
- return cmp;
1215
- }
1216
- }
1217
- return 0;
1218
- }
1219
- case 'outer': {
1220
- const rhsOuter = rhs as typeof lhs;
1221
- const cmp = cmpJoinEntries(lhs.primary, rhsOuter.primary);
1222
- if (cmp !== 0) {
1223
- return cmp;
1224
- }
1225
- if (lhs.secondary.length !== rhsOuter.secondary.length) {
1226
- return lhs.secondary.length - rhsOuter.secondary.length;
1227
- }
1228
- for (let i = 0; i < lhs.secondary.length; i++) {
1229
- const cmp = cmpJoinEntries(lhs.secondary[i], rhsOuter.secondary[i]);
1230
- if (cmp !== 0) {
1231
- return cmp;
1232
- }
1233
- }
1234
- return 0;
1235
- }
1236
- default:
1237
- assertNever(type);
1238
- }
1239
- }
1240
- function sortJoinEntry(entry: JoinEntry<PObjectId>): JoinEntry<PObjectId> {
1241
- switch (entry.type) {
1242
- case 'column':
1243
- case 'slicedColumn':
1244
- case 'inlineColumn':
1245
- return entry;
1246
- case 'artificialColumn': {
1247
- const sortedAxesIndices = entry.axesIndices.toSorted((lhs, rhs) => lhs - rhs);
1248
- return {
1249
- ...entry,
1250
- axesIndices: sortedAxesIndices,
1251
- };
1252
- }
1253
- case 'inner':
1254
- case 'full': {
1255
- const sortedEntries = entry.entries.map(sortJoinEntry);
1256
- sortedEntries.sort(cmpJoinEntries);
1257
- return {
1258
- ...entry,
1259
- entries: sortedEntries,
1260
- };
1261
- }
1262
- case 'outer': {
1263
- const sortedSecondary = entry.secondary.map(sortJoinEntry);
1264
- sortedSecondary.sort(cmpJoinEntries);
1265
- return {
1266
- ...entry,
1267
- primary: sortJoinEntry(entry.primary),
1268
- secondary: sortedSecondary,
1269
- };
1270
- }
1271
- default:
1272
- assertNever(entry);
1273
- }
1274
- }
1275
- function sortFilters(filters: PTableRecordFilter[]): PTableRecordFilter[] {
1276
- return filters.toSorted((lhs, rhs) => {
1277
- if (lhs.column.type === 'axis' && rhs.column.type === 'axis') {
1278
- const lhsId = canonicalizeJson(getAxisId(lhs.column.id));
1279
- const rhsId = canonicalizeJson(getAxisId(rhs.column.id));
1280
- return lhsId < rhsId ? -1 : 1;
1281
- } else if (lhs.column.type === 'column' && rhs.column.type === 'column') {
1282
- return lhs.column.id < rhs.column.id ? -1 : 1;
1283
- } else {
1284
- return lhs.column.type === 'axis' ? -1 : 1;
1285
- }
1286
- });
1287
- }
1288
- return {
1289
- src: sortJoinEntry(def.src),
1290
- partitionFilters: sortFilters(def.partitionFilters),
1291
- filters: sortFilters(def.filters),
1292
- sorting: def.sorting,
293
+ const resolveDataInfo = (spec: PColumnSpec, data: PColumnDataUniversal<PlTreeNodeAccessor>) => {
294
+ return isPlTreeNodeAccessor(data)
295
+ ? parseDataInfoResource(data)
296
+ : isDataInfo(data)
297
+ ? data.type === 'ParquetPartitioned'
298
+ ? mapDataInfo(data, (a) => traverseParquetChunkResource(a))
299
+ : mapDataInfo(data, (a) => a.persist())
300
+ : makeJsonDataInfo(spec, data);
1293
301
  };
1294
- }
1295
-
1296
- function stableKeyFromFullPTableDef(data: FullPTableDef): string {
1297
- try {
1298
- const hash = createHash('sha256');
1299
- hash.update(canonicalize(data)!);
1300
- return hash.digest().toString('hex');
1301
- } catch (err: unknown) {
1302
- throw new PFrameDriverError(
1303
- `PTable handle calculation failed, `
1304
- + `request: ${JSON.stringify(data)}, `
1305
- + `error: ${ensureError(err)}`,
1306
- );
1307
- }
1308
- }
1309
-
1310
- function stableKeyFromPFrameData(data: PColumn<PFrameInternal.DataInfo<PlTreeEntry>>[]): string {
1311
- const orderedData = [...data].map((column) =>
1312
- mapPObjectData(column, (r) => {
1313
- let result: {
1314
- type: string;
1315
- keyLength: number;
1316
- payload: {
1317
- key: string;
1318
- value: null | number | string | [string, string];
1319
- }[];
1320
- };
1321
- const type = r.type;
1322
- switch (type) {
1323
- case 'Json':
1324
- result = {
1325
- type: r.type,
1326
- keyLength: r.keyLength,
1327
- payload: Object.entries(r.data).map(([part, value]) => ({
1328
- key: part,
1329
- value,
1330
- })),
1331
- };
1332
- break;
1333
- case 'JsonPartitioned':
1334
- result = {
1335
- type: r.type,
1336
- keyLength: r.partitionKeyLength,
1337
- payload: Object.entries(r.parts).map(([part, info]) => ({
1338
- key: part,
1339
- value: makeBlobId(info),
1340
- })),
1341
- };
1342
- break;
1343
- case 'BinaryPartitioned':
1344
- result = {
1345
- type: r.type,
1346
- keyLength: r.partitionKeyLength,
1347
- payload: Object.entries(r.parts).map(([part, info]) => ({
1348
- key: part,
1349
- value: [makeBlobId(info.index), makeBlobId(info.values)] as const,
1350
- })),
1351
- };
1352
- break;
1353
- case 'ParquetPartitioned':
1354
- result = {
1355
- type: r.type,
1356
- keyLength: r.partitionKeyLength,
1357
- payload: Object.entries(r.parts).map(([part, info]) => ({
1358
- key: part,
1359
- value: info.dataDigest || [
1360
- makeBlobId(info.data),
1361
- JSON.stringify({ axes: info.axes, column: info.column }),
1362
- ] as const,
1363
- })),
1364
- };
1365
- break;
1366
- default:
1367
- throw new PFrameDriverError(`unsupported resource type: ${JSON.stringify(type satisfies never)}`);
1368
- }
1369
- result.payload.sort((lhs, rhs) => lhs.key < rhs.key ? -1 : 1);
1370
- return result;
1371
- }),
1372
- );
1373
- orderedData.sort((lhs, rhs) => lhs.id < rhs.id ? -1 : 1);
1374
302
 
1375
- const hash = createHash('sha256');
1376
- hash.update(canonicalize(orderedData)!);
1377
- return hash.digest().toString('hex');
303
+ return new AbstractPFrameDriver({
304
+ logger,
305
+ localBlobProvider,
306
+ remoteBlobProvider,
307
+ spillPath: resolvedSpillPath,
308
+ options: params.options,
309
+ resolveDataInfo,
310
+ });
1378
311
  }