@milaboratories/pl-middle-layer 1.43.12 → 1.43.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,7 +65,7 @@ import {
65
65
  emptyDir,
66
66
  ConcurrencyLimitingExecutor,
67
67
  RefCountResourcePool,
68
- type PollResource,
68
+ type PoolResource,
69
69
  } from '@milaboratories/ts-helpers';
70
70
  import canonicalize from 'canonicalize';
71
71
  import { PFrameFactory, HttpHelpers } from '@milaboratories/pframes-rs-node';
@@ -79,7 +79,7 @@ function makeBlobId(res: PlTreeEntry): string {
79
79
  return String(res.rid);
80
80
  }
81
81
 
82
- type LocalBlobPoolEntry = PollResource<ComputableStableDefined<LocalBlobHandleAndSize>>;
82
+ type LocalBlobPoolEntry = PoolResource<ComputableStableDefined<LocalBlobHandleAndSize>>;
83
83
 
84
84
  class LocalBlobPool
85
85
  extends RefCountResourcePool<PlTreeEntry, ComputableStableDefined<LocalBlobHandleAndSize>>
@@ -92,7 +92,7 @@ class LocalBlobPool
92
92
  return makeBlobId(params);
93
93
  }
94
94
 
95
- protected createNewResource(params: PlTreeEntry): ComputableStableDefined<LocalBlobHandleAndSize> {
95
+ protected createNewResource(params: PlTreeEntry, _key: string): ComputableStableDefined<LocalBlobHandleAndSize> {
96
96
  return this.blobDriver.getDownloadedBlob(params);
97
97
  }
98
98
 
@@ -117,7 +117,7 @@ class LocalBlobPool
117
117
  };
118
118
  }
119
119
 
120
- type RemoteBlobPoolEntry = PollResource<Computable<RemoteBlobHandleAndSize>>;
120
+ type RemoteBlobPoolEntry = PoolResource<Computable<RemoteBlobHandleAndSize>>;
121
121
 
122
122
  class RemoteBlobPool
123
123
  extends RefCountResourcePool<PlTreeEntry, Computable<RemoteBlobHandleAndSize>> {
@@ -129,7 +129,7 @@ class RemoteBlobPool
129
129
  return String(params.rid);
130
130
  }
131
131
 
132
- protected createNewResource(params: PlTreeEntry): Computable<RemoteBlobHandleAndSize> {
132
+ protected createNewResource(params: PlTreeEntry, _key: string): Computable<RemoteBlobHandleAndSize> {
133
133
  return this.blobDriver.getOnDemandBlob(params);
134
134
  }
135
135
 
@@ -178,8 +178,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
178
178
  try {
179
179
  await params.callback(response);
180
180
  } catch (error: unknown) {
181
- this.logger(
182
- 'warn',
181
+ this.logger('warn',
183
182
  `PFrames blob store received unhandled rejection from HTTP handler: ${ensureError(error)}`,
184
183
  );
185
184
  }
@@ -193,8 +192,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
193
192
  try {
194
193
  blob = await computable.getValue();
195
194
  } catch (error: unknown) {
196
- this.logger(
197
- 'error',
195
+ this.logger('error',
198
196
  `PFrames blob store failed to get blob from computable: ${ensureError(error)}`,
199
197
  );
200
198
  return await respond({ type: 'InternalError' });
@@ -217,9 +215,9 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
217
215
  });
218
216
  }
219
217
 
220
- this.logger(
221
- 'info',
222
- `PFrames blob store requesting content for ${blobId}, range [${translatedRange.start}..=${translatedRange.end}]`,
218
+ this.logger('info',
219
+ `PFrames blob store requesting content for ${blobId}, `
220
+ + `range [${translatedRange.start}..=${translatedRange.end}]`,
223
221
  );
224
222
  return await this.remoteBlobPool.withContent(blob.handle, {
225
223
  range: translatedRange,
@@ -236,8 +234,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
236
234
  });
237
235
  } catch (error: unknown) {
238
236
  if (!isAbortError(error)) {
239
- this.logger(
240
- 'warn',
237
+ this.logger('warn',
241
238
  `PFrames blob store unhandled error: ${ensureError(error)}`,
242
239
  );
243
240
  }
@@ -250,7 +247,10 @@ type InternalPFrameData = PFrameDef<PFrameInternal.DataInfo<PlTreeEntry>>;
250
247
 
251
248
  const valueTypes: ValueType[] = ['Int', 'Long', 'Float', 'Double', 'String', 'Bytes'] as const;
252
249
 
253
- function migrateFilters(filters: PTableRecordFilter[]): PTableRecordFilter[] {
250
+ function migrateFilters(
251
+ filters: PTableRecordFilter[],
252
+ logger: PFrameInternal.Logger,
253
+ ): PTableRecordFilter[] {
254
254
  const filtersV1 = [];
255
255
  const filtersV2: PTableRecordSingleValueFilterV2[] = [];
256
256
  for (const filter of filters) {
@@ -266,7 +266,7 @@ function migrateFilters(filters: PTableRecordFilter[]): PTableRecordFilter[] {
266
266
  }
267
267
  if (filtersV1.length > 0) {
268
268
  const filtersV1Json = JSON.stringify(filtersV1);
269
- console.warn(
269
+ logger('warn',
270
270
  `type overriten from 'bySingleColumn' to 'bySingleColumnV2' for filters: ${filtersV1Json}`,
271
271
  );
272
272
  }
@@ -275,19 +275,20 @@ function migrateFilters(filters: PTableRecordFilter[]): PTableRecordFilter[] {
275
275
 
276
276
  function migratePTableFilters<T>(
277
277
  def: Omit<PTableDef<T>, 'partitionFilters'> | PTableDef<T>,
278
+ logger: PFrameInternal.Logger,
278
279
  ): PTableDef<T> {
279
280
  if (!('partitionFilters' in def)) {
280
281
  // For old blocks assume all axes filters to be partition filters
281
282
  return {
282
283
  ...def,
283
- partitionFilters: migrateFilters(def.filters.filter((f) => f.column.type === 'axis')),
284
- filters: migrateFilters(def.filters.filter((f) => f.column.type === 'column')),
284
+ partitionFilters: migrateFilters(def.filters.filter((f) => f.column.type === 'axis'), logger),
285
+ filters: migrateFilters(def.filters.filter((f) => f.column.type === 'column'), logger),
285
286
  };
286
287
  }
287
288
  return {
288
289
  ...def,
289
- partitionFilters: migrateFilters(def.partitionFilters),
290
- filters: migrateFilters(def.filters),
290
+ partitionFilters: migrateFilters(def.partitionFilters, logger),
291
+ filters: migrateFilters(def.filters, logger),
291
292
  };
292
293
  }
293
294
 
@@ -311,16 +312,155 @@ function hasArtificialColumns<T>(entry: JoinEntry<T>): boolean {
311
312
 
312
313
  const bigintReplacer = (_: string, v: unknown) => (typeof v === 'bigint' ? v.toString() : v);
313
314
 
314
- class PTableCache {
315
- private readonly perFrame = new Map<PFrameHandle, LRUCache<PTableHandle, PollResource<PTableHolder>>>();
316
- private readonly global: LRUCache<PTableHandle, PollResource<PTableHolder>>;
317
- private readonly disposeListeners = new Map<PTableHandle, () => void>();
315
+ class PFramePool extends RefCountResourcePool<InternalPFrameData, PFrameHolder> {
316
+ constructor(
317
+ private readonly parquetServer: PFrameInternal.HttpServerInfo,
318
+ private readonly localBlobPool: LocalBlobPool,
319
+ private readonly remoteBlobPool: RemoteBlobPool,
320
+ private readonly logger: PFrameInternal.Logger,
321
+ private readonly spillPath: string,
322
+ ) {
323
+ super();
324
+ }
325
+
326
+ protected calculateParamsKey(params: InternalPFrameData): string {
327
+ try {
328
+ return stableKeyFromPFrameData(params);
329
+ } catch (err: unknown) {
330
+ if (isPFrameDriverError(err)) throw err;
331
+ throw new PFrameDriverError(
332
+ `PFrame handle calculation failed, `
333
+ + `request: ${JSON.stringify(params, bigintReplacer)}, `
334
+ + `error: ${ensureError(err)}`,
335
+ );
336
+ }
337
+ }
338
+
339
+ protected createNewResource(params: InternalPFrameData, key: string): PFrameHolder {
340
+ if (getDebugFlags().logPFrameRequests) {
341
+ this.logger('info',
342
+ `PFrame creation (pFrameHandle = ${key}): `
343
+ + `${JSON.stringify(params, bigintReplacer)}`,
344
+ );
345
+ }
346
+ return new PFrameHolder(
347
+ this.parquetServer,
348
+ this.localBlobPool,
349
+ this.remoteBlobPool,
350
+ this.logger,
351
+ this.spillPath,
352
+ params,
353
+ );
354
+ }
318
355
 
356
+ public getByKey(key: PFrameHandle): PFrameHolder {
357
+ const resource = super.tryGetByKey(key);
358
+ if (!resource) throw new PFrameDriverError(`PFrame not found, handle = ${key}`);
359
+ return resource;
360
+ }
361
+ }
362
+
363
+ class PTableDefPool extends RefCountResourcePool<FullPTableDef, PTableDefHolder> {
364
+ constructor(private readonly logger: PFrameInternal.Logger) {
365
+ super();
366
+ }
367
+
368
+ protected calculateParamsKey(params: FullPTableDef): string {
369
+ return stableKeyFromFullPTableDef(params);
370
+ }
371
+
372
+ protected createNewResource(params: FullPTableDef, key: string): PTableDefHolder {
373
+ return new PTableDefHolder(params, key as PTableHandle, this.logger);
374
+ }
375
+
376
+ public getByKey(key: PTableHandle): PTableDefHolder {
377
+ const resource = super.tryGetByKey(key);
378
+ if (!resource) throw new PFrameDriverError(`PTable definition not found, handle = ${key}`);
379
+ return resource;
380
+ }
381
+ }
382
+
383
+ class PTablePool extends RefCountResourcePool<FullPTableDef, PTableHolder> {
319
384
  constructor(
385
+ private readonly pFrames: PFramePool,
386
+ private readonly pTableDefs: PTableDefPool,
320
387
  private readonly logger: PFrameInternal.Logger,
321
- private readonly ops: PFrameDriverOps,
322
388
  ) {
323
- this.global = new LRUCache<PTableHandle, PollResource<PTableHolder>>({
389
+ super();
390
+ }
391
+
392
+ protected calculateParamsKey(params: FullPTableDef): string {
393
+ return stableKeyFromFullPTableDef(params);
394
+ }
395
+
396
+ protected createNewResource(params: FullPTableDef, key: string): PTableHolder {
397
+ if (getDebugFlags().logPFrameRequests) {
398
+ this.logger('info',
399
+ `PTable creation (pTableHandle = ${key}): `
400
+ + `${JSON.stringify(params, bigintReplacer)}`,
401
+ );
402
+ }
403
+
404
+ const handle = params.pFrameHandle;
405
+ const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
406
+
407
+ const defDisposeSignal = this.pTableDefs.tryGetByKey(key)?.disposeSignal;
408
+ const combinedSignal = AbortSignal.any([disposeSignal, defDisposeSignal].filter((s) => !!s));
409
+
410
+ // 3. Sort
411
+ if (params.def.sorting.length > 0) {
412
+ const predecessor = this.acquire({
413
+ ...params,
414
+ def: {
415
+ ...params.def,
416
+ sorting: [],
417
+ },
418
+ });
419
+ const { resource: { pTablePromise } } = predecessor;
420
+ const sortedTable = pTablePromise.then((pTable) => pTable.sort(params.def.sorting));
421
+ return new PTableHolder(handle, combinedSignal, sortedTable, predecessor);
422
+ }
423
+
424
+ // 2. Filter (except the case with artificial columns where cartesian creates too many rows)
425
+ if (!hasArtificialColumns(params.def.src) && params.def.filters.length > 0) {
426
+ const predecessor = this.acquire({
427
+ ...params,
428
+ def: {
429
+ ...params.def,
430
+ filters: [],
431
+ },
432
+ });
433
+ const { resource: { pTablePromise } } = predecessor;
434
+ const filteredTable = pTablePromise.then((pTable) => pTable.filter(params.def.filters));
435
+ return new PTableHolder(handle, combinedSignal, filteredTable, predecessor);
436
+ }
437
+
438
+ // 1. Join
439
+ const table = pFramePromise.then((pFrame) => pFrame.createTable({
440
+ src: joinEntryToInternal(params.def.src),
441
+ // `params.def.filters` would be non-empty only when join has artificial columns
442
+ filters: [...params.def.partitionFilters, ...params.def.filters],
443
+ }));
444
+ return new PTableHolder(handle, combinedSignal, table);
445
+ }
446
+
447
+ public getByKey(key: PTableHandle): PTableHolder {
448
+ const resource = super.tryGetByKey(key);
449
+ if (!resource) throw new PFrameDriverError(`PTable not found, handle = ${key}`);
450
+ return resource;
451
+ }
452
+ }
453
+
454
+ class PTableCacheUi {
455
+ private readonly perFrame = new Map<PFrameHandle, LRUCache<PTableHandle, PoolResource<PTableHolder>>>();
456
+ private readonly global: LRUCache<PTableHandle, PoolResource<PTableHolder>>;
457
+ private readonly disposeListeners = new Set<PTableHandle>();
458
+
459
+ constructor(
460
+ private readonly logger: PFrameInternal.Logger,
461
+ private readonly ops: Pick<PFrameDriverOps, 'pFramesCacheMaxSize' | 'pFrameCacheMaxCount'>,
462
+ ) {
463
+ this.global = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
324
464
  maxSize: this.ops.pFramesCacheMaxSize,
325
465
  dispose: (resource, key, reason) => {
326
466
  if (reason === 'evict') {
@@ -331,19 +471,15 @@ class PTableCache {
331
471
  this.perFrame.delete(resource.resource.pFrame);
332
472
  }
333
473
 
334
- const disposeListener = this.disposeListeners.get(key)!;
335
- this.disposeListeners.delete(key);
336
- resource.resource.disposeSignal.removeEventListener('abort', disposeListener);
337
-
338
474
  resource.unref();
339
475
  if (getDebugFlags().logPFrameRequests) {
340
- this.logger('info', `calculateTableData cache - removed PTable ${key}`);
476
+ logger('info', `calculateTableData cache - removed PTable ${key} (reason: ${reason})`);
341
477
  }
342
478
  },
343
479
  });
344
480
  }
345
481
 
346
- public cache(resource: PollResource<PTableHolder>, size: number): void {
482
+ public cache(resource: PoolResource<PTableHolder>, size: number): void {
347
483
  const key = resource.key as PTableHandle;
348
484
  if (getDebugFlags().logPFrameRequests) {
349
485
  this.logger('info', `calculateTableData cache - added PTable ${key} with size ${size}`);
@@ -353,7 +489,7 @@ class PTableCache {
353
489
 
354
490
  let perFrame = this.perFrame.get(resource.resource.pFrame);
355
491
  if (!perFrame) {
356
- perFrame = new LRUCache<PTableHandle, PollResource<PTableHolder>>({
492
+ perFrame = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
357
493
  max: this.ops.pFrameCacheMaxCount,
358
494
  dispose: (_resource, key, reason) => {
359
495
  if (reason === 'evict') {
@@ -365,12 +501,65 @@ class PTableCache {
365
501
  }
366
502
  perFrame.set(key, resource);
367
503
 
368
- const disposeListener = () => {
369
- this.perFrame.get(resource.resource.pFrame)?.delete(key);
370
- this.global.delete(key);
371
- };
372
- this.disposeListeners.set(key, disposeListener);
373
- resource.resource.disposeSignal.addEventListener('abort', disposeListener);
504
+ if (!this.disposeListeners.has(key)) {
505
+ const disposeListener = () => {
506
+ this.perFrame.get(resource.resource.pFrame)?.delete(key);
507
+ this.global.delete(key);
508
+
509
+ this.disposeListeners.delete(key);
510
+ resource.resource.disposeSignal.removeEventListener('abort', disposeListener);
511
+ };
512
+ this.disposeListeners.add(key);
513
+ resource.resource.disposeSignal.addEventListener('abort', disposeListener);
514
+ }
515
+ }
516
+ }
517
+
518
+ class PTableCacheModel {
519
+ private readonly global: LRUCache<PTableHandle, PoolResource<PTableHolder>>;
520
+ private readonly disposeListeners = new Set<PTableHandle>();
521
+
522
+ constructor(
523
+ private readonly logger: PFrameInternal.Logger,
524
+ ops: Pick<PFrameDriverOps, 'pTablesCacheMaxSize'>,
525
+ ) {
526
+ this.global = new LRUCache<PTableHandle, PoolResource<PTableHolder>>({
527
+ maxSize: ops.pTablesCacheMaxSize,
528
+ dispose: (resource, key, reason) => {
529
+ resource.unref();
530
+ if (getDebugFlags().logPFrameRequests) {
531
+ logger('info', `createPTable cache - removed PTable ${key} (reason: ${reason})`);
532
+ }
533
+ },
534
+ });
535
+ }
536
+
537
+ public cache(resource: PoolResource<PTableHolder>, size: number, defDisposeSignal: AbortSignal): void {
538
+ const key = resource.key as PTableHandle;
539
+ if (getDebugFlags().logPFrameRequests) {
540
+ this.logger('info', `createPTable cache - added PTable ${key} with size ${size}`);
541
+ }
542
+
543
+ const status: LRUCache.Status<PoolResource<PTableHolder>> = {};
544
+ this.global.set(key, resource, { size: Math.max(size, 1), status }); // 1 is minimum size to avoid cache evictions
545
+
546
+ if (status.maxEntrySizeExceeded) {
547
+ resource.unref();
548
+ if (getDebugFlags().logPFrameRequests) {
549
+ this.logger('info', `createPTable cache - removed PTable ${key} (maxEntrySizeExceeded)`);
550
+ }
551
+ } else {
552
+ if (!this.disposeListeners.has(key)) {
553
+ const disposeListener = () => {
554
+ this.global.delete(key);
555
+
556
+ this.disposeListeners.delete(key);
557
+ defDisposeSignal.removeEventListener('abort', disposeListener);
558
+ };
559
+ this.disposeListeners.add(key);
560
+ defDisposeSignal.addEventListener('abort', disposeListener);
561
+ }
562
+ }
374
563
  }
375
564
  }
376
565
 
@@ -451,12 +640,16 @@ class PFrameHolder implements PFrameInternal.PFrameDataSourceV2, AsyncDisposable
451
640
  this.dispose();
452
641
  pFrame.dispose();
453
642
  throw new PFrameDriverError(
454
- `PFrame creation failed asynchronously, columns: ${JSON.stringify(jsonifiedColumns)}, error: ${ensureError(err)}`,
643
+ `PFrame creation failed asynchronously, `
644
+ + `columns: ${JSON.stringify(jsonifiedColumns)}, `
645
+ + `error: ${ensureError(err)}`,
455
646
  );
456
647
  });
457
648
  } catch (err: unknown) {
458
649
  throw new PFrameDriverError(
459
- `PFrame creation failed synchronously, columns: ${JSON.stringify(jsonifiedColumns)}, error: ${ensureError(err)}`,
650
+ `PFrame creation failed synchronously, `
651
+ + `columns: ${JSON.stringify(jsonifiedColumns)}, `
652
+ + `error: ${ensureError(err)}`,
460
653
  );
461
654
  }
462
655
  }
@@ -487,6 +680,31 @@ class PFrameHolder implements PFrameInternal.PFrameDataSourceV2, AsyncDisposable
487
680
  }
488
681
  }
489
682
 
683
+ class PTableDefHolder implements Disposable {
684
+ private readonly abortController = new AbortController();
685
+
686
+ constructor(
687
+ public readonly def: FullPTableDef,
688
+ private readonly pTableHandle: PTableHandle,
689
+ private readonly logger: PFrameInternal.Logger,
690
+ ) {
691
+ if (getDebugFlags().logPFrameRequests) {
692
+ this.logger('info', `PTable definition saved (pTableHandle = ${this.pTableHandle})`);
693
+ }
694
+ }
695
+
696
+ public get disposeSignal(): AbortSignal {
697
+ return this.abortController.signal;
698
+ }
699
+
700
+ [Symbol.dispose](): void {
701
+ this.abortController.abort();
702
+ if (getDebugFlags().logPFrameRequests) {
703
+ this.logger('info', `PTable definition disposed (pTableHandle = ${this.pTableHandle})`);
704
+ }
705
+ }
706
+ }
707
+
490
708
  class PTableHolder implements AsyncDisposable {
491
709
  private readonly abortController = new AbortController();
492
710
  private readonly combinedDisposeSignal: AbortSignal;
@@ -495,7 +713,7 @@ class PTableHolder implements AsyncDisposable {
495
713
  public readonly pFrame: PFrameHandle,
496
714
  pFrameDisposeSignal: AbortSignal,
497
715
  public readonly pTablePromise: Promise<PFrameInternal.PTableV7>,
498
- public readonly predecessor?: PollResource<PTableHolder>,
716
+ private readonly predecessor?: PoolResource<PTableHolder>,
499
717
  ) {
500
718
  this.combinedDisposeSignal = AbortSignal.any([pFrameDisposeSignal, this.abortController.signal]);
501
719
  }
@@ -533,6 +751,12 @@ export type PFrameDriverOps = {
533
751
  * Also each table has predeccessors, overlapping predecessors will be counted twice, so the effective limit is smaller.
534
752
  */
535
753
  pFramesCacheMaxSize: number;
754
+ /**
755
+ * Maximum size of `createPTable` results cached on disk.
756
+ * The limit is soft, as the same table could be materialized with other requests and will not be deleted in such case.
757
+ * Also each table has predeccessors, overlapping predecessors will be counted twice, so the effective limit is smaller.
758
+ */
759
+ pTablesCacheMaxSize: number;
536
760
  };
537
761
 
538
762
  /**
@@ -602,9 +826,13 @@ export interface InternalPFrameDriver extends SdkPFrameDriver, AsyncDisposable {
602
826
  }
603
827
 
604
828
  export class PFrameDriver implements InternalPFrameDriver {
605
- private readonly pFrames: RefCountResourcePool<InternalPFrameData, PFrameHolder>;
606
- private readonly pTables: RefCountResourcePool<FullPTableDef, PTableHolder>;
607
- private readonly pTableCache: PTableCache;
829
+ private readonly pFrames: PFramePool;
830
+ private readonly pTableDefs: PTableDefPool;
831
+ private readonly pTables: PTablePool;
832
+
833
+ private readonly pTableCacheUi: PTableCacheUi;
834
+ private readonly pTableCacheModel: PTableCacheModel;
835
+
608
836
  private readonly frameConcurrencyLimiter: ConcurrencyLimitingExecutor;
609
837
  private readonly tableConcurrencyLimiter: ConcurrencyLimitingExecutor;
610
838
 
@@ -644,119 +872,12 @@ export class PFrameDriver implements InternalPFrameDriver {
644
872
  this.frameConcurrencyLimiter = concurrencyLimiter;
645
873
  this.tableConcurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pTableConcurrency);
646
874
 
647
- this.pTableCache = new PTableCache(logger, ops);
648
-
649
- this.pFrames = new (class extends RefCountResourcePool<InternalPFrameData, PFrameHolder> {
650
- constructor(
651
- private readonly parquetServer: PFrameInternal.HttpServerInfo,
652
- private readonly localBlobPool: LocalBlobPool,
653
- private readonly remoteBlobPool: RemoteBlobPool,
654
- private readonly logger: PFrameInternal.Logger,
655
- private readonly spillPath: string,
656
- ) {
657
- super();
658
- }
659
-
660
- public acquire(params: InternalPFrameData): PollResource<PFrameHolder> {
661
- return super.acquire(params);
662
- }
663
-
664
- public getByKey(key: PFrameHandle): PFrameHolder {
665
- const resource = super.tryGetByKey(key);
666
- if (!resource) throw new PFrameDriverError(`PFrame not found, handle = ${key}`);
667
- return resource;
668
- }
669
-
670
- protected createNewResource(params: InternalPFrameData): PFrameHolder {
671
- if (getDebugFlags().logPFrameRequests)
672
- this.logger('info',
673
- `PFrame creation (pFrameHandle = ${this.calculateParamsKey(params)}): ${JSON.stringify(params, bigintReplacer)}`,
674
- );
675
- return new PFrameHolder(this.parquetServer, this.localBlobPool, this.remoteBlobPool, this.logger, this.spillPath, params);
676
- }
677
-
678
- protected calculateParamsKey(params: InternalPFrameData): string {
679
- try {
680
- return stableKeyFromPFrameData(params);
681
- } catch (err: unknown) {
682
- if (isPFrameDriverError(err)) throw err;
683
- throw new PFrameDriverError(`PFrame handle calculation failed, request: ${JSON.stringify(params, bigintReplacer)}, error: ${ensureError(err)}`);
684
- }
685
- }
686
- })(server.info, localBlobPool, remoteBlobPool, logger, spillPath);
687
-
688
- this.pTables = new (class extends RefCountResourcePool<
689
- FullPTableDef,
690
- PTableHolder
691
- > {
692
- constructor(
693
- private readonly pFrames: RefCountResourcePool<InternalPFrameData, PFrameHolder>,
694
- private readonly logger: PFrameInternal.Logger,
695
- ) {
696
- super();
697
- }
698
-
699
- public getByKey(key: PTableHandle): PTableHolder {
700
- const resource = super.tryGetByKey(key);
701
- if (!resource) throw new PFrameDriverError(`PTable not found, handle = ${key}`);
702
- return resource;
703
- }
704
-
705
- protected createNewResource(params: FullPTableDef): PTableHolder {
706
- if (getDebugFlags().logPFrameRequests) {
707
- this.logger('info',
708
- `PTable creation (pTableHandle = ${this.calculateParamsKey(params)}): ${JSON.stringify(params, bigintReplacer)}`,
709
- );
710
- }
711
-
712
- const handle = params.pFrameHandle;
713
- const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
714
-
715
- // 3. Sort
716
- if (params.def.sorting.length > 0) {
717
- const predecessor = this.acquire({
718
- ...params,
719
- def: {
720
- ...params.def,
721
- sorting: [],
722
- },
723
- });
724
- const { resource: { pTablePromise } } = predecessor;
725
- const sortedTable = pTablePromise.then((pTable) => pTable.sort(params.def.sorting));
726
- return new PTableHolder(handle, disposeSignal, sortedTable, predecessor);
727
- }
875
+ this.pFrames = new PFramePool(server.info, localBlobPool, remoteBlobPool, logger, spillPath);
876
+ this.pTableDefs = new PTableDefPool(logger);
877
+ this.pTables = new PTablePool(this.pFrames, this.pTableDefs, logger);
728
878
 
729
- // 2. Filter (except the case with artificial columns where cartesian creates too many rows)
730
- if (!hasArtificialColumns(params.def.src) && params.def.filters.length > 0) {
731
- const predecessor = this.acquire({
732
- ...params,
733
- def: {
734
- ...params.def,
735
- filters: [],
736
- },
737
- });
738
- const { resource: { pTablePromise } } = predecessor;
739
- const filteredTable = pTablePromise.then((pTable) => pTable.filter(params.def.filters));
740
- return new PTableHolder(handle, disposeSignal, filteredTable, predecessor);
741
- }
742
-
743
- // 1. Join
744
- const table = pFramePromise.then((pFrame) => pFrame.createTable({
745
- src: joinEntryToInternal(params.def.src),
746
- // `params.def.filters` would be non-empty only when join has artificial columns
747
- filters: [...params.def.partitionFilters, ...params.def.filters],
748
- }));
749
- return new PTableHolder(handle, disposeSignal, table);
750
- }
751
-
752
- protected calculateParamsKey(params: FullPTableDef): string {
753
- try {
754
- return stableKeyFromFullPTableDef(params);
755
- } catch (err: unknown) {
756
- throw new PFrameDriverError(`PTable handle calculation failed, request: ${JSON.stringify(params)}, error: ${ensureError(err)}`);
757
- }
758
- }
759
- })(this.pFrames, logger);
879
+ this.pTableCacheUi = new PTableCacheUi(logger, ops);
880
+ this.pTableCacheModel = new PTableCacheModel(logger, ops);
760
881
  }
761
882
 
762
883
  async dispose(): Promise<void> {
@@ -799,20 +920,16 @@ export class PFrameDriver implements InternalPFrameDriver {
799
920
  rawDef: PTableDef<PColumn<PColumnDataUniversal>>,
800
921
  ctx: ComputableCtx,
801
922
  ): PTableHandle {
802
- const def = migratePTableFilters(rawDef);
923
+ const def = migratePTableFilters(rawDef, this.logger);
803
924
  const pFrameHandle = this.createPFrame(extractAllColumns(def.src), ctx);
804
925
  const defIds = mapPTableDef(def, (c) => c.id);
805
926
 
806
- const res = this.pTables.acquire({ def: defIds, pFrameHandle });
807
- if (getDebugFlags().logPFrameRequests)
808
- this.logger('info',
809
- `Create PTable call (pFrameHandle = ${pFrameHandle}; pTableHandle = ${JSON.stringify(res)}): ${JSON.stringify(
810
- mapPTableDef(def, (c) => c.spec),
811
- bigintReplacer,
812
- )}`,
813
- );
814
- ctx.addOnDestroy(res.unref); // in addition to pframe unref added in createPFrame above
815
- return res.key as PTableHandle;
927
+ const { key, unref } = this.pTableDefs.acquire({ def: defIds, pFrameHandle });
928
+ if (getDebugFlags().logPFrameRequests) {
929
+ this.logger('info', `Create PTable call (pFrameHandle = ${pFrameHandle}; pTableHandle = ${key})`);
930
+ }
931
+ ctx.addOnDestroy(unref); // in addition to pframe unref added in createPFrame above
932
+ return key as PTableHandle;
816
933
  }
817
934
 
818
935
  //
@@ -879,12 +996,12 @@ export class PFrameDriver implements InternalPFrameDriver {
879
996
 
880
997
  const table = this.pTables.acquire({
881
998
  pFrameHandle: handle,
882
- def: migratePTableFilters(request),
999
+ def: migratePTableFilters(request, this.logger),
883
1000
  });
884
- const { resource: { pTablePromise, disposeSignal } } = table;
1001
+ const { pTablePromise, disposeSignal } = table.resource;
885
1002
  const pTable = await pTablePromise;
886
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
887
1003
 
1004
+ const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
888
1005
  return await this.frameConcurrencyLimiter.run(async () => {
889
1006
  try {
890
1007
  const spec = pTable.getSpec();
@@ -905,7 +1022,7 @@ export class PFrameDriver implements InternalPFrameDriver {
905
1022
  withPredecessors: true,
906
1023
  signal: combinedSignal,
907
1024
  });
908
- this.pTableCache.cache(table, overallSize);
1025
+ this.pTableCacheUi.cache(table, overallSize);
909
1026
 
910
1027
  return spec.map((spec, i) => ({
911
1028
  spec: spec,
@@ -931,12 +1048,12 @@ export class PFrameDriver implements InternalPFrameDriver {
931
1048
 
932
1049
  const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
933
1050
  const pFrame = await pFramePromise;
934
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
935
1051
 
1052
+ const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
936
1053
  return await this.frameConcurrencyLimiter.run(async () => {
937
1054
  return await pFrame.getUniqueValues({
938
1055
  ...request,
939
- filters: migrateFilters(request.filters),
1056
+ filters: migrateFilters(request.filters, this.logger),
940
1057
  }, {
941
1058
  signal: combinedSignal,
942
1059
  });
@@ -948,21 +1065,38 @@ export class PFrameDriver implements InternalPFrameDriver {
948
1065
  //
949
1066
 
950
1067
  public async getSpec(handle: PTableHandle): Promise<PTableColumnSpec[]> {
951
- const { pTablePromise } = this.pTables.getByKey(handle);
1068
+ const { def } = this.pTableDefs.getByKey(handle);
1069
+ using table = this.pTables.acquire(def);
1070
+
1071
+ const { pTablePromise } = table.resource;
952
1072
  const pTable = await pTablePromise;
1073
+
953
1074
  return pTable.getSpec();
954
1075
  }
955
1076
 
956
1077
  public async getShape(handle: PTableHandle, signal?: AbortSignal): Promise<PTableShape> {
957
- const { pTablePromise, disposeSignal } = this.pTables.getByKey(handle);
1078
+ const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
1079
+ const table = this.pTables.acquire(def);
1080
+
1081
+ const { pTablePromise, disposeSignal } = table.resource;
958
1082
  const pTable = await pTablePromise;
1083
+
959
1084
  const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
1085
+ const { shape, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
1086
+ const shape = await pTable.getShape({
1087
+ signal: combinedSignal,
1088
+ });
960
1089
 
961
- return await this.tableConcurrencyLimiter.run(async () => {
962
- return await pTable.getShape({
1090
+ const overallSize = await pTable.getFootprint({
1091
+ withPredecessors: true,
963
1092
  signal: combinedSignal,
964
1093
  });
1094
+
1095
+ return { shape, overallSize };
965
1096
  });
1097
+
1098
+ this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
1099
+ return shape;
966
1100
  }
967
1101
 
968
1102
  public async getData(
@@ -971,16 +1105,29 @@ export class PFrameDriver implements InternalPFrameDriver {
971
1105
  range: TableRange | undefined,
972
1106
  signal?: AbortSignal,
973
1107
  ): Promise<PTableVector[]> {
974
- const { pTablePromise, disposeSignal } = this.pTables.getByKey(handle);
1108
+ const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
1109
+ const table = this.pTables.acquire(def);
1110
+
1111
+ const { pTablePromise, disposeSignal } = table.resource;
975
1112
  const pTable = await pTablePromise;
976
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
977
1113
 
978
- return await this.tableConcurrencyLimiter.run(async () => {
979
- return await pTable.getData(columnIndices, {
1114
+ const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
1115
+ const { data, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
1116
+ const data = await pTable.getData(columnIndices, {
980
1117
  range,
981
1118
  signal: combinedSignal,
982
1119
  });
1120
+
1121
+ const overallSize = await pTable.getFootprint({
1122
+ withPredecessors: true,
1123
+ signal: combinedSignal,
1124
+ });
1125
+
1126
+ return { data, overallSize };
983
1127
  });
1128
+
1129
+ this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
1130
+ return data;
984
1131
  }
985
1132
  }
986
1133
 
@@ -1038,9 +1185,17 @@ function joinEntryToInternal(entry: JoinEntry<PObjectId>): PFrameInternal.JoinEn
1038
1185
  }
1039
1186
 
1040
1187
  function stableKeyFromFullPTableDef(data: FullPTableDef): string {
1041
- const hash = createHash('sha256');
1042
- hash.update(canonicalize(data)!);
1043
- return hash.digest().toString('hex');
1188
+ try {
1189
+ const hash = createHash('sha256');
1190
+ hash.update(canonicalize(data)!);
1191
+ return hash.digest().toString('hex');
1192
+ } catch (err: unknown) {
1193
+ throw new PFrameDriverError(
1194
+ `PTable handle calculation failed, `
1195
+ + `request: ${JSON.stringify(data)}, `
1196
+ + `error: ${ensureError(err)}`,
1197
+ );
1198
+ }
1044
1199
  }
1045
1200
 
1046
1201
  function stableKeyFromPFrameData(data: PColumn<PFrameInternal.DataInfo<PlTreeEntry>>[]): string {