@milaboratories/pl-middle-layer 1.38.1 → 1.39.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,7 +41,7 @@ import {
41
41
  ensureError,
42
42
  } from '@platforma-sdk/model';
43
43
  import { LRUCache } from 'lru-cache';
44
- import type { UnrefFn } from './ref_count_pool';
44
+ import type { PollResource } from './ref_count_pool';
45
45
  import { RefCountResourcePool } from './ref_count_pool';
46
46
  import { allBlobs, makeDataInfoFromPColumnValues, mapBlobs, parseDataInfoResource } from './data';
47
47
  import { createHash } from 'node:crypto';
@@ -53,7 +53,7 @@ import * as fsp from 'node:fs/promises';
53
53
  import * as path from 'node:path';
54
54
  import { getDebugFlags } from '../debug';
55
55
 
56
- export type PColumnDataUniversal = PlTreeNodeAccessor | DataInfo<PlTreeNodeAccessor> | PColumnValues;
56
+ type PColumnDataUniversal = PlTreeNodeAccessor | DataInfo<PlTreeNodeAccessor> | PColumnValues;
57
57
 
58
58
  function blobKey(res: ResourceInfo): string {
59
59
  return String(res.id);
@@ -106,10 +106,72 @@ function migratePTableFilters<T>(
106
106
 
107
107
  const bigintReplacer = (_: string, v: unknown) => (typeof v === 'bigint' ? v.toString() : v);
108
108
 
109
+ class PTableCache {
110
+ private readonly perFrame = new Map<PFrameHandle, LRUCache<PTableHandle, PollResource<PTableHolder>>>();
111
+ private readonly global: LRUCache<PTableHandle, PollResource<PTableHolder>>;
112
+ private readonly disposeListeners = new Map<PTableHandle, () => void>();
113
+
114
+ constructor(
115
+ private readonly logger: MiLogger,
116
+ private readonly ops: PFrameDriverOps,
117
+ ) {
118
+ this.global = new LRUCache<PTableHandle, PollResource<PTableHolder>>({
119
+ maxSize: this.ops.pFramesCacheMaxSize,
120
+ dispose: (resource, key, reason) => {
121
+ if (reason === 'evict') {
122
+ this.perFrame.get(resource.resource.pFrame)?.delete(key);
123
+ }
124
+
125
+ if (this.perFrame.get(resource.resource.pFrame)?.size === 0) {
126
+ this.perFrame.delete(resource.resource.pFrame);
127
+ }
128
+
129
+ const disposeListener = this.disposeListeners.get(key)!;
130
+ this.disposeListeners.delete(key);
131
+ resource.resource.disposeSignal.removeEventListener('abort', disposeListener);
132
+
133
+ resource.unref();
134
+ if (getDebugFlags().logPFrameRequests) {
135
+ this.logger.info(`calculateTableData cache - removed PTable ${key}`);
136
+ }
137
+ },
138
+ });
139
+ }
140
+
141
+ public cache(resource: PollResource<PTableHolder>, size: number): void {
142
+ const key = resource.key as PTableHandle;
143
+ if (getDebugFlags().logPFrameRequests) {
144
+ this.logger.info(`calculateTableData cache - added PTable ${key} with size ${size}`);
145
+ }
146
+
147
+ this.global.set(key, resource, { size });
148
+
149
+ let perFrame = this.perFrame.get(resource.resource.pFrame);
150
+ if (!perFrame) {
151
+ perFrame = new LRUCache<PTableHandle, PollResource<PTableHolder>>({
152
+ max: this.ops.pFrameCacheMaxCount,
153
+ dispose: (_resource, key, reason) => {
154
+ if (reason === 'evict') {
155
+ this.global.delete(key);
156
+ }
157
+ },
158
+ });
159
+ this.perFrame.set(resource.resource.pFrame, perFrame);
160
+ }
161
+ perFrame.set(key, resource);
162
+
163
+ const disposeListener = () => {
164
+ this.perFrame.get(resource.resource.pFrame)?.delete(key);
165
+ this.global.delete(key);
166
+ };
167
+ this.disposeListeners.set(key, disposeListener);
168
+ resource.resource.disposeSignal.addEventListener('abort', disposeListener);
169
+ }
170
+ }
171
+
109
172
  class PFrameHolder implements PFrameInternal.PFrameDataSource, Disposable {
110
- public readonly pFrame: PFrameInternal.PFrameV7;
173
+ public readonly pFrame: PFrameInternal.PFrameV8;
111
174
  private readonly abortController = new AbortController();
112
- private readonly pTableCache: LRUCache<PTableHandle, UnrefFn>;
113
175
  private readonly blobIdToResource = new Map<string, ResourceInfo>();
114
176
  private readonly blobHandleComputables = new Map<
115
177
  string,
@@ -151,11 +213,6 @@ class PFrameHolder implements PFrameInternal.PFrameDataSource, Disposable {
151
213
  `Rust PFrame creation failed, columns: ${JSON.stringify(distinctСolumns)}, error: ${err as Error}`,
152
214
  );
153
215
  }
154
-
155
- this.pTableCache = new LRUCache<PTableHandle, UnrefFn>({
156
- max: 5, // TODO: calculate size on disk, not number of PTables
157
- dispose: (unref) => unref(),
158
- });
159
216
  }
160
217
 
161
218
  private getOrCreateComputableForBlob(blobId: string) {
@@ -188,13 +245,8 @@ class PFrameHolder implements PFrameInternal.PFrameDataSource, Disposable {
188
245
  return this.abortController.signal;
189
246
  }
190
247
 
191
- public cache(handle: PTableHandle, unref: UnrefFn): void {
192
- this.pTableCache.set(handle, unref);
193
- }
194
-
195
248
  [Symbol.dispose](): void {
196
249
  this.abortController.abort();
197
- this.pTableCache.clear();
198
250
  for (const computable of this.blobHandleComputables.values()) computable.resetState();
199
251
  this.pFrame.dispose();
200
252
  }
@@ -205,9 +257,10 @@ class PTableHolder implements Disposable {
205
257
  private readonly combinedDisposeSignal: AbortSignal;
206
258
 
207
259
  constructor(
260
+ public readonly pFrame: PFrameHandle,
208
261
  pFrameDisposeSignal: AbortSignal,
209
- public readonly pTable: PFrameInternal.PTableV5,
210
- private readonly unrefPredecessor?: UnrefFn,
262
+ public readonly pTable: PFrameInternal.PTableV6,
263
+ public readonly predecessor?: PollResource<PTableHolder>,
211
264
  ) {
212
265
  this.combinedDisposeSignal = AbortSignal.any([pFrameDisposeSignal, this.abortController.signal]);
213
266
  }
@@ -219,7 +272,7 @@ class PTableHolder implements Disposable {
219
272
  [Symbol.dispose](): void {
220
273
  this.abortController.abort();
221
274
  this.pTable.dispose();
222
- this.unrefPredecessor?.();
275
+ this.predecessor?.unref();
223
276
  }
224
277
  }
225
278
 
@@ -228,6 +281,19 @@ type FullPTableDef = {
228
281
  def: PTableDef<PObjectId>;
229
282
  };
230
283
 
284
+ export type PFrameDriverOps = {
285
+ // Concurrency limits for `getUniqueValues` and `calculateTableData` requests
286
+ pFrameConcurrency: number;
287
+ // Concurrency limits for `getShape` and `getData` requests
288
+ pTableConcurrency: number;
289
+ // Maximum number of `calculateTableData` results cached for each PFrame
290
+ pFrameCacheMaxCount: number;
291
+ // Maximum size of `calculateTableData` results cached for PFrames overall.
292
+ // The limit is soft, as the same table could be materialized with other requests and will not be deleted in such case.
293
+ // Also each table has predeccessors, overlapping predecessors will be counted twice, so the effective limit is smaller.
294
+ pFramesCacheMaxSize: number;
295
+ };
296
+
231
297
  /**
232
298
  * Extends public and safe SDK's driver API with methods used internally in the middle
233
299
  * layer and in tests.
@@ -294,6 +360,7 @@ export interface InternalPFrameDriver extends SdkPFrameDriver {
294
360
  export class PFrameDriver implements InternalPFrameDriver {
295
361
  private readonly pFrames: RefCountResourcePool<InternalPFrameData, PFrameHolder>;
296
362
  private readonly pTables: RefCountResourcePool<FullPTableDef, PTableHolder>;
363
+ private readonly pTableCache: PTableCache;
297
364
  private readonly frameConcurrencyLimiter: ConcurrencyLimitingExecutor;
298
365
  private readonly tableConcurrencyLimiter: ConcurrencyLimitingExecutor;
299
366
 
@@ -305,20 +372,24 @@ export class PFrameDriver implements InternalPFrameDriver {
305
372
  blobDriver: DownloadDriver,
306
373
  logger: MiLogger,
307
374
  spillPath: string,
375
+ ops: PFrameDriverOps,
308
376
  ): Promise<PFrameDriver> {
309
377
  const resolvedSpillPath = path.resolve(spillPath);
310
378
  await emptyDir(resolvedSpillPath);
311
- return new PFrameDriver(blobDriver, logger, resolvedSpillPath);
379
+ return new PFrameDriver(blobDriver, logger, resolvedSpillPath, ops);
312
380
  }
313
381
 
314
382
  private constructor(
315
383
  private readonly blobDriver: DownloadDriver,
316
384
  private readonly logger: MiLogger,
317
385
  private readonly spillPath: string,
386
+ ops: PFrameDriverOps,
318
387
  ) {
319
- const concurrencyLimiter = new ConcurrencyLimitingExecutor(1);
388
+ const concurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pFrameConcurrency);
320
389
  this.frameConcurrencyLimiter = concurrencyLimiter;
321
- this.tableConcurrencyLimiter = new ConcurrencyLimitingExecutor(1);
390
+ this.tableConcurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pTableConcurrency);
391
+
392
+ this.pTableCache = new PTableCache(this.logger, ops);
322
393
 
323
394
  this.pFrames = new (class extends RefCountResourcePool<InternalPFrameData, PFrameHolder> {
324
395
  constructor(
@@ -364,28 +435,30 @@ export class PFrameDriver implements InternalPFrameDriver {
364
435
 
365
436
  // 3. Sort
366
437
  if (params.def.sorting.length > 0) {
367
- const { resource: { pTable }, unref } = this.acquire({
438
+ const predecessor = this.acquire({
368
439
  ...params,
369
440
  def: {
370
441
  ...params.def,
371
442
  sorting: [],
372
443
  },
373
444
  });
445
+ const { resource: { pTable } } = predecessor;
374
446
  const sortedTable = pTable.sort(params.def.sorting);
375
- return new PTableHolder(disposeSignal, sortedTable, unref);
447
+ return new PTableHolder(handle, disposeSignal, sortedTable, predecessor);
376
448
  }
377
449
 
378
450
  // 2. Filter
379
451
  if (params.def.filters.length > 0) {
380
- const { resource: { pTable }, unref } = this.acquire({
452
+ const predecessor = this.acquire({
381
453
  ...params,
382
454
  def: {
383
455
  ...params.def,
384
456
  filters: [],
385
457
  },
386
458
  });
459
+ const { resource: { pTable } } = predecessor;
387
460
  const filteredTable = pTable.filter(params.def.filters);
388
- return new PTableHolder(disposeSignal, filteredTable, unref);
461
+ return new PTableHolder(handle, disposeSignal, filteredTable, predecessor);
389
462
  }
390
463
 
391
464
  // 1. Join
@@ -393,7 +466,7 @@ export class PFrameDriver implements InternalPFrameDriver {
393
466
  src: joinEntryToInternal(params.def.src),
394
467
  filters: params.def.partitionFilters,
395
468
  });
396
- return new PTableHolder(disposeSignal, table);
469
+ return new PTableHolder(handle, disposeSignal, table);
397
470
  }
398
471
 
399
472
  protected calculateParamsKey(params: FullPTableDef): string {
@@ -502,23 +575,35 @@ export class PFrameDriver implements InternalPFrameDriver {
502
575
  );
503
576
  }
504
577
 
505
- const { key: pTableHandle, resource: { pTable, disposeSignal }, unref } = this.pTables.acquire({
578
+ const table = this.pTables.acquire({
506
579
  pFrameHandle: handle,
507
580
  def: migratePTableFilters(request),
508
581
  });
582
+ const { resource: { pTable, disposeSignal } } = table;
509
583
  const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
510
584
 
511
585
  return await this.frameConcurrencyLimiter.run(async () => {
512
- this.pFrames.getByKey(handle).cache(pTableHandle as PTableHandle, unref);
513
- const spec = pTable.getSpec();
514
- const data = await pTable.getData([...spec.keys()], {
515
- range,
516
- signal: combinedSignal,
517
- });
518
- return spec.map((spec, i) => ({
519
- spec: spec,
520
- data: data[i],
521
- }));
586
+ try {
587
+ const spec = pTable.getSpec();
588
+ const data = await pTable.getData([...spec.keys()], {
589
+ range,
590
+ signal: combinedSignal,
591
+ });
592
+
593
+ const size = await pTable.getFootprint({
594
+ withPredecessors: true,
595
+ signal: combinedSignal,
596
+ });
597
+ this.pTableCache.cache(table, size);
598
+
599
+ return spec.map((spec, i) => ({
600
+ spec: spec,
601
+ data: data[i],
602
+ }));
603
+ } catch (err: unknown) {
604
+ table.unref();
605
+ throw err;
606
+ }
522
607
  });
523
608
  }
524
609