@milaboratories/pl-middle-layer 1.43.59 → 1.43.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/debug/index.cjs +0 -1
  2. package/dist/debug/index.cjs.map +1 -1
  3. package/dist/debug/index.d.ts +0 -1
  4. package/dist/debug/index.d.ts.map +1 -1
  5. package/dist/debug/index.js +0 -1
  6. package/dist/debug/index.js.map +1 -1
  7. package/dist/js_render/computable_context.cjs +6 -2
  8. package/dist/js_render/computable_context.cjs.map +1 -1
  9. package/dist/js_render/computable_context.d.ts +1 -1
  10. package/dist/js_render/computable_context.d.ts.map +1 -1
  11. package/dist/js_render/computable_context.js +6 -2
  12. package/dist/js_render/computable_context.js.map +1 -1
  13. package/dist/middle_layer/driver_kit.cjs +6 -1
  14. package/dist/middle_layer/driver_kit.cjs.map +1 -1
  15. package/dist/middle_layer/driver_kit.js +7 -2
  16. package/dist/middle_layer/driver_kit.js.map +1 -1
  17. package/dist/middle_layer/ops.cjs +4 -8
  18. package/dist/middle_layer/ops.cjs.map +1 -1
  19. package/dist/middle_layer/ops.d.ts +1 -1
  20. package/dist/middle_layer/ops.d.ts.map +1 -1
  21. package/dist/middle_layer/ops.js +4 -8
  22. package/dist/middle_layer/ops.js.map +1 -1
  23. package/dist/pool/data.cjs +0 -15
  24. package/dist/pool/data.cjs.map +1 -1
  25. package/dist/pool/data.d.ts +1 -2
  26. package/dist/pool/data.d.ts.map +1 -1
  27. package/dist/pool/data.js +1 -15
  28. package/dist/pool/data.js.map +1 -1
  29. package/dist/pool/driver.cjs +88 -906
  30. package/dist/pool/driver.cjs.map +1 -1
  31. package/dist/pool/driver.d.ts +14 -86
  32. package/dist/pool/driver.d.ts.map +1 -1
  33. package/dist/pool/driver.js +88 -907
  34. package/dist/pool/driver.js.map +1 -1
  35. package/package.json +20 -19
  36. package/src/debug/index.ts +0 -2
  37. package/src/js_render/computable_context.ts +8 -6
  38. package/src/middle_layer/driver_kit.ts +6 -6
  39. package/src/middle_layer/ops.ts +2 -9
  40. package/src/pool/data.ts +0 -22
  41. package/src/pool/driver.ts +141 -1208
@@ -1,26 +1,23 @@
1
- import { __addDisposableResource, __disposeResources } from '../__external/.pnpm/@rollup_plugin-typescript@12.1.4_rollup@4.52.4_tslib@2.7.0_typescript@5.6.3/__external/tslib/tslib.es6.js';
1
+ import { ensureError, isAbortError, PFrameDriverError, mapDataInfo, isDataInfo } from '@platforma-sdk/model';
2
2
  import { PFrameInternal } from '@milaboratories/pl-model-middle-layer';
3
+ import { emptyDir, RefCountPoolBase } from '@milaboratories/ts-helpers';
3
4
  import { isPlTreeNodeAccessor } from '@milaboratories/pl-tree';
4
- import { ensureError, isAbortError, mapPObjectData, mapDataInfo, isDataInfo, uniqueBy, extractAllColumns, mapPTableDef, PFrameDriverError, isPFrameDriverError, canonicalizeJson, getAxisId } from '@platforma-sdk/model';
5
- import { LRUCache } from 'lru-cache';
6
- import { parseDataInfoResource, makeDataInfoFromPColumnValues, traverseParquetChunkResource } from './data.js';
7
- import { createHash } from 'node:crypto';
8
- import { mapValues } from 'es-toolkit';
9
- import { emptyDir, ConcurrencyLimitingExecutor, RefCountResourcePool, assertNever } from '@milaboratories/ts-helpers';
10
- import canonicalize from 'canonicalize';
11
- import { PFrameFactory, HttpHelpers } from '@milaboratories/pframes-rs-node';
5
+ import { AbstractPFrameDriverOpsDefaults, AbstractPFrameDriver, makeJsonDataInfo } from '@milaboratories/pf-driver';
6
+ import { HttpHelpers } from '@milaboratories/pframes-rs-node';
12
7
  import path from 'node:path';
13
- import { getDebugFlags } from '../debug/index.js';
14
8
  import { Readable } from 'node:stream';
9
+ import { parseDataInfoResource, traverseParquetChunkResource } from './data.js';
15
10
 
16
11
  function makeBlobId(res) {
17
12
  return String(res.rid);
18
13
  }
19
- class LocalBlobPool extends RefCountResourcePool {
14
+ class LocalBlobProviderImpl extends RefCountPoolBase {
20
15
  blobDriver;
21
- constructor(blobDriver) {
16
+ logger;
17
+ constructor(blobDriver, logger) {
22
18
  super();
23
19
  this.blobDriver = blobDriver;
20
+ this.logger = logger;
24
21
  }
25
22
  calculateParamsKey(params) {
26
23
  return makeBlobId(params);
@@ -31,38 +28,48 @@ class LocalBlobPool extends RefCountResourcePool {
31
28
  getByKey(blobId) {
32
29
  const resource = super.tryGetByKey(blobId);
33
30
  if (!resource)
34
- throw new PFrameDriverError(`Blob with id ${blobId} not found.`);
31
+ throw new PFrameDriverError(`Local blob with id ${blobId} not found.`);
35
32
  return resource;
36
33
  }
37
- async preloadBlob(blobIds, signal) {
38
- try {
39
- await Promise.all(blobIds.map((blobId) => this.getByKey(blobId).awaitStableFullValue(signal)));
40
- }
41
- catch (err) {
42
- if (!isAbortError(err))
43
- throw err;
44
- }
45
- }
46
- ;
47
- async resolveBlobContent(blobId, signal) {
48
- const computable = this.getByKey(blobId);
49
- const blob = await computable.awaitStableValue(signal);
50
- return await this.blobDriver.getContent(blob.handle, { signal });
34
+ makeDataSource(signal) {
35
+ return {
36
+ preloadBlob: async (blobIds) => {
37
+ try {
38
+ await Promise.all(blobIds.map((blobId) => this.getByKey(blobId).awaitStableFullValue(signal)));
39
+ }
40
+ catch (err) {
41
+ if (!isAbortError(err))
42
+ throw err;
43
+ }
44
+ },
45
+ resolveBlobContent: async (blobId) => {
46
+ const computable = this.getByKey(blobId);
47
+ const blob = await computable.awaitStableValue(signal);
48
+ return await this.blobDriver.getContent(blob.handle, { signal });
49
+ },
50
+ };
51
51
  }
52
- ;
53
52
  }
54
- class RemoteBlobPool extends RefCountResourcePool {
53
+ class RemoteBlobPool extends RefCountPoolBase {
55
54
  blobDriver;
56
- constructor(blobDriver) {
55
+ logger;
56
+ constructor(blobDriver, logger) {
57
57
  super();
58
58
  this.blobDriver = blobDriver;
59
+ this.logger = logger;
59
60
  }
60
61
  calculateParamsKey(params) {
61
- return String(params.rid);
62
+ return makeBlobId(params);
62
63
  }
63
64
  createNewResource(params, _key) {
64
65
  return this.blobDriver.getOnDemandBlob(params);
65
66
  }
67
+ getByKey(blobId) {
68
+ const resource = super.tryGetByKey(blobId);
69
+ if (!resource)
70
+ throw new PFrameDriverError(`Remote blob with id ${blobId} not found.`);
71
+ return resource;
72
+ }
66
73
  async withContent(handle, options) {
67
74
  return await this.blobDriver.withContent(handle, {
68
75
  range: {
@@ -75,10 +82,10 @@ class RemoteBlobPool extends RefCountResourcePool {
75
82
  }
76
83
  }
77
84
  class BlobStore extends PFrameInternal.BaseObjectStore {
78
- remoteBlobPool;
85
+ remoteBlobProvider;
79
86
  constructor(options) {
80
87
  super(options);
81
- this.remoteBlobPool = options.remoteBlobPool;
88
+ this.remoteBlobProvider = options.remoteBlobProvider;
82
89
  }
83
90
  async request(filename, params) {
84
91
  const blobId = filename.slice(0, -PFrameInternal.ParquetExtension.length);
@@ -91,7 +98,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
91
98
  }
92
99
  };
93
100
  try {
94
- const computable = this.remoteBlobPool.tryGetByKey(blobId);
101
+ const computable = this.remoteBlobProvider.tryGetByKey(blobId);
95
102
  if (!computable)
96
103
  return await respond({ type: 'NotFound' });
97
104
  let blob;
@@ -119,7 +126,7 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
119
126
  }
120
127
  this.logger('info', `PFrames blob store requesting content for ${blobId}, `
121
128
  + `range [${translatedRange.start}..=${translatedRange.end}]`);
122
- return await this.remoteBlobPool.withContent(blob.handle, {
129
+ return await this.remoteBlobProvider.withContent(blob.handle, {
123
130
  range: translatedRange,
124
131
  signal: params.signal,
125
132
  handler: async (data) => {
@@ -141,885 +148,59 @@ class BlobStore extends PFrameInternal.BaseObjectStore {
141
148
  }
142
149
  }
143
150
  }
144
- const valueTypes = ['Int', 'Long', 'Float', 'Double', 'String', 'Bytes'];
145
- function migrateFilters(filters, logger) {
146
- const filtersV1 = [];
147
- const filtersV2 = [];
148
- for (const filter of filters) {
149
- if (filter.type === 'bySingleColumn') {
150
- filtersV1.push(filter);
151
- filtersV2.push({
152
- ...filter,
153
- type: 'bySingleColumnV2',
154
- });
155
- }
156
- else {
157
- filtersV2.push(filter);
158
- }
159
- }
160
- if (filtersV1.length > 0) {
161
- const filtersV1Json = JSON.stringify(filtersV1);
162
- logger('warn', `type overriten from 'bySingleColumn' to 'bySingleColumnV2' for filters: ${filtersV1Json}`);
163
- }
164
- return filtersV2;
165
- }
166
- function migratePTableFilters(def, logger) {
167
- if (!('partitionFilters' in def)) {
168
- // For old blocks assume all axes filters to be partition filters
169
- return {
170
- ...def,
171
- partitionFilters: migrateFilters(def.filters.filter((f) => f.column.type === 'axis'), logger),
172
- filters: migrateFilters(def.filters.filter((f) => f.column.type === 'column'), logger),
173
- };
174
- }
175
- return {
176
- ...def,
177
- partitionFilters: migrateFilters(def.partitionFilters, logger),
178
- filters: migrateFilters(def.filters, logger),
179
- };
180
- }
181
- function hasArtificialColumns(entry) {
182
- switch (entry.type) {
183
- case 'column':
184
- case 'slicedColumn':
185
- case 'inlineColumn':
186
- return false;
187
- case 'artificialColumn':
188
- return true;
189
- case 'full':
190
- case 'inner':
191
- return entry.entries.some(hasArtificialColumns);
192
- case 'outer':
193
- return hasArtificialColumns(entry.primary) || entry.secondary.some(hasArtificialColumns);
194
- default:
195
- assertNever(entry);
196
- }
197
- }
198
- const bigintReplacer = (_, v) => (typeof v === 'bigint' ? v.toString() : v);
199
- class PFramePool extends RefCountResourcePool {
200
- parquetServer;
201
- localBlobPool;
202
- remoteBlobPool;
203
- logger;
204
- spillPath;
205
- constructor(parquetServer, localBlobPool, remoteBlobPool, logger, spillPath) {
206
- super();
207
- this.parquetServer = parquetServer;
208
- this.localBlobPool = localBlobPool;
209
- this.remoteBlobPool = remoteBlobPool;
210
- this.logger = logger;
211
- this.spillPath = spillPath;
212
- }
213
- calculateParamsKey(params) {
214
- try {
215
- return stableKeyFromPFrameData(params);
216
- }
217
- catch (err) {
218
- if (isPFrameDriverError(err))
219
- throw err;
220
- throw new PFrameDriverError(`PFrame handle calculation failed, `
221
- + `request: ${JSON.stringify(params, bigintReplacer)}, `
222
- + `error: ${ensureError(err)}`);
223
- }
224
- }
225
- createNewResource(params, key) {
226
- if (getDebugFlags().logPFrameRequests) {
227
- this.logger('info', `PFrame creation (pFrameHandle = ${key}): `
228
- + `${JSON.stringify(params, bigintReplacer)}`);
229
- }
230
- return new PFrameHolder(this.parquetServer, this.localBlobPool, this.remoteBlobPool, this.logger, this.spillPath, params);
231
- }
232
- getByKey(key) {
233
- const resource = super.tryGetByKey(key);
234
- if (!resource)
235
- throw new PFrameDriverError(`PFrame not found, handle = ${key}`);
236
- return resource;
237
- }
238
- }
239
- class PTableDefPool extends RefCountResourcePool {
240
- logger;
241
- constructor(logger) {
242
- super();
243
- this.logger = logger;
244
- }
245
- calculateParamsKey(params) {
246
- return stableKeyFromFullPTableDef(params);
247
- }
248
- createNewResource(params, key) {
249
- return new PTableDefHolder(params, key, this.logger);
250
- }
251
- getByKey(key) {
252
- const resource = super.tryGetByKey(key);
253
- if (!resource)
254
- throw new PFrameDriverError(`PTable definition not found, handle = ${key}`);
255
- return resource;
256
- }
257
- }
258
- class PTablePool extends RefCountResourcePool {
259
- pFrames;
260
- pTableDefs;
261
- logger;
262
- constructor(pFrames, pTableDefs, logger) {
263
- super();
264
- this.pFrames = pFrames;
265
- this.pTableDefs = pTableDefs;
266
- this.logger = logger;
267
- }
268
- calculateParamsKey(params) {
269
- return stableKeyFromFullPTableDef(params);
270
- }
271
- createNewResource(params, key) {
272
- if (getDebugFlags().logPFrameRequests) {
273
- this.logger('info', `PTable creation (pTableHandle = ${key}): `
274
- + `${JSON.stringify(params, bigintReplacer)}`);
275
- }
276
- const handle = params.pFrameHandle;
277
- const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
278
- const defDisposeSignal = this.pTableDefs.tryGetByKey(key)?.disposeSignal;
279
- const combinedSignal = AbortSignal.any([disposeSignal, defDisposeSignal].filter((s) => !!s));
280
- // 3. Sort
281
- if (params.def.sorting.length > 0) {
282
- const predecessor = this.acquire({
283
- ...params,
284
- def: {
285
- ...params.def,
286
- sorting: [],
287
- },
288
- });
289
- const { resource: { pTablePromise } } = predecessor;
290
- const sortedTable = pTablePromise.then((pTable) => pTable.sort(params.def.sorting));
291
- return new PTableHolder(handle, combinedSignal, sortedTable, predecessor);
292
- }
293
- // 2. Filter (except the case with artificial columns where cartesian creates too many rows)
294
- if (!hasArtificialColumns(params.def.src) && params.def.filters.length > 0) {
295
- const predecessor = this.acquire({
296
- ...params,
297
- def: {
298
- ...params.def,
299
- filters: [],
300
- },
301
- });
302
- const { resource: { pTablePromise } } = predecessor;
303
- const filteredTable = pTablePromise.then((pTable) => pTable.filter(params.def.filters));
304
- return new PTableHolder(handle, combinedSignal, filteredTable, predecessor);
305
- }
306
- // 1. Join
307
- const table = pFramePromise.then((pFrame) => pFrame.createTable({
308
- src: joinEntryToInternal(params.def.src),
309
- // `params.def.filters` would be non-empty only when join has artificial columns
310
- filters: [...params.def.partitionFilters, ...params.def.filters],
311
- }));
312
- return new PTableHolder(handle, combinedSignal, table);
313
- }
314
- getByKey(key) {
315
- const resource = super.tryGetByKey(key);
316
- if (!resource)
317
- throw new PFrameDriverError(`PTable not found, handle = ${key}`);
318
- return resource;
319
- }
320
- }
321
- class PTableCacheUi {
322
- logger;
323
- ops;
324
- perFrame = new Map();
325
- global;
326
- disposeListeners = new Set();
327
- constructor(logger, ops) {
328
- this.logger = logger;
329
- this.ops = ops;
330
- this.global = new LRUCache({
331
- maxSize: this.ops.pFramesCacheMaxSize,
332
- dispose: (resource, key, reason) => {
333
- if (reason === 'evict') {
334
- this.perFrame.get(resource.resource.pFrame)?.delete(key);
335
- }
336
- if (this.perFrame.get(resource.resource.pFrame)?.size === 0) {
337
- this.perFrame.delete(resource.resource.pFrame);
338
- }
339
- resource.unref();
340
- if (getDebugFlags().logPFrameRequests) {
341
- logger('info', `calculateTableData cache - removed PTable ${key} (reason: ${reason})`);
342
- }
343
- },
344
- });
345
- }
346
- cache(resource, size) {
347
- const key = resource.key;
348
- if (getDebugFlags().logPFrameRequests) {
349
- this.logger('info', `calculateTableData cache - added PTable ${key} with size ${size}`);
350
- }
351
- this.global.set(key, resource, { size: Math.max(size, 1) }); // 1 is minimum size to avoid cache evictions
352
- let perFrame = this.perFrame.get(resource.resource.pFrame);
353
- if (!perFrame) {
354
- perFrame = new LRUCache({
355
- max: this.ops.pFrameCacheMaxCount,
356
- dispose: (_resource, key, reason) => {
357
- if (reason === 'evict') {
358
- this.global.delete(key);
359
- }
360
- },
361
- });
362
- this.perFrame.set(resource.resource.pFrame, perFrame);
363
- }
364
- perFrame.set(key, resource);
365
- if (!this.disposeListeners.has(key)) {
366
- const disposeListener = () => {
367
- this.perFrame.get(resource.resource.pFrame)?.delete(key);
368
- this.global.delete(key);
369
- this.disposeListeners.delete(key);
370
- resource.resource.disposeSignal.removeEventListener('abort', disposeListener);
371
- };
372
- this.disposeListeners.add(key);
373
- resource.resource.disposeSignal.addEventListener('abort', disposeListener);
374
- }
375
- }
376
- }
377
- class PTableCacheModel {
378
- logger;
379
- global;
380
- disposeListeners = new Set();
381
- constructor(logger, ops) {
382
- this.logger = logger;
383
- this.global = new LRUCache({
384
- maxSize: ops.pTablesCacheMaxSize,
385
- dispose: (resource, key, reason) => {
386
- resource.unref();
387
- if (getDebugFlags().logPFrameRequests) {
388
- logger('info', `createPTable cache - removed PTable ${key} (reason: ${reason})`);
389
- }
390
- },
391
- });
392
- }
393
- cache(resource, size, defDisposeSignal) {
394
- const key = resource.key;
395
- if (getDebugFlags().logPFrameRequests) {
396
- this.logger('info', `createPTable cache - added PTable ${key} with size ${size}`);
397
- }
398
- const status = {};
399
- this.global.set(key, resource, { size: Math.max(size, 1), status }); // 1 is minimum size to avoid cache evictions
400
- if (status.maxEntrySizeExceeded) {
401
- resource.unref();
402
- if (getDebugFlags().logPFrameRequests) {
403
- this.logger('info', `createPTable cache - removed PTable ${key} (maxEntrySizeExceeded)`);
404
- }
405
- }
406
- else {
407
- if (!this.disposeListeners.has(key)) {
408
- const disposeListener = () => {
409
- this.global.delete(key);
410
- this.disposeListeners.delete(key);
411
- defDisposeSignal.removeEventListener('abort', disposeListener);
412
- };
413
- this.disposeListeners.add(key);
414
- defDisposeSignal.addEventListener('abort', disposeListener);
415
- }
416
- }
417
- }
418
- }
419
- class PFrameHolder {
420
- parquetServer;
421
- localBlobPool;
422
- remoteBlobPool;
423
- spillPath;
424
- pFramePromise;
425
- abortController = new AbortController();
426
- localBlobs = [];
427
- remoteBlobs = [];
428
- constructor(parquetServer, localBlobPool, remoteBlobPool, logger, spillPath, columns) {
429
- this.parquetServer = parquetServer;
430
- this.localBlobPool = localBlobPool;
431
- this.remoteBlobPool = remoteBlobPool;
432
- this.spillPath = spillPath;
433
- const makeLocalBlobId = (blob) => {
434
- const localBlob = this.localBlobPool.acquire(blob);
435
- this.localBlobs.push(localBlob);
436
- return localBlob.key;
437
- };
438
- const makeRemoteBlobId = (blob) => {
439
- const remoteBlob = this.remoteBlobPool.acquire(blob);
440
- this.remoteBlobs.push(remoteBlob);
441
- return remoteBlob.key + PFrameInternal.ParquetExtension;
442
- };
443
- const mapColumnData = (data) => {
444
- switch (data.type) {
445
- case 'Json':
446
- return { ...data };
447
- case 'JsonPartitioned':
448
- return {
449
- ...data,
450
- parts: mapValues(data.parts, makeLocalBlobId),
451
- };
452
- case 'BinaryPartitioned':
453
- return {
454
- ...data,
455
- parts: mapValues(data.parts, (v) => ({
456
- index: makeLocalBlobId(v.index),
457
- values: makeLocalBlobId(v.values),
458
- })),
459
- };
460
- case 'ParquetPartitioned':
461
- return {
462
- ...data,
463
- parts: mapValues(data.parts, (v) => ({
464
- ...v,
465
- data: makeRemoteBlobId(v.data),
466
- })),
467
- };
468
- default:
469
- assertNever(data);
470
- }
471
- };
472
- const jsonifiedColumns = columns.map((column) => ({
473
- ...column,
474
- data: mapColumnData(column.data),
475
- }));
476
- try {
477
- const pFrame = PFrameFactory.createPFrame({ spillPath: this.spillPath, logger });
478
- pFrame.setDataSource(this);
479
- const promises = [];
480
- for (const column of jsonifiedColumns) {
481
- pFrame.addColumnSpec(column.id, column.spec);
482
- promises.push(pFrame.setColumnData(column.id, column.data, { signal: this.disposeSignal }));
483
- }
484
- this.pFramePromise = Promise.all(promises)
485
- .then(() => pFrame)
486
- .catch((err) => {
487
- this.dispose();
488
- pFrame.dispose();
489
- throw new PFrameDriverError(`PFrame creation failed asynchronously, `
490
- + `columns: ${JSON.stringify(jsonifiedColumns)}, `
491
- + `error: ${ensureError(err)}`);
492
- });
493
- }
494
- catch (err) {
495
- throw new PFrameDriverError(`PFrame creation failed synchronously, `
496
- + `columns: ${JSON.stringify(jsonifiedColumns)}, `
497
- + `error: ${ensureError(err)}`);
498
- }
499
- }
500
- preloadBlob = async (blobIds) => {
501
- return await this.localBlobPool.preloadBlob(blobIds, this.disposeSignal);
502
- };
503
- resolveBlobContent = async (blobId) => {
504
- return await this.localBlobPool.resolveBlobContent(blobId, this.disposeSignal);
505
- };
506
- get disposeSignal() {
507
- return this.abortController.signal;
508
- }
509
- dispose() {
510
- this.abortController.abort();
511
- this.localBlobs.forEach((entry) => entry.unref());
512
- this.remoteBlobs.forEach((entry) => entry.unref());
513
- }
514
- async [Symbol.asyncDispose]() {
515
- this.dispose();
516
- await this.pFramePromise
517
- .then((pFrame) => pFrame.dispose())
518
- .catch(() => { });
519
- }
520
- }
521
- class PTableDefHolder {
522
- def;
523
- pTableHandle;
524
- logger;
525
- abortController = new AbortController();
526
- constructor(def, pTableHandle, logger) {
527
- this.def = def;
528
- this.pTableHandle = pTableHandle;
529
- this.logger = logger;
530
- if (getDebugFlags().logPFrameRequests) {
531
- this.logger('info', `PTable definition saved (pTableHandle = ${this.pTableHandle})`);
532
- }
533
- }
534
- get disposeSignal() {
535
- return this.abortController.signal;
536
- }
537
- [Symbol.dispose]() {
538
- this.abortController.abort();
539
- if (getDebugFlags().logPFrameRequests) {
540
- this.logger('info', `PTable definition disposed (pTableHandle = ${this.pTableHandle})`);
541
- }
542
- }
543
- }
544
- class PTableHolder {
545
- pFrame;
546
- pTablePromise;
547
- predecessor;
548
- abortController = new AbortController();
549
- combinedDisposeSignal;
550
- constructor(pFrame, pFrameDisposeSignal, pTablePromise, predecessor) {
551
- this.pFrame = pFrame;
552
- this.pTablePromise = pTablePromise;
553
- this.predecessor = predecessor;
554
- this.combinedDisposeSignal = AbortSignal.any([pFrameDisposeSignal, this.abortController.signal]);
555
- }
556
- get disposeSignal() {
557
- return this.combinedDisposeSignal;
558
- }
559
- async [Symbol.asyncDispose]() {
560
- this.abortController.abort();
561
- await this.pTablePromise
562
- .then((pTable) => pTable.dispose())
563
- .catch(() => { });
564
- this.predecessor?.unref();
565
- }
566
- }
567
- class PFrameDriver {
568
- logger;
151
+ class RemoteBlobProviderImpl {
152
+ pool;
569
153
  server;
570
- pFrames;
571
- pTableDefs;
572
- pTables;
573
- pTableCacheUi;
574
- pTableCacheModel;
575
- frameConcurrencyLimiter;
576
- tableConcurrencyLimiter;
577
- async pprofDump() {
578
- return await PFrameFactory.pprofDump();
579
- }
580
- static async init(blobDriver, miLogger, spillPath, ops) {
581
- const resolvedSpillPath = path.resolve(spillPath);
582
- await emptyDir(resolvedSpillPath);
583
- const logger = (level, message) => miLogger[level](message);
584
- const localBlobPool = new LocalBlobPool(blobDriver);
585
- const remoteBlobPool = new RemoteBlobPool(blobDriver);
586
- const store = new BlobStore({ remoteBlobPool, logger });
587
- const handler = HttpHelpers.createRequestHandler({ store: store });
588
- const server = await HttpHelpers.createHttpServer({ handler, port: ops.parquetServerPort });
589
- return new PFrameDriver(logger, server, localBlobPool, remoteBlobPool, resolvedSpillPath, ops);
590
- }
591
- constructor(logger, server, localBlobPool, remoteBlobPool, spillPath, ops) {
592
- this.logger = logger;
154
+ constructor(pool, server) {
155
+ this.pool = pool;
593
156
  this.server = server;
594
- const concurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pFrameConcurrency);
595
- this.frameConcurrencyLimiter = concurrencyLimiter;
596
- this.tableConcurrencyLimiter = new ConcurrencyLimitingExecutor(ops.pTableConcurrency);
597
- this.pFrames = new PFramePool(server.info, localBlobPool, remoteBlobPool, logger, spillPath);
598
- this.pTableDefs = new PTableDefPool(logger);
599
- this.pTables = new PTablePool(this.pFrames, this.pTableDefs, logger);
600
- this.pTableCacheUi = new PTableCacheUi(logger, ops);
601
- this.pTableCacheModel = new PTableCacheModel(logger, ops);
602
- }
603
- async dispose() {
604
- return await this.server.stop();
605
- }
606
- async [Symbol.asyncDispose]() {
607
- return await this.dispose();
608
- }
609
- //
610
- // Internal / Config API Methods
611
- //
612
- createPFrame(def, ctx) {
613
- const columns = def
614
- .filter((c) => valueTypes.find((t) => t === c.spec.valueType))
615
- .map((c) => mapPObjectData(c, (d) => isPlTreeNodeAccessor(d)
616
- ? parseDataInfoResource(d)
617
- : isDataInfo(d)
618
- ? d.type === 'ParquetPartitioned'
619
- ? mapDataInfo(d, (a) => traverseParquetChunkResource(a))
620
- : mapDataInfo(d, (a) => a.persist())
621
- : makeDataInfoFromPColumnValues(c.spec, d)));
622
- const distinctColumns = uniqueBy(columns, (column) => column.id);
623
- const res = this.pFrames.acquire(distinctColumns);
624
- ctx.addOnDestroy(res.unref);
625
- return res.key;
626
157
  }
627
- createPTable(rawDef, ctx) {
628
- const def = migratePTableFilters(rawDef, this.logger);
629
- const pFrameHandle = this.createPFrame(extractAllColumns(def.src), ctx);
630
- const defIds = mapPTableDef(def, (c) => c.id);
631
- const sortedDef = sortPTableDef(defIds);
632
- const { key, unref } = this.pTableDefs.acquire({ def: sortedDef, pFrameHandle });
633
- if (getDebugFlags().logPFrameRequests) {
634
- this.logger('info', `Create PTable call (pFrameHandle = ${pFrameHandle}; pTableHandle = ${key})`);
635
- }
636
- ctx.addOnDestroy(unref); // in addition to pframe unref added in createPFrame above
637
- return key;
158
+ static async init(blobDriver, logger, serverOptions) {
159
+ const pool = new RemoteBlobPool(blobDriver, logger);
160
+ const store = new BlobStore({ remoteBlobProvider: pool, logger });
161
+ const handler = HttpHelpers.createRequestHandler({ store });
162
+ const server = await HttpHelpers.createHttpServer({ ...serverOptions, handler });
163
+ logger('info', `PFrames HTTP server started on ${server.info.url}`);
164
+ return new RemoteBlobProviderImpl(pool, server);
638
165
  }
639
- //
640
- // PFrame istance methods
641
- //
642
- async findColumns(handle, request) {
643
- const iRequest = {
644
- ...request,
645
- compatibleWith: request.compatibleWith.length !== 0
646
- ? [{
647
- axesSpec: [
648
- ...new Map(request.compatibleWith.map((item) => [canonicalize(item), item])).values(),
649
- ],
650
- qualifications: [],
651
- }]
652
- : [],
653
- };
654
- const { pFramePromise } = this.pFrames.getByKey(handle);
655
- const pFrame = await pFramePromise;
656
- const responce = await pFrame.findColumns(iRequest);
657
- return {
658
- hits: responce.hits
659
- .filter((h) => // only exactly matching columns
660
- h.mappingVariants.length === 0
661
- || h.mappingVariants.some((v) => v.qualifications.forHit.length === 0
662
- && v.qualifications.forQueries.every((q) => q.length === 0)))
663
- .map((h) => h.hit),
664
- };
166
+ acquire(params) {
167
+ return this.pool.acquire(params);
665
168
  }
666
- async getColumnSpec(handle, columnId) {
667
- const { pFramePromise } = this.pFrames.getByKey(handle);
668
- const pFrame = await pFramePromise;
669
- return await pFrame.getColumnSpec(columnId);
169
+ httpServerInfo() {
170
+ return this.server.info;
670
171
  }
671
- async listColumns(handle) {
672
- const { pFramePromise } = this.pFrames.getByKey(handle);
673
- const pFrame = await pFramePromise;
674
- return await pFrame.listColumns();
675
- }
676
- async calculateTableData(handle, request, range, signal) {
677
- if (getDebugFlags().logPFrameRequests) {
678
- this.logger('info', `Call calculateTableData, handle = ${handle}, request = ${JSON.stringify(request, bigintReplacer)}`);
679
- }
680
- const table = this.pTables.acquire({
681
- pFrameHandle: handle,
682
- def: sortPTableDef(migratePTableFilters(request, this.logger)),
683
- });
684
- const { pTablePromise, disposeSignal } = table.resource;
685
- const pTable = await pTablePromise;
686
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
687
- return await this.frameConcurrencyLimiter.run(async () => {
688
- try {
689
- const spec = pTable.getSpec();
690
- const data = await pTable.getData([...spec.keys()], {
691
- range,
692
- signal: combinedSignal,
693
- });
694
- const resultSize = await pTable.getFootprint({
695
- withPredecessors: false,
696
- signal: combinedSignal,
697
- });
698
- if (resultSize >= 2 * 1024 * 1024 * 1024) {
699
- throw new PFrameDriverError(`Join results exceed 2GB, please add filters to shrink the result size`);
700
- }
701
- const overallSize = await pTable.getFootprint({
702
- withPredecessors: true,
703
- signal: combinedSignal,
704
- });
705
- this.pTableCacheUi.cache(table, overallSize);
706
- return spec.map((spec, i) => ({
707
- spec: spec,
708
- data: data[i],
709
- }));
710
- }
711
- catch (err) {
712
- table.unref();
713
- throw err;
714
- }
715
- });
716
- }
717
- async getUniqueValues(handle, request, signal) {
718
- if (getDebugFlags().logPFrameRequests) {
719
- this.logger('info', `Call getUniqueValues, handle = ${handle}, request = ${JSON.stringify(request, bigintReplacer)}`);
720
- }
721
- const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
722
- const pFrame = await pFramePromise;
723
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
724
- return await this.frameConcurrencyLimiter.run(async () => {
725
- return await pFrame.getUniqueValues({
726
- ...request,
727
- filters: migrateFilters(request.filters, this.logger),
728
- }, {
729
- signal: combinedSignal,
730
- });
731
- });
732
- }
733
- //
734
- // PTable istance methods
735
- //
736
- async getSpec(handle) {
737
- const env_1 = { stack: [], error: void 0, hasError: false };
738
- try {
739
- const { def } = this.pTableDefs.getByKey(handle);
740
- const table = __addDisposableResource(env_1, this.pTables.acquire(def), false);
741
- const { pTablePromise } = table.resource;
742
- const pTable = await pTablePromise;
743
- return pTable.getSpec();
744
- }
745
- catch (e_1) {
746
- env_1.error = e_1;
747
- env_1.hasError = true;
748
- }
749
- finally {
750
- __disposeResources(env_1);
751
- }
752
- }
753
- async getShape(handle, signal) {
754
- const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
755
- const table = this.pTables.acquire(def);
756
- const { pTablePromise, disposeSignal } = table.resource;
757
- const pTable = await pTablePromise;
758
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
759
- const { shape, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
760
- const shape = await pTable.getShape({
761
- signal: combinedSignal,
762
- });
763
- const overallSize = await pTable.getFootprint({
764
- withPredecessors: true,
765
- signal: combinedSignal,
766
- });
767
- return { shape, overallSize };
768
- });
769
- this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
770
- return shape;
771
- }
772
- async getData(handle, columnIndices, range, signal) {
773
- const { def, disposeSignal: defDisposeSignal } = this.pTableDefs.getByKey(handle);
774
- const table = this.pTables.acquire(def);
775
- const { pTablePromise, disposeSignal } = table.resource;
776
- const pTable = await pTablePromise;
777
- const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
778
- const { data, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
779
- const data = await pTable.getData(columnIndices, {
780
- range,
781
- signal: combinedSignal,
782
- });
783
- const overallSize = await pTable.getFootprint({
784
- withPredecessors: true,
785
- signal: combinedSignal,
786
- });
787
- return { data, overallSize };
788
- });
789
- this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
790
- return data;
791
- }
792
- }
793
- function joinEntryToInternal(entry) {
794
- const type = entry.type;
795
- switch (type) {
796
- case 'column':
797
- return {
798
- type: 'column',
799
- columnId: entry.column,
800
- };
801
- case 'slicedColumn':
802
- return {
803
- type: 'slicedColumn',
804
- columnId: entry.column,
805
- newId: entry.newId,
806
- axisFilters: entry.axisFilters,
807
- };
808
- case 'artificialColumn':
809
- return {
810
- type: 'artificialColumn',
811
- columnId: entry.column,
812
- newId: entry.newId,
813
- axesIndices: entry.axesIndices,
814
- };
815
- case 'inlineColumn':
816
- return {
817
- type: 'inlineColumn',
818
- newId: entry.column.id,
819
- spec: entry.column.spec,
820
- dataInfo: {
821
- type: 'Json',
822
- keyLength: entry.column.spec.axesSpec.length,
823
- data: entry.column.data.reduce((acc, row) => {
824
- acc[JSON.stringify(row.key)] = row.val;
825
- return acc;
826
- }, {}),
827
- },
828
- };
829
- case 'inner':
830
- case 'full':
831
- return {
832
- type: entry.type,
833
- entries: entry.entries.map((col) => joinEntryToInternal(col)),
834
- };
835
- case 'outer':
836
- return {
837
- type: 'outer',
838
- primary: joinEntryToInternal(entry.primary),
839
- secondary: entry.secondary.map((col) => joinEntryToInternal(col)),
840
- };
841
- default:
842
- throw new PFrameDriverError(`unsupported PFrame join entry type: ${type}`);
843
- }
844
- }
845
- function sortPTableDef(def) {
846
- function cmpJoinEntries(lhs, rhs) {
847
- if (lhs.type !== rhs.type) {
848
- return lhs.type < rhs.type ? -1 : 1;
849
- }
850
- const type = lhs.type;
851
- switch (type) {
852
- case 'column':
853
- return lhs.column < rhs.column ? -1 : 1;
854
- case 'slicedColumn':
855
- case 'artificialColumn':
856
- return lhs.newId < rhs.newId ? -1 : 1;
857
- case 'inlineColumn': {
858
- return lhs.column.id < rhs.column.id ? -1 : 1;
859
- }
860
- case 'inner':
861
- case 'full': {
862
- const rhsInner = rhs;
863
- if (lhs.entries.length !== rhsInner.entries.length) {
864
- return lhs.entries.length - rhsInner.entries.length;
865
- }
866
- for (let i = 0; i < lhs.entries.length; i++) {
867
- const cmp = cmpJoinEntries(lhs.entries[i], rhsInner.entries[i]);
868
- if (cmp !== 0) {
869
- return cmp;
870
- }
871
- }
872
- return 0;
873
- }
874
- case 'outer': {
875
- const rhsOuter = rhs;
876
- const cmp = cmpJoinEntries(lhs.primary, rhsOuter.primary);
877
- if (cmp !== 0) {
878
- return cmp;
879
- }
880
- if (lhs.secondary.length !== rhsOuter.secondary.length) {
881
- return lhs.secondary.length - rhsOuter.secondary.length;
882
- }
883
- for (let i = 0; i < lhs.secondary.length; i++) {
884
- const cmp = cmpJoinEntries(lhs.secondary[i], rhsOuter.secondary[i]);
885
- if (cmp !== 0) {
886
- return cmp;
887
- }
888
- }
889
- return 0;
890
- }
891
- default:
892
- assertNever(type);
893
- }
894
- }
895
- function sortJoinEntry(entry) {
896
- switch (entry.type) {
897
- case 'column':
898
- case 'slicedColumn':
899
- case 'inlineColumn':
900
- return entry;
901
- case 'artificialColumn': {
902
- const sortedAxesIndices = entry.axesIndices.toSorted((lhs, rhs) => lhs - rhs);
903
- return {
904
- ...entry,
905
- axesIndices: sortedAxesIndices,
906
- };
907
- }
908
- case 'inner':
909
- case 'full': {
910
- const sortedEntries = entry.entries.map(sortJoinEntry);
911
- sortedEntries.sort(cmpJoinEntries);
912
- return {
913
- ...entry,
914
- entries: sortedEntries,
915
- };
916
- }
917
- case 'outer': {
918
- const sortedSecondary = entry.secondary.map(sortJoinEntry);
919
- sortedSecondary.sort(cmpJoinEntries);
920
- return {
921
- ...entry,
922
- primary: sortJoinEntry(entry.primary),
923
- secondary: sortedSecondary,
924
- };
925
- }
926
- default:
927
- assertNever(entry);
928
- }
929
- }
930
- function sortFilters(filters) {
931
- return filters.toSorted((lhs, rhs) => {
932
- if (lhs.column.type === 'axis' && rhs.column.type === 'axis') {
933
- const lhsId = canonicalizeJson(getAxisId(lhs.column.id));
934
- const rhsId = canonicalizeJson(getAxisId(rhs.column.id));
935
- return lhsId < rhsId ? -1 : 1;
936
- }
937
- else if (lhs.column.type === 'column' && rhs.column.type === 'column') {
938
- return lhs.column.id < rhs.column.id ? -1 : 1;
939
- }
940
- else {
941
- return lhs.column.type === 'axis' ? -1 : 1;
942
- }
943
- });
944
- }
945
- return {
946
- src: sortJoinEntry(def.src),
947
- partitionFilters: sortFilters(def.partitionFilters),
948
- filters: sortFilters(def.filters),
949
- sorting: def.sorting,
172
+ async [Symbol.asyncDispose]() {
173
+ await this.server.stop();
174
+ }
175
+ }
176
+ const PFrameDriverOpsDefaults = {
177
+ ...AbstractPFrameDriverOpsDefaults,
178
+ parquetServerPort: 0, // 0 means that some unused port will be assigned by the OS
179
+ };
180
+ async function createPFrameDriver(params) {
181
+ const resolvedSpillPath = path.resolve(params.spillPath);
182
+ await emptyDir(resolvedSpillPath);
183
+ const logger = (level, message) => params.logger[level](message);
184
+ const localBlobProvider = new LocalBlobProviderImpl(params.blobDriver, logger);
185
+ const remoteBlobProvider = await RemoteBlobProviderImpl.init(params.blobDriver, logger, { port: params.options.parquetServerPort });
186
+ const resolveDataInfo = (spec, data) => {
187
+ return isPlTreeNodeAccessor(data)
188
+ ? parseDataInfoResource(data)
189
+ : isDataInfo(data)
190
+ ? data.type === 'ParquetPartitioned'
191
+ ? mapDataInfo(data, (a) => traverseParquetChunkResource(a))
192
+ : mapDataInfo(data, (a) => a.persist())
193
+ : makeJsonDataInfo(spec, data);
950
194
  };
951
- }
952
- function stableKeyFromFullPTableDef(data) {
953
- try {
954
- const hash = createHash('sha256');
955
- hash.update(canonicalize(data));
956
- return hash.digest().toString('hex');
957
- }
958
- catch (err) {
959
- throw new PFrameDriverError(`PTable handle calculation failed, `
960
- + `request: ${JSON.stringify(data)}, `
961
- + `error: ${ensureError(err)}`);
962
- }
963
- }
964
- function stableKeyFromPFrameData(data) {
965
- const orderedData = [...data].map((column) => mapPObjectData(column, (r) => {
966
- let result;
967
- const type = r.type;
968
- switch (type) {
969
- case 'Json':
970
- result = {
971
- type: r.type,
972
- keyLength: r.keyLength,
973
- payload: Object.entries(r.data).map(([part, value]) => ({
974
- key: part,
975
- value,
976
- })),
977
- };
978
- break;
979
- case 'JsonPartitioned':
980
- result = {
981
- type: r.type,
982
- keyLength: r.partitionKeyLength,
983
- payload: Object.entries(r.parts).map(([part, info]) => ({
984
- key: part,
985
- value: makeBlobId(info),
986
- })),
987
- };
988
- break;
989
- case 'BinaryPartitioned':
990
- result = {
991
- type: r.type,
992
- keyLength: r.partitionKeyLength,
993
- payload: Object.entries(r.parts).map(([part, info]) => ({
994
- key: part,
995
- value: [makeBlobId(info.index), makeBlobId(info.values)],
996
- })),
997
- };
998
- break;
999
- case 'ParquetPartitioned':
1000
- result = {
1001
- type: r.type,
1002
- keyLength: r.partitionKeyLength,
1003
- payload: Object.entries(r.parts).map(([part, info]) => ({
1004
- key: part,
1005
- value: info.dataDigest || [
1006
- makeBlobId(info.data),
1007
- JSON.stringify({ axes: info.axes, column: info.column }),
1008
- ],
1009
- })),
1010
- };
1011
- break;
1012
- default:
1013
- throw new PFrameDriverError(`unsupported resource type: ${JSON.stringify(type)}`);
1014
- }
1015
- result.payload.sort((lhs, rhs) => lhs.key < rhs.key ? -1 : 1);
1016
- return result;
1017
- }));
1018
- orderedData.sort((lhs, rhs) => lhs.id < rhs.id ? -1 : 1);
1019
- const hash = createHash('sha256');
1020
- hash.update(canonicalize(orderedData));
1021
- return hash.digest().toString('hex');
195
+ return new AbstractPFrameDriver({
196
+ logger,
197
+ localBlobProvider,
198
+ remoteBlobProvider,
199
+ spillPath: resolvedSpillPath,
200
+ options: params.options,
201
+ resolveDataInfo,
202
+ });
1022
203
  }
1023
204
 
1024
- export { PFrameDriver };
205
+ export { PFrameDriverOpsDefaults, createPFrameDriver };
1025
206
  //# sourceMappingURL=driver.js.map