@milaboratories/pl-middle-layer 1.43.59 → 1.43.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/debug/index.cjs +0 -1
- package/dist/debug/index.cjs.map +1 -1
- package/dist/debug/index.d.ts +0 -1
- package/dist/debug/index.d.ts.map +1 -1
- package/dist/debug/index.js +0 -1
- package/dist/debug/index.js.map +1 -1
- package/dist/js_render/computable_context.cjs +6 -2
- package/dist/js_render/computable_context.cjs.map +1 -1
- package/dist/js_render/computable_context.d.ts +1 -1
- package/dist/js_render/computable_context.d.ts.map +1 -1
- package/dist/js_render/computable_context.js +6 -2
- package/dist/js_render/computable_context.js.map +1 -1
- package/dist/middle_layer/driver_kit.cjs +6 -1
- package/dist/middle_layer/driver_kit.cjs.map +1 -1
- package/dist/middle_layer/driver_kit.js +7 -2
- package/dist/middle_layer/driver_kit.js.map +1 -1
- package/dist/middle_layer/ops.cjs +4 -8
- package/dist/middle_layer/ops.cjs.map +1 -1
- package/dist/middle_layer/ops.d.ts +1 -1
- package/dist/middle_layer/ops.d.ts.map +1 -1
- package/dist/middle_layer/ops.js +4 -8
- package/dist/middle_layer/ops.js.map +1 -1
- package/dist/pool/data.cjs +0 -15
- package/dist/pool/data.cjs.map +1 -1
- package/dist/pool/data.d.ts +1 -2
- package/dist/pool/data.d.ts.map +1 -1
- package/dist/pool/data.js +1 -15
- package/dist/pool/data.js.map +1 -1
- package/dist/pool/driver.cjs +88 -906
- package/dist/pool/driver.cjs.map +1 -1
- package/dist/pool/driver.d.ts +14 -86
- package/dist/pool/driver.d.ts.map +1 -1
- package/dist/pool/driver.js +88 -907
- package/dist/pool/driver.js.map +1 -1
- package/package.json +20 -19
- package/src/debug/index.ts +0 -2
- package/src/js_render/computable_context.ts +8 -6
- package/src/middle_layer/driver_kit.ts +6 -6
- package/src/middle_layer/ops.ts +2 -9
- package/src/pool/data.ts +0 -22
- package/src/pool/driver.ts +141 -1208
package/dist/pool/driver.cjs
CHANGED
|
@@ -1,28 +1,25 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var tslib_es6 = require('../__external/.pnpm/@rollup_plugin-typescript@12.1.4_rollup@4.52.4_tslib@2.7.0_typescript@5.6.3/__external/tslib/tslib.es6.cjs');
|
|
4
|
-
var plModelMiddleLayer = require('@milaboratories/pl-model-middle-layer');
|
|
5
|
-
var plTree = require('@milaboratories/pl-tree');
|
|
6
3
|
var model = require('@platforma-sdk/model');
|
|
7
|
-
var
|
|
8
|
-
var data = require('./data.cjs');
|
|
9
|
-
var node_crypto = require('node:crypto');
|
|
10
|
-
var esToolkit = require('es-toolkit');
|
|
4
|
+
var plModelMiddleLayer = require('@milaboratories/pl-model-middle-layer');
|
|
11
5
|
var tsHelpers = require('@milaboratories/ts-helpers');
|
|
12
|
-
var
|
|
6
|
+
var plTree = require('@milaboratories/pl-tree');
|
|
7
|
+
var pfDriver = require('@milaboratories/pf-driver');
|
|
13
8
|
var pframesRsNode = require('@milaboratories/pframes-rs-node');
|
|
14
9
|
var path = require('node:path');
|
|
15
|
-
var index = require('../debug/index.cjs');
|
|
16
10
|
var node_stream = require('node:stream');
|
|
11
|
+
var data = require('./data.cjs');
|
|
17
12
|
|
|
18
13
|
function makeBlobId(res) {
|
|
19
14
|
return String(res.rid);
|
|
20
15
|
}
|
|
21
|
-
class
|
|
16
|
+
class LocalBlobProviderImpl extends tsHelpers.RefCountPoolBase {
|
|
22
17
|
blobDriver;
|
|
23
|
-
|
|
18
|
+
logger;
|
|
19
|
+
constructor(blobDriver, logger) {
|
|
24
20
|
super();
|
|
25
21
|
this.blobDriver = blobDriver;
|
|
22
|
+
this.logger = logger;
|
|
26
23
|
}
|
|
27
24
|
calculateParamsKey(params) {
|
|
28
25
|
return makeBlobId(params);
|
|
@@ -33,38 +30,48 @@ class LocalBlobPool extends tsHelpers.RefCountResourcePool {
|
|
|
33
30
|
getByKey(blobId) {
|
|
34
31
|
const resource = super.tryGetByKey(blobId);
|
|
35
32
|
if (!resource)
|
|
36
|
-
throw new model.PFrameDriverError(`
|
|
33
|
+
throw new model.PFrameDriverError(`Local blob with id ${blobId} not found.`);
|
|
37
34
|
return resource;
|
|
38
35
|
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
36
|
+
makeDataSource(signal) {
|
|
37
|
+
return {
|
|
38
|
+
preloadBlob: async (blobIds) => {
|
|
39
|
+
try {
|
|
40
|
+
await Promise.all(blobIds.map((blobId) => this.getByKey(blobId).awaitStableFullValue(signal)));
|
|
41
|
+
}
|
|
42
|
+
catch (err) {
|
|
43
|
+
if (!model.isAbortError(err))
|
|
44
|
+
throw err;
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
resolveBlobContent: async (blobId) => {
|
|
48
|
+
const computable = this.getByKey(blobId);
|
|
49
|
+
const blob = await computable.awaitStableValue(signal);
|
|
50
|
+
return await this.blobDriver.getContent(blob.handle, { signal });
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
53
|
}
|
|
54
|
-
;
|
|
55
54
|
}
|
|
56
|
-
class RemoteBlobPool extends tsHelpers.
|
|
55
|
+
class RemoteBlobPool extends tsHelpers.RefCountPoolBase {
|
|
57
56
|
blobDriver;
|
|
58
|
-
|
|
57
|
+
logger;
|
|
58
|
+
constructor(blobDriver, logger) {
|
|
59
59
|
super();
|
|
60
60
|
this.blobDriver = blobDriver;
|
|
61
|
+
this.logger = logger;
|
|
61
62
|
}
|
|
62
63
|
calculateParamsKey(params) {
|
|
63
|
-
return
|
|
64
|
+
return makeBlobId(params);
|
|
64
65
|
}
|
|
65
66
|
createNewResource(params, _key) {
|
|
66
67
|
return this.blobDriver.getOnDemandBlob(params);
|
|
67
68
|
}
|
|
69
|
+
getByKey(blobId) {
|
|
70
|
+
const resource = super.tryGetByKey(blobId);
|
|
71
|
+
if (!resource)
|
|
72
|
+
throw new model.PFrameDriverError(`Remote blob with id ${blobId} not found.`);
|
|
73
|
+
return resource;
|
|
74
|
+
}
|
|
68
75
|
async withContent(handle, options) {
|
|
69
76
|
return await this.blobDriver.withContent(handle, {
|
|
70
77
|
range: {
|
|
@@ -77,10 +84,10 @@ class RemoteBlobPool extends tsHelpers.RefCountResourcePool {
|
|
|
77
84
|
}
|
|
78
85
|
}
|
|
79
86
|
class BlobStore extends plModelMiddleLayer.PFrameInternal.BaseObjectStore {
|
|
80
|
-
|
|
87
|
+
remoteBlobProvider;
|
|
81
88
|
constructor(options) {
|
|
82
89
|
super(options);
|
|
83
|
-
this.
|
|
90
|
+
this.remoteBlobProvider = options.remoteBlobProvider;
|
|
84
91
|
}
|
|
85
92
|
async request(filename, params) {
|
|
86
93
|
const blobId = filename.slice(0, -plModelMiddleLayer.PFrameInternal.ParquetExtension.length);
|
|
@@ -93,7 +100,7 @@ class BlobStore extends plModelMiddleLayer.PFrameInternal.BaseObjectStore {
|
|
|
93
100
|
}
|
|
94
101
|
};
|
|
95
102
|
try {
|
|
96
|
-
const computable = this.
|
|
103
|
+
const computable = this.remoteBlobProvider.tryGetByKey(blobId);
|
|
97
104
|
if (!computable)
|
|
98
105
|
return await respond({ type: 'NotFound' });
|
|
99
106
|
let blob;
|
|
@@ -121,7 +128,7 @@ class BlobStore extends plModelMiddleLayer.PFrameInternal.BaseObjectStore {
|
|
|
121
128
|
}
|
|
122
129
|
this.logger('info', `PFrames blob store requesting content for ${blobId}, `
|
|
123
130
|
+ `range [${translatedRange.start}..=${translatedRange.end}]`);
|
|
124
|
-
return await this.
|
|
131
|
+
return await this.remoteBlobProvider.withContent(blob.handle, {
|
|
125
132
|
range: translatedRange,
|
|
126
133
|
signal: params.signal,
|
|
127
134
|
handler: async (data) => {
|
|
@@ -143,885 +150,60 @@ class BlobStore extends plModelMiddleLayer.PFrameInternal.BaseObjectStore {
|
|
|
143
150
|
}
|
|
144
151
|
}
|
|
145
152
|
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
const filtersV1 = [];
|
|
149
|
-
const filtersV2 = [];
|
|
150
|
-
for (const filter of filters) {
|
|
151
|
-
if (filter.type === 'bySingleColumn') {
|
|
152
|
-
filtersV1.push(filter);
|
|
153
|
-
filtersV2.push({
|
|
154
|
-
...filter,
|
|
155
|
-
type: 'bySingleColumnV2',
|
|
156
|
-
});
|
|
157
|
-
}
|
|
158
|
-
else {
|
|
159
|
-
filtersV2.push(filter);
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
if (filtersV1.length > 0) {
|
|
163
|
-
const filtersV1Json = JSON.stringify(filtersV1);
|
|
164
|
-
logger('warn', `type overriten from 'bySingleColumn' to 'bySingleColumnV2' for filters: ${filtersV1Json}`);
|
|
165
|
-
}
|
|
166
|
-
return filtersV2;
|
|
167
|
-
}
|
|
168
|
-
function migratePTableFilters(def, logger) {
|
|
169
|
-
if (!('partitionFilters' in def)) {
|
|
170
|
-
// For old blocks assume all axes filters to be partition filters
|
|
171
|
-
return {
|
|
172
|
-
...def,
|
|
173
|
-
partitionFilters: migrateFilters(def.filters.filter((f) => f.column.type === 'axis'), logger),
|
|
174
|
-
filters: migrateFilters(def.filters.filter((f) => f.column.type === 'column'), logger),
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
return {
|
|
178
|
-
...def,
|
|
179
|
-
partitionFilters: migrateFilters(def.partitionFilters, logger),
|
|
180
|
-
filters: migrateFilters(def.filters, logger),
|
|
181
|
-
};
|
|
182
|
-
}
|
|
183
|
-
function hasArtificialColumns(entry) {
|
|
184
|
-
switch (entry.type) {
|
|
185
|
-
case 'column':
|
|
186
|
-
case 'slicedColumn':
|
|
187
|
-
case 'inlineColumn':
|
|
188
|
-
return false;
|
|
189
|
-
case 'artificialColumn':
|
|
190
|
-
return true;
|
|
191
|
-
case 'full':
|
|
192
|
-
case 'inner':
|
|
193
|
-
return entry.entries.some(hasArtificialColumns);
|
|
194
|
-
case 'outer':
|
|
195
|
-
return hasArtificialColumns(entry.primary) || entry.secondary.some(hasArtificialColumns);
|
|
196
|
-
default:
|
|
197
|
-
tsHelpers.assertNever(entry);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
const bigintReplacer = (_, v) => (typeof v === 'bigint' ? v.toString() : v);
|
|
201
|
-
class PFramePool extends tsHelpers.RefCountResourcePool {
|
|
202
|
-
parquetServer;
|
|
203
|
-
localBlobPool;
|
|
204
|
-
remoteBlobPool;
|
|
205
|
-
logger;
|
|
206
|
-
spillPath;
|
|
207
|
-
constructor(parquetServer, localBlobPool, remoteBlobPool, logger, spillPath) {
|
|
208
|
-
super();
|
|
209
|
-
this.parquetServer = parquetServer;
|
|
210
|
-
this.localBlobPool = localBlobPool;
|
|
211
|
-
this.remoteBlobPool = remoteBlobPool;
|
|
212
|
-
this.logger = logger;
|
|
213
|
-
this.spillPath = spillPath;
|
|
214
|
-
}
|
|
215
|
-
calculateParamsKey(params) {
|
|
216
|
-
try {
|
|
217
|
-
return stableKeyFromPFrameData(params);
|
|
218
|
-
}
|
|
219
|
-
catch (err) {
|
|
220
|
-
if (model.isPFrameDriverError(err))
|
|
221
|
-
throw err;
|
|
222
|
-
throw new model.PFrameDriverError(`PFrame handle calculation failed, `
|
|
223
|
-
+ `request: ${JSON.stringify(params, bigintReplacer)}, `
|
|
224
|
-
+ `error: ${model.ensureError(err)}`);
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
createNewResource(params, key) {
|
|
228
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
229
|
-
this.logger('info', `PFrame creation (pFrameHandle = ${key}): `
|
|
230
|
-
+ `${JSON.stringify(params, bigintReplacer)}`);
|
|
231
|
-
}
|
|
232
|
-
return new PFrameHolder(this.parquetServer, this.localBlobPool, this.remoteBlobPool, this.logger, this.spillPath, params);
|
|
233
|
-
}
|
|
234
|
-
getByKey(key) {
|
|
235
|
-
const resource = super.tryGetByKey(key);
|
|
236
|
-
if (!resource)
|
|
237
|
-
throw new model.PFrameDriverError(`PFrame not found, handle = ${key}`);
|
|
238
|
-
return resource;
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
class PTableDefPool extends tsHelpers.RefCountResourcePool {
|
|
242
|
-
logger;
|
|
243
|
-
constructor(logger) {
|
|
244
|
-
super();
|
|
245
|
-
this.logger = logger;
|
|
246
|
-
}
|
|
247
|
-
calculateParamsKey(params) {
|
|
248
|
-
return stableKeyFromFullPTableDef(params);
|
|
249
|
-
}
|
|
250
|
-
createNewResource(params, key) {
|
|
251
|
-
return new PTableDefHolder(params, key, this.logger);
|
|
252
|
-
}
|
|
253
|
-
getByKey(key) {
|
|
254
|
-
const resource = super.tryGetByKey(key);
|
|
255
|
-
if (!resource)
|
|
256
|
-
throw new model.PFrameDriverError(`PTable definition not found, handle = ${key}`);
|
|
257
|
-
return resource;
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
class PTablePool extends tsHelpers.RefCountResourcePool {
|
|
261
|
-
pFrames;
|
|
262
|
-
pTableDefs;
|
|
263
|
-
logger;
|
|
264
|
-
constructor(pFrames, pTableDefs, logger) {
|
|
265
|
-
super();
|
|
266
|
-
this.pFrames = pFrames;
|
|
267
|
-
this.pTableDefs = pTableDefs;
|
|
268
|
-
this.logger = logger;
|
|
269
|
-
}
|
|
270
|
-
calculateParamsKey(params) {
|
|
271
|
-
return stableKeyFromFullPTableDef(params);
|
|
272
|
-
}
|
|
273
|
-
createNewResource(params, key) {
|
|
274
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
275
|
-
this.logger('info', `PTable creation (pTableHandle = ${key}): `
|
|
276
|
-
+ `${JSON.stringify(params, bigintReplacer)}`);
|
|
277
|
-
}
|
|
278
|
-
const handle = params.pFrameHandle;
|
|
279
|
-
const { pFramePromise, disposeSignal } = this.pFrames.getByKey(handle);
|
|
280
|
-
const defDisposeSignal = this.pTableDefs.tryGetByKey(key)?.disposeSignal;
|
|
281
|
-
const combinedSignal = AbortSignal.any([disposeSignal, defDisposeSignal].filter((s) => !!s));
|
|
282
|
-
// 3. Sort
|
|
283
|
-
if (params.def.sorting.length > 0) {
|
|
284
|
-
const predecessor = this.acquire({
|
|
285
|
-
...params,
|
|
286
|
-
def: {
|
|
287
|
-
...params.def,
|
|
288
|
-
sorting: [],
|
|
289
|
-
},
|
|
290
|
-
});
|
|
291
|
-
const { resource: { pTablePromise } } = predecessor;
|
|
292
|
-
const sortedTable = pTablePromise.then((pTable) => pTable.sort(params.def.sorting));
|
|
293
|
-
return new PTableHolder(handle, combinedSignal, sortedTable, predecessor);
|
|
294
|
-
}
|
|
295
|
-
// 2. Filter (except the case with artificial columns where cartesian creates too many rows)
|
|
296
|
-
if (!hasArtificialColumns(params.def.src) && params.def.filters.length > 0) {
|
|
297
|
-
const predecessor = this.acquire({
|
|
298
|
-
...params,
|
|
299
|
-
def: {
|
|
300
|
-
...params.def,
|
|
301
|
-
filters: [],
|
|
302
|
-
},
|
|
303
|
-
});
|
|
304
|
-
const { resource: { pTablePromise } } = predecessor;
|
|
305
|
-
const filteredTable = pTablePromise.then((pTable) => pTable.filter(params.def.filters));
|
|
306
|
-
return new PTableHolder(handle, combinedSignal, filteredTable, predecessor);
|
|
307
|
-
}
|
|
308
|
-
// 1. Join
|
|
309
|
-
const table = pFramePromise.then((pFrame) => pFrame.createTable({
|
|
310
|
-
src: joinEntryToInternal(params.def.src),
|
|
311
|
-
// `params.def.filters` would be non-empty only when join has artificial columns
|
|
312
|
-
filters: [...params.def.partitionFilters, ...params.def.filters],
|
|
313
|
-
}));
|
|
314
|
-
return new PTableHolder(handle, combinedSignal, table);
|
|
315
|
-
}
|
|
316
|
-
getByKey(key) {
|
|
317
|
-
const resource = super.tryGetByKey(key);
|
|
318
|
-
if (!resource)
|
|
319
|
-
throw new model.PFrameDriverError(`PTable not found, handle = ${key}`);
|
|
320
|
-
return resource;
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
class PTableCacheUi {
|
|
324
|
-
logger;
|
|
325
|
-
ops;
|
|
326
|
-
perFrame = new Map();
|
|
327
|
-
global;
|
|
328
|
-
disposeListeners = new Set();
|
|
329
|
-
constructor(logger, ops) {
|
|
330
|
-
this.logger = logger;
|
|
331
|
-
this.ops = ops;
|
|
332
|
-
this.global = new lruCache.LRUCache({
|
|
333
|
-
maxSize: this.ops.pFramesCacheMaxSize,
|
|
334
|
-
dispose: (resource, key, reason) => {
|
|
335
|
-
if (reason === 'evict') {
|
|
336
|
-
this.perFrame.get(resource.resource.pFrame)?.delete(key);
|
|
337
|
-
}
|
|
338
|
-
if (this.perFrame.get(resource.resource.pFrame)?.size === 0) {
|
|
339
|
-
this.perFrame.delete(resource.resource.pFrame);
|
|
340
|
-
}
|
|
341
|
-
resource.unref();
|
|
342
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
343
|
-
logger('info', `calculateTableData cache - removed PTable ${key} (reason: ${reason})`);
|
|
344
|
-
}
|
|
345
|
-
},
|
|
346
|
-
});
|
|
347
|
-
}
|
|
348
|
-
cache(resource, size) {
|
|
349
|
-
const key = resource.key;
|
|
350
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
351
|
-
this.logger('info', `calculateTableData cache - added PTable ${key} with size ${size}`);
|
|
352
|
-
}
|
|
353
|
-
this.global.set(key, resource, { size: Math.max(size, 1) }); // 1 is minimum size to avoid cache evictions
|
|
354
|
-
let perFrame = this.perFrame.get(resource.resource.pFrame);
|
|
355
|
-
if (!perFrame) {
|
|
356
|
-
perFrame = new lruCache.LRUCache({
|
|
357
|
-
max: this.ops.pFrameCacheMaxCount,
|
|
358
|
-
dispose: (_resource, key, reason) => {
|
|
359
|
-
if (reason === 'evict') {
|
|
360
|
-
this.global.delete(key);
|
|
361
|
-
}
|
|
362
|
-
},
|
|
363
|
-
});
|
|
364
|
-
this.perFrame.set(resource.resource.pFrame, perFrame);
|
|
365
|
-
}
|
|
366
|
-
perFrame.set(key, resource);
|
|
367
|
-
if (!this.disposeListeners.has(key)) {
|
|
368
|
-
const disposeListener = () => {
|
|
369
|
-
this.perFrame.get(resource.resource.pFrame)?.delete(key);
|
|
370
|
-
this.global.delete(key);
|
|
371
|
-
this.disposeListeners.delete(key);
|
|
372
|
-
resource.resource.disposeSignal.removeEventListener('abort', disposeListener);
|
|
373
|
-
};
|
|
374
|
-
this.disposeListeners.add(key);
|
|
375
|
-
resource.resource.disposeSignal.addEventListener('abort', disposeListener);
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
class PTableCacheModel {
|
|
380
|
-
logger;
|
|
381
|
-
global;
|
|
382
|
-
disposeListeners = new Set();
|
|
383
|
-
constructor(logger, ops) {
|
|
384
|
-
this.logger = logger;
|
|
385
|
-
this.global = new lruCache.LRUCache({
|
|
386
|
-
maxSize: ops.pTablesCacheMaxSize,
|
|
387
|
-
dispose: (resource, key, reason) => {
|
|
388
|
-
resource.unref();
|
|
389
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
390
|
-
logger('info', `createPTable cache - removed PTable ${key} (reason: ${reason})`);
|
|
391
|
-
}
|
|
392
|
-
},
|
|
393
|
-
});
|
|
394
|
-
}
|
|
395
|
-
cache(resource, size, defDisposeSignal) {
|
|
396
|
-
const key = resource.key;
|
|
397
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
398
|
-
this.logger('info', `createPTable cache - added PTable ${key} with size ${size}`);
|
|
399
|
-
}
|
|
400
|
-
const status = {};
|
|
401
|
-
this.global.set(key, resource, { size: Math.max(size, 1), status }); // 1 is minimum size to avoid cache evictions
|
|
402
|
-
if (status.maxEntrySizeExceeded) {
|
|
403
|
-
resource.unref();
|
|
404
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
405
|
-
this.logger('info', `createPTable cache - removed PTable ${key} (maxEntrySizeExceeded)`);
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
else {
|
|
409
|
-
if (!this.disposeListeners.has(key)) {
|
|
410
|
-
const disposeListener = () => {
|
|
411
|
-
this.global.delete(key);
|
|
412
|
-
this.disposeListeners.delete(key);
|
|
413
|
-
defDisposeSignal.removeEventListener('abort', disposeListener);
|
|
414
|
-
};
|
|
415
|
-
this.disposeListeners.add(key);
|
|
416
|
-
defDisposeSignal.addEventListener('abort', disposeListener);
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
}
|
|
421
|
-
class PFrameHolder {
|
|
422
|
-
parquetServer;
|
|
423
|
-
localBlobPool;
|
|
424
|
-
remoteBlobPool;
|
|
425
|
-
spillPath;
|
|
426
|
-
pFramePromise;
|
|
427
|
-
abortController = new AbortController();
|
|
428
|
-
localBlobs = [];
|
|
429
|
-
remoteBlobs = [];
|
|
430
|
-
constructor(parquetServer, localBlobPool, remoteBlobPool, logger, spillPath, columns) {
|
|
431
|
-
this.parquetServer = parquetServer;
|
|
432
|
-
this.localBlobPool = localBlobPool;
|
|
433
|
-
this.remoteBlobPool = remoteBlobPool;
|
|
434
|
-
this.spillPath = spillPath;
|
|
435
|
-
const makeLocalBlobId = (blob) => {
|
|
436
|
-
const localBlob = this.localBlobPool.acquire(blob);
|
|
437
|
-
this.localBlobs.push(localBlob);
|
|
438
|
-
return localBlob.key;
|
|
439
|
-
};
|
|
440
|
-
const makeRemoteBlobId = (blob) => {
|
|
441
|
-
const remoteBlob = this.remoteBlobPool.acquire(blob);
|
|
442
|
-
this.remoteBlobs.push(remoteBlob);
|
|
443
|
-
return remoteBlob.key + plModelMiddleLayer.PFrameInternal.ParquetExtension;
|
|
444
|
-
};
|
|
445
|
-
const mapColumnData = (data) => {
|
|
446
|
-
switch (data.type) {
|
|
447
|
-
case 'Json':
|
|
448
|
-
return { ...data };
|
|
449
|
-
case 'JsonPartitioned':
|
|
450
|
-
return {
|
|
451
|
-
...data,
|
|
452
|
-
parts: esToolkit.mapValues(data.parts, makeLocalBlobId),
|
|
453
|
-
};
|
|
454
|
-
case 'BinaryPartitioned':
|
|
455
|
-
return {
|
|
456
|
-
...data,
|
|
457
|
-
parts: esToolkit.mapValues(data.parts, (v) => ({
|
|
458
|
-
index: makeLocalBlobId(v.index),
|
|
459
|
-
values: makeLocalBlobId(v.values),
|
|
460
|
-
})),
|
|
461
|
-
};
|
|
462
|
-
case 'ParquetPartitioned':
|
|
463
|
-
return {
|
|
464
|
-
...data,
|
|
465
|
-
parts: esToolkit.mapValues(data.parts, (v) => ({
|
|
466
|
-
...v,
|
|
467
|
-
data: makeRemoteBlobId(v.data),
|
|
468
|
-
})),
|
|
469
|
-
};
|
|
470
|
-
default:
|
|
471
|
-
tsHelpers.assertNever(data);
|
|
472
|
-
}
|
|
473
|
-
};
|
|
474
|
-
const jsonifiedColumns = columns.map((column) => ({
|
|
475
|
-
...column,
|
|
476
|
-
data: mapColumnData(column.data),
|
|
477
|
-
}));
|
|
478
|
-
try {
|
|
479
|
-
const pFrame = pframesRsNode.PFrameFactory.createPFrame({ spillPath: this.spillPath, logger });
|
|
480
|
-
pFrame.setDataSource(this);
|
|
481
|
-
const promises = [];
|
|
482
|
-
for (const column of jsonifiedColumns) {
|
|
483
|
-
pFrame.addColumnSpec(column.id, column.spec);
|
|
484
|
-
promises.push(pFrame.setColumnData(column.id, column.data, { signal: this.disposeSignal }));
|
|
485
|
-
}
|
|
486
|
-
this.pFramePromise = Promise.all(promises)
|
|
487
|
-
.then(() => pFrame)
|
|
488
|
-
.catch((err) => {
|
|
489
|
-
this.dispose();
|
|
490
|
-
pFrame.dispose();
|
|
491
|
-
throw new model.PFrameDriverError(`PFrame creation failed asynchronously, `
|
|
492
|
-
+ `columns: ${JSON.stringify(jsonifiedColumns)}, `
|
|
493
|
-
+ `error: ${model.ensureError(err)}`);
|
|
494
|
-
});
|
|
495
|
-
}
|
|
496
|
-
catch (err) {
|
|
497
|
-
throw new model.PFrameDriverError(`PFrame creation failed synchronously, `
|
|
498
|
-
+ `columns: ${JSON.stringify(jsonifiedColumns)}, `
|
|
499
|
-
+ `error: ${model.ensureError(err)}`);
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
preloadBlob = async (blobIds) => {
|
|
503
|
-
return await this.localBlobPool.preloadBlob(blobIds, this.disposeSignal);
|
|
504
|
-
};
|
|
505
|
-
resolveBlobContent = async (blobId) => {
|
|
506
|
-
return await this.localBlobPool.resolveBlobContent(blobId, this.disposeSignal);
|
|
507
|
-
};
|
|
508
|
-
get disposeSignal() {
|
|
509
|
-
return this.abortController.signal;
|
|
510
|
-
}
|
|
511
|
-
dispose() {
|
|
512
|
-
this.abortController.abort();
|
|
513
|
-
this.localBlobs.forEach((entry) => entry.unref());
|
|
514
|
-
this.remoteBlobs.forEach((entry) => entry.unref());
|
|
515
|
-
}
|
|
516
|
-
async [Symbol.asyncDispose]() {
|
|
517
|
-
this.dispose();
|
|
518
|
-
await this.pFramePromise
|
|
519
|
-
.then((pFrame) => pFrame.dispose())
|
|
520
|
-
.catch(() => { });
|
|
521
|
-
}
|
|
522
|
-
}
|
|
523
|
-
class PTableDefHolder {
|
|
524
|
-
def;
|
|
525
|
-
pTableHandle;
|
|
526
|
-
logger;
|
|
527
|
-
abortController = new AbortController();
|
|
528
|
-
constructor(def, pTableHandle, logger) {
|
|
529
|
-
this.def = def;
|
|
530
|
-
this.pTableHandle = pTableHandle;
|
|
531
|
-
this.logger = logger;
|
|
532
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
533
|
-
this.logger('info', `PTable definition saved (pTableHandle = ${this.pTableHandle})`);
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
get disposeSignal() {
|
|
537
|
-
return this.abortController.signal;
|
|
538
|
-
}
|
|
539
|
-
[Symbol.dispose]() {
|
|
540
|
-
this.abortController.abort();
|
|
541
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
542
|
-
this.logger('info', `PTable definition disposed (pTableHandle = ${this.pTableHandle})`);
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
}
|
|
546
|
-
class PTableHolder {
|
|
547
|
-
pFrame;
|
|
548
|
-
pTablePromise;
|
|
549
|
-
predecessor;
|
|
550
|
-
abortController = new AbortController();
|
|
551
|
-
combinedDisposeSignal;
|
|
552
|
-
constructor(pFrame, pFrameDisposeSignal, pTablePromise, predecessor) {
|
|
553
|
-
this.pFrame = pFrame;
|
|
554
|
-
this.pTablePromise = pTablePromise;
|
|
555
|
-
this.predecessor = predecessor;
|
|
556
|
-
this.combinedDisposeSignal = AbortSignal.any([pFrameDisposeSignal, this.abortController.signal]);
|
|
557
|
-
}
|
|
558
|
-
get disposeSignal() {
|
|
559
|
-
return this.combinedDisposeSignal;
|
|
560
|
-
}
|
|
561
|
-
async [Symbol.asyncDispose]() {
|
|
562
|
-
this.abortController.abort();
|
|
563
|
-
await this.pTablePromise
|
|
564
|
-
.then((pTable) => pTable.dispose())
|
|
565
|
-
.catch(() => { });
|
|
566
|
-
this.predecessor?.unref();
|
|
567
|
-
}
|
|
568
|
-
}
|
|
569
|
-
class PFrameDriver {
|
|
570
|
-
logger;
|
|
153
|
+
class RemoteBlobProviderImpl {
|
|
154
|
+
pool;
|
|
571
155
|
server;
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
pTables;
|
|
575
|
-
pTableCacheUi;
|
|
576
|
-
pTableCacheModel;
|
|
577
|
-
frameConcurrencyLimiter;
|
|
578
|
-
tableConcurrencyLimiter;
|
|
579
|
-
async pprofDump() {
|
|
580
|
-
return await pframesRsNode.PFrameFactory.pprofDump();
|
|
581
|
-
}
|
|
582
|
-
static async init(blobDriver, miLogger, spillPath, ops) {
|
|
583
|
-
const resolvedSpillPath = path.resolve(spillPath);
|
|
584
|
-
await tsHelpers.emptyDir(resolvedSpillPath);
|
|
585
|
-
const logger = (level, message) => miLogger[level](message);
|
|
586
|
-
const localBlobPool = new LocalBlobPool(blobDriver);
|
|
587
|
-
const remoteBlobPool = new RemoteBlobPool(blobDriver);
|
|
588
|
-
const store = new BlobStore({ remoteBlobPool, logger });
|
|
589
|
-
const handler = pframesRsNode.HttpHelpers.createRequestHandler({ store: store });
|
|
590
|
-
const server = await pframesRsNode.HttpHelpers.createHttpServer({ handler, port: ops.parquetServerPort });
|
|
591
|
-
return new PFrameDriver(logger, server, localBlobPool, remoteBlobPool, resolvedSpillPath, ops);
|
|
592
|
-
}
|
|
593
|
-
constructor(logger, server, localBlobPool, remoteBlobPool, spillPath, ops) {
|
|
594
|
-
this.logger = logger;
|
|
156
|
+
constructor(pool, server) {
|
|
157
|
+
this.pool = pool;
|
|
595
158
|
this.server = server;
|
|
596
|
-
const concurrencyLimiter = new tsHelpers.ConcurrencyLimitingExecutor(ops.pFrameConcurrency);
|
|
597
|
-
this.frameConcurrencyLimiter = concurrencyLimiter;
|
|
598
|
-
this.tableConcurrencyLimiter = new tsHelpers.ConcurrencyLimitingExecutor(ops.pTableConcurrency);
|
|
599
|
-
this.pFrames = new PFramePool(server.info, localBlobPool, remoteBlobPool, logger, spillPath);
|
|
600
|
-
this.pTableDefs = new PTableDefPool(logger);
|
|
601
|
-
this.pTables = new PTablePool(this.pFrames, this.pTableDefs, logger);
|
|
602
|
-
this.pTableCacheUi = new PTableCacheUi(logger, ops);
|
|
603
|
-
this.pTableCacheModel = new PTableCacheModel(logger, ops);
|
|
604
|
-
}
|
|
605
|
-
async dispose() {
|
|
606
|
-
return await this.server.stop();
|
|
607
|
-
}
|
|
608
|
-
async [Symbol.asyncDispose]() {
|
|
609
|
-
return await this.dispose();
|
|
610
|
-
}
|
|
611
|
-
//
|
|
612
|
-
// Internal / Config API Methods
|
|
613
|
-
//
|
|
614
|
-
createPFrame(def, ctx) {
|
|
615
|
-
const columns = def
|
|
616
|
-
.filter((c) => valueTypes.find((t) => t === c.spec.valueType))
|
|
617
|
-
.map((c) => model.mapPObjectData(c, (d) => plTree.isPlTreeNodeAccessor(d)
|
|
618
|
-
? data.parseDataInfoResource(d)
|
|
619
|
-
: model.isDataInfo(d)
|
|
620
|
-
? d.type === 'ParquetPartitioned'
|
|
621
|
-
? model.mapDataInfo(d, (a) => data.traverseParquetChunkResource(a))
|
|
622
|
-
: model.mapDataInfo(d, (a) => a.persist())
|
|
623
|
-
: data.makeDataInfoFromPColumnValues(c.spec, d)));
|
|
624
|
-
const distinctColumns = model.uniqueBy(columns, (column) => column.id);
|
|
625
|
-
const res = this.pFrames.acquire(distinctColumns);
|
|
626
|
-
ctx.addOnDestroy(res.unref);
|
|
627
|
-
return res.key;
|
|
628
|
-
}
|
|
629
|
-
createPTable(rawDef, ctx) {
|
|
630
|
-
const def = migratePTableFilters(rawDef, this.logger);
|
|
631
|
-
const pFrameHandle = this.createPFrame(model.extractAllColumns(def.src), ctx);
|
|
632
|
-
const defIds = model.mapPTableDef(def, (c) => c.id);
|
|
633
|
-
const sortedDef = sortPTableDef(defIds);
|
|
634
|
-
const { key, unref } = this.pTableDefs.acquire({ def: sortedDef, pFrameHandle });
|
|
635
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
636
|
-
this.logger('info', `Create PTable call (pFrameHandle = ${pFrameHandle}; pTableHandle = ${key})`);
|
|
637
|
-
}
|
|
638
|
-
ctx.addOnDestroy(unref); // in addition to pframe unref added in createPFrame above
|
|
639
|
-
return key;
|
|
640
|
-
}
|
|
641
|
-
//
|
|
642
|
-
// PFrame istance methods
|
|
643
|
-
//
|
|
644
|
-
async findColumns(handle, request) {
|
|
645
|
-
const iRequest = {
|
|
646
|
-
...request,
|
|
647
|
-
compatibleWith: request.compatibleWith.length !== 0
|
|
648
|
-
? [{
|
|
649
|
-
axesSpec: [
|
|
650
|
-
...new Map(request.compatibleWith.map((item) => [canonicalize(item), item])).values(),
|
|
651
|
-
],
|
|
652
|
-
qualifications: [],
|
|
653
|
-
}]
|
|
654
|
-
: [],
|
|
655
|
-
};
|
|
656
|
-
const { pFramePromise } = this.pFrames.getByKey(handle);
|
|
657
|
-
const pFrame = await pFramePromise;
|
|
658
|
-
const responce = await pFrame.findColumns(iRequest);
|
|
659
|
-
return {
|
|
660
|
-
hits: responce.hits
|
|
661
|
-
.filter((h) => // only exactly matching columns
|
|
662
|
-
h.mappingVariants.length === 0
|
|
663
|
-
|| h.mappingVariants.some((v) => v.qualifications.forHit.length === 0
|
|
664
|
-
&& v.qualifications.forQueries.every((q) => q.length === 0)))
|
|
665
|
-
.map((h) => h.hit),
|
|
666
|
-
};
|
|
667
|
-
}
|
|
668
|
-
async getColumnSpec(handle, columnId) {
|
|
669
|
-
const { pFramePromise } = this.pFrames.getByKey(handle);
|
|
670
|
-
const pFrame = await pFramePromise;
|
|
671
|
-
return await pFrame.getColumnSpec(columnId);
|
|
672
|
-
}
|
|
673
|
-
async listColumns(handle) {
|
|
674
|
-
const { pFramePromise } = this.pFrames.getByKey(handle);
|
|
675
|
-
const pFrame = await pFramePromise;
|
|
676
|
-
return await pFrame.listColumns();
|
|
677
|
-
}
|
|
678
|
-
async calculateTableData(handle, request, range, signal) {
|
|
679
|
-
if (index.getDebugFlags().logPFrameRequests) {
|
|
680
|
-
this.logger('info', `Call calculateTableData, handle = ${handle}, request = ${JSON.stringify(request, bigintReplacer)}`);
|
|
681
|
-
}
|
|
682
|
-
const table = this.pTables.acquire({
|
|
683
|
-
pFrameHandle: handle,
|
|
684
|
-
def: sortPTableDef(migratePTableFilters(request, this.logger)),
|
|
685
|
-
});
|
|
686
|
-
const { pTablePromise, disposeSignal } = table.resource;
|
|
687
|
-
const pTable = await pTablePromise;
|
|
688
|
-
const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
|
|
689
|
-
return await this.frameConcurrencyLimiter.run(async () => {
|
|
690
|
-
try {
|
|
691
|
-
const spec = pTable.getSpec();
|
|
692
|
-
const data = await pTable.getData([...spec.keys()], {
|
|
693
|
-
range,
|
|
694
|
-
signal: combinedSignal,
|
|
695
|
-
});
|
|
696
|
-
const resultSize = await pTable.getFootprint({
|
|
697
|
-
withPredecessors: false,
|
|
698
|
-
signal: combinedSignal,
|
|
699
|
-
});
|
|
700
|
-
if (resultSize >= 2 * 1024 * 1024 * 1024) {
|
|
701
|
-
throw new model.PFrameDriverError(`Join results exceed 2GB, please add filters to shrink the result size`);
|
|
702
|
-
}
|
|
703
|
-
const overallSize = await pTable.getFootprint({
|
|
704
|
-
withPredecessors: true,
|
|
705
|
-
signal: combinedSignal,
|
|
706
|
-
});
|
|
707
|
-
this.pTableCacheUi.cache(table, overallSize);
|
|
708
|
-
return spec.map((spec, i) => ({
|
|
709
|
-
spec: spec,
|
|
710
|
-
data: data[i],
|
|
711
|
-
}));
|
|
712
|
-
}
|
|
713
|
-
catch (err) {
|
|
714
|
-
table.unref();
|
|
715
|
-
throw err;
|
|
716
|
-
}
|
|
717
|
-
});
|
|
718
159
|
}
|
|
719
|
-
async
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
}
|
|
723
|
-
const {
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
return await this.frameConcurrencyLimiter.run(async () => {
|
|
727
|
-
return await pFrame.getUniqueValues({
|
|
728
|
-
...request,
|
|
729
|
-
filters: migrateFilters(request.filters, this.logger),
|
|
730
|
-
}, {
|
|
731
|
-
signal: combinedSignal,
|
|
732
|
-
});
|
|
733
|
-
});
|
|
160
|
+
static async init(blobDriver, logger, serverOptions) {
|
|
161
|
+
const pool = new RemoteBlobPool(blobDriver, logger);
|
|
162
|
+
const store = new BlobStore({ remoteBlobProvider: pool, logger });
|
|
163
|
+
const handler = pframesRsNode.HttpHelpers.createRequestHandler({ store });
|
|
164
|
+
const server = await pframesRsNode.HttpHelpers.createHttpServer({ ...serverOptions, handler });
|
|
165
|
+
logger('info', `PFrames HTTP server started on ${server.info.url}`);
|
|
166
|
+
return new RemoteBlobProviderImpl(pool, server);
|
|
734
167
|
}
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
//
|
|
738
|
-
async getSpec(handle) {
|
|
739
|
-
const env_1 = { stack: [], error: void 0, hasError: false };
|
|
740
|
-
try {
|
|
741
|
-
const { def } = this.pTableDefs.getByKey(handle);
|
|
742
|
-
const table = tslib_es6.__addDisposableResource(env_1, this.pTables.acquire(def), false);
|
|
743
|
-
const { pTablePromise } = table.resource;
|
|
744
|
-
const pTable = await pTablePromise;
|
|
745
|
-
return pTable.getSpec();
|
|
746
|
-
}
|
|
747
|
-
catch (e_1) {
|
|
748
|
-
env_1.error = e_1;
|
|
749
|
-
env_1.hasError = true;
|
|
750
|
-
}
|
|
751
|
-
finally {
|
|
752
|
-
tslib_es6.__disposeResources(env_1);
|
|
753
|
-
}
|
|
168
|
+
acquire(params) {
|
|
169
|
+
return this.pool.acquire(params);
|
|
754
170
|
}
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
const table = this.pTables.acquire(def);
|
|
758
|
-
const { pTablePromise, disposeSignal } = table.resource;
|
|
759
|
-
const pTable = await pTablePromise;
|
|
760
|
-
const combinedSignal = AbortSignal.any([signal, disposeSignal].filter((s) => !!s));
|
|
761
|
-
const { shape, overallSize } = await this.tableConcurrencyLimiter.run(async () => {
|
|
762
|
-
const shape = await pTable.getShape({
|
|
763
|
-
signal: combinedSignal,
|
|
764
|
-
});
|
|
765
|
-
const overallSize = await pTable.getFootprint({
|
|
766
|
-
withPredecessors: true,
|
|
767
|
-
signal: combinedSignal,
|
|
768
|
-
});
|
|
769
|
-
return { shape, overallSize };
|
|
770
|
-
});
|
|
771
|
-
this.pTableCacheModel.cache(table, overallSize, defDisposeSignal);
|
|
772
|
-
return shape;
|
|
171
|
+
httpServerInfo() {
|
|
172
|
+
return this.server.info;
|
|
773
173
|
}
|
|
774
|
-
async
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
const type = entry.type;
|
|
797
|
-
switch (type) {
|
|
798
|
-
case 'column':
|
|
799
|
-
return {
|
|
800
|
-
type: 'column',
|
|
801
|
-
columnId: entry.column,
|
|
802
|
-
};
|
|
803
|
-
case 'slicedColumn':
|
|
804
|
-
return {
|
|
805
|
-
type: 'slicedColumn',
|
|
806
|
-
columnId: entry.column,
|
|
807
|
-
newId: entry.newId,
|
|
808
|
-
axisFilters: entry.axisFilters,
|
|
809
|
-
};
|
|
810
|
-
case 'artificialColumn':
|
|
811
|
-
return {
|
|
812
|
-
type: 'artificialColumn',
|
|
813
|
-
columnId: entry.column,
|
|
814
|
-
newId: entry.newId,
|
|
815
|
-
axesIndices: entry.axesIndices,
|
|
816
|
-
};
|
|
817
|
-
case 'inlineColumn':
|
|
818
|
-
return {
|
|
819
|
-
type: 'inlineColumn',
|
|
820
|
-
newId: entry.column.id,
|
|
821
|
-
spec: entry.column.spec,
|
|
822
|
-
dataInfo: {
|
|
823
|
-
type: 'Json',
|
|
824
|
-
keyLength: entry.column.spec.axesSpec.length,
|
|
825
|
-
data: entry.column.data.reduce((acc, row) => {
|
|
826
|
-
acc[JSON.stringify(row.key)] = row.val;
|
|
827
|
-
return acc;
|
|
828
|
-
}, {}),
|
|
829
|
-
},
|
|
830
|
-
};
|
|
831
|
-
case 'inner':
|
|
832
|
-
case 'full':
|
|
833
|
-
return {
|
|
834
|
-
type: entry.type,
|
|
835
|
-
entries: entry.entries.map((col) => joinEntryToInternal(col)),
|
|
836
|
-
};
|
|
837
|
-
case 'outer':
|
|
838
|
-
return {
|
|
839
|
-
type: 'outer',
|
|
840
|
-
primary: joinEntryToInternal(entry.primary),
|
|
841
|
-
secondary: entry.secondary.map((col) => joinEntryToInternal(col)),
|
|
842
|
-
};
|
|
843
|
-
default:
|
|
844
|
-
throw new model.PFrameDriverError(`unsupported PFrame join entry type: ${type}`);
|
|
845
|
-
}
|
|
846
|
-
}
|
|
847
|
-
function sortPTableDef(def) {
|
|
848
|
-
function cmpJoinEntries(lhs, rhs) {
|
|
849
|
-
if (lhs.type !== rhs.type) {
|
|
850
|
-
return lhs.type < rhs.type ? -1 : 1;
|
|
851
|
-
}
|
|
852
|
-
const type = lhs.type;
|
|
853
|
-
switch (type) {
|
|
854
|
-
case 'column':
|
|
855
|
-
return lhs.column < rhs.column ? -1 : 1;
|
|
856
|
-
case 'slicedColumn':
|
|
857
|
-
case 'artificialColumn':
|
|
858
|
-
return lhs.newId < rhs.newId ? -1 : 1;
|
|
859
|
-
case 'inlineColumn': {
|
|
860
|
-
return lhs.column.id < rhs.column.id ? -1 : 1;
|
|
861
|
-
}
|
|
862
|
-
case 'inner':
|
|
863
|
-
case 'full': {
|
|
864
|
-
const rhsInner = rhs;
|
|
865
|
-
if (lhs.entries.length !== rhsInner.entries.length) {
|
|
866
|
-
return lhs.entries.length - rhsInner.entries.length;
|
|
867
|
-
}
|
|
868
|
-
for (let i = 0; i < lhs.entries.length; i++) {
|
|
869
|
-
const cmp = cmpJoinEntries(lhs.entries[i], rhsInner.entries[i]);
|
|
870
|
-
if (cmp !== 0) {
|
|
871
|
-
return cmp;
|
|
872
|
-
}
|
|
873
|
-
}
|
|
874
|
-
return 0;
|
|
875
|
-
}
|
|
876
|
-
case 'outer': {
|
|
877
|
-
const rhsOuter = rhs;
|
|
878
|
-
const cmp = cmpJoinEntries(lhs.primary, rhsOuter.primary);
|
|
879
|
-
if (cmp !== 0) {
|
|
880
|
-
return cmp;
|
|
881
|
-
}
|
|
882
|
-
if (lhs.secondary.length !== rhsOuter.secondary.length) {
|
|
883
|
-
return lhs.secondary.length - rhsOuter.secondary.length;
|
|
884
|
-
}
|
|
885
|
-
for (let i = 0; i < lhs.secondary.length; i++) {
|
|
886
|
-
const cmp = cmpJoinEntries(lhs.secondary[i], rhsOuter.secondary[i]);
|
|
887
|
-
if (cmp !== 0) {
|
|
888
|
-
return cmp;
|
|
889
|
-
}
|
|
890
|
-
}
|
|
891
|
-
return 0;
|
|
892
|
-
}
|
|
893
|
-
default:
|
|
894
|
-
tsHelpers.assertNever(type);
|
|
895
|
-
}
|
|
896
|
-
}
|
|
897
|
-
function sortJoinEntry(entry) {
|
|
898
|
-
switch (entry.type) {
|
|
899
|
-
case 'column':
|
|
900
|
-
case 'slicedColumn':
|
|
901
|
-
case 'inlineColumn':
|
|
902
|
-
return entry;
|
|
903
|
-
case 'artificialColumn': {
|
|
904
|
-
const sortedAxesIndices = entry.axesIndices.toSorted((lhs, rhs) => lhs - rhs);
|
|
905
|
-
return {
|
|
906
|
-
...entry,
|
|
907
|
-
axesIndices: sortedAxesIndices,
|
|
908
|
-
};
|
|
909
|
-
}
|
|
910
|
-
case 'inner':
|
|
911
|
-
case 'full': {
|
|
912
|
-
const sortedEntries = entry.entries.map(sortJoinEntry);
|
|
913
|
-
sortedEntries.sort(cmpJoinEntries);
|
|
914
|
-
return {
|
|
915
|
-
...entry,
|
|
916
|
-
entries: sortedEntries,
|
|
917
|
-
};
|
|
918
|
-
}
|
|
919
|
-
case 'outer': {
|
|
920
|
-
const sortedSecondary = entry.secondary.map(sortJoinEntry);
|
|
921
|
-
sortedSecondary.sort(cmpJoinEntries);
|
|
922
|
-
return {
|
|
923
|
-
...entry,
|
|
924
|
-
primary: sortJoinEntry(entry.primary),
|
|
925
|
-
secondary: sortedSecondary,
|
|
926
|
-
};
|
|
927
|
-
}
|
|
928
|
-
default:
|
|
929
|
-
tsHelpers.assertNever(entry);
|
|
930
|
-
}
|
|
931
|
-
}
|
|
932
|
-
function sortFilters(filters) {
|
|
933
|
-
return filters.toSorted((lhs, rhs) => {
|
|
934
|
-
if (lhs.column.type === 'axis' && rhs.column.type === 'axis') {
|
|
935
|
-
const lhsId = model.canonicalizeJson(model.getAxisId(lhs.column.id));
|
|
936
|
-
const rhsId = model.canonicalizeJson(model.getAxisId(rhs.column.id));
|
|
937
|
-
return lhsId < rhsId ? -1 : 1;
|
|
938
|
-
}
|
|
939
|
-
else if (lhs.column.type === 'column' && rhs.column.type === 'column') {
|
|
940
|
-
return lhs.column.id < rhs.column.id ? -1 : 1;
|
|
941
|
-
}
|
|
942
|
-
else {
|
|
943
|
-
return lhs.column.type === 'axis' ? -1 : 1;
|
|
944
|
-
}
|
|
945
|
-
});
|
|
946
|
-
}
|
|
947
|
-
return {
|
|
948
|
-
src: sortJoinEntry(def.src),
|
|
949
|
-
partitionFilters: sortFilters(def.partitionFilters),
|
|
950
|
-
filters: sortFilters(def.filters),
|
|
951
|
-
sorting: def.sorting,
|
|
174
|
+
async [Symbol.asyncDispose]() {
|
|
175
|
+
await this.server.stop();
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
const PFrameDriverOpsDefaults = {
|
|
179
|
+
...pfDriver.AbstractPFrameDriverOpsDefaults,
|
|
180
|
+
parquetServerPort: 0, // 0 means that some unused port will be assigned by the OS
|
|
181
|
+
};
|
|
182
|
+
async function createPFrameDriver(params) {
|
|
183
|
+
const resolvedSpillPath = path.resolve(params.spillPath);
|
|
184
|
+
await tsHelpers.emptyDir(resolvedSpillPath);
|
|
185
|
+
const logger = (level, message) => params.logger[level](message);
|
|
186
|
+
const localBlobProvider = new LocalBlobProviderImpl(params.blobDriver, logger);
|
|
187
|
+
const remoteBlobProvider = await RemoteBlobProviderImpl.init(params.blobDriver, logger, { port: params.options.parquetServerPort });
|
|
188
|
+
const resolveDataInfo = (spec, data$1) => {
|
|
189
|
+
return plTree.isPlTreeNodeAccessor(data$1)
|
|
190
|
+
? data.parseDataInfoResource(data$1)
|
|
191
|
+
: model.isDataInfo(data$1)
|
|
192
|
+
? data$1.type === 'ParquetPartitioned'
|
|
193
|
+
? model.mapDataInfo(data$1, (a) => data.traverseParquetChunkResource(a))
|
|
194
|
+
: model.mapDataInfo(data$1, (a) => a.persist())
|
|
195
|
+
: pfDriver.makeJsonDataInfo(spec, data$1);
|
|
952
196
|
};
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
throw new model.PFrameDriverError(`PTable handle calculation failed, `
|
|
962
|
-
+ `request: ${JSON.stringify(data)}, `
|
|
963
|
-
+ `error: ${model.ensureError(err)}`);
|
|
964
|
-
}
|
|
965
|
-
}
|
|
966
|
-
function stableKeyFromPFrameData(data) {
|
|
967
|
-
const orderedData = [...data].map((column) => model.mapPObjectData(column, (r) => {
|
|
968
|
-
let result;
|
|
969
|
-
const type = r.type;
|
|
970
|
-
switch (type) {
|
|
971
|
-
case 'Json':
|
|
972
|
-
result = {
|
|
973
|
-
type: r.type,
|
|
974
|
-
keyLength: r.keyLength,
|
|
975
|
-
payload: Object.entries(r.data).map(([part, value]) => ({
|
|
976
|
-
key: part,
|
|
977
|
-
value,
|
|
978
|
-
})),
|
|
979
|
-
};
|
|
980
|
-
break;
|
|
981
|
-
case 'JsonPartitioned':
|
|
982
|
-
result = {
|
|
983
|
-
type: r.type,
|
|
984
|
-
keyLength: r.partitionKeyLength,
|
|
985
|
-
payload: Object.entries(r.parts).map(([part, info]) => ({
|
|
986
|
-
key: part,
|
|
987
|
-
value: makeBlobId(info),
|
|
988
|
-
})),
|
|
989
|
-
};
|
|
990
|
-
break;
|
|
991
|
-
case 'BinaryPartitioned':
|
|
992
|
-
result = {
|
|
993
|
-
type: r.type,
|
|
994
|
-
keyLength: r.partitionKeyLength,
|
|
995
|
-
payload: Object.entries(r.parts).map(([part, info]) => ({
|
|
996
|
-
key: part,
|
|
997
|
-
value: [makeBlobId(info.index), makeBlobId(info.values)],
|
|
998
|
-
})),
|
|
999
|
-
};
|
|
1000
|
-
break;
|
|
1001
|
-
case 'ParquetPartitioned':
|
|
1002
|
-
result = {
|
|
1003
|
-
type: r.type,
|
|
1004
|
-
keyLength: r.partitionKeyLength,
|
|
1005
|
-
payload: Object.entries(r.parts).map(([part, info]) => ({
|
|
1006
|
-
key: part,
|
|
1007
|
-
value: info.dataDigest || [
|
|
1008
|
-
makeBlobId(info.data),
|
|
1009
|
-
JSON.stringify({ axes: info.axes, column: info.column }),
|
|
1010
|
-
],
|
|
1011
|
-
})),
|
|
1012
|
-
};
|
|
1013
|
-
break;
|
|
1014
|
-
default:
|
|
1015
|
-
throw new model.PFrameDriverError(`unsupported resource type: ${JSON.stringify(type)}`);
|
|
1016
|
-
}
|
|
1017
|
-
result.payload.sort((lhs, rhs) => lhs.key < rhs.key ? -1 : 1);
|
|
1018
|
-
return result;
|
|
1019
|
-
}));
|
|
1020
|
-
orderedData.sort((lhs, rhs) => lhs.id < rhs.id ? -1 : 1);
|
|
1021
|
-
const hash = node_crypto.createHash('sha256');
|
|
1022
|
-
hash.update(canonicalize(orderedData));
|
|
1023
|
-
return hash.digest().toString('hex');
|
|
197
|
+
return new pfDriver.AbstractPFrameDriver({
|
|
198
|
+
logger,
|
|
199
|
+
localBlobProvider,
|
|
200
|
+
remoteBlobProvider,
|
|
201
|
+
spillPath: resolvedSpillPath,
|
|
202
|
+
options: params.options,
|
|
203
|
+
resolveDataInfo,
|
|
204
|
+
});
|
|
1024
205
|
}
|
|
1025
206
|
|
|
1026
|
-
exports.
|
|
207
|
+
exports.PFrameDriverOpsDefaults = PFrameDriverOpsDefaults;
|
|
208
|
+
exports.createPFrameDriver = createPFrameDriver;
|
|
1027
209
|
//# sourceMappingURL=driver.cjs.map
|