@milaboratories/pl-model-common 1.19.3 → 1.19.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/drivers/pframe/data_info.d.ts +31 -18
- package/dist/drivers/pframe/data_info.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +318 -287
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
- package/src/drivers/pframe/data_info.ts +151 -124
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@milaboratories/pl-model-common",
|
|
3
|
-
"version": "1.19.
|
|
3
|
+
"version": "1.19.5",
|
|
4
4
|
"description": "Platforma SDK Model",
|
|
5
5
|
"types": "./dist/index.d.ts",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"typescript": "~5.6.3",
|
|
27
27
|
"vite": "^6.3.5",
|
|
28
28
|
"vitest": "^2.1.9",
|
|
29
|
-
"@
|
|
30
|
-
"@
|
|
29
|
+
"@platforma-sdk/eslint-config": "1.0.3",
|
|
30
|
+
"@milaboratories/build-configs": "1.0.5"
|
|
31
31
|
},
|
|
32
32
|
"scripts": {
|
|
33
33
|
"type-check": "tsc --noEmit --composite false",
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { assertNever } from '../../util';
|
|
2
|
+
|
|
1
3
|
/**
|
|
2
4
|
* Represents a JavaScript representation of a value in a PColumn. Can be null, a number, or a string.
|
|
3
5
|
* These are the primitive types that can be stored directly in PColumns.
|
|
@@ -85,7 +87,7 @@ export type BinaryPartitionedDataInfo<Blob> = {
|
|
|
85
87
|
parts: Record<string, BinaryChunk<Blob>>;
|
|
86
88
|
};
|
|
87
89
|
|
|
88
|
-
type ParquetChunkMappingAxis = {
|
|
90
|
+
export type ParquetChunkMappingAxis = {
|
|
89
91
|
/** Data type (matches PColumn axis types) */
|
|
90
92
|
type: 'Int' | 'Long' | 'String';
|
|
91
93
|
|
|
@@ -93,7 +95,7 @@ type ParquetChunkMappingAxis = {
|
|
|
93
95
|
id: string;
|
|
94
96
|
};
|
|
95
97
|
|
|
96
|
-
type ParquetChunkMappingColumn = {
|
|
98
|
+
export type ParquetChunkMappingColumn = {
|
|
97
99
|
/** Data type (matches PColumn value type) */
|
|
98
100
|
type: 'Int' | 'Long' | 'Float' | 'Double' | 'String';
|
|
99
101
|
|
|
@@ -101,11 +103,19 @@ type ParquetChunkMappingColumn = {
|
|
|
101
103
|
id: string;
|
|
102
104
|
};
|
|
103
105
|
|
|
104
|
-
type
|
|
106
|
+
export type ParquetChunkMapping = {
|
|
107
|
+
/** Axes mappings - Parquet file is sorted by these fields in this order */
|
|
108
|
+
axes: ParquetChunkMappingAxis[];
|
|
109
|
+
|
|
110
|
+
/** Column mapping */
|
|
111
|
+
column: ParquetChunkMappingColumn;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
export type ParquetChunkStats = {
|
|
105
115
|
/** Number of rows in the chunk */
|
|
106
|
-
numberOfRows
|
|
116
|
+
numberOfRows: number;
|
|
107
117
|
/** Byte size information for storage optimization and query planning */
|
|
108
|
-
size
|
|
118
|
+
size: {
|
|
109
119
|
/** Byte sizes for each axis column in the same order as axes mapping */
|
|
110
120
|
axes: number[];
|
|
111
121
|
/** Byte size for the data column */
|
|
@@ -113,23 +123,19 @@ type ParquetChunkStats = {
|
|
|
113
123
|
};
|
|
114
124
|
};
|
|
115
125
|
|
|
116
|
-
export type
|
|
117
|
-
/** Parquet file (PTable) containing column data */
|
|
118
|
-
data: Blob;
|
|
119
|
-
|
|
126
|
+
export type ParquetChunkMetadata = {
|
|
120
127
|
/** Content hash calculated for the specific axes and data this chunk represents */
|
|
121
|
-
dataDigest
|
|
122
|
-
|
|
123
|
-
/** Axes mappings - Parquet file is sorted by these fields in this order */
|
|
124
|
-
axes: ParquetChunkMappingAxis[];
|
|
125
|
-
|
|
126
|
-
/** Column mapping */
|
|
127
|
-
column: ParquetChunkMappingColumn;
|
|
128
|
+
dataDigest: string;
|
|
128
129
|
|
|
129
130
|
/** Pre-computed statistics for optimization without blob download */
|
|
130
|
-
stats
|
|
131
|
+
stats: Partial<ParquetChunkStats>;
|
|
131
132
|
};
|
|
132
133
|
|
|
134
|
+
export type ParquetChunk<Blob> = {
|
|
135
|
+
/** Parquet file (PTable) containing column data */
|
|
136
|
+
data: Blob;
|
|
137
|
+
} & ParquetChunkMapping & Partial<ParquetChunkMetadata>;
|
|
138
|
+
|
|
133
139
|
export type ParquetPartitionedDataInfo<Blob> = {
|
|
134
140
|
/** Identifier for this data format ('ParquetPartitioned') */
|
|
135
141
|
type: 'ParquetPartitioned';
|
|
@@ -138,7 +144,7 @@ export type ParquetPartitionedDataInfo<Blob> = {
|
|
|
138
144
|
partitionKeyLength: number;
|
|
139
145
|
|
|
140
146
|
/** Map of stringified partition keys to parquet files */
|
|
141
|
-
parts: Record<string,
|
|
147
|
+
parts: Record<string, Blob>;
|
|
142
148
|
};
|
|
143
149
|
|
|
144
150
|
/**
|
|
@@ -150,7 +156,8 @@ export type ParquetPartitionedDataInfo<Blob> = {
|
|
|
150
156
|
export type DataInfo<Blob> =
|
|
151
157
|
| JsonDataInfo
|
|
152
158
|
| JsonPartitionedDataInfo<Blob>
|
|
153
|
-
| BinaryPartitionedDataInfo<Blob
|
|
159
|
+
| BinaryPartitionedDataInfo<Blob>
|
|
160
|
+
| ParquetPartitionedDataInfo<Blob>;
|
|
154
161
|
|
|
155
162
|
/**
|
|
156
163
|
* Type guard function that checks if the given value is a valid DataInfo.
|
|
@@ -176,12 +183,8 @@ export function isDataInfo<Blob>(value: unknown): value is DataInfo<Blob> {
|
|
|
176
183
|
&& typeof data.data === 'object'
|
|
177
184
|
);
|
|
178
185
|
case 'JsonPartitioned':
|
|
179
|
-
return (
|
|
180
|
-
typeof data.partitionKeyLength === 'number'
|
|
181
|
-
&& data.parts !== undefined
|
|
182
|
-
&& typeof data.parts === 'object'
|
|
183
|
-
);
|
|
184
186
|
case 'BinaryPartitioned':
|
|
187
|
+
case 'ParquetPartitioned':
|
|
185
188
|
return (
|
|
186
189
|
typeof data.partitionKeyLength === 'number'
|
|
187
190
|
&& data.parts !== undefined
|
|
@@ -201,6 +204,14 @@ export function isDataInfo<Blob>(value: unknown): value is DataInfo<Blob> {
|
|
|
201
204
|
* @param mapFn - Function to transform blobs from type B1 to type B2
|
|
202
205
|
* @returns A new DataInfo object with transformed blob references
|
|
203
206
|
*/
|
|
207
|
+
export function mapDataInfo<B1, B2>(
|
|
208
|
+
dataInfo: ParquetPartitionedDataInfo<B1>,
|
|
209
|
+
mapFn: (blob: B1) => B2,
|
|
210
|
+
): ParquetPartitionedDataInfo<B2>;
|
|
211
|
+
export function mapDataInfo<B1, B2>(
|
|
212
|
+
dataInfo: Exclude<DataInfo<B1>, ParquetPartitionedDataInfo<B1>>,
|
|
213
|
+
mapFn: (blob: B1) => B2,
|
|
214
|
+
): Exclude<DataInfo<B2>, ParquetPartitionedDataInfo<B2>>;
|
|
204
215
|
export function mapDataInfo<B1, B2>(
|
|
205
216
|
dataInfo: DataInfo<B1>,
|
|
206
217
|
mapFn: (blob: B1) => B2,
|
|
@@ -242,6 +253,17 @@ export function mapDataInfo<B1, B2>(
|
|
|
242
253
|
parts: newParts,
|
|
243
254
|
};
|
|
244
255
|
}
|
|
256
|
+
case 'ParquetPartitioned': {
|
|
257
|
+
// Map each blob in parts
|
|
258
|
+
const newParts: Record<string, B2> = {};
|
|
259
|
+
for (const [key, blob] of Object.entries(dataInfo.parts)) {
|
|
260
|
+
newParts[key] = mapFn(blob);
|
|
261
|
+
}
|
|
262
|
+
return {
|
|
263
|
+
...dataInfo,
|
|
264
|
+
parts: newParts,
|
|
265
|
+
};
|
|
266
|
+
}
|
|
245
267
|
}
|
|
246
268
|
}
|
|
247
269
|
|
|
@@ -260,17 +282,20 @@ export function visitDataInfo<B>(
|
|
|
260
282
|
break;
|
|
261
283
|
case 'JsonPartitioned': {
|
|
262
284
|
// Visit each blob in parts
|
|
263
|
-
|
|
264
|
-
cb(blob);
|
|
265
|
-
}
|
|
285
|
+
Object.values(dataInfo.parts).forEach(cb);
|
|
266
286
|
break;
|
|
267
287
|
}
|
|
268
288
|
case 'BinaryPartitioned': {
|
|
269
289
|
// Visit each index and values blob in parts
|
|
270
|
-
|
|
290
|
+
Object.values(dataInfo.parts).forEach((chunk) => {
|
|
271
291
|
cb(chunk.index);
|
|
272
292
|
cb(chunk.values);
|
|
273
|
-
}
|
|
293
|
+
});
|
|
294
|
+
break;
|
|
295
|
+
}
|
|
296
|
+
case 'ParquetPartitioned': {
|
|
297
|
+
// Visit each blob in parts
|
|
298
|
+
Object.values(dataInfo.parts).forEach(cb);
|
|
274
299
|
break;
|
|
275
300
|
}
|
|
276
301
|
}
|
|
@@ -322,12 +347,21 @@ export interface BinaryPartitionedDataInfoEntries<Blob> {
|
|
|
322
347
|
parts: PColumnDataEntry<BinaryChunk<Blob>>[];
|
|
323
348
|
}
|
|
324
349
|
|
|
350
|
+
/**
|
|
351
|
+
* Entry-based representation of ParquetPartitionedDataInfo
|
|
352
|
+
*/
|
|
353
|
+
export interface ParquetPartitionedDataInfoEntries<Blob> {
|
|
354
|
+
type: 'ParquetPartitioned';
|
|
355
|
+
partitionKeyLength: number;
|
|
356
|
+
parts: PColumnDataEntry<Blob>[];
|
|
357
|
+
}
|
|
325
358
|
/**
|
|
326
359
|
* Union type representing all possible entry-based partitioned data storage formats
|
|
327
360
|
*/
|
|
328
361
|
export type PartitionedDataInfoEntries<Blob> =
|
|
329
362
|
| JsonPartitionedDataInfoEntries<Blob>
|
|
330
|
-
| BinaryPartitionedDataInfoEntries<Blob
|
|
363
|
+
| BinaryPartitionedDataInfoEntries<Blob>
|
|
364
|
+
| ParquetPartitionedDataInfoEntries<Blob>;
|
|
331
365
|
|
|
332
366
|
/**
|
|
333
367
|
* Union type representing all possible entry-based data storage formats
|
|
@@ -359,11 +393,8 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
|
|
|
359
393
|
&& Array.isArray(data.data)
|
|
360
394
|
);
|
|
361
395
|
case 'JsonPartitioned':
|
|
362
|
-
return (
|
|
363
|
-
typeof data.partitionKeyLength === 'number'
|
|
364
|
-
&& Array.isArray(data.parts)
|
|
365
|
-
);
|
|
366
396
|
case 'BinaryPartitioned':
|
|
397
|
+
case 'ParquetPartitioned':
|
|
367
398
|
return (
|
|
368
399
|
typeof data.partitionKeyLength === 'number'
|
|
369
400
|
&& Array.isArray(data.parts)
|
|
@@ -382,7 +413,14 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
|
|
|
382
413
|
*/
|
|
383
414
|
export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is PartitionedDataInfoEntries<Blob> {
|
|
384
415
|
if (!isDataInfoEntries(value)) return false;
|
|
385
|
-
|
|
416
|
+
switch (value.type) {
|
|
417
|
+
case 'JsonPartitioned':
|
|
418
|
+
case 'BinaryPartitioned':
|
|
419
|
+
case 'ParquetPartitioned':
|
|
420
|
+
return true;
|
|
421
|
+
default:
|
|
422
|
+
return false;
|
|
423
|
+
}
|
|
386
424
|
}
|
|
387
425
|
|
|
388
426
|
/**
|
|
@@ -393,42 +431,40 @@ export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is Par
|
|
|
393
431
|
*/
|
|
394
432
|
export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntries<Blob> {
|
|
395
433
|
switch (dataInfo.type) {
|
|
396
|
-
case 'Json': {
|
|
397
|
-
|
|
434
|
+
case 'Json': return {
|
|
435
|
+
type: 'Json',
|
|
436
|
+
keyLength: dataInfo.keyLength,
|
|
437
|
+
data: Object.entries(dataInfo.data).map(([keyStr, value]) => {
|
|
398
438
|
const key = JSON.parse(keyStr) as PColumnKey;
|
|
399
|
-
return { key, value }
|
|
400
|
-
})
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
};
|
|
407
|
-
}
|
|
408
|
-
case 'JsonPartitioned': {
|
|
409
|
-
const parts: PColumnDataEntry<Blob>[] = Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
|
|
439
|
+
return { key, value } as PColumnDataEntry<PColumnValue>;
|
|
440
|
+
}),
|
|
441
|
+
};
|
|
442
|
+
case 'JsonPartitioned': return {
|
|
443
|
+
type: 'JsonPartitioned',
|
|
444
|
+
partitionKeyLength: dataInfo.partitionKeyLength,
|
|
445
|
+
parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
|
|
410
446
|
const key = JSON.parse(keyStr) as PColumnKey;
|
|
411
|
-
return { key, value: blob }
|
|
412
|
-
})
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
};
|
|
419
|
-
}
|
|
420
|
-
case 'BinaryPartitioned': {
|
|
421
|
-
const parts: PColumnDataEntry<BinaryChunk<Blob>>[] = Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
|
|
447
|
+
return { key, value: blob } as PColumnDataEntry<Blob>;
|
|
448
|
+
}),
|
|
449
|
+
};
|
|
450
|
+
case 'BinaryPartitioned': return {
|
|
451
|
+
type: 'BinaryPartitioned',
|
|
452
|
+
partitionKeyLength: dataInfo.partitionKeyLength,
|
|
453
|
+
parts: Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
|
|
422
454
|
const key = JSON.parse(keyStr) as PColumnKey;
|
|
423
|
-
return { key, value: chunk }
|
|
424
|
-
})
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
455
|
+
return { key, value: chunk } as PColumnDataEntry<BinaryChunk<Blob>>;
|
|
456
|
+
}),
|
|
457
|
+
};
|
|
458
|
+
case 'ParquetPartitioned': return {
|
|
459
|
+
type: 'ParquetPartitioned',
|
|
460
|
+
partitionKeyLength: dataInfo.partitionKeyLength,
|
|
461
|
+
parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
|
|
462
|
+
const key = JSON.parse(keyStr) as PColumnKey;
|
|
463
|
+
return { key, value: blob } as PColumnDataEntry<Blob>;
|
|
464
|
+
}),
|
|
465
|
+
};
|
|
466
|
+
default:
|
|
467
|
+
assertNever(dataInfo);
|
|
432
468
|
}
|
|
433
469
|
}
|
|
434
470
|
|
|
@@ -440,42 +476,36 @@ export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntri
|
|
|
440
476
|
*/
|
|
441
477
|
export function entriesToDataInfo<Blob>(dataInfoEntries: DataInfoEntries<Blob>): DataInfo<Blob> {
|
|
442
478
|
switch (dataInfoEntries.type) {
|
|
443
|
-
case 'Json': {
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
return {
|
|
474
|
-
type: 'BinaryPartitioned',
|
|
475
|
-
partitionKeyLength: dataInfoEntries.partitionKeyLength,
|
|
476
|
-
parts,
|
|
477
|
-
};
|
|
478
|
-
}
|
|
479
|
+
case 'Json': return {
|
|
480
|
+
type: 'Json',
|
|
481
|
+
keyLength: dataInfoEntries.keyLength,
|
|
482
|
+
data: Object.fromEntries(
|
|
483
|
+
dataInfoEntries.data.map(({ key, value }) => [JSON.stringify(key), value]),
|
|
484
|
+
),
|
|
485
|
+
};
|
|
486
|
+
case 'JsonPartitioned': return {
|
|
487
|
+
type: 'JsonPartitioned',
|
|
488
|
+
partitionKeyLength: dataInfoEntries.partitionKeyLength,
|
|
489
|
+
parts: Object.fromEntries(
|
|
490
|
+
dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
|
|
491
|
+
),
|
|
492
|
+
};
|
|
493
|
+
case 'BinaryPartitioned': return {
|
|
494
|
+
type: 'BinaryPartitioned',
|
|
495
|
+
partitionKeyLength: dataInfoEntries.partitionKeyLength,
|
|
496
|
+
parts: Object.fromEntries(
|
|
497
|
+
dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
|
|
498
|
+
),
|
|
499
|
+
};
|
|
500
|
+
case 'ParquetPartitioned': return {
|
|
501
|
+
type: 'ParquetPartitioned',
|
|
502
|
+
partitionKeyLength: dataInfoEntries.partitionKeyLength,
|
|
503
|
+
parts: Object.fromEntries(
|
|
504
|
+
dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
|
|
505
|
+
),
|
|
506
|
+
};
|
|
507
|
+
default:
|
|
508
|
+
assertNever(dataInfoEntries);
|
|
479
509
|
}
|
|
480
510
|
}
|
|
481
511
|
|
|
@@ -504,32 +534,29 @@ export function mapDataInfoEntries<B1, B2>(
|
|
|
504
534
|
case 'Json':
|
|
505
535
|
// Json type doesn't contain blobs, so return as is
|
|
506
536
|
return dataInfoEntries;
|
|
507
|
-
case 'JsonPartitioned': {
|
|
508
|
-
|
|
509
|
-
|
|
537
|
+
case 'JsonPartitioned': return {
|
|
538
|
+
...dataInfoEntries,
|
|
539
|
+
parts: dataInfoEntries.parts.map((entry) => ({
|
|
510
540
|
key: entry.key,
|
|
511
541
|
value: mapFn(entry.value),
|
|
512
|
-
}))
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
};
|
|
518
|
-
}
|
|
519
|
-
case 'BinaryPartitioned': {
|
|
520
|
-
// Map each index and values blob in parts
|
|
521
|
-
const newParts = dataInfoEntries.parts.map((entry) => ({
|
|
542
|
+
})),
|
|
543
|
+
};
|
|
544
|
+
case 'BinaryPartitioned': return {
|
|
545
|
+
...dataInfoEntries,
|
|
546
|
+
parts: dataInfoEntries.parts.map((entry) => ({
|
|
522
547
|
key: entry.key,
|
|
523
548
|
value: {
|
|
524
549
|
index: mapFn(entry.value.index),
|
|
525
550
|
values: mapFn(entry.value.values),
|
|
526
551
|
},
|
|
527
|
-
}))
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
552
|
+
})),
|
|
553
|
+
};
|
|
554
|
+
case 'ParquetPartitioned': return {
|
|
555
|
+
...dataInfoEntries,
|
|
556
|
+
parts: dataInfoEntries.parts.map((entry) => ({
|
|
557
|
+
key: entry.key,
|
|
558
|
+
value: mapFn(entry.value),
|
|
559
|
+
})),
|
|
560
|
+
};
|
|
534
561
|
}
|
|
535
562
|
}
|