@milaboratories/pl-model-common 1.19.2 → 1.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@milaboratories/pl-model-common",
3
- "version": "1.19.2",
3
+ "version": "1.19.4",
4
4
  "description": "Platforma SDK Model",
5
5
  "types": "./dist/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -1,3 +1,5 @@
1
+ import { assertNever } from '../../util';
2
+
1
3
  /**
2
4
  * Represents a JavaScript representation of a value in a PColumn. Can be null, a number, or a string.
3
5
  * These are the primitive types that can be stored directly in PColumns.
@@ -85,7 +87,7 @@ export type BinaryPartitionedDataInfo<Blob> = {
85
87
  parts: Record<string, BinaryChunk<Blob>>;
86
88
  };
87
89
 
88
- type ParquetPartitionInfoMappingAxis = {
90
+ export type ParquetChunkMappingAxis = {
89
91
  /** Data type (matches PColumn axis types) */
90
92
  type: 'Int' | 'Long' | 'String';
91
93
 
@@ -93,7 +95,7 @@ type ParquetPartitionInfoMappingAxis = {
93
95
  id: string;
94
96
  };
95
97
 
96
- type ParquetPartitionInfoMappingColumn = {
98
+ export type ParquetChunkMappingColumn = {
97
99
  /** Data type (matches PColumn value type) */
98
100
  type: 'Int' | 'Long' | 'Float' | 'Double' | 'String';
99
101
 
@@ -101,42 +103,38 @@ type ParquetPartitionInfoMappingColumn = {
101
103
  id: string;
102
104
  };
103
105
 
104
- type ParquetPartitionInfoMapping = {
106
+ export type ParquetChunkMapping = {
105
107
  /** Axes mappings - Parquet file is sorted by these fields in this order */
106
- axes: ParquetPartitionInfoMappingAxis[];
108
+ axes: ParquetChunkMappingAxis[];
107
109
 
108
110
  /** Column mapping */
109
- column: ParquetPartitionInfoMappingColumn;
111
+ column: ParquetChunkMappingColumn;
110
112
  };
111
113
 
112
- type ParquetPartitionInfoData<Blob> = {
113
- /** Parquet file (PTable) containing column data */
114
- data: Blob;
114
+ export type ParquetChunkStats = {
115
+ /** Number of rows in the chunk */
116
+ numberOfRows: number;
117
+ /** Byte size information for storage optimization and query planning */
118
+ size: {
119
+ /** Byte sizes for each axis column in the same order as axes mapping */
120
+ axes: number[];
121
+ /** Byte size for the data column */
122
+ column: number;
123
+ };
124
+ };
115
125
 
116
- /** Content hash calculated for the specific axes and data this partition represents */
117
- dataDigest?: string;
126
+ export type ParquetChunkMetadata = {
127
+ /** Content hash calculated for the specific axes and data this chunk represents */
128
+ dataDigest: string;
118
129
 
119
130
  /** Pre-computed statistics for optimization without blob download */
120
- stats?: {
121
- /** Number of rows in the column */
122
- numberOfRows?: number;
123
- /** Byte size information for storage optimization and query planning */
124
- numberOfBytes?: {
125
- /** Byte sizes for each axis column in the same order as axes mapping */
126
- axes: number[];
127
- /** Byte size for the data column */
128
- column: number;
129
- };
130
- };
131
+ stats: Partial<ParquetChunkStats>;
131
132
  };
132
133
 
133
134
  export type ParquetChunk<Blob> = {
134
- /** Mapping of column names to their data */
135
- mapping: ParquetPartitionInfoMapping;
136
-
137
- /** Data for this partition */
138
- data: ParquetPartitionInfoData<Blob>;
139
- };
135
+ /** Parquet file (PTable) containing column data */
136
+ data: Blob;
137
+ } & ParquetChunkMapping & Partial<ParquetChunkMetadata>;
140
138
 
141
139
  export type ParquetPartitionedDataInfo<Blob> = {
142
140
  /** Identifier for this data format ('ParquetPartitioned') */
@@ -146,7 +144,7 @@ export type ParquetPartitionedDataInfo<Blob> = {
146
144
  partitionKeyLength: number;
147
145
 
148
146
  /** Map of stringified partition keys to parquet files */
149
- parts: Record<string, ParquetChunk<Blob>>;
147
+ parts: Record<string, Blob>;
150
148
  };
151
149
 
152
150
  /**
@@ -158,7 +156,8 @@ export type ParquetPartitionedDataInfo<Blob> = {
158
156
  export type DataInfo<Blob> =
159
157
  | JsonDataInfo
160
158
  | JsonPartitionedDataInfo<Blob>
161
- | BinaryPartitionedDataInfo<Blob>;
159
+ | BinaryPartitionedDataInfo<Blob>
160
+ | ParquetPartitionedDataInfo<Blob>;
162
161
 
163
162
  /**
164
163
  * Type guard function that checks if the given value is a valid DataInfo.
@@ -184,12 +183,8 @@ export function isDataInfo<Blob>(value: unknown): value is DataInfo<Blob> {
184
183
  && typeof data.data === 'object'
185
184
  );
186
185
  case 'JsonPartitioned':
187
- return (
188
- typeof data.partitionKeyLength === 'number'
189
- && data.parts !== undefined
190
- && typeof data.parts === 'object'
191
- );
192
186
  case 'BinaryPartitioned':
187
+ case 'ParquetPartitioned':
193
188
  return (
194
189
  typeof data.partitionKeyLength === 'number'
195
190
  && data.parts !== undefined
@@ -250,6 +245,17 @@ export function mapDataInfo<B1, B2>(
250
245
  parts: newParts,
251
246
  };
252
247
  }
248
+ case 'ParquetPartitioned': {
249
+ // Map each blob in parts
250
+ const newParts: Record<string, B2> = {};
251
+ for (const [key, blob] of Object.entries(dataInfo.parts)) {
252
+ newParts[key] = mapFn(blob);
253
+ }
254
+ return {
255
+ ...dataInfo,
256
+ parts: newParts,
257
+ };
258
+ }
253
259
  }
254
260
  }
255
261
 
@@ -268,17 +274,20 @@ export function visitDataInfo<B>(
268
274
  break;
269
275
  case 'JsonPartitioned': {
270
276
  // Visit each blob in parts
271
- for (const [_, blob] of Object.entries(dataInfo.parts)) {
272
- cb(blob);
273
- }
277
+ Object.values(dataInfo.parts).forEach(cb);
274
278
  break;
275
279
  }
276
280
  case 'BinaryPartitioned': {
277
281
  // Visit each index and values blob in parts
278
- for (const [_, chunk] of Object.entries(dataInfo.parts)) {
282
+ Object.values(dataInfo.parts).forEach((chunk) => {
279
283
  cb(chunk.index);
280
284
  cb(chunk.values);
281
- }
285
+ });
286
+ break;
287
+ }
288
+ case 'ParquetPartitioned': {
289
+ // Visit each blob in parts
290
+ Object.values(dataInfo.parts).forEach(cb);
282
291
  break;
283
292
  }
284
293
  }
@@ -330,12 +339,21 @@ export interface BinaryPartitionedDataInfoEntries<Blob> {
330
339
  parts: PColumnDataEntry<BinaryChunk<Blob>>[];
331
340
  }
332
341
 
342
+ /**
343
+ * Entry-based representation of ParquetPartitionedDataInfo
344
+ */
345
+ export interface ParquetPartitionedDataInfoEntries<Blob> {
346
+ type: 'ParquetPartitioned';
347
+ partitionKeyLength: number;
348
+ parts: PColumnDataEntry<Blob>[];
349
+ }
333
350
  /**
334
351
  * Union type representing all possible entry-based partitioned data storage formats
335
352
  */
336
353
  export type PartitionedDataInfoEntries<Blob> =
337
354
  | JsonPartitionedDataInfoEntries<Blob>
338
- | BinaryPartitionedDataInfoEntries<Blob>;
355
+ | BinaryPartitionedDataInfoEntries<Blob>
356
+ | ParquetPartitionedDataInfoEntries<Blob>;
339
357
 
340
358
  /**
341
359
  * Union type representing all possible entry-based data storage formats
@@ -367,11 +385,8 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
367
385
  && Array.isArray(data.data)
368
386
  );
369
387
  case 'JsonPartitioned':
370
- return (
371
- typeof data.partitionKeyLength === 'number'
372
- && Array.isArray(data.parts)
373
- );
374
388
  case 'BinaryPartitioned':
389
+ case 'ParquetPartitioned':
375
390
  return (
376
391
  typeof data.partitionKeyLength === 'number'
377
392
  && Array.isArray(data.parts)
@@ -390,7 +405,14 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
390
405
  */
391
406
  export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is PartitionedDataInfoEntries<Blob> {
392
407
  if (!isDataInfoEntries(value)) return false;
393
- return value.type === 'JsonPartitioned' || value.type === 'BinaryPartitioned';
408
+ switch (value.type) {
409
+ case 'JsonPartitioned':
410
+ case 'BinaryPartitioned':
411
+ case 'ParquetPartitioned':
412
+ return true;
413
+ default:
414
+ return false;
415
+ }
394
416
  }
395
417
 
396
418
  /**
@@ -401,42 +423,40 @@ export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is Par
401
423
  */
402
424
  export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntries<Blob> {
403
425
  switch (dataInfo.type) {
404
- case 'Json': {
405
- const entries: PColumnDataEntry<PColumnValue>[] = Object.entries(dataInfo.data).map(([keyStr, value]) => {
426
+ case 'Json': return {
427
+ type: 'Json',
428
+ keyLength: dataInfo.keyLength,
429
+ data: Object.entries(dataInfo.data).map(([keyStr, value]) => {
406
430
  const key = JSON.parse(keyStr) as PColumnKey;
407
- return { key, value };
408
- });
409
-
410
- return {
411
- type: 'Json',
412
- keyLength: dataInfo.keyLength,
413
- data: entries,
414
- };
415
- }
416
- case 'JsonPartitioned': {
417
- const parts: PColumnDataEntry<Blob>[] = Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
431
+ return { key, value } as PColumnDataEntry<PColumnValue>;
432
+ }),
433
+ };
434
+ case 'JsonPartitioned': return {
435
+ type: 'JsonPartitioned',
436
+ partitionKeyLength: dataInfo.partitionKeyLength,
437
+ parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
418
438
  const key = JSON.parse(keyStr) as PColumnKey;
419
- return { key, value: blob };
420
- });
421
-
422
- return {
423
- type: 'JsonPartitioned',
424
- partitionKeyLength: dataInfo.partitionKeyLength,
425
- parts,
426
- };
427
- }
428
- case 'BinaryPartitioned': {
429
- const parts: PColumnDataEntry<BinaryChunk<Blob>>[] = Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
439
+ return { key, value: blob } as PColumnDataEntry<Blob>;
440
+ }),
441
+ };
442
+ case 'BinaryPartitioned': return {
443
+ type: 'BinaryPartitioned',
444
+ partitionKeyLength: dataInfo.partitionKeyLength,
445
+ parts: Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
430
446
  const key = JSON.parse(keyStr) as PColumnKey;
431
- return { key, value: chunk };
432
- });
433
-
434
- return {
435
- type: 'BinaryPartitioned',
436
- partitionKeyLength: dataInfo.partitionKeyLength,
437
- parts,
438
- };
439
- }
447
+ return { key, value: chunk } as PColumnDataEntry<BinaryChunk<Blob>>;
448
+ }),
449
+ };
450
+ case 'ParquetPartitioned': return {
451
+ type: 'ParquetPartitioned',
452
+ partitionKeyLength: dataInfo.partitionKeyLength,
453
+ parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
454
+ const key = JSON.parse(keyStr) as PColumnKey;
455
+ return { key, value: blob } as PColumnDataEntry<Blob>;
456
+ }),
457
+ };
458
+ default:
459
+ assertNever(dataInfo);
440
460
  }
441
461
  }
442
462
 
@@ -448,42 +468,36 @@ export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntri
448
468
  */
449
469
  export function entriesToDataInfo<Blob>(dataInfoEntries: DataInfoEntries<Blob>): DataInfo<Blob> {
450
470
  switch (dataInfoEntries.type) {
451
- case 'Json': {
452
- const data: Record<string, PColumnValue> = {};
453
- for (const entry of dataInfoEntries.data) {
454
- data[JSON.stringify(entry.key)] = entry.value;
455
- }
456
-
457
- return {
458
- type: 'Json',
459
- keyLength: dataInfoEntries.keyLength,
460
- data,
461
- };
462
- }
463
- case 'JsonPartitioned': {
464
- const parts: Record<string, Blob> = {};
465
- for (const entry of dataInfoEntries.parts) {
466
- parts[JSON.stringify(entry.key)] = entry.value;
467
- }
468
-
469
- return {
470
- type: 'JsonPartitioned',
471
- partitionKeyLength: dataInfoEntries.partitionKeyLength,
472
- parts,
473
- };
474
- }
475
- case 'BinaryPartitioned': {
476
- const parts: Record<string, BinaryChunk<Blob>> = {};
477
- for (const entry of dataInfoEntries.parts) {
478
- parts[JSON.stringify(entry.key)] = entry.value;
479
- }
480
-
481
- return {
482
- type: 'BinaryPartitioned',
483
- partitionKeyLength: dataInfoEntries.partitionKeyLength,
484
- parts,
485
- };
486
- }
471
+ case 'Json': return {
472
+ type: 'Json',
473
+ keyLength: dataInfoEntries.keyLength,
474
+ data: Object.fromEntries(
475
+ dataInfoEntries.data.map(({ key, value }) => [JSON.stringify(key), value]),
476
+ ),
477
+ };
478
+ case 'JsonPartitioned': return {
479
+ type: 'JsonPartitioned',
480
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
481
+ parts: Object.fromEntries(
482
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
483
+ ),
484
+ };
485
+ case 'BinaryPartitioned': return {
486
+ type: 'BinaryPartitioned',
487
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
488
+ parts: Object.fromEntries(
489
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
490
+ ),
491
+ };
492
+ case 'ParquetPartitioned': return {
493
+ type: 'ParquetPartitioned',
494
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
495
+ parts: Object.fromEntries(
496
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
497
+ ),
498
+ };
499
+ default:
500
+ assertNever(dataInfoEntries);
487
501
  }
488
502
  }
489
503
 
@@ -512,32 +526,29 @@ export function mapDataInfoEntries<B1, B2>(
512
526
  case 'Json':
513
527
  // Json type doesn't contain blobs, so return as is
514
528
  return dataInfoEntries;
515
- case 'JsonPartitioned': {
516
- // Map each blob in parts
517
- const newParts = dataInfoEntries.parts.map((entry) => ({
529
+ case 'JsonPartitioned': return {
530
+ ...dataInfoEntries,
531
+ parts: dataInfoEntries.parts.map((entry) => ({
518
532
  key: entry.key,
519
533
  value: mapFn(entry.value),
520
- }));
521
-
522
- return {
523
- ...dataInfoEntries,
524
- parts: newParts,
525
- };
526
- }
527
- case 'BinaryPartitioned': {
528
- // Map each index and values blob in parts
529
- const newParts = dataInfoEntries.parts.map((entry) => ({
534
+ })),
535
+ };
536
+ case 'BinaryPartitioned': return {
537
+ ...dataInfoEntries,
538
+ parts: dataInfoEntries.parts.map((entry) => ({
530
539
  key: entry.key,
531
540
  value: {
532
541
  index: mapFn(entry.value.index),
533
542
  values: mapFn(entry.value.values),
534
543
  },
535
- }));
536
-
537
- return {
538
- ...dataInfoEntries,
539
- parts: newParts,
540
- };
541
- }
544
+ })),
545
+ };
546
+ case 'ParquetPartitioned': return {
547
+ ...dataInfoEntries,
548
+ parts: dataInfoEntries.parts.map((entry) => ({
549
+ key: entry.key,
550
+ value: mapFn(entry.value),
551
+ })),
552
+ };
542
553
  }
543
554
  }