@milaboratories/pl-model-common 1.19.3 → 1.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@milaboratories/pl-model-common",
3
- "version": "1.19.3",
3
+ "version": "1.19.4",
4
4
  "description": "Platforma SDK Model",
5
5
  "types": "./dist/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -1,3 +1,5 @@
1
+ import { assertNever } from '../../util';
2
+
1
3
  /**
2
4
  * Represents a JavaScript representation of a value in a PColumn. Can be null, a number, or a string.
3
5
  * These are the primitive types that can be stored directly in PColumns.
@@ -85,7 +87,7 @@ export type BinaryPartitionedDataInfo<Blob> = {
85
87
  parts: Record<string, BinaryChunk<Blob>>;
86
88
  };
87
89
 
88
- type ParquetChunkMappingAxis = {
90
+ export type ParquetChunkMappingAxis = {
89
91
  /** Data type (matches PColumn axis types) */
90
92
  type: 'Int' | 'Long' | 'String';
91
93
 
@@ -93,7 +95,7 @@ type ParquetChunkMappingAxis = {
93
95
  id: string;
94
96
  };
95
97
 
96
- type ParquetChunkMappingColumn = {
98
+ export type ParquetChunkMappingColumn = {
97
99
  /** Data type (matches PColumn value type) */
98
100
  type: 'Int' | 'Long' | 'Float' | 'Double' | 'String';
99
101
 
@@ -101,11 +103,19 @@ type ParquetChunkMappingColumn = {
101
103
  id: string;
102
104
  };
103
105
 
104
- type ParquetChunkStats = {
106
+ export type ParquetChunkMapping = {
107
+ /** Axes mappings - Parquet file is sorted by these fields in this order */
108
+ axes: ParquetChunkMappingAxis[];
109
+
110
+ /** Column mapping */
111
+ column: ParquetChunkMappingColumn;
112
+ };
113
+
114
+ export type ParquetChunkStats = {
105
115
  /** Number of rows in the chunk */
106
- numberOfRows?: number;
116
+ numberOfRows: number;
107
117
  /** Byte size information for storage optimization and query planning */
108
- size?: {
118
+ size: {
109
119
  /** Byte sizes for each axis column in the same order as axes mapping */
110
120
  axes: number[];
111
121
  /** Byte size for the data column */
@@ -113,23 +123,19 @@ type ParquetChunkStats = {
113
123
  };
114
124
  };
115
125
 
116
- export type ParquetChunk<Blob> = {
117
- /** Parquet file (PTable) containing column data */
118
- data: Blob;
119
-
126
+ export type ParquetChunkMetadata = {
120
127
  /** Content hash calculated for the specific axes and data this chunk represents */
121
- dataDigest?: string;
122
-
123
- /** Axes mappings - Parquet file is sorted by these fields in this order */
124
- axes: ParquetChunkMappingAxis[];
125
-
126
- /** Column mapping */
127
- column: ParquetChunkMappingColumn;
128
+ dataDigest: string;
128
129
 
129
130
  /** Pre-computed statistics for optimization without blob download */
130
- stats?: ParquetChunkStats;
131
+ stats: Partial<ParquetChunkStats>;
131
132
  };
132
133
 
134
+ export type ParquetChunk<Blob> = {
135
+ /** Parquet file (PTable) containing column data */
136
+ data: Blob;
137
+ } & ParquetChunkMapping & Partial<ParquetChunkMetadata>;
138
+
133
139
  export type ParquetPartitionedDataInfo<Blob> = {
134
140
  /** Identifier for this data format ('ParquetPartitioned') */
135
141
  type: 'ParquetPartitioned';
@@ -138,7 +144,7 @@ export type ParquetPartitionedDataInfo<Blob> = {
138
144
  partitionKeyLength: number;
139
145
 
140
146
  /** Map of stringified partition keys to parquet files */
141
- parts: Record<string, ParquetChunk<Blob>>;
147
+ parts: Record<string, Blob>;
142
148
  };
143
149
 
144
150
  /**
@@ -150,7 +156,8 @@ export type ParquetPartitionedDataInfo<Blob> = {
150
156
  export type DataInfo<Blob> =
151
157
  | JsonDataInfo
152
158
  | JsonPartitionedDataInfo<Blob>
153
- | BinaryPartitionedDataInfo<Blob>;
159
+ | BinaryPartitionedDataInfo<Blob>
160
+ | ParquetPartitionedDataInfo<Blob>;
154
161
 
155
162
  /**
156
163
  * Type guard function that checks if the given value is a valid DataInfo.
@@ -176,12 +183,8 @@ export function isDataInfo<Blob>(value: unknown): value is DataInfo<Blob> {
176
183
  && typeof data.data === 'object'
177
184
  );
178
185
  case 'JsonPartitioned':
179
- return (
180
- typeof data.partitionKeyLength === 'number'
181
- && data.parts !== undefined
182
- && typeof data.parts === 'object'
183
- );
184
186
  case 'BinaryPartitioned':
187
+ case 'ParquetPartitioned':
185
188
  return (
186
189
  typeof data.partitionKeyLength === 'number'
187
190
  && data.parts !== undefined
@@ -242,6 +245,17 @@ export function mapDataInfo<B1, B2>(
242
245
  parts: newParts,
243
246
  };
244
247
  }
248
+ case 'ParquetPartitioned': {
249
+ // Map each blob in parts
250
+ const newParts: Record<string, B2> = {};
251
+ for (const [key, blob] of Object.entries(dataInfo.parts)) {
252
+ newParts[key] = mapFn(blob);
253
+ }
254
+ return {
255
+ ...dataInfo,
256
+ parts: newParts,
257
+ };
258
+ }
245
259
  }
246
260
  }
247
261
 
@@ -260,17 +274,20 @@ export function visitDataInfo<B>(
260
274
  break;
261
275
  case 'JsonPartitioned': {
262
276
  // Visit each blob in parts
263
- for (const [_, blob] of Object.entries(dataInfo.parts)) {
264
- cb(blob);
265
- }
277
+ Object.values(dataInfo.parts).forEach(cb);
266
278
  break;
267
279
  }
268
280
  case 'BinaryPartitioned': {
269
281
  // Visit each index and values blob in parts
270
- for (const [_, chunk] of Object.entries(dataInfo.parts)) {
282
+ Object.values(dataInfo.parts).forEach((chunk) => {
271
283
  cb(chunk.index);
272
284
  cb(chunk.values);
273
- }
285
+ });
286
+ break;
287
+ }
288
+ case 'ParquetPartitioned': {
289
+ // Visit each blob in parts
290
+ Object.values(dataInfo.parts).forEach(cb);
274
291
  break;
275
292
  }
276
293
  }
@@ -322,12 +339,21 @@ export interface BinaryPartitionedDataInfoEntries<Blob> {
322
339
  parts: PColumnDataEntry<BinaryChunk<Blob>>[];
323
340
  }
324
341
 
342
+ /**
343
+ * Entry-based representation of ParquetPartitionedDataInfo
344
+ */
345
+ export interface ParquetPartitionedDataInfoEntries<Blob> {
346
+ type: 'ParquetPartitioned';
347
+ partitionKeyLength: number;
348
+ parts: PColumnDataEntry<Blob>[];
349
+ }
325
350
  /**
326
351
  * Union type representing all possible entry-based partitioned data storage formats
327
352
  */
328
353
  export type PartitionedDataInfoEntries<Blob> =
329
354
  | JsonPartitionedDataInfoEntries<Blob>
330
- | BinaryPartitionedDataInfoEntries<Blob>;
355
+ | BinaryPartitionedDataInfoEntries<Blob>
356
+ | ParquetPartitionedDataInfoEntries<Blob>;
331
357
 
332
358
  /**
333
359
  * Union type representing all possible entry-based data storage formats
@@ -359,11 +385,8 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
359
385
  && Array.isArray(data.data)
360
386
  );
361
387
  case 'JsonPartitioned':
362
- return (
363
- typeof data.partitionKeyLength === 'number'
364
- && Array.isArray(data.parts)
365
- );
366
388
  case 'BinaryPartitioned':
389
+ case 'ParquetPartitioned':
367
390
  return (
368
391
  typeof data.partitionKeyLength === 'number'
369
392
  && Array.isArray(data.parts)
@@ -382,7 +405,14 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
382
405
  */
383
406
  export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is PartitionedDataInfoEntries<Blob> {
384
407
  if (!isDataInfoEntries(value)) return false;
385
- return value.type === 'JsonPartitioned' || value.type === 'BinaryPartitioned';
408
+ switch (value.type) {
409
+ case 'JsonPartitioned':
410
+ case 'BinaryPartitioned':
411
+ case 'ParquetPartitioned':
412
+ return true;
413
+ default:
414
+ return false;
415
+ }
386
416
  }
387
417
 
388
418
  /**
@@ -393,42 +423,40 @@ export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is Par
393
423
  */
394
424
  export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntries<Blob> {
395
425
  switch (dataInfo.type) {
396
- case 'Json': {
397
- const entries: PColumnDataEntry<PColumnValue>[] = Object.entries(dataInfo.data).map(([keyStr, value]) => {
426
+ case 'Json': return {
427
+ type: 'Json',
428
+ keyLength: dataInfo.keyLength,
429
+ data: Object.entries(dataInfo.data).map(([keyStr, value]) => {
398
430
  const key = JSON.parse(keyStr) as PColumnKey;
399
- return { key, value };
400
- });
401
-
402
- return {
403
- type: 'Json',
404
- keyLength: dataInfo.keyLength,
405
- data: entries,
406
- };
407
- }
408
- case 'JsonPartitioned': {
409
- const parts: PColumnDataEntry<Blob>[] = Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
431
+ return { key, value } as PColumnDataEntry<PColumnValue>;
432
+ }),
433
+ };
434
+ case 'JsonPartitioned': return {
435
+ type: 'JsonPartitioned',
436
+ partitionKeyLength: dataInfo.partitionKeyLength,
437
+ parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
410
438
  const key = JSON.parse(keyStr) as PColumnKey;
411
- return { key, value: blob };
412
- });
413
-
414
- return {
415
- type: 'JsonPartitioned',
416
- partitionKeyLength: dataInfo.partitionKeyLength,
417
- parts,
418
- };
419
- }
420
- case 'BinaryPartitioned': {
421
- const parts: PColumnDataEntry<BinaryChunk<Blob>>[] = Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
439
+ return { key, value: blob } as PColumnDataEntry<Blob>;
440
+ }),
441
+ };
442
+ case 'BinaryPartitioned': return {
443
+ type: 'BinaryPartitioned',
444
+ partitionKeyLength: dataInfo.partitionKeyLength,
445
+ parts: Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
422
446
  const key = JSON.parse(keyStr) as PColumnKey;
423
- return { key, value: chunk };
424
- });
425
-
426
- return {
427
- type: 'BinaryPartitioned',
428
- partitionKeyLength: dataInfo.partitionKeyLength,
429
- parts,
430
- };
431
- }
447
+ return { key, value: chunk } as PColumnDataEntry<BinaryChunk<Blob>>;
448
+ }),
449
+ };
450
+ case 'ParquetPartitioned': return {
451
+ type: 'ParquetPartitioned',
452
+ partitionKeyLength: dataInfo.partitionKeyLength,
453
+ parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
454
+ const key = JSON.parse(keyStr) as PColumnKey;
455
+ return { key, value: blob } as PColumnDataEntry<Blob>;
456
+ }),
457
+ };
458
+ default:
459
+ assertNever(dataInfo);
432
460
  }
433
461
  }
434
462
 
@@ -440,42 +468,36 @@ export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntri
440
468
  */
441
469
  export function entriesToDataInfo<Blob>(dataInfoEntries: DataInfoEntries<Blob>): DataInfo<Blob> {
442
470
  switch (dataInfoEntries.type) {
443
- case 'Json': {
444
- const data: Record<string, PColumnValue> = {};
445
- for (const entry of dataInfoEntries.data) {
446
- data[JSON.stringify(entry.key)] = entry.value;
447
- }
448
-
449
- return {
450
- type: 'Json',
451
- keyLength: dataInfoEntries.keyLength,
452
- data,
453
- };
454
- }
455
- case 'JsonPartitioned': {
456
- const parts: Record<string, Blob> = {};
457
- for (const entry of dataInfoEntries.parts) {
458
- parts[JSON.stringify(entry.key)] = entry.value;
459
- }
460
-
461
- return {
462
- type: 'JsonPartitioned',
463
- partitionKeyLength: dataInfoEntries.partitionKeyLength,
464
- parts,
465
- };
466
- }
467
- case 'BinaryPartitioned': {
468
- const parts: Record<string, BinaryChunk<Blob>> = {};
469
- for (const entry of dataInfoEntries.parts) {
470
- parts[JSON.stringify(entry.key)] = entry.value;
471
- }
472
-
473
- return {
474
- type: 'BinaryPartitioned',
475
- partitionKeyLength: dataInfoEntries.partitionKeyLength,
476
- parts,
477
- };
478
- }
471
+ case 'Json': return {
472
+ type: 'Json',
473
+ keyLength: dataInfoEntries.keyLength,
474
+ data: Object.fromEntries(
475
+ dataInfoEntries.data.map(({ key, value }) => [JSON.stringify(key), value]),
476
+ ),
477
+ };
478
+ case 'JsonPartitioned': return {
479
+ type: 'JsonPartitioned',
480
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
481
+ parts: Object.fromEntries(
482
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
483
+ ),
484
+ };
485
+ case 'BinaryPartitioned': return {
486
+ type: 'BinaryPartitioned',
487
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
488
+ parts: Object.fromEntries(
489
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
490
+ ),
491
+ };
492
+ case 'ParquetPartitioned': return {
493
+ type: 'ParquetPartitioned',
494
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
495
+ parts: Object.fromEntries(
496
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
497
+ ),
498
+ };
499
+ default:
500
+ assertNever(dataInfoEntries);
479
501
  }
480
502
  }
481
503
 
@@ -504,32 +526,29 @@ export function mapDataInfoEntries<B1, B2>(
504
526
  case 'Json':
505
527
  // Json type doesn't contain blobs, so return as is
506
528
  return dataInfoEntries;
507
- case 'JsonPartitioned': {
508
- // Map each blob in parts
509
- const newParts = dataInfoEntries.parts.map((entry) => ({
529
+ case 'JsonPartitioned': return {
530
+ ...dataInfoEntries,
531
+ parts: dataInfoEntries.parts.map((entry) => ({
510
532
  key: entry.key,
511
533
  value: mapFn(entry.value),
512
- }));
513
-
514
- return {
515
- ...dataInfoEntries,
516
- parts: newParts,
517
- };
518
- }
519
- case 'BinaryPartitioned': {
520
- // Map each index and values blob in parts
521
- const newParts = dataInfoEntries.parts.map((entry) => ({
534
+ })),
535
+ };
536
+ case 'BinaryPartitioned': return {
537
+ ...dataInfoEntries,
538
+ parts: dataInfoEntries.parts.map((entry) => ({
522
539
  key: entry.key,
523
540
  value: {
524
541
  index: mapFn(entry.value.index),
525
542
  values: mapFn(entry.value.values),
526
543
  },
527
- }));
528
-
529
- return {
530
- ...dataInfoEntries,
531
- parts: newParts,
532
- };
533
- }
544
+ })),
545
+ };
546
+ case 'ParquetPartitioned': return {
547
+ ...dataInfoEntries,
548
+ parts: dataInfoEntries.parts.map((entry) => ({
549
+ key: entry.key,
550
+ value: mapFn(entry.value),
551
+ })),
552
+ };
534
553
  }
535
554
  }