@milaboratories/pl-model-common 1.19.3 → 1.19.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@milaboratories/pl-model-common",
3
- "version": "1.19.3",
3
+ "version": "1.19.5",
4
4
  "description": "Platforma SDK Model",
5
5
  "types": "./dist/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -26,8 +26,8 @@
26
26
  "typescript": "~5.6.3",
27
27
  "vite": "^6.3.5",
28
28
  "vitest": "^2.1.9",
29
- "@milaboratories/build-configs": "1.0.5",
30
- "@platforma-sdk/eslint-config": "1.0.3"
29
+ "@platforma-sdk/eslint-config": "1.0.3",
30
+ "@milaboratories/build-configs": "1.0.5"
31
31
  },
32
32
  "scripts": {
33
33
  "type-check": "tsc --noEmit --composite false",
@@ -1,3 +1,5 @@
1
+ import { assertNever } from '../../util';
2
+
1
3
  /**
2
4
  * Represents a JavaScript representation of a value in a PColumn. Can be null, a number, or a string.
3
5
  * These are the primitive types that can be stored directly in PColumns.
@@ -85,7 +87,7 @@ export type BinaryPartitionedDataInfo<Blob> = {
85
87
  parts: Record<string, BinaryChunk<Blob>>;
86
88
  };
87
89
 
88
- type ParquetChunkMappingAxis = {
90
+ export type ParquetChunkMappingAxis = {
89
91
  /** Data type (matches PColumn axis types) */
90
92
  type: 'Int' | 'Long' | 'String';
91
93
 
@@ -93,7 +95,7 @@ type ParquetChunkMappingAxis = {
93
95
  id: string;
94
96
  };
95
97
 
96
- type ParquetChunkMappingColumn = {
98
+ export type ParquetChunkMappingColumn = {
97
99
  /** Data type (matches PColumn value type) */
98
100
  type: 'Int' | 'Long' | 'Float' | 'Double' | 'String';
99
101
 
@@ -101,11 +103,19 @@ type ParquetChunkMappingColumn = {
101
103
  id: string;
102
104
  };
103
105
 
104
- type ParquetChunkStats = {
106
+ export type ParquetChunkMapping = {
107
+ /** Axes mappings - Parquet file is sorted by these fields in this order */
108
+ axes: ParquetChunkMappingAxis[];
109
+
110
+ /** Column mapping */
111
+ column: ParquetChunkMappingColumn;
112
+ };
113
+
114
+ export type ParquetChunkStats = {
105
115
  /** Number of rows in the chunk */
106
- numberOfRows?: number;
116
+ numberOfRows: number;
107
117
  /** Byte size information for storage optimization and query planning */
108
- size?: {
118
+ size: {
109
119
  /** Byte sizes for each axis column in the same order as axes mapping */
110
120
  axes: number[];
111
121
  /** Byte size for the data column */
@@ -113,23 +123,19 @@ type ParquetChunkStats = {
113
123
  };
114
124
  };
115
125
 
116
- export type ParquetChunk<Blob> = {
117
- /** Parquet file (PTable) containing column data */
118
- data: Blob;
119
-
126
+ export type ParquetChunkMetadata = {
120
127
  /** Content hash calculated for the specific axes and data this chunk represents */
121
- dataDigest?: string;
122
-
123
- /** Axes mappings - Parquet file is sorted by these fields in this order */
124
- axes: ParquetChunkMappingAxis[];
125
-
126
- /** Column mapping */
127
- column: ParquetChunkMappingColumn;
128
+ dataDigest: string;
128
129
 
129
130
  /** Pre-computed statistics for optimization without blob download */
130
- stats?: ParquetChunkStats;
131
+ stats: Partial<ParquetChunkStats>;
131
132
  };
132
133
 
134
+ export type ParquetChunk<Blob> = {
135
+ /** Parquet file (PTable) containing column data */
136
+ data: Blob;
137
+ } & ParquetChunkMapping & Partial<ParquetChunkMetadata>;
138
+
133
139
  export type ParquetPartitionedDataInfo<Blob> = {
134
140
  /** Identifier for this data format ('ParquetPartitioned') */
135
141
  type: 'ParquetPartitioned';
@@ -138,7 +144,7 @@ export type ParquetPartitionedDataInfo<Blob> = {
138
144
  partitionKeyLength: number;
139
145
 
140
146
  /** Map of stringified partition keys to parquet files */
141
- parts: Record<string, ParquetChunk<Blob>>;
147
+ parts: Record<string, Blob>;
142
148
  };
143
149
 
144
150
  /**
@@ -150,7 +156,8 @@ export type ParquetPartitionedDataInfo<Blob> = {
150
156
  export type DataInfo<Blob> =
151
157
  | JsonDataInfo
152
158
  | JsonPartitionedDataInfo<Blob>
153
- | BinaryPartitionedDataInfo<Blob>;
159
+ | BinaryPartitionedDataInfo<Blob>
160
+ | ParquetPartitionedDataInfo<Blob>;
154
161
 
155
162
  /**
156
163
  * Type guard function that checks if the given value is a valid DataInfo.
@@ -176,12 +183,8 @@ export function isDataInfo<Blob>(value: unknown): value is DataInfo<Blob> {
176
183
  && typeof data.data === 'object'
177
184
  );
178
185
  case 'JsonPartitioned':
179
- return (
180
- typeof data.partitionKeyLength === 'number'
181
- && data.parts !== undefined
182
- && typeof data.parts === 'object'
183
- );
184
186
  case 'BinaryPartitioned':
187
+ case 'ParquetPartitioned':
185
188
  return (
186
189
  typeof data.partitionKeyLength === 'number'
187
190
  && data.parts !== undefined
@@ -201,6 +204,14 @@ export function isDataInfo<Blob>(value: unknown): value is DataInfo<Blob> {
201
204
  * @param mapFn - Function to transform blobs from type B1 to type B2
202
205
  * @returns A new DataInfo object with transformed blob references
203
206
  */
207
+ export function mapDataInfo<B1, B2>(
208
+ dataInfo: ParquetPartitionedDataInfo<B1>,
209
+ mapFn: (blob: B1) => B2,
210
+ ): ParquetPartitionedDataInfo<B2>;
211
+ export function mapDataInfo<B1, B2>(
212
+ dataInfo: Exclude<DataInfo<B1>, ParquetPartitionedDataInfo<B1>>,
213
+ mapFn: (blob: B1) => B2,
214
+ ): Exclude<DataInfo<B2>, ParquetPartitionedDataInfo<B2>>;
204
215
  export function mapDataInfo<B1, B2>(
205
216
  dataInfo: DataInfo<B1>,
206
217
  mapFn: (blob: B1) => B2,
@@ -242,6 +253,17 @@ export function mapDataInfo<B1, B2>(
242
253
  parts: newParts,
243
254
  };
244
255
  }
256
+ case 'ParquetPartitioned': {
257
+ // Map each blob in parts
258
+ const newParts: Record<string, B2> = {};
259
+ for (const [key, blob] of Object.entries(dataInfo.parts)) {
260
+ newParts[key] = mapFn(blob);
261
+ }
262
+ return {
263
+ ...dataInfo,
264
+ parts: newParts,
265
+ };
266
+ }
245
267
  }
246
268
  }
247
269
 
@@ -260,17 +282,20 @@ export function visitDataInfo<B>(
260
282
  break;
261
283
  case 'JsonPartitioned': {
262
284
  // Visit each blob in parts
263
- for (const [_, blob] of Object.entries(dataInfo.parts)) {
264
- cb(blob);
265
- }
285
+ Object.values(dataInfo.parts).forEach(cb);
266
286
  break;
267
287
  }
268
288
  case 'BinaryPartitioned': {
269
289
  // Visit each index and values blob in parts
270
- for (const [_, chunk] of Object.entries(dataInfo.parts)) {
290
+ Object.values(dataInfo.parts).forEach((chunk) => {
271
291
  cb(chunk.index);
272
292
  cb(chunk.values);
273
- }
293
+ });
294
+ break;
295
+ }
296
+ case 'ParquetPartitioned': {
297
+ // Visit each blob in parts
298
+ Object.values(dataInfo.parts).forEach(cb);
274
299
  break;
275
300
  }
276
301
  }
@@ -322,12 +347,21 @@ export interface BinaryPartitionedDataInfoEntries<Blob> {
322
347
  parts: PColumnDataEntry<BinaryChunk<Blob>>[];
323
348
  }
324
349
 
350
+ /**
351
+ * Entry-based representation of ParquetPartitionedDataInfo
352
+ */
353
+ export interface ParquetPartitionedDataInfoEntries<Blob> {
354
+ type: 'ParquetPartitioned';
355
+ partitionKeyLength: number;
356
+ parts: PColumnDataEntry<Blob>[];
357
+ }
325
358
  /**
326
359
  * Union type representing all possible entry-based partitioned data storage formats
327
360
  */
328
361
  export type PartitionedDataInfoEntries<Blob> =
329
362
  | JsonPartitionedDataInfoEntries<Blob>
330
- | BinaryPartitionedDataInfoEntries<Blob>;
363
+ | BinaryPartitionedDataInfoEntries<Blob>
364
+ | ParquetPartitionedDataInfoEntries<Blob>;
331
365
 
332
366
  /**
333
367
  * Union type representing all possible entry-based data storage formats
@@ -359,11 +393,8 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
359
393
  && Array.isArray(data.data)
360
394
  );
361
395
  case 'JsonPartitioned':
362
- return (
363
- typeof data.partitionKeyLength === 'number'
364
- && Array.isArray(data.parts)
365
- );
366
396
  case 'BinaryPartitioned':
397
+ case 'ParquetPartitioned':
367
398
  return (
368
399
  typeof data.partitionKeyLength === 'number'
369
400
  && Array.isArray(data.parts)
@@ -382,7 +413,14 @@ export function isDataInfoEntries<Blob>(value: unknown): value is DataInfoEntrie
382
413
  */
383
414
  export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is PartitionedDataInfoEntries<Blob> {
384
415
  if (!isDataInfoEntries(value)) return false;
385
- return value.type === 'JsonPartitioned' || value.type === 'BinaryPartitioned';
416
+ switch (value.type) {
417
+ case 'JsonPartitioned':
418
+ case 'BinaryPartitioned':
419
+ case 'ParquetPartitioned':
420
+ return true;
421
+ default:
422
+ return false;
423
+ }
386
424
  }
387
425
 
388
426
  /**
@@ -393,42 +431,40 @@ export function isPartitionedDataInfoEntries<Blob>(value: unknown): value is Par
393
431
  */
394
432
  export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntries<Blob> {
395
433
  switch (dataInfo.type) {
396
- case 'Json': {
397
- const entries: PColumnDataEntry<PColumnValue>[] = Object.entries(dataInfo.data).map(([keyStr, value]) => {
434
+ case 'Json': return {
435
+ type: 'Json',
436
+ keyLength: dataInfo.keyLength,
437
+ data: Object.entries(dataInfo.data).map(([keyStr, value]) => {
398
438
  const key = JSON.parse(keyStr) as PColumnKey;
399
- return { key, value };
400
- });
401
-
402
- return {
403
- type: 'Json',
404
- keyLength: dataInfo.keyLength,
405
- data: entries,
406
- };
407
- }
408
- case 'JsonPartitioned': {
409
- const parts: PColumnDataEntry<Blob>[] = Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
439
+ return { key, value } as PColumnDataEntry<PColumnValue>;
440
+ }),
441
+ };
442
+ case 'JsonPartitioned': return {
443
+ type: 'JsonPartitioned',
444
+ partitionKeyLength: dataInfo.partitionKeyLength,
445
+ parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
410
446
  const key = JSON.parse(keyStr) as PColumnKey;
411
- return { key, value: blob };
412
- });
413
-
414
- return {
415
- type: 'JsonPartitioned',
416
- partitionKeyLength: dataInfo.partitionKeyLength,
417
- parts,
418
- };
419
- }
420
- case 'BinaryPartitioned': {
421
- const parts: PColumnDataEntry<BinaryChunk<Blob>>[] = Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
447
+ return { key, value: blob } as PColumnDataEntry<Blob>;
448
+ }),
449
+ };
450
+ case 'BinaryPartitioned': return {
451
+ type: 'BinaryPartitioned',
452
+ partitionKeyLength: dataInfo.partitionKeyLength,
453
+ parts: Object.entries(dataInfo.parts).map(([keyStr, chunk]) => {
422
454
  const key = JSON.parse(keyStr) as PColumnKey;
423
- return { key, value: chunk };
424
- });
425
-
426
- return {
427
- type: 'BinaryPartitioned',
428
- partitionKeyLength: dataInfo.partitionKeyLength,
429
- parts,
430
- };
431
- }
455
+ return { key, value: chunk } as PColumnDataEntry<BinaryChunk<Blob>>;
456
+ }),
457
+ };
458
+ case 'ParquetPartitioned': return {
459
+ type: 'ParquetPartitioned',
460
+ partitionKeyLength: dataInfo.partitionKeyLength,
461
+ parts: Object.entries(dataInfo.parts).map(([keyStr, blob]) => {
462
+ const key = JSON.parse(keyStr) as PColumnKey;
463
+ return { key, value: blob } as PColumnDataEntry<Blob>;
464
+ }),
465
+ };
466
+ default:
467
+ assertNever(dataInfo);
432
468
  }
433
469
  }
434
470
 
@@ -440,42 +476,36 @@ export function dataInfoToEntries<Blob>(dataInfo: DataInfo<Blob>): DataInfoEntri
440
476
  */
441
477
  export function entriesToDataInfo<Blob>(dataInfoEntries: DataInfoEntries<Blob>): DataInfo<Blob> {
442
478
  switch (dataInfoEntries.type) {
443
- case 'Json': {
444
- const data: Record<string, PColumnValue> = {};
445
- for (const entry of dataInfoEntries.data) {
446
- data[JSON.stringify(entry.key)] = entry.value;
447
- }
448
-
449
- return {
450
- type: 'Json',
451
- keyLength: dataInfoEntries.keyLength,
452
- data,
453
- };
454
- }
455
- case 'JsonPartitioned': {
456
- const parts: Record<string, Blob> = {};
457
- for (const entry of dataInfoEntries.parts) {
458
- parts[JSON.stringify(entry.key)] = entry.value;
459
- }
460
-
461
- return {
462
- type: 'JsonPartitioned',
463
- partitionKeyLength: dataInfoEntries.partitionKeyLength,
464
- parts,
465
- };
466
- }
467
- case 'BinaryPartitioned': {
468
- const parts: Record<string, BinaryChunk<Blob>> = {};
469
- for (const entry of dataInfoEntries.parts) {
470
- parts[JSON.stringify(entry.key)] = entry.value;
471
- }
472
-
473
- return {
474
- type: 'BinaryPartitioned',
475
- partitionKeyLength: dataInfoEntries.partitionKeyLength,
476
- parts,
477
- };
478
- }
479
+ case 'Json': return {
480
+ type: 'Json',
481
+ keyLength: dataInfoEntries.keyLength,
482
+ data: Object.fromEntries(
483
+ dataInfoEntries.data.map(({ key, value }) => [JSON.stringify(key), value]),
484
+ ),
485
+ };
486
+ case 'JsonPartitioned': return {
487
+ type: 'JsonPartitioned',
488
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
489
+ parts: Object.fromEntries(
490
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
491
+ ),
492
+ };
493
+ case 'BinaryPartitioned': return {
494
+ type: 'BinaryPartitioned',
495
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
496
+ parts: Object.fromEntries(
497
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
498
+ ),
499
+ };
500
+ case 'ParquetPartitioned': return {
501
+ type: 'ParquetPartitioned',
502
+ partitionKeyLength: dataInfoEntries.partitionKeyLength,
503
+ parts: Object.fromEntries(
504
+ dataInfoEntries.parts.map(({ key, value }) => [JSON.stringify(key), value]),
505
+ ),
506
+ };
507
+ default:
508
+ assertNever(dataInfoEntries);
479
509
  }
480
510
  }
481
511
 
@@ -504,32 +534,29 @@ export function mapDataInfoEntries<B1, B2>(
504
534
  case 'Json':
505
535
  // Json type doesn't contain blobs, so return as is
506
536
  return dataInfoEntries;
507
- case 'JsonPartitioned': {
508
- // Map each blob in parts
509
- const newParts = dataInfoEntries.parts.map((entry) => ({
537
+ case 'JsonPartitioned': return {
538
+ ...dataInfoEntries,
539
+ parts: dataInfoEntries.parts.map((entry) => ({
510
540
  key: entry.key,
511
541
  value: mapFn(entry.value),
512
- }));
513
-
514
- return {
515
- ...dataInfoEntries,
516
- parts: newParts,
517
- };
518
- }
519
- case 'BinaryPartitioned': {
520
- // Map each index and values blob in parts
521
- const newParts = dataInfoEntries.parts.map((entry) => ({
542
+ })),
543
+ };
544
+ case 'BinaryPartitioned': return {
545
+ ...dataInfoEntries,
546
+ parts: dataInfoEntries.parts.map((entry) => ({
522
547
  key: entry.key,
523
548
  value: {
524
549
  index: mapFn(entry.value.index),
525
550
  values: mapFn(entry.value.values),
526
551
  },
527
- }));
528
-
529
- return {
530
- ...dataInfoEntries,
531
- parts: newParts,
532
- };
533
- }
552
+ })),
553
+ };
554
+ case 'ParquetPartitioned': return {
555
+ ...dataInfoEntries,
556
+ parts: dataInfoEntries.parts.map((entry) => ({
557
+ key: entry.key,
558
+ value: mapFn(entry.value),
559
+ })),
560
+ };
534
561
  }
535
562
  }