@platforma-sdk/model 1.25.0 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ import type {
2
+ BinaryChunk,
3
+ DataInfoEntries,
4
+ PColumnDataEntry,
5
+ PColumnKey,
6
+ PColumnValue,
7
+ JsonDataInfoEntries,
8
+ JsonPartitionedDataInfoEntries,
9
+ BinaryPartitionedDataInfoEntries,
10
+ } from '@milaboratories/pl-model-common';
11
+ import type { AxisFilterByIdx } from '@milaboratories/pl-model-common';
12
+
13
+ /**
14
+ * Filters DataInfoEntries using axis filters, removing specified axes from keys and
15
+ * only keeping entries that match the filter values.
16
+ *
17
+ * @param dataInfoEntries - The data info object to filter
18
+ * @param axisFilters - Array of axis filters (index, value pairs)
19
+ * @throws Error if any filter axis is outside the partitioning axes or data axes for Json data
20
+ */
21
+ export function filterDataInfoEntries<Blob>(
22
+ dataInfoEntries: BinaryPartitionedDataInfoEntries<Blob>,
23
+ axisFilters: AxisFilterByIdx[],
24
+ ): BinaryPartitionedDataInfoEntries<Blob>;
25
+ export function filterDataInfoEntries<Blob>(
26
+ dataInfoEntries: JsonPartitionedDataInfoEntries<Blob>,
27
+ axisFilters: AxisFilterByIdx[],
28
+ ): JsonPartitionedDataInfoEntries<Blob>;
29
+ export function filterDataInfoEntries<Blob>(
30
+ dataInfoEntries: BinaryPartitionedDataInfoEntries<Blob> | JsonPartitionedDataInfoEntries<Blob>,
31
+ axisFilters: AxisFilterByIdx[],
32
+ ): BinaryPartitionedDataInfoEntries<Blob> | JsonPartitionedDataInfoEntries<Blob>;
33
+ export function filterDataInfoEntries(
34
+ dataInfoEntries: JsonDataInfoEntries,
35
+ axisFilters: AxisFilterByIdx[],
36
+ ): JsonDataInfoEntries;
37
+ export function filterDataInfoEntries<Blob>(
38
+ dataInfoEntries: DataInfoEntries<Blob>,
39
+ axisFilters: AxisFilterByIdx[],
40
+ ): DataInfoEntries<Blob> {
41
+ // Sort filters by axis index in descending order to safely remove elements from arrays
42
+ const sortedFilters = [...axisFilters].sort((a, b) => b[0] - a[0]);
43
+
44
+ // Check for invalid filter axes
45
+ if (dataInfoEntries.type === 'JsonPartitioned' || dataInfoEntries.type === 'BinaryPartitioned') {
46
+ const { partitionKeyLength } = dataInfoEntries;
47
+ for (const [axisIdx] of axisFilters)
48
+ if (axisIdx >= partitionKeyLength)
49
+ throw new Error(`Can't filter on non-partitioned axis ${axisIdx}. Must be >= ${partitionKeyLength}`);
50
+ } else if (dataInfoEntries.type === 'Json') {
51
+ const { keyLength } = dataInfoEntries;
52
+ for (const [axisIdx] of axisFilters)
53
+ if (axisIdx >= keyLength)
54
+ throw new Error(`Can't filter on non-data axis ${axisIdx}. Must be >= ${keyLength}`);
55
+ }
56
+
57
+ const keyMatchesFilters = (key: PColumnKey): boolean => {
58
+ for (const [axisIdx, axisValue] of sortedFilters)
59
+ if (key[axisIdx] !== axisValue)
60
+ return false;
61
+ return true;
62
+ };
63
+
64
+ const removeFilteredAxes = (key: PColumnKey): PColumnKey => {
65
+ const newKey = [...key];
66
+
67
+ // Remove axes in descending order to maintain correct indices
68
+ for (const [axisIdx] of sortedFilters)
69
+ newKey.splice(axisIdx, 1);
70
+
71
+ return newKey;
72
+ };
73
+
74
+ switch (dataInfoEntries.type) {
75
+ case 'Json': {
76
+ const filteredData: PColumnDataEntry<PColumnValue>[] = dataInfoEntries.data
77
+ .filter((entry: PColumnDataEntry<PColumnValue>) => keyMatchesFilters(entry.key))
78
+ .map((entry: PColumnDataEntry<PColumnValue>) => ({
79
+ key: removeFilteredAxes(entry.key),
80
+ value: entry.value,
81
+ }));
82
+
83
+ return {
84
+ type: 'Json',
85
+ keyLength: dataInfoEntries.keyLength - axisFilters.length,
86
+ data: filteredData,
87
+ };
88
+ }
89
+
90
+ case 'JsonPartitioned': {
91
+ const filteredParts = dataInfoEntries.parts
92
+ .filter((entry: PColumnDataEntry<Blob>) => keyMatchesFilters(entry.key))
93
+ .map((entry: PColumnDataEntry<Blob>) => ({
94
+ key: removeFilteredAxes(entry.key),
95
+ value: entry.value,
96
+ }));
97
+
98
+ return {
99
+ type: 'JsonPartitioned',
100
+ partitionKeyLength: dataInfoEntries.partitionKeyLength - axisFilters.length,
101
+ parts: filteredParts,
102
+ };
103
+ }
104
+
105
+ case 'BinaryPartitioned': {
106
+ const filteredParts = dataInfoEntries.parts
107
+ .filter((entry: PColumnDataEntry<BinaryChunk<Blob>>) => keyMatchesFilters(entry.key))
108
+ .map((entry: PColumnDataEntry<BinaryChunk<Blob>>) => ({
109
+ key: removeFilteredAxes(entry.key),
110
+ value: entry.value,
111
+ }));
112
+
113
+ return {
114
+ type: 'BinaryPartitioned',
115
+ partitionKeyLength: dataInfoEntries.partitionKeyLength - axisFilters.length,
116
+ parts: filteredParts,
117
+ };
118
+ }
119
+ }
120
+ }
@@ -1,2 +1,3 @@
1
1
  export * from './pcolumn_data';
2
2
  export * from './label';
3
+ export * from './axis_filtering';
@@ -1,3 +1,12 @@
1
+ /* eslint-disable @typescript-eslint/no-unused-vars */
2
+ import {
3
+ type BinaryChunk,
4
+ type BinaryPartitionedDataInfoEntries,
5
+ type DataInfoEntries,
6
+ type JsonPartitionedDataInfoEntries,
7
+ type PColumnDataEntry,
8
+ type PColumnKey,
9
+ } from '@milaboratories/pl-model-common';
1
10
  import type { TreeNodeAccessor } from '../accessor';
2
11
 
3
12
  const PCD_PREFIX = 'PColumnData/';
@@ -12,8 +21,6 @@ const PCD_SUP_PREFIX = PCD_PREFIX + 'Partitioned/';
12
21
  export const RT_JSON_SUPER_PARTITIONED = PCD_SUP_PREFIX + 'JsonPartitioned';
13
22
  export const RT_BINARY_SUPER_PARTITIONED = PCD_SUP_PREFIX + 'BinaryPartitioned';
14
23
 
15
- export type PColumnKey = (string | number)[];
16
-
17
24
  export type PColumnResourceMapEntry<T> = {
18
25
  key: PColumnKey;
19
26
  value: T;
@@ -95,13 +102,13 @@ export type PColumnKeyList = {
95
102
  keyLength: number;
96
103
  };
97
104
 
98
- const removeIndexSuffix = (keyStr: string): string | undefined => {
105
+ const removeIndexSuffix = (keyStr: string): { baseKey: string; type: 'index' | 'values' } => {
99
106
  if (keyStr.endsWith('.index')) {
100
- return undefined;
107
+ return { baseKey: keyStr.substring(0, keyStr.length - 6), type: 'index' };
101
108
  } else if (keyStr.endsWith('.values')) {
102
- return keyStr.substring(0, keyStr.length - 7);
109
+ return { baseKey: keyStr.substring(0, keyStr.length - 7), type: 'values' };
103
110
  } else {
104
- throw Error(`key must ends on .index/.values for binary p-column, got: ${keyStr}`);
111
+ throw new Error(`key must ends on .index/.values for binary p-column, got: ${keyStr}`);
105
112
  }
106
113
  };
107
114
 
@@ -144,9 +151,7 @@ export function getPartitionKeysList(
144
151
  case RT_BINARY_PARTITIONED:
145
152
  for (let keyStr of acc.listInputFields()) {
146
153
  if (rt === RT_BINARY_PARTITIONED) {
147
- const k = removeIndexSuffix(keyStr);
148
- if (!k) continue;
149
- else keyStr = k;
154
+ keyStr = removeIndexSuffix(keyStr).baseKey;
150
155
  }
151
156
  const key = [...JSON.parse(keyStr)] as PColumnKey;
152
157
  data.push(key);
@@ -164,9 +169,7 @@ export function getPartitionKeysList(
164
169
  if (value !== undefined) {
165
170
  for (let keyStr of value.listInputFields()) {
166
171
  if (rt === RT_BINARY_SUPER_PARTITIONED) {
167
- const k = removeIndexSuffix(keyStr);
168
- if (!k) continue;
169
- else keyStr = k;
172
+ keyStr = removeIndexSuffix(keyStr).baseKey;
170
173
  }
171
174
  const key = [...keyPrefix, ...JSON.parse(keyStr)] as PColumnKey;
172
175
  data.push(key);
@@ -206,3 +209,179 @@ export function getUniquePartitionKeys(
206
209
 
207
210
  return result.map((s) => Array.from(s.values()));
208
211
  }
212
+
213
+ /**
214
+ * Parses the PColumn data from a TreeNodeAccessor into a DataInfoEntries structure.
215
+ * Returns undefined if any required data is missing.
216
+ * Throws error on validation failures.
217
+ *
218
+ * @param acc - The TreeNodeAccessor containing PColumn data
219
+ * @param keyPrefix - Optional key prefix for recursive calls
220
+ * @returns DataInfoEntries representation of the PColumn data, or undefined if incomplete
221
+ */
222
+ export function parsePColumnData(
223
+ acc: TreeNodeAccessor | undefined,
224
+ keyPrefix: PColumnKey = [],
225
+ ): JsonPartitionedDataInfoEntries<TreeNodeAccessor> | BinaryPartitionedDataInfoEntries<TreeNodeAccessor> | undefined {
226
+ if (acc === undefined) return undefined;
227
+
228
+ const resourceType = acc.resourceType.name;
229
+ const meta = acc.getDataAsJson<Record<string, number>>();
230
+
231
+ // Prevent recursive super-partitioned resources
232
+ if (keyPrefix.length > 0
233
+ && (resourceType === RT_JSON_SUPER_PARTITIONED || resourceType === RT_BINARY_SUPER_PARTITIONED)) {
234
+ throw new Error(`Unexpected nested super-partitioned resource: ${resourceType}`);
235
+ }
236
+
237
+ switch (resourceType) {
238
+ case RT_RESOURCE_MAP:
239
+ case RT_RESOURCE_MAP_PARTITIONED:
240
+ throw new Error(`Only data columns are supported, got: ${resourceType}`);
241
+
242
+ case RT_JSON_PARTITIONED: {
243
+ if (typeof meta?.partitionKeyLength !== 'number') {
244
+ throw new Error(`Missing partitionKeyLength in metadata for ${resourceType}`);
245
+ }
246
+
247
+ const parts: PColumnDataEntry<TreeNodeAccessor>[] = [];
248
+ for (const keyStr of acc.listInputFields()) {
249
+ const value = acc.resolve({ field: keyStr, assertFieldType: 'Input' });
250
+ if (value === undefined) return undefined;
251
+
252
+ const key = [...keyPrefix, ...JSON.parse(keyStr)];
253
+ parts.push({ key, value });
254
+ }
255
+
256
+ return {
257
+ type: 'JsonPartitioned',
258
+ partitionKeyLength: meta.partitionKeyLength,
259
+ parts,
260
+ };
261
+ }
262
+
263
+ case RT_BINARY_PARTITIONED: {
264
+ if (typeof meta?.partitionKeyLength !== 'number') {
265
+ throw new Error(`Missing partitionKeyLength in metadata for ${resourceType}`);
266
+ }
267
+
268
+ const parts: PColumnDataEntry<BinaryChunk<TreeNodeAccessor>>[] = [];
269
+ const baseKeys = new Map<string, { index?: TreeNodeAccessor; values?: TreeNodeAccessor }>();
270
+
271
+ // Group fields by base key (without .index/.values suffix)
272
+ for (const keyStr of acc.listInputFields()) {
273
+ const suffix = removeIndexSuffix(keyStr);
274
+
275
+ const value = acc.resolve({ field: keyStr, assertFieldType: 'Input' });
276
+ if (value === undefined) return undefined;
277
+
278
+ let entry = baseKeys.get(suffix.baseKey);
279
+ if (!entry) {
280
+ entry = {};
281
+ baseKeys.set(suffix.baseKey, entry);
282
+ }
283
+
284
+ if (suffix.type === 'index') {
285
+ entry.index = value;
286
+ } else {
287
+ entry.values = value;
288
+ }
289
+ }
290
+
291
+ // Process complete binary chunks only
292
+ for (const [baseKeyStr, entry] of baseKeys.entries()) {
293
+ if (!entry.index || !entry.values) return undefined;
294
+
295
+ const key = [...keyPrefix, ...JSON.parse(baseKeyStr)];
296
+ parts.push({
297
+ key,
298
+ value: {
299
+ index: entry.index,
300
+ values: entry.values,
301
+ },
302
+ });
303
+ }
304
+
305
+ return {
306
+ type: 'BinaryPartitioned',
307
+ partitionKeyLength: meta.partitionKeyLength,
308
+ parts,
309
+ };
310
+ }
311
+
312
+ case RT_JSON_SUPER_PARTITIONED: {
313
+ if (typeof meta?.superPartitionKeyLength !== 'number'
314
+ || typeof meta?.partitionKeyLength !== 'number') {
315
+ throw new Error(`Missing superPartitionKeyLength or partitionKeyLength in metadata for ${resourceType}`);
316
+ }
317
+
318
+ const totalKeyLength = meta.superPartitionKeyLength + meta.partitionKeyLength;
319
+ const parts: PColumnDataEntry<TreeNodeAccessor>[] = [];
320
+
321
+ // Process all super partitions
322
+ for (const supKeyStr of acc.listInputFields()) {
323
+ const superPartition = acc.resolve({ field: supKeyStr, assertFieldType: 'Input' });
324
+ if (superPartition === undefined) return undefined;
325
+
326
+ // Validate inner type
327
+ if (superPartition.resourceType.name !== RT_JSON_PARTITIONED) {
328
+ throw new Error(`Expected ${RT_JSON_PARTITIONED} inside ${resourceType}, but got ${superPartition.resourceType.name}`);
329
+ }
330
+
331
+ const innerResult = parsePColumnData(superPartition, JSON.parse(supKeyStr) as PColumnKey);
332
+
333
+ if (innerResult === undefined) return undefined;
334
+
335
+ if (innerResult.type !== 'JsonPartitioned')
336
+ throw new Error(`Unexpected inner result type for ${resourceType}: ${innerResult.type}`);
337
+
338
+ parts.push(...innerResult.parts);
339
+ }
340
+
341
+ return {
342
+ type: 'JsonPartitioned',
343
+ partitionKeyLength: totalKeyLength,
344
+ parts,
345
+ };
346
+ }
347
+
348
+ case RT_BINARY_SUPER_PARTITIONED: {
349
+ if (typeof meta?.superPartitionKeyLength !== 'number'
350
+ || typeof meta?.partitionKeyLength !== 'number') {
351
+ throw new Error(`Missing superPartitionKeyLength or partitionKeyLength in metadata for ${resourceType}`);
352
+ }
353
+
354
+ const totalKeyLength = meta.superPartitionKeyLength + meta.partitionKeyLength;
355
+ const parts: PColumnDataEntry<BinaryChunk<TreeNodeAccessor>>[] = [];
356
+
357
+ // Process all super partitions
358
+ for (const supKeyStr of acc.listInputFields()) {
359
+ const superPartition = acc.resolve({ field: supKeyStr, assertFieldType: 'Input' });
360
+ if (superPartition === undefined) return undefined;
361
+
362
+ // Validate inner type
363
+ if (superPartition.resourceType.name !== RT_BINARY_PARTITIONED) {
364
+ throw new Error(`Expected ${RT_BINARY_PARTITIONED} inside ${resourceType}, but got ${superPartition.resourceType.name}`);
365
+ }
366
+
367
+ const innerResult = parsePColumnData(superPartition, JSON.parse(supKeyStr) as PColumnKey);
368
+
369
+ if (innerResult === undefined) return undefined;
370
+
371
+ if (innerResult.type !== 'BinaryPartitioned')
372
+ throw new Error(`Unexpected inner result type for ${resourceType}: ${innerResult.type}`);
373
+
374
+ parts.push(...innerResult.parts);
375
+ }
376
+
377
+ return {
378
+ type: 'BinaryPartitioned',
379
+ partitionKeyLength: totalKeyLength,
380
+ parts,
381
+ };
382
+ }
383
+
384
+ default:
385
+ throw new Error(`Unknown resource type: ${resourceType}`);
386
+ }
387
+ }