@platforma-open/milaboratories.software-ptabler.schema 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { DataType } from './common';
2
- export type Expression = ComparisonExpression | BinaryArithmeticExpression | UnaryArithmeticExpression | CastExpression | BooleanLogicExpression | NotExpression | NullCheckExpression | StringJoinExpression | HashExpression | ColumnReferenceExpression | ConstantValueExpression | RankExpression | CumsumExpression | ExtendedUnaryStringExpression | StringDistanceExpression | FuzzyStringFilterExpression | WhenThenOtherwiseExpression | SubstringExpression | StringReplaceExpression | StringContainsExpression | StringStartsWithExpression | StringEndsWithExpression | StringContainsAnyExpression | StringCountMatchesExpression | StringExtractExpression | MinMaxExpression | FillNaExpression | WindowExpression;
2
+ export type Expression = ComparisonExpression | BinaryArithmeticExpression | UnaryArithmeticExpression | CastExpression | BooleanLogicExpression | NotExpression | NullCheckExpression | StringJoinExpression | HashExpression | ColumnReferenceExpression | ConstantValueExpression | RankExpression | CumsumExpression | ExtendedUnaryStringExpression | StringDistanceExpression | FuzzyStringFilterExpression | WhenThenOtherwiseExpression | SubstringExpression | StringReplaceExpression | StringContainsExpression | StringStartsWithExpression | StringEndsWithExpression | StringContainsAnyExpression | StringCountMatchesExpression | StringExtractExpression | MinMaxExpression | FillNaExpression | WindowExpression | StructFieldExpression;
3
3
  /** Represents all possible expression types in the system. */
4
4
  export type ComparisonOperator = 'gt' | 'ge' | 'eq' | 'lt' | 'le' | 'neq';
5
5
  /** Defines a comparison operation between two expressions. */
@@ -406,3 +406,19 @@ export interface WindowExpression {
406
406
  /** List of expressions to partition the data by. The aggregation is performed independently within each partition. */
407
407
  partitionBy: Expression[];
408
408
  }
409
+ /**
410
+ * Represents a struct field access operation.
411
+ * This operation retrieves a single field from a struct (nested data structure).
412
+ * It corresponds to Polars' struct.field() functionality.
413
+ */
414
+ export interface StructFieldExpression {
415
+ /** The type of operation, always 'struct_field'. */
416
+ type: 'struct_field';
417
+ /** The struct expression to extract fields from. */
418
+ struct: Expression;
419
+ /**
420
+ * The field name to extract from the struct.
421
+ * Currently only supports single field extraction due to Polars behavior limitations.
422
+ */
423
+ fields: string;
424
+ }
package/dist/index.d.ts CHANGED
@@ -1,10 +1,12 @@
1
- import { ReadCsvStep, WriteCsvStep } from './io';
1
+ import { ReadCsvStep, ReadNdjsonStep, WriteCsvStep, WriteNdjsonStep, BaseFileReadStep, BaseFileWriteStep } from './io';
2
2
  import { AddColumnsStep, FilterStep, SelectStep, WithColumnsStep, WithoutColumnsStep } from './basic_steps';
3
3
  import { AggregateStep } from './aggregate';
4
4
  import { AnyJoinStep } from './join';
5
5
  import { ConcatenateStep } from './concatenate';
6
6
  import { SortStep } from './sort';
7
- export type PTablerStep = ReadCsvStep | WriteCsvStep | AddColumnsStep | FilterStep | AggregateStep | AnyJoinStep | ConcatenateStep | SortStep | SelectStep | WithColumnsStep | WithoutColumnsStep;
7
+ export type PTablerStep = ReadCsvStep | ReadNdjsonStep | WriteCsvStep | WriteNdjsonStep | AddColumnsStep | FilterStep | AggregateStep | AnyJoinStep | ConcatenateStep | SortStep | SelectStep | WithColumnsStep | WithoutColumnsStep;
8
8
  export type PTablerWorkflow = {
9
9
  workflow: PTablerStep[];
10
10
  };
11
+ export type { BaseFileReadStep, BaseFileWriteStep };
12
+ export type { Expression, StructFieldExpression } from './expressions';
package/dist/io.d.ts CHANGED
@@ -10,16 +10,15 @@ export interface ColumnSchema {
10
10
  /** Optional: A specific string to be interpreted as a null value for this column. */
11
11
  nullValue?: string;
12
12
  }
13
- /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
14
- export interface ReadCsvStep {
15
- /** The type of the step, which is always 'read_csv' for this operation. */
16
- type: 'read_csv';
17
- /** Path to the CSV file to be read. */
13
+ /**
14
+ * Base interface for file reading operations that contains common fields
15
+ * shared across different file format readers.
16
+ */
17
+ export interface BaseFileReadStep {
18
+ /** Path to the file to be read. */
18
19
  file: string;
19
20
  /** The name assigned to the loaded DataFrame in the tablespace. */
20
21
  name: string;
21
- /** Optional: The delimiter character used in the CSV file. */
22
- delimiter?: string;
23
22
  /**
24
23
  * Optional: Provides schema information for specific columns.
25
24
  * If `infer_schema` is `true` (default), these definitions act as overrides
@@ -30,39 +29,61 @@ export interface ReadCsvStep {
30
29
  */
31
30
  schema?: ColumnSchema[];
32
31
  /**
33
- * Optional: Whether to infer the schema from the CSV file using Polars'
32
+ * Optional: Whether to infer the schema from the file using Polars'
34
33
  * default inference mechanism (e.g., reading a certain number of rows).
35
34
  * Defaults to `true`. If set to `false`, type inference is disabled,
36
35
  * and types will rely on the `schema` field or Polars' defaults for
37
36
  * columns not specified in `schema`.
38
37
  */
39
- infer_schema?: boolean;
38
+ inferSchema?: boolean;
39
+ /**
40
+ * Optional: Return null if parsing fails because of schema mismatches.
41
+ * Defaults to `false`.
42
+ */
43
+ ignoreErrors?: boolean;
44
+ /**
45
+ * Optional: Stop reading after this many rows.
46
+ * If not specified, all rows will be read.
47
+ */
48
+ nRows?: number;
49
+ }
50
+ /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
51
+ export interface ReadCsvStep extends BaseFileReadStep {
52
+ /** The type of the step, which is always 'read_csv' for this operation. */
53
+ type: 'read_csv';
54
+ /** Optional: The delimiter character used in the CSV file. */
55
+ delimiter?: string;
56
+ }
57
+ /** Represents the configuration for a step that reads data from an NDJSON file into the tablespace. */
58
+ export interface ReadNdjsonStep extends BaseFileReadStep {
59
+ /** The type of the step, which is always 'read_ndjson' for this operation. */
60
+ type: 'read_ndjson';
40
61
  }
41
62
  /**
42
- * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
63
+ * Base interface for file writing operations that contains common fields
64
+ * shared across different file format writers.
43
65
  */
44
- export interface WriteCsvStep {
45
- /** The type of the step, which is always 'write_csv' for this operation. */
46
- type: 'write_csv';
66
+ export interface BaseFileWriteStep {
47
67
  /** The name of the table in the tablespace to be written. */
48
68
  table: string;
49
- /** Path to the output CSV file. */
69
+ /** Path to the output file. */
50
70
  file: string;
51
- /** Optional: A list of column names to write to the CSV. If omitted, all columns are written. */
71
+ /** Optional: A list of column names to write to the file. If omitted, all columns are written. */
52
72
  columns?: string[];
73
+ }
74
+ /**
75
+ * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
76
+ */
77
+ export interface WriteCsvStep extends BaseFileWriteStep {
78
+ /** The type of the step, which is always 'write_csv' for this operation. */
79
+ type: 'write_csv';
53
80
  /** Optional: The delimiter character to use in the output CSV file. */
54
81
  delimiter?: string;
55
82
  }
56
83
  /**
57
- * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
84
+ * Represents the configuration for a step that writes a table from the tablespace to an NDJSON file.
58
85
  */
59
- export interface WriteJsonStep {
60
- /** The type of the step, which is always 'write_json' for this operation. */
61
- type: 'write_json';
62
- /** The name of the table in the tablespace to be written. */
63
- table: string;
64
- /** Path to the output JSON file. */
65
- file: string;
66
- /** Optional: A list of column names to write to the JSON. If omitted, all columns are written. */
67
- columns?: string[];
86
+ export interface WriteNdjsonStep extends BaseFileWriteStep {
87
+ /** The type of the step, which is always 'write_ndjson' for this operation. */
88
+ type: 'write_ndjson';
68
89
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.software-ptabler.schema",
3
- "version": "1.8.0",
3
+ "version": "1.10.0",
4
4
  "description": "Type definitions for PTabler",
5
5
  "types": "./dist/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -28,7 +28,8 @@ export type Expression =
28
28
  | StringExtractExpression
29
29
  | MinMaxExpression
30
30
  | FillNaExpression
31
- | WindowExpression;
31
+ | WindowExpression
32
+ | StructFieldExpression;
32
33
 
33
34
  /** Represents all possible expression types in the system. */
34
35
  export type ComparisonOperator = 'gt' | 'ge' | 'eq' | 'lt' | 'le' | 'neq';
@@ -514,3 +515,20 @@ export interface WindowExpression {
514
515
  /** List of expressions to partition the data by. The aggregation is performed independently within each partition. */
515
516
  partitionBy: Expression[];
516
517
  }
518
+
519
+ /**
520
+ * Represents a struct field access operation.
521
+ * This operation retrieves a single field from a struct (nested data structure).
522
+ * It corresponds to Polars' struct.field() functionality.
523
+ */
524
+ export interface StructFieldExpression {
525
+ /** The type of operation, always 'struct_field'. */
526
+ type: 'struct_field';
527
+ /** The struct expression to extract fields from. */
528
+ struct: Expression;
529
+ /**
530
+ * The field name to extract from the struct.
531
+ * Currently only supports single field extraction due to Polars behavior limitations.
532
+ */
533
+ fields: string;
534
+ }
package/src/index.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { ReadCsvStep, WriteCsvStep } from './io';
1
+ import type { ReadCsvStep, ReadNdjsonStep, WriteCsvStep, WriteNdjsonStep, BaseFileReadStep, BaseFileWriteStep } from './io';
2
2
  import type { AddColumnsStep, FilterStep, SelectStep, WithColumnsStep, WithoutColumnsStep } from './basic_steps';
3
3
  import type { AggregateStep } from './aggregate';
4
4
  import type { AnyJoinStep } from './join';
@@ -7,7 +7,9 @@ import type { SortStep } from './sort';
7
7
 
8
8
  export type PTablerStep =
9
9
  | ReadCsvStep
10
+ | ReadNdjsonStep
10
11
  | WriteCsvStep
12
+ | WriteNdjsonStep
11
13
  | AddColumnsStep
12
14
  | FilterStep
13
15
  | AggregateStep
@@ -21,3 +23,9 @@ export type PTablerStep =
21
23
  export type PTablerWorkflow = {
22
24
  workflow: PTablerStep[];
23
25
  };
26
+
27
+ // Re-export base interfaces for potential external use
28
+ export type { BaseFileReadStep, BaseFileWriteStep };
29
+
30
+ // Re-export expression types for external use
31
+ export type { Expression, StructFieldExpression } from './expressions';
package/src/io.ts CHANGED
@@ -12,16 +12,15 @@ export interface ColumnSchema {
12
12
  nullValue?: string;
13
13
  }
14
14
 
15
- /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
16
- export interface ReadCsvStep {
17
- /** The type of the step, which is always 'read_csv' for this operation. */
18
- type: 'read_csv';
19
- /** Path to the CSV file to be read. */
15
+ /**
16
+ * Base interface for file reading operations that contains common fields
17
+ * shared across different file format readers.
18
+ */
19
+ export interface BaseFileReadStep {
20
+ /** Path to the file to be read. */
20
21
  file: string;
21
22
  /** The name assigned to the loaded DataFrame in the tablespace. */
22
23
  name: string;
23
- /** Optional: The delimiter character used in the CSV file. */
24
- delimiter?: string;
25
24
  /**
26
25
  * Optional: Provides schema information for specific columns.
27
26
  * If `infer_schema` is `true` (default), these definitions act as overrides
@@ -32,41 +31,76 @@ export interface ReadCsvStep {
32
31
  */
33
32
  schema?: ColumnSchema[];
34
33
  /**
35
- * Optional: Whether to infer the schema from the CSV file using Polars'
34
+ * Optional: Whether to infer the schema from the file using Polars'
36
35
  * default inference mechanism (e.g., reading a certain number of rows).
37
36
  * Defaults to `true`. If set to `false`, type inference is disabled,
38
37
  * and types will rely on the `schema` field or Polars' defaults for
39
38
  * columns not specified in `schema`.
40
39
  */
41
- infer_schema?: boolean;
40
+ inferSchema?: boolean;
41
+ /**
42
+ * Optional: Return null if parsing fails because of schema mismatches.
43
+ * Defaults to `false`.
44
+ */
45
+ ignoreErrors?: boolean;
46
+ /**
47
+ * Optional: Stop reading after this many rows.
48
+ * If not specified, all rows will be read.
49
+ */
50
+ nRows?: number;
51
+ }
52
+
53
+ /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
54
+ export interface ReadCsvStep extends BaseFileReadStep {
55
+ /** The type of the step, which is always 'read_csv' for this operation. */
56
+ type: 'read_csv';
57
+ /** Optional: The delimiter character used in the CSV file. */
58
+ delimiter?: string;
59
+ }
60
+
61
+ /** Represents the configuration for a step that reads data from an NDJSON file into the tablespace. */
62
+ export interface ReadNdjsonStep extends BaseFileReadStep {
63
+ /** The type of the step, which is always 'read_ndjson' for this operation. */
64
+ type: 'read_ndjson';
42
65
  }
43
66
 
44
67
  /**
45
- * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
68
+ * Base interface for file writing operations that contains common fields
69
+ * shared across different file format writers.
46
70
  */
47
- export interface WriteCsvStep {
48
- /** The type of the step, which is always 'write_csv' for this operation. */
49
- type: 'write_csv';
71
+ export interface BaseFileWriteStep {
50
72
  /** The name of the table in the tablespace to be written. */
51
73
  table: string;
52
- /** Path to the output CSV file. */
74
+ /** Path to the output file. */
53
75
  file: string;
54
- /** Optional: A list of column names to write to the CSV. If omitted, all columns are written. */
76
+ /** Optional: A list of column names to write to the file. If omitted, all columns are written. */
55
77
  columns?: string[];
78
+ }
79
+
80
+ /**
81
+ * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
82
+ */
83
+ export interface WriteCsvStep extends BaseFileWriteStep {
84
+ /** The type of the step, which is always 'write_csv' for this operation. */
85
+ type: 'write_csv';
56
86
  /** Optional: The delimiter character to use in the output CSV file. */
57
87
  delimiter?: string;
58
88
  }
59
89
 
90
+ // Not yet supported, should be a normal write_json, but we don't have a lazy sink_json, can create a workaround
91
+ // if needed.
92
+ // /**
93
+ // * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
94
+ // */
95
+ // export interface WriteJsonStep extends BaseFileWriteStep {
96
+ // /** The type of the step, which is always 'write_json' for this operation. */
97
+ // type: 'write_json';
98
+ // }
99
+
60
100
  /**
61
- * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
101
+ * Represents the configuration for a step that writes a table from the tablespace to an NDJSON file.
62
102
  */
63
- export interface WriteJsonStep {
64
- /** The type of the step, which is always 'write_json' for this operation. */
65
- type: 'write_json';
66
- /** The name of the table in the tablespace to be written. */
67
- table: string;
68
- /** Path to the output JSON file. */
69
- file: string;
70
- /** Optional: A list of column names to write to the JSON. If omitted, all columns are written. */
71
- columns?: string[];
103
+ export interface WriteNdjsonStep extends BaseFileWriteStep {
104
+ /** The type of the step, which is always 'write_ndjson' for this operation. */
105
+ type: 'write_ndjson';
72
106
  }