@platforma-open/milaboratories.software-ptabler.schema 1.8.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/expressions.d.ts +17 -1
- package/dist/index.d.ts +4 -2
- package/dist/io.d.ts +46 -25
- package/package.json +1 -1
- package/src/expressions.ts +19 -1
- package/src/index.ts +9 -1
- package/src/io.ts +59 -25
package/dist/expressions.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { DataType } from './common';
|
|
2
|
-
export type Expression = ComparisonExpression | BinaryArithmeticExpression | UnaryArithmeticExpression | CastExpression | BooleanLogicExpression | NotExpression | NullCheckExpression | StringJoinExpression | HashExpression | ColumnReferenceExpression | ConstantValueExpression | RankExpression | CumsumExpression | ExtendedUnaryStringExpression | StringDistanceExpression | FuzzyStringFilterExpression | WhenThenOtherwiseExpression | SubstringExpression | StringReplaceExpression | StringContainsExpression | StringStartsWithExpression | StringEndsWithExpression | StringContainsAnyExpression | StringCountMatchesExpression | StringExtractExpression | MinMaxExpression | FillNaExpression | WindowExpression;
|
|
2
|
+
export type Expression = ComparisonExpression | BinaryArithmeticExpression | UnaryArithmeticExpression | CastExpression | BooleanLogicExpression | NotExpression | NullCheckExpression | StringJoinExpression | HashExpression | ColumnReferenceExpression | ConstantValueExpression | RankExpression | CumsumExpression | ExtendedUnaryStringExpression | StringDistanceExpression | FuzzyStringFilterExpression | WhenThenOtherwiseExpression | SubstringExpression | StringReplaceExpression | StringContainsExpression | StringStartsWithExpression | StringEndsWithExpression | StringContainsAnyExpression | StringCountMatchesExpression | StringExtractExpression | MinMaxExpression | FillNaExpression | WindowExpression | StructFieldExpression;
|
|
3
3
|
/** Represents all possible expression types in the system. */
|
|
4
4
|
export type ComparisonOperator = 'gt' | 'ge' | 'eq' | 'lt' | 'le' | 'neq';
|
|
5
5
|
/** Defines a comparison operation between two expressions. */
|
|
@@ -406,3 +406,19 @@ export interface WindowExpression {
|
|
|
406
406
|
/** List of expressions to partition the data by. The aggregation is performed independently within each partition. */
|
|
407
407
|
partitionBy: Expression[];
|
|
408
408
|
}
|
|
409
|
+
/**
|
|
410
|
+
* Represents a struct field access operation.
|
|
411
|
+
* This operation retrieves a single field from a struct (nested data structure).
|
|
412
|
+
* It corresponds to Polars' struct.field() functionality.
|
|
413
|
+
*/
|
|
414
|
+
export interface StructFieldExpression {
|
|
415
|
+
/** The type of operation, always 'struct_field'. */
|
|
416
|
+
type: 'struct_field';
|
|
417
|
+
/** The struct expression to extract fields from. */
|
|
418
|
+
struct: Expression;
|
|
419
|
+
/**
|
|
420
|
+
* The field name to extract from the struct.
|
|
421
|
+
* Currently only supports single field extraction due to Polars behavior limitations.
|
|
422
|
+
*/
|
|
423
|
+
fields: string;
|
|
424
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
import { ReadCsvStep, WriteCsvStep } from './io';
|
|
1
|
+
import { ReadCsvStep, ReadNdjsonStep, WriteCsvStep, WriteNdjsonStep, BaseFileReadStep, BaseFileWriteStep } from './io';
|
|
2
2
|
import { AddColumnsStep, FilterStep, SelectStep, WithColumnsStep, WithoutColumnsStep } from './basic_steps';
|
|
3
3
|
import { AggregateStep } from './aggregate';
|
|
4
4
|
import { AnyJoinStep } from './join';
|
|
5
5
|
import { ConcatenateStep } from './concatenate';
|
|
6
6
|
import { SortStep } from './sort';
|
|
7
|
-
export type PTablerStep = ReadCsvStep | WriteCsvStep | AddColumnsStep | FilterStep | AggregateStep | AnyJoinStep | ConcatenateStep | SortStep | SelectStep | WithColumnsStep | WithoutColumnsStep;
|
|
7
|
+
export type PTablerStep = ReadCsvStep | ReadNdjsonStep | WriteCsvStep | WriteNdjsonStep | AddColumnsStep | FilterStep | AggregateStep | AnyJoinStep | ConcatenateStep | SortStep | SelectStep | WithColumnsStep | WithoutColumnsStep;
|
|
8
8
|
export type PTablerWorkflow = {
|
|
9
9
|
workflow: PTablerStep[];
|
|
10
10
|
};
|
|
11
|
+
export type { BaseFileReadStep, BaseFileWriteStep };
|
|
12
|
+
export type { Expression, StructFieldExpression } from './expressions';
|
package/dist/io.d.ts
CHANGED
|
@@ -10,16 +10,15 @@ export interface ColumnSchema {
|
|
|
10
10
|
/** Optional: A specific string to be interpreted as a null value for this column. */
|
|
11
11
|
nullValue?: string;
|
|
12
12
|
}
|
|
13
|
-
/**
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
/**
|
|
14
|
+
* Base interface for file reading operations that contains common fields
|
|
15
|
+
* shared across different file format readers.
|
|
16
|
+
*/
|
|
17
|
+
export interface BaseFileReadStep {
|
|
18
|
+
/** Path to the file to be read. */
|
|
18
19
|
file: string;
|
|
19
20
|
/** The name assigned to the loaded DataFrame in the tablespace. */
|
|
20
21
|
name: string;
|
|
21
|
-
/** Optional: The delimiter character used in the CSV file. */
|
|
22
|
-
delimiter?: string;
|
|
23
22
|
/**
|
|
24
23
|
* Optional: Provides schema information for specific columns.
|
|
25
24
|
* If `infer_schema` is `true` (default), these definitions act as overrides
|
|
@@ -30,39 +29,61 @@ export interface ReadCsvStep {
|
|
|
30
29
|
*/
|
|
31
30
|
schema?: ColumnSchema[];
|
|
32
31
|
/**
|
|
33
|
-
* Optional: Whether to infer the schema from the
|
|
32
|
+
* Optional: Whether to infer the schema from the file using Polars'
|
|
34
33
|
* default inference mechanism (e.g., reading a certain number of rows).
|
|
35
34
|
* Defaults to `true`. If set to `false`, type inference is disabled,
|
|
36
35
|
* and types will rely on the `schema` field or Polars' defaults for
|
|
37
36
|
* columns not specified in `schema`.
|
|
38
37
|
*/
|
|
39
|
-
|
|
38
|
+
inferSchema?: boolean;
|
|
39
|
+
/**
|
|
40
|
+
* Optional: Return null if parsing fails because of schema mismatches.
|
|
41
|
+
* Defaults to `false`.
|
|
42
|
+
*/
|
|
43
|
+
ignoreErrors?: boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Optional: Stop reading after this many rows.
|
|
46
|
+
* If not specified, all rows will be read.
|
|
47
|
+
*/
|
|
48
|
+
nRows?: number;
|
|
49
|
+
}
|
|
50
|
+
/** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
|
|
51
|
+
export interface ReadCsvStep extends BaseFileReadStep {
|
|
52
|
+
/** The type of the step, which is always 'read_csv' for this operation. */
|
|
53
|
+
type: 'read_csv';
|
|
54
|
+
/** Optional: The delimiter character used in the CSV file. */
|
|
55
|
+
delimiter?: string;
|
|
56
|
+
}
|
|
57
|
+
/** Represents the configuration for a step that reads data from an NDJSON file into the tablespace. */
|
|
58
|
+
export interface ReadNdjsonStep extends BaseFileReadStep {
|
|
59
|
+
/** The type of the step, which is always 'read_ndjson' for this operation. */
|
|
60
|
+
type: 'read_ndjson';
|
|
40
61
|
}
|
|
41
62
|
/**
|
|
42
|
-
*
|
|
63
|
+
* Base interface for file writing operations that contains common fields
|
|
64
|
+
* shared across different file format writers.
|
|
43
65
|
*/
|
|
44
|
-
export interface
|
|
45
|
-
/** The type of the step, which is always 'write_csv' for this operation. */
|
|
46
|
-
type: 'write_csv';
|
|
66
|
+
export interface BaseFileWriteStep {
|
|
47
67
|
/** The name of the table in the tablespace to be written. */
|
|
48
68
|
table: string;
|
|
49
|
-
/** Path to the output
|
|
69
|
+
/** Path to the output file. */
|
|
50
70
|
file: string;
|
|
51
|
-
/** Optional: A list of column names to write to the
|
|
71
|
+
/** Optional: A list of column names to write to the file. If omitted, all columns are written. */
|
|
52
72
|
columns?: string[];
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Represents the configuration for a step that writes a table from the tablespace to a CSV file.
|
|
76
|
+
*/
|
|
77
|
+
export interface WriteCsvStep extends BaseFileWriteStep {
|
|
78
|
+
/** The type of the step, which is always 'write_csv' for this operation. */
|
|
79
|
+
type: 'write_csv';
|
|
53
80
|
/** Optional: The delimiter character to use in the output CSV file. */
|
|
54
81
|
delimiter?: string;
|
|
55
82
|
}
|
|
56
83
|
/**
|
|
57
|
-
* Represents the configuration for a step that writes a table from the tablespace to
|
|
84
|
+
* Represents the configuration for a step that writes a table from the tablespace to an NDJSON file.
|
|
58
85
|
*/
|
|
59
|
-
export interface
|
|
60
|
-
/** The type of the step, which is always '
|
|
61
|
-
type: '
|
|
62
|
-
/** The name of the table in the tablespace to be written. */
|
|
63
|
-
table: string;
|
|
64
|
-
/** Path to the output JSON file. */
|
|
65
|
-
file: string;
|
|
66
|
-
/** Optional: A list of column names to write to the JSON. If omitted, all columns are written. */
|
|
67
|
-
columns?: string[];
|
|
86
|
+
export interface WriteNdjsonStep extends BaseFileWriteStep {
|
|
87
|
+
/** The type of the step, which is always 'write_ndjson' for this operation. */
|
|
88
|
+
type: 'write_ndjson';
|
|
68
89
|
}
|
package/package.json
CHANGED
package/src/expressions.ts
CHANGED
|
@@ -28,7 +28,8 @@ export type Expression =
|
|
|
28
28
|
| StringExtractExpression
|
|
29
29
|
| MinMaxExpression
|
|
30
30
|
| FillNaExpression
|
|
31
|
-
| WindowExpression
|
|
31
|
+
| WindowExpression
|
|
32
|
+
| StructFieldExpression;
|
|
32
33
|
|
|
33
34
|
/** Represents all possible expression types in the system. */
|
|
34
35
|
export type ComparisonOperator = 'gt' | 'ge' | 'eq' | 'lt' | 'le' | 'neq';
|
|
@@ -514,3 +515,20 @@ export interface WindowExpression {
|
|
|
514
515
|
/** List of expressions to partition the data by. The aggregation is performed independently within each partition. */
|
|
515
516
|
partitionBy: Expression[];
|
|
516
517
|
}
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* Represents a struct field access operation.
|
|
521
|
+
* This operation retrieves a single field from a struct (nested data structure).
|
|
522
|
+
* It corresponds to Polars' struct.field() functionality.
|
|
523
|
+
*/
|
|
524
|
+
export interface StructFieldExpression {
|
|
525
|
+
/** The type of operation, always 'struct_field'. */
|
|
526
|
+
type: 'struct_field';
|
|
527
|
+
/** The struct expression to extract fields from. */
|
|
528
|
+
struct: Expression;
|
|
529
|
+
/**
|
|
530
|
+
* The field name to extract from the struct.
|
|
531
|
+
* Currently only supports single field extraction due to Polars behavior limitations.
|
|
532
|
+
*/
|
|
533
|
+
fields: string;
|
|
534
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ReadCsvStep, WriteCsvStep } from './io';
|
|
1
|
+
import type { ReadCsvStep, ReadNdjsonStep, WriteCsvStep, WriteNdjsonStep, BaseFileReadStep, BaseFileWriteStep } from './io';
|
|
2
2
|
import type { AddColumnsStep, FilterStep, SelectStep, WithColumnsStep, WithoutColumnsStep } from './basic_steps';
|
|
3
3
|
import type { AggregateStep } from './aggregate';
|
|
4
4
|
import type { AnyJoinStep } from './join';
|
|
@@ -7,7 +7,9 @@ import type { SortStep } from './sort';
|
|
|
7
7
|
|
|
8
8
|
export type PTablerStep =
|
|
9
9
|
| ReadCsvStep
|
|
10
|
+
| ReadNdjsonStep
|
|
10
11
|
| WriteCsvStep
|
|
12
|
+
| WriteNdjsonStep
|
|
11
13
|
| AddColumnsStep
|
|
12
14
|
| FilterStep
|
|
13
15
|
| AggregateStep
|
|
@@ -21,3 +23,9 @@ export type PTablerStep =
|
|
|
21
23
|
export type PTablerWorkflow = {
|
|
22
24
|
workflow: PTablerStep[];
|
|
23
25
|
};
|
|
26
|
+
|
|
27
|
+
// Re-export base interfaces for potential external use
|
|
28
|
+
export type { BaseFileReadStep, BaseFileWriteStep };
|
|
29
|
+
|
|
30
|
+
// Re-export expression types for external use
|
|
31
|
+
export type { Expression, StructFieldExpression } from './expressions';
|
package/src/io.ts
CHANGED
|
@@ -12,16 +12,15 @@ export interface ColumnSchema {
|
|
|
12
12
|
nullValue?: string;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
/**
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
/**
|
|
16
|
+
* Base interface for file reading operations that contains common fields
|
|
17
|
+
* shared across different file format readers.
|
|
18
|
+
*/
|
|
19
|
+
export interface BaseFileReadStep {
|
|
20
|
+
/** Path to the file to be read. */
|
|
20
21
|
file: string;
|
|
21
22
|
/** The name assigned to the loaded DataFrame in the tablespace. */
|
|
22
23
|
name: string;
|
|
23
|
-
/** Optional: The delimiter character used in the CSV file. */
|
|
24
|
-
delimiter?: string;
|
|
25
24
|
/**
|
|
26
25
|
* Optional: Provides schema information for specific columns.
|
|
27
26
|
* If `infer_schema` is `true` (default), these definitions act as overrides
|
|
@@ -32,41 +31,76 @@ export interface ReadCsvStep {
|
|
|
32
31
|
*/
|
|
33
32
|
schema?: ColumnSchema[];
|
|
34
33
|
/**
|
|
35
|
-
* Optional: Whether to infer the schema from the
|
|
34
|
+
* Optional: Whether to infer the schema from the file using Polars'
|
|
36
35
|
* default inference mechanism (e.g., reading a certain number of rows).
|
|
37
36
|
* Defaults to `true`. If set to `false`, type inference is disabled,
|
|
38
37
|
* and types will rely on the `schema` field or Polars' defaults for
|
|
39
38
|
* columns not specified in `schema`.
|
|
40
39
|
*/
|
|
41
|
-
|
|
40
|
+
inferSchema?: boolean;
|
|
41
|
+
/**
|
|
42
|
+
* Optional: Return null if parsing fails because of schema mismatches.
|
|
43
|
+
* Defaults to `false`.
|
|
44
|
+
*/
|
|
45
|
+
ignoreErrors?: boolean;
|
|
46
|
+
/**
|
|
47
|
+
* Optional: Stop reading after this many rows.
|
|
48
|
+
* If not specified, all rows will be read.
|
|
49
|
+
*/
|
|
50
|
+
nRows?: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
|
|
54
|
+
export interface ReadCsvStep extends BaseFileReadStep {
|
|
55
|
+
/** The type of the step, which is always 'read_csv' for this operation. */
|
|
56
|
+
type: 'read_csv';
|
|
57
|
+
/** Optional: The delimiter character used in the CSV file. */
|
|
58
|
+
delimiter?: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Represents the configuration for a step that reads data from an NDJSON file into the tablespace. */
|
|
62
|
+
export interface ReadNdjsonStep extends BaseFileReadStep {
|
|
63
|
+
/** The type of the step, which is always 'read_ndjson' for this operation. */
|
|
64
|
+
type: 'read_ndjson';
|
|
42
65
|
}
|
|
43
66
|
|
|
44
67
|
/**
|
|
45
|
-
*
|
|
68
|
+
* Base interface for file writing operations that contains common fields
|
|
69
|
+
* shared across different file format writers.
|
|
46
70
|
*/
|
|
47
|
-
export interface
|
|
48
|
-
/** The type of the step, which is always 'write_csv' for this operation. */
|
|
49
|
-
type: 'write_csv';
|
|
71
|
+
export interface BaseFileWriteStep {
|
|
50
72
|
/** The name of the table in the tablespace to be written. */
|
|
51
73
|
table: string;
|
|
52
|
-
/** Path to the output
|
|
74
|
+
/** Path to the output file. */
|
|
53
75
|
file: string;
|
|
54
|
-
/** Optional: A list of column names to write to the
|
|
76
|
+
/** Optional: A list of column names to write to the file. If omitted, all columns are written. */
|
|
55
77
|
columns?: string[];
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Represents the configuration for a step that writes a table from the tablespace to a CSV file.
|
|
82
|
+
*/
|
|
83
|
+
export interface WriteCsvStep extends BaseFileWriteStep {
|
|
84
|
+
/** The type of the step, which is always 'write_csv' for this operation. */
|
|
85
|
+
type: 'write_csv';
|
|
56
86
|
/** Optional: The delimiter character to use in the output CSV file. */
|
|
57
87
|
delimiter?: string;
|
|
58
88
|
}
|
|
59
89
|
|
|
90
|
+
// Not yet supported, should be a normal write_json, but we don't have a lazy sink_json, can create a workaround
|
|
91
|
+
// if needed.
|
|
92
|
+
// /**
|
|
93
|
+
// * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
|
|
94
|
+
// */
|
|
95
|
+
// export interface WriteJsonStep extends BaseFileWriteStep {
|
|
96
|
+
// /** The type of the step, which is always 'write_json' for this operation. */
|
|
97
|
+
// type: 'write_json';
|
|
98
|
+
// }
|
|
99
|
+
|
|
60
100
|
/**
|
|
61
|
-
* Represents the configuration for a step that writes a table from the tablespace to
|
|
101
|
+
* Represents the configuration for a step that writes a table from the tablespace to an NDJSON file.
|
|
62
102
|
*/
|
|
63
|
-
export interface
|
|
64
|
-
/** The type of the step, which is always '
|
|
65
|
-
type: '
|
|
66
|
-
/** The name of the table in the tablespace to be written. */
|
|
67
|
-
table: string;
|
|
68
|
-
/** Path to the output JSON file. */
|
|
69
|
-
file: string;
|
|
70
|
-
/** Optional: A list of column names to write to the JSON. If omitted, all columns are written. */
|
|
71
|
-
columns?: string[];
|
|
103
|
+
export interface WriteNdjsonStep extends BaseFileWriteStep {
|
|
104
|
+
/** The type of the step, which is always 'write_ndjson' for this operation. */
|
|
105
|
+
type: 'write_ndjson';
|
|
72
106
|
}
|