@platforma-open/milaboratories.software-ptabler.schema 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ import { Expression, AggregationType } from './expressions';
2
+ /**
3
+ * Defines standard aggregation functions that operate on a single expression.
4
+ */
5
+ export type StandardAggregationType = 'sum' | 'mean' | 'median' | 'min' | 'max' | 'std' | 'var' | 'count' | 'first' | 'last' | 'n_unique';
6
+ /**
7
+ * Defines aggregation functions that select a value from one expression based on the min/max of another expression.
8
+ */
9
+ export type ByClauseAggregationType = 'max_by' | 'min_by';
10
+ /**
11
+ * Represents a standard aggregation operation like sum, mean, count, etc.
12
+ */
13
+ export interface StandardAggregationOperation {
14
+ /** The name for the resulting column after aggregation. */
15
+ name: string;
16
+ /** The type of standard aggregation to perform. */
17
+ aggregation: AggregationType;
18
+ /**
19
+ * The primary expression to aggregate.
20
+ * For aggregations like 'sum', 'mean', 'min', 'max', this is the expression (e.g., a column) whose values are aggregated.
21
+ * For 'count', this expression might not be directly used by Polars if counting all rows, but can be used to count non-null values in a specific column.
22
+ * For 'first', 'last', 'n_unique', this is the expression on which the operation is performed.
23
+ */
24
+ expression: Expression;
25
+ }
26
+ /**
27
+ * Represents an aggregation operation that selects a value from one expression
28
+ * based on the minimum or maximum value of another expression (the 'by_expression').
29
+ */
30
+ export interface ByClauseAggregationOperation {
31
+ /** The name for the resulting column after aggregation. */
32
+ name: string;
33
+ /** The type of 'by-clause' aggregation to perform (e.g., 'max_by', 'min_by'). */
34
+ aggregation: ByClauseAggregationType;
35
+ /** The expression whose value is selected. */
36
+ expression: Expression;
37
+ /**
38
+ * The expression or list of expressions to order by to determine which value of `expression` is selected.
39
+ * If an array of expressions is provided, ordering is done sequentially by each expression.
40
+ */
41
+ by: Expression[];
42
+ }
43
+ /**
44
+ * Represents the configuration for an 'aggregate' step in the workflow.
45
+ * This step performs aggregation operations on a table, optionally grouping by certain columns,
46
+ * and outputs a new table with the aggregated results.
47
+ */
48
+ export interface AggregateStep {
49
+ /** Specifies the type of the step, which is 'aggregate'. */
50
+ type: 'aggregate';
51
+ /** The name of the input table from the tablespace on which to perform aggregation. */
52
+ inputTable: string;
53
+ /** The name to be assigned to the newly created aggregated table in the tablespace. */
54
+ outputTable: string;
55
+ /** An optional list of column names to group by before performing aggregations. */
56
+ groupBy: string[];
57
+ /** An array of aggregation operations to apply to the input table. */
58
+ aggregations: (StandardAggregationOperation | ByClauseAggregationOperation)[];
59
+ }
@@ -0,0 +1,135 @@
1
+ import { Expression } from './expressions';
2
+ /**
3
+ * Defines a step that adds one or more new columns to an existing table in the tablespace.
4
+ * This operation modifies the specified table in place.
5
+ */
6
+ export interface AddColumnsStep {
7
+ /**
8
+ * The type identifier for this step.
9
+ * Must be 'add_columns'.
10
+ */
11
+ type: 'add_columns';
12
+ /**
13
+ * The name of the target DataFrame in the tablespace to which columns will be added.
14
+ */
15
+ table: string;
16
+ /**
17
+ * An array defining the new columns to be added.
18
+ * Each object in the array specifies the name of a new column and the expression to compute its values.
19
+ */
20
+ columns: {
21
+ /**
22
+ * The name of the new column.
23
+ */
24
+ name: string;
25
+ /**
26
+ * An Expression object defining how to compute the column's values.
27
+ * The expression will be evaluated for each row to generate the values for the new column.
28
+ */
29
+ expression: Expression;
30
+ }[];
31
+ }
32
+ /**
33
+ * Defines a step that filters rows in a table based on a specified condition
34
+ * and outputs the result to a new table in the tablespace.
35
+ */
36
+ export interface FilterStep {
37
+ /**
38
+ * The type identifier for this step.
39
+ * Must be 'filter'.
40
+ */
41
+ type: 'filter';
42
+ /**
43
+ * The name of the input table in the tablespace from which rows will be filtered.
44
+ */
45
+ inputTable: string;
46
+ /**
47
+ * The name for the resulting filtered table that will be added to the tablespace.
48
+ * This new table will contain only the rows that satisfy the condition.
49
+ */
50
+ outputTable: string;
51
+ /**
52
+ * A boolean Expression object used as the filter condition.
53
+ * Rows for which this expression evaluates to true are kept in the outputTable.
54
+ * Rows for which it evaluates to false or null are excluded.
55
+ */
56
+ condition: Expression;
57
+ }
58
+ /**
59
+ * Defines a step that selects a specific set of columns from an input table,
60
+ * potentially applying transformations or creating new columns, and outputs
61
+ * the result to a new table in the tablespace. This operation is similar
62
+ * to Polars' `select` method.
63
+ */
64
+ export interface SelectStep {
65
+ /**
66
+ * The type identifier for this step.
67
+ * Must be 'select'.
68
+ */
69
+ type: 'select';
70
+ /**
71
+ * The name of the input table in the tablespace from which columns will be selected.
72
+ */
73
+ inputTable: string;
74
+ /**
75
+ * The name for the resulting table that will be added to the tablespace.
76
+ * This new table will contain only the columns defined in the 'columns' array.
77
+ */
78
+ outputTable: string;
79
+ /**
80
+ * An array defining the columns for the output table.
81
+ * Each object in the array specifies the name of a column in the output table
82
+ * and the expression to compute its values.
83
+ */
84
+ columns: {
85
+ /**
86
+ * The name of the column in the output table.
87
+ */
88
+ name: string;
89
+ /**
90
+ * An Expression object defining how to compute the column's values.
91
+ * This expression will be evaluated to generate the values for this column
92
+ * in the output table.
93
+ */
94
+ expression: Expression;
95
+ }[];
96
+ }
97
+ /**
98
+ * Defines a step that adds new columns to an input table (or replaces existing ones
99
+ * if names collide) and outputs the result to a new table in the tablespace.
100
+ * This operation is similar to Polars' `with_columns` method.
101
+ */
102
+ export interface WithColumnsStep {
103
+ /**
104
+ * The type identifier for this step.
105
+ * Must be 'with_columns'.
106
+ */
107
+ type: 'with_columns';
108
+ /**
109
+ * The name of the input table in the tablespace to which columns will be added.
110
+ */
111
+ inputTable: string;
112
+ /**
113
+ * The name for the resulting table that will be added to the tablespace.
114
+ * This new table will contain all original columns from the inputTable,
115
+ * plus the new columns defined here (or with existing columns replaced by
116
+ * new ones if names match).
117
+ */
118
+ outputTable: string;
119
+ /**
120
+ * An array defining the new or replacement columns.
121
+ * Each object in the array specifies the name of a column and the
122
+ * expression to compute its values.
123
+ */
124
+ columns: {
125
+ /**
126
+ * The name of the new or replacement column.
127
+ */
128
+ name: string;
129
+ /**
130
+ * An Expression object defining how to compute the column's values.
131
+ * The expression will be evaluated for each row to generate the values for the column.
132
+ */
133
+ expression: Expression;
134
+ }[];
135
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Defines the supported Polars primitive data types for schema definition.
3
+ *
4
+ * The following are aliases for convenience:
5
+ * - 'Int': maps to 'Int32'
6
+ * - 'Long': maps to 'Int64'
7
+ * - 'Float': maps to 'Float32'
8
+ * - 'Double': maps to 'Float64'
9
+ */
10
+ export type DataType = 'Int8' | 'Int16' | 'Int32' | 'Int64' | 'UInt8' | 'UInt16' | 'UInt32' | 'UInt64' | 'Float32' | 'Float64' | 'Boolean' | 'String' | 'Date' | 'Datetime' | 'Time' | 'Int' | 'Long' | 'Float' | 'Double';
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Defines a step that vertically concatenates multiple tables from the tablespace
3
+ * into a single output table. Columns are matched by name.
4
+ */
5
+ export interface ConcatenateStep {
6
+ /** The type identifier for this step. Must be 'concatenate'. */
7
+ type: 'concatenate';
8
+ /**
9
+ * An array of input table names from the tablespace.
10
+ * The tables are concatenated vertically in the order they appear in this array.
11
+ */
12
+ inputTables: string[];
13
+ /** The name to be assigned to the newly created concatenated table in the tablespace. */
14
+ outputTable: string;
15
+ /**
16
+ * Optional. A list of column names to select from all input tables.
17
+ * If omitted, all columns from all input tables are included, and columns are matched by name.
18
+ * If provided, only these specified columns will be included in the output table.
19
+ * All input tables must contain all specified columns for the operation to succeed.
20
+ */
21
+ columns?: string[];
22
+ }
@@ -0,0 +1,320 @@
1
+ import { DataType } from './common';
2
+ export type Expression = ComparisonExpression | BinaryArithmeticExpression | UnaryArithmeticExpression | CastExpression | BooleanLogicExpression | NotExpression | NullCheckExpression | StringJoinExpression | HashExpression | ColumnReferenceExpression | ConstantValueExpression | RankExpression | CumsumExpression | ExtendedUnaryStringExpression | StringDistanceExpression | FuzzyStringFilterExpression | WhenThenOtherwiseExpression | SubstringExpression | StringReplaceExpression | MinMaxExpression | FillNaExpression | WindowExpression;
3
+ /** Represents all possible expression types in the system. */
4
+ export type ComparisonOperator = 'gt' | 'ge' | 'eq' | 'lt' | 'le' | 'neq';
5
+ /** Defines a comparison operation between two expressions. */
6
+ export interface ComparisonExpression {
7
+ /** The type of comparison (e.g., 'gt', 'eq'). */
8
+ type: ComparisonOperator;
9
+ /** The left-hand side expression. */
10
+ lhs: Expression;
11
+ /** The right-hand side expression. */
12
+ rhs: Expression;
13
+ }
14
+ /** Defines the supported binary arithmetic operators. */
15
+ export type BinaryArithmeticOperator = 'plus' | 'minus' | 'multiply' | 'truediv' | 'floordiv';
16
+ /** Represents a binary arithmetic operation between two expressions. */
17
+ export interface BinaryArithmeticExpression {
18
+ /** The type of arithmetic operation (e.g., 'plus', 'minus'). */
19
+ type: BinaryArithmeticOperator;
20
+ /** The left-hand side expression. */
21
+ lhs: Expression;
22
+ /** The right-hand side expression. */
23
+ rhs: Expression;
24
+ }
25
+ /** Defines the supported unary arithmetic operators. */
26
+ export type UnaryArithmeticOperator = 'log10' | 'log' | 'log2' | 'abs' | 'sqrt' | 'negate' | 'floor' | 'round' | 'ceil';
27
+ /** Represents a unary arithmetic operation on a single expression. */
28
+ export interface UnaryArithmeticExpression {
29
+ /** The type of unary operation (e.g., 'log10', 'abs'). */
30
+ type: UnaryArithmeticOperator;
31
+ /** The expression to operate on. */
32
+ value: Expression;
33
+ }
34
+ /**
35
+ * Represents a type casting operation that converts the result of an expression to a specified data type.
36
+ */
37
+ export interface CastExpression {
38
+ /** The type of operation, always 'cast'. */
39
+ type: 'cast';
40
+ /** The expression whose result will be cast to the target data type. */
41
+ value: Expression;
42
+ /** The target data type to cast the expression result to. */
43
+ dtype: DataType;
44
+ /**
45
+ * Whether to use strict casting mode. If true, conversion errors and overflows will throw exceptions.
46
+ * If false or undefined, uses non-strict mode where failures result in null values. Defaults to false.
47
+ */
48
+ strict?: boolean;
49
+ }
50
+ /** Defines the supported boolean list operators. */
51
+ export type BooleanListOperator = 'and' | 'or';
52
+ /** Represents a boolean logic operation (AND, OR) on a list of expressions. */
53
+ export interface BooleanLogicExpression {
54
+ /** The type of boolean operation ('and', 'or'). */
55
+ type: BooleanListOperator;
56
+ /** An array of boolean expressions as operands. */
57
+ operands: Expression[];
58
+ }
59
+ /** Represents a logical NOT operation on a single boolean expression. */
60
+ export interface NotExpression {
61
+ /** The type of operation, always 'not'. */
62
+ type: 'not';
63
+ /** The boolean expression to negate. */
64
+ value: Expression;
65
+ }
66
+ /** Defines the supported null check operators. */
67
+ export type NullCheckOperator = 'is_na' | 'is_not_na';
68
+ /** Represents a null check operation (is NA, is not NA) on an expression. */
69
+ export interface NullCheckExpression {
70
+ /** The type of null check ('is_na', 'is_not_na'). */
71
+ type: NullCheckOperator;
72
+ /** The expression to check for nullity. */
73
+ value: Expression;
74
+ }
75
+ /** Represents a string join operation on an array of expressions. */
76
+ export interface StringJoinExpression {
77
+ /** The type of operation, always 'str_join'. */
78
+ type: 'str_join';
79
+ /** An array of expressions whose string representations will be joined. */
80
+ operands: Expression[];
81
+ /** An optional delimiter string to insert between joined elements. */
82
+ delimiter?: string;
83
+ }
84
+ /** Defines the supported hash types. Includes common cryptographic and non-cryptographic algorithms. */
85
+ export type HashType = 'sha256' | 'sha512' | 'md5' | 'blake3' | 'wyhash' | 'xxh3';
86
+ /**
87
+ * Defines the encoding for the hash output.
88
+ * - 'hex': Standard hexadecimal encoding.
89
+ * - 'base64': Standard base64 encoding.
90
+ * - 'base64_alphanumeric': Base64 encoding with non-alphanumeric characters (e.g., '+', '/') removed.
91
+ * - 'base64_alphanumeric_upper': Base64 encoding with non-alphanumeric characters removed and the result converted to uppercase.
92
+ */
93
+ export type HashEncoding = 'hex' | 'base64' | 'base64_alphanumeric' | 'base64_alphanumeric_upper';
94
+ /** Represents a hashing operation on an expression. */
95
+ export interface HashExpression {
96
+ /** The specific type of hash algorithm to apply. */
97
+ type: 'hash';
98
+ /** The type of hash algorithm to apply. */
99
+ hashType: HashType;
100
+ /** The encoding for the output hash string. */
101
+ encoding: HashEncoding;
102
+ /** The expression whose value will be hashed. */
103
+ value: Expression;
104
+ /** Optional. Minimal number of entropy bits required. Affects encoding, truncating the result to the shortest string with the requested entropy. No error if bits exceed what the hash offers. */
105
+ bits?: number;
106
+ }
107
+ /** Represents a reference to a column by its name. */
108
+ export interface ColumnReferenceExpression {
109
+ /** The type of operation, always 'col'. */
110
+ type: 'col';
111
+ /** The name of the column to reference. */
112
+ name: string;
113
+ }
114
+ /** Represents a constant literal value (string, number, boolean, or null). */
115
+ export interface ConstantValueExpression {
116
+ /** The type of operation, always 'const'. */
117
+ type: 'const';
118
+ /** The constant value. */
119
+ value: string | number | boolean | null;
120
+ }
121
+ /**
122
+ * Represents a rank function applied over a dataset partition.
123
+ * Calculates the rank of each row within its partition based on the specified ordering.
124
+ */
125
+ export interface RankExpression {
126
+ /** The type of operation, always 'rank'. */
127
+ type: 'rank';
128
+ /** List of expressions to partition the data by before ranking. The output of these expressions will be used for partitioning. */
129
+ partitionBy: Expression[];
130
+ /** Defines the ordering expressions within partitions to determine the rank. */
131
+ orderBy: Expression[];
132
+ /** Whether to sort in descending order. Defaults to false (ascending). */
133
+ descending?: boolean;
134
+ }
135
+ /**
136
+ * Represents a cumulative sum function applied over a dataset partition.
137
+ * Calculates the cumulative sum of the 'value' expression within each partition,
138
+ * based on the specified ordering. Values are sorted by value and then by
139
+ * additional_order_by before summing.
140
+ */
141
+ export interface CumsumExpression {
142
+ /** The type of operation, always 'cumsum'. */
143
+ type: 'cumsum';
144
+ /** The expression whose values will be cumulatively summed. */
145
+ value: Expression;
146
+ /** Defines additional ordering within partitions for the cumulative sum calculation, in addition to the ordering of the values themselves. */
147
+ additionalOrderBy: Expression[];
148
+ /** List of expressions to partition the data by before calculating the cumulative sum. The output of these expressions will be used for partitioning. */
149
+ partitionBy: Expression[];
150
+ /** Whether to sort in descending order. Defaults to false (ascending). */
151
+ descending?: boolean;
152
+ }
153
+ /** Defines the supported unary string operators. */
154
+ export type UnaryStringOperator = 'to_upper' | 'to_lower';
155
+ /** Represents a unary string operation on a single expression. */
156
+ export interface ExtendedUnaryStringExpression {
157
+ /** The type of unary string operation (e.g., 'to_upper', 'to_lower', 'str_len'). */
158
+ type: UnaryStringOperator | 'str_len';
159
+ /** The string expression to operate on. */
160
+ value: Expression;
161
+ }
162
+ /** Defines the supported string distance metrics. */
163
+ export type StringDistanceMetric = 'levenshtein' | 'optimal_string_alignment' | 'jaro_winkler';
164
+ /**
165
+ * Represents a string distance/similarity calculation between two expressions.
166
+ * Computes metrics like Levenshtein, Optimal String Alignment, or Jaro-Winkler.
167
+ */
168
+ export interface StringDistanceExpression {
169
+ /** The type of operation, always 'string_distance'. */
170
+ type: 'string_distance';
171
+ /** The specific distance metric to use. */
172
+ metric: StringDistanceMetric;
173
+ /** The first string expression. */
174
+ string1: Expression;
175
+ /** The second string expression to compare against. */
176
+ string2: Expression;
177
+ /**
178
+ * If true, the expression returns a similarity score (typically normalized between 0 and 1).
179
+ * If false or undefined, it returns the raw edit distance (e.g., Levenshtein, OSA).
180
+ * Jaro-Winkler inherently returns a similarity score; this flag might be ignored or influence its normalization if applicable.
181
+ */
182
+ returnSimilarity?: boolean;
183
+ }
184
+ /** Defines the supported fuzzy string filter distance metrics. */
185
+ export type FuzzyFilterDistanceMetric = 'levenshtein' | 'hamming';
186
+ /**
187
+ * Represents a fuzzy string filter operation on an expression.
188
+ * This operation compares the string value of an expression (`value`)
189
+ * against another string or string expression (`pattern`) using a specified
190
+ * distance metric (`levenshtein` or `hamming`), returning true if the distance is
191
+ * within the specified `bound`.
192
+ */
193
+ export interface FuzzyStringFilterExpression {
194
+ /** The type of operation, always 'fuzzy_string_filter'. */
195
+ type: 'fuzzy_string_filter';
196
+ /** The distance metric to use for the fuzzy comparison. */
197
+ metric: FuzzyFilterDistanceMetric;
198
+ /** The expression whose string value will be compared. */
199
+ value: Expression;
200
+ /** The expression representing the string pattern to compare against. */
201
+ pattern: Expression;
202
+ /** The maximum allowed distance for a match (inclusive). */
203
+ bound: number;
204
+ }
205
+ /**
206
+ * Represents a single "when" condition and its corresponding "then" result expression.
207
+ * Used within the WhenThenOtherwiseExpression.
208
+ */
209
+ export interface WhenThenClause {
210
+ /** The condition expression. Should evaluate to a boolean. */
211
+ when: Expression;
212
+ /** The result expression if the 'when' condition is true. */
213
+ then: Expression;
214
+ }
215
+ /**
216
+ * Represents a conditional expression that evaluates a series of "when"
217
+ * conditions and returns the corresponding "then" expression's value.
218
+ * If no "when" condition is met, it returns the value of the "otherwise" expression.
219
+ * This mimics Polars' when/then/otherwise functionality.
220
+ */
221
+ export interface WhenThenOtherwiseExpression {
222
+ /** The type of operation, always 'when_then_otherwise'. */
223
+ type: 'when_then_otherwise';
224
+ /** An array of "when/then" clauses to be evaluated in order. */
225
+ conditions: WhenThenClause[];
226
+ /** The expression whose value is returned if none of the "when" conditions are met. */
227
+ otherwise: Expression;
228
+ }
229
+ /**
230
+ * Represents a substring extraction operation on an expression.
231
+ * Extracts a portion of the string value resulting from the 'value' expression.
232
+ * The substring starts at the 'start' index (0-based).
233
+ * - If 'length' is provided, it specifies the maximum length of the substring.
234
+ * - If 'end' is provided, it specifies the index *before* which the substring ends.
235
+ * - If neither 'length' nor 'end' is provided, the substring extends to the end of the string.
236
+ * - 'length' and 'end' are mutually exclusive.
237
+ * If the requested substring range extends beyond the actual string length,
238
+ * the extraction automatically stops at the end of the string.
239
+ */
240
+ export interface SubstringExpression {
241
+ /** The type of operation, always 'substring'. */
242
+ type: 'substring';
243
+ /** The expression whose string value will be used. */
244
+ value: Expression;
245
+ /** The starting position (0-indexed). */
246
+ start: number;
247
+ /** The length of the substring. Mutually exclusive with 'end'. */
248
+ length?: number;
249
+ /** The end position of the substring (exclusive). Mutually exclusive with 'length'. */
250
+ end?: number;
251
+ }
252
+ /**
253
+ * Represents a string replacement operation.
254
+ * Replaces occurrences of a pattern (regex or literal) in a string expression with a replacement string.
255
+ * The behavior is aligned with Polars' `replace` and `replace_all` functions.
256
+ *
257
+ * - If `literal` is true, the `pattern` is treated as a literal string. Otherwise, it's treated as a regular expression.
258
+ * - If `replaceAll` is true, all occurrences of the pattern are replaced. Otherwise, only the first occurrence is replaced.
259
+ *
260
+ * When using regular expressions (i.e., `literal` is false or undefined):
261
+ * - Positional capture groups can be referenced in the `replacement` string using `$n` or `${n}` (e.g., `$1` for the first group).
262
+ * - Named capture groups can be referenced using `${name}`.
263
+ * - To include a literal dollar sign (`$`) in the replacement, it must be escaped as `$$`.
264
+ */
265
+ export interface StringReplaceExpression {
266
+ /** The type of operation, always 'str_replace'. */
267
+ type: 'str_replace';
268
+ /** The input string expression to operate on. */
269
+ value: Expression;
270
+ /** The pattern (regex or literal string) to search for. Can be a string literal or an expression evaluating to a string. */
271
+ pattern: Expression | string;
272
+ /** The replacement string. Can be a string literal or an expression evaluating to a string. Can use $n or ${name} for captured groups if pattern is a regex. */
273
+ replacement: Expression | string;
274
+ /** If true, replace all occurrences of the pattern. If false or undefined, replace only the first. Defaults to false. */
275
+ replaceAll?: boolean;
276
+ /** If true, treat the pattern as a literal string. If false or undefined, treat it as a regex. Defaults to false. */
277
+ literal?: boolean;
278
+ }
279
+ /** Defines the supported min/max operators. */
280
+ export type MinMaxOperator = 'min' | 'max';
281
+ /** Represents a min or max operation on a list of expressions. */
282
+ export interface MinMaxExpression {
283
+ /** The type of operation ('min' or 'max'). */
284
+ type: MinMaxOperator;
285
+ /** An array of expressions to find the minimum or maximum value from. */
286
+ operands: Expression[];
287
+ }
288
+ /**
289
+ * Represents a fill NA (null) operation.
290
+ * If the 'input' expression evaluates to null, the 'fillValue' expression is used.
291
+ * Otherwise, the 'input' expression's value is used.
292
+ * This is a convenience shortcut for a common pattern often implemented with
293
+ * conditional expressions (e.g., when(is_na(input), fillValue).otherwise(input)).
294
+ */
295
+ export interface FillNaExpression {
296
+ /** The type of operation, always 'fill_na'. */
297
+ type: 'fill_na';
298
+ /** The primary expression to evaluate. */
299
+ input: Expression;
300
+ /** The expression whose value is used if 'input' is null. */
301
+ fillValue: Expression;
302
+ }
303
+ /**
304
+ * Defines standard aggregation functions that can be used in window expressions.
305
+ */
306
+ export type AggregationType = 'sum' | 'mean' | 'median' | 'min' | 'max' | 'std' | 'var' | 'count' | 'first' | 'last' | 'n_unique';
307
+ /**
308
+ * Represents a window function call.
309
+ * This allows applying an aggregation function over a specific partition of the data.
310
+ */
311
+ export interface WindowExpression {
312
+ /** The type of operation, always 'aggregate'. Note: This might be confusing, consider 'window_aggregate' or similar if 'aggregate' is heavily used elsewhere for a different step type. */
313
+ type: 'aggregate';
314
+ /** The aggregation function to apply (e.g., 'sum', 'mean'). */
315
+ aggregation: AggregationType;
316
+ /** The expression to apply the aggregation function to. */
317
+ value: Expression;
318
+ /** List of expressions to partition the data by. The aggregation is performed independently within each partition. */
319
+ partitionBy: Expression[];
320
+ }
@@ -0,0 +1,10 @@
1
+ import { ReadCsvStep, WriteCsvStep } from './io';
2
+ import { AddColumnsStep, FilterStep } from './basic_steps';
3
+ import { AggregateStep } from './aggregate';
4
+ import { AnyJoinStep } from './join';
5
+ import { ConcatenateStep } from './concatenate';
6
+ import { SortStep } from './sort';
7
+ export type PTablerStep = ReadCsvStep | WriteCsvStep | AddColumnsStep | FilterStep | AggregateStep | AnyJoinStep | ConcatenateStep | SortStep;
8
+ export type PTablerWorkflow = {
9
+ workflow: PTablerStep[];
10
+ };
package/dist/index.js ADDED
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sources":[],"sourcesContent":[],"names":[],"mappings":""}
package/dist/index.mjs ADDED
@@ -0,0 +1,2 @@
1
+
2
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","sources":[],"sourcesContent":[],"names":[],"mappings":""}
package/dist/io.d.ts ADDED
@@ -0,0 +1,68 @@
1
+ import { DataType } from './common';
2
+ /**
3
+ * Represents the schema definition for a single column.
4
+ */
5
+ export interface ColumnSchema {
6
+ /** The name of the column. */
7
+ column: string;
8
+ /** Optional: The expected Polars data type for this column. */
9
+ type?: DataType;
10
+ /** Optional: A specific string to be interpreted as a null value for this column. */
11
+ nullValue?: string;
12
+ }
13
+ /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
14
+ export interface ReadCsvStep {
15
+ /** The type of the step, which is always 'read_csv' for this operation. */
16
+ type: 'read_csv';
17
+ /** Path to the CSV file to be read. */
18
+ file: string;
19
+ /** The name assigned to the loaded DataFrame in the tablespace. */
20
+ name: string;
21
+ /** Optional: The delimiter character used in the CSV file. */
22
+ delimiter?: string;
23
+ /**
24
+ * Optional: Provides schema information for specific columns.
25
+ * If `infer_schema` is `true` (default), these definitions act as overrides
26
+ * to the types inferred by Polars. Each `ColumnSchema` can specify a `type`
27
+ * and/or a `nullValue`. If `infer_schema` is `false`, these definitions are
28
+ * used directly; for columns not listed, Polars' default behavior when no
29
+ * type is specified (e.g., reading as string) will apply.
30
+ */
31
+ schema?: ColumnSchema[];
32
+ /**
33
+ * Optional: Whether to infer the schema from the CSV file using Polars'
34
+ * default inference mechanism (e.g., reading a certain number of rows).
35
+ * Defaults to `true`. If set to `false`, type inference is disabled,
36
+ * and types will rely on the `schema` field or Polars' defaults for
37
+ * columns not specified in `schema`.
38
+ */
39
+ infer_schema?: boolean;
40
+ }
41
+ /**
42
+ * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
43
+ */
44
+ export interface WriteCsvStep {
45
+ /** The type of the step, which is always 'write_csv' for this operation. */
46
+ type: 'write_csv';
47
+ /** The name of the table in the tablespace to be written. */
48
+ table: string;
49
+ /** Path to the output CSV file. */
50
+ file: string;
51
+ /** Optional: A list of column names to write to the CSV. If omitted, all columns are written. */
52
+ columns?: string[];
53
+ /** Optional: The delimiter character to use in the output CSV file. */
54
+ delimiter?: string;
55
+ }
56
+ /**
57
+ * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
58
+ */
59
+ export interface WriteJsonStep {
60
+ /** The type of the step, which is always 'write_json' for this operation. */
61
+ type: 'write_json';
62
+ /** The name of the table in the tablespace to be written. */
63
+ table: string;
64
+ /** Path to the output JSON file. */
65
+ file: string;
66
+ /** Optional: A list of column names to write to the JSON. If omitted, all columns are written. */
67
+ columns?: string[];
68
+ }