@platforma-open/milaboratories.software-ptabler.schema 1.12.0 → 1.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/aggregate.d.ts +2 -5
- package/dist/aggregate.d.ts.map +1 -0
- package/dist/basic_steps.d.ts +30 -35
- package/dist/basic_steps.d.ts.map +1 -0
- package/dist/common.d.ts +1 -0
- package/dist/common.d.ts.map +1 -0
- package/dist/concatenate.d.ts +1 -0
- package/dist/concatenate.d.ts.map +1 -0
- package/dist/expressions/base.d.ts +6 -0
- package/dist/expressions/base.d.ts.map +1 -0
- package/dist/expressions/basics.d.ts +131 -0
- package/dist/expressions/basics.d.ts.map +1 -0
- package/dist/expressions/conditional.d.ts +56 -0
- package/dist/expressions/conditional.d.ts.map +1 -0
- package/dist/expressions/fuzzy.d.ts +45 -0
- package/dist/expressions/fuzzy.d.ts.map +1 -0
- package/dist/expressions/hash.d.ts +25 -0
- package/dist/expressions/hash.d.ts.map +1 -0
- package/dist/expressions/index.d.ts +25 -0
- package/dist/expressions/index.d.ts.map +1 -0
- package/dist/expressions/pframes.d.ts +115 -0
- package/dist/expressions/pframes.d.ts.map +1 -0
- package/dist/expressions/selectors.d.ts +172 -0
- package/dist/expressions/selectors.d.ts.map +1 -0
- package/dist/expressions/string.d.ts +158 -0
- package/dist/expressions/string.d.ts.map +1 -0
- package/dist/expressions/struct.d.ts +37 -0
- package/dist/expressions/struct.d.ts.map +1 -0
- package/dist/expressions/window.d.ts +52 -0
- package/dist/expressions/window.d.ts.map +1 -0
- package/dist/index.cjs +3 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +12 -9
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1 -1
- package/dist/io.d.ts +16 -1
- package/dist/io.d.ts.map +1 -0
- package/dist/join.d.ts +1 -0
- package/dist/join.d.ts.map +1 -0
- package/dist/read_frame.d.ts +26 -0
- package/dist/read_frame.d.ts.map +1 -0
- package/dist/sort.d.ts +4 -9
- package/dist/sort.d.ts.map +1 -0
- package/dist/write_frame.d.ts +58 -0
- package/dist/write_frame.d.ts.map +1 -0
- package/package.json +11 -9
- package/src/aggregate.ts +0 -16
- package/src/basic_steps.ts +32 -37
- package/src/expressions/base.ts +5 -0
- package/src/expressions/basics.ts +163 -0
- package/src/expressions/conditional.ts +59 -0
- package/src/expressions/fuzzy.ts +51 -0
- package/src/expressions/hash.ts +37 -0
- package/src/expressions/index.ts +147 -0
- package/src/expressions/pframes.ts +118 -0
- package/src/expressions/selectors.ts +203 -0
- package/src/expressions/string.ts +168 -0
- package/src/expressions/struct.ts +37 -0
- package/src/expressions/window.ts +66 -0
- package/src/index.ts +35 -5
- package/src/io.ts +16 -0
- package/src/read_frame.ts +26 -0
- package/src/sort.ts +2 -9
- package/src/write_frame.ts +66 -0
- package/dist/expressions.d.ts +0 -439
- package/dist/index.mjs +0 -2
- package/dist/index.mjs.map +0 -1
- package/src/expressions.ts +0 -549
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import type { Expression } from './base';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a regex matching operation using ECMAScript regular expressions.
|
|
5
|
+
* Takes a string expression as input and returns a boolean indicating if the input value
|
|
6
|
+
* matches the provided ECMAScript regular expression.
|
|
7
|
+
*/
|
|
8
|
+
export interface MatchesEcmaRegexExpression {
|
|
9
|
+
/** The type of operation, always 'matches_ecma_regex'. */
|
|
10
|
+
type: 'matches_ecma_regex';
|
|
11
|
+
/** The string expression whose value will be compared. */
|
|
12
|
+
value: Expression;
|
|
13
|
+
/** The ECMAScript regular expression to match against. */
|
|
14
|
+
ecma_regex: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Represents a fuzzy string matching operation.
|
|
19
|
+
* Takes a string expression as input and returns a boolean indicating if the input value
|
|
20
|
+
* contains a close match to the provided reference string.
|
|
21
|
+
*/
|
|
22
|
+
export interface ContainsFuzzyMatchExpression {
|
|
23
|
+
/** The type of operation, always 'contains_fuzzy_match'. */
|
|
24
|
+
type: 'contains_fuzzy_match';
|
|
25
|
+
/** The string expression whose value will be compared. */
|
|
26
|
+
value: Expression;
|
|
27
|
+
/** The string reference to compare against. */
|
|
28
|
+
reference: string;
|
|
29
|
+
/** The maximum number of edits allowed to be considered a match. */
|
|
30
|
+
max_edits: number;
|
|
31
|
+
/** The wildcard character to use. */
|
|
32
|
+
wildcard?: string;
|
|
33
|
+
/** If true, only substitutions are allowed (deletions and insertions are also allowed by default). */
|
|
34
|
+
substitutions_only?: boolean;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Represents a regex replacement operation using ECMAScript regular expressions.
|
|
39
|
+
* Performs JavaScript String.prototype.replace() operation.
|
|
40
|
+
*
|
|
41
|
+
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
|
|
42
|
+
*
|
|
43
|
+
* @example
|
|
44
|
+
* Input: "result.POIS_P001_W104_PBMC_2.clns"
|
|
45
|
+
* Pattern: "^.*(P\\d+)_(W\\d+).*$"
|
|
46
|
+
* Replacement: "$2-$1"
|
|
47
|
+
* Result: "W104-P001"
|
|
48
|
+
*/
|
|
49
|
+
export interface ReplaceEcmaRegexExpression {
|
|
50
|
+
/** The type of operation, always 'replace_ecma_regex'. */
|
|
51
|
+
type: 'replace_ecma_regex';
|
|
52
|
+
/** The string expression whose value will be replaced. */
|
|
53
|
+
value: Expression;
|
|
54
|
+
/**
|
|
55
|
+
* String representing ECMAScript RegEx with at least one capturing group.
|
|
56
|
+
* If you need to reorder capturing groups - use RegExp matching the whole string
|
|
57
|
+
* (must start with string begin anchor ^, end with string end anchor $).
|
|
58
|
+
* Use regex playground https://regexr.com/ to test your ideas.
|
|
59
|
+
*/
|
|
60
|
+
ecma_regex: string;
|
|
61
|
+
/**
|
|
62
|
+
* Replacement pattern used to construct result string from captured groups.
|
|
63
|
+
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#specifying_a_string_as_the_replacement
|
|
64
|
+
* Empty string as result would become NA.
|
|
65
|
+
*/
|
|
66
|
+
replacement: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Represents a regex extraction operation using ECMAScript regular expressions.
|
|
71
|
+
* Simplified 'regexpReplace' with replacement set to $1.
|
|
72
|
+
* This means that string is replaced with first capture group value.
|
|
73
|
+
*
|
|
74
|
+
* RegEx must match the entire string, this would be enforced even when ^ and $ are skipped.
|
|
75
|
+
* If there are no matches - value would be replaced with empty string.
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* // Example 1:
|
|
79
|
+
* Input: "123___abc.xlsx"
|
|
80
|
+
* Pattern: "\\d+___([a-z]+).xlsx"
|
|
81
|
+
* Result: "abc"
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* // Example 2:
|
|
85
|
+
* Input: "123___abc.xlsx"
|
|
86
|
+
* Pattern: "(\\d+)___([a-z]+).xlsx"
|
|
87
|
+
* Result: "123"
|
|
88
|
+
*
|
|
89
|
+
* @example
|
|
90
|
+
* // Example 3:
|
|
91
|
+
* Input: "123___abc.xlsx"
|
|
92
|
+
* Pattern: "((\\d+)___([a-z]+)).xlsx"
|
|
93
|
+
* Result: "123___abc"
|
|
94
|
+
*
|
|
95
|
+
* @example
|
|
96
|
+
* // Wrong example (pattern doesn't match entire string):
|
|
97
|
+
* Input: "123___abc.xlsx"
|
|
98
|
+
* Pattern: "(\\d+___[a-z]+)"
|
|
99
|
+
* Result: "" (empty string, as .xlsx part is missing in pattern, so pattern was not matched)
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* // Correct example:
|
|
103
|
+
* Input: "123___abc.xlsx"
|
|
104
|
+
* Pattern: "(\\d+___[a-z]+).xlsx"
|
|
105
|
+
* Result: "123___abc"
|
|
106
|
+
*/
|
|
107
|
+
export interface ExtractEcmaRegexExpression {
|
|
108
|
+
/** The type of operation, always 'extract_ecma_regex'. */
|
|
109
|
+
type: 'extract_ecma_regex';
|
|
110
|
+
/** The string expression whose value will be extracted. */
|
|
111
|
+
value: Expression;
|
|
112
|
+
/**
|
|
113
|
+
* String representing ECMAScript RegEx with at least one capturing group.
|
|
114
|
+
* RegEx must match the entire string, this would be enforced even when ^ and $ are skipped.
|
|
115
|
+
* If there are no matches - value would be replaced with empty string.
|
|
116
|
+
*/
|
|
117
|
+
ecma_regex: string;
|
|
118
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import type { AxisSpec } from '@milaboratories/pl-model-common';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a selector for all columns.
|
|
5
|
+
* Selects all columns in the DataFrame.
|
|
6
|
+
*/
|
|
7
|
+
export interface AllSelectorExpression {
|
|
8
|
+
/** The type of operation, always 'all'. */
|
|
9
|
+
type: 'selector_all';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Represents a selector for string columns.
|
|
14
|
+
* Selects all columns with string data types.
|
|
15
|
+
*/
|
|
16
|
+
export interface StringSelectorExpression {
|
|
17
|
+
/** The type of operation, always 'string'. */
|
|
18
|
+
type: 'selector_string';
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Represents a selector for numeric columns.
|
|
23
|
+
* Selects all columns with numeric data types (integers and floats).
|
|
24
|
+
*/
|
|
25
|
+
export interface NumericSelectorExpression {
|
|
26
|
+
/** The type of operation, always 'numeric'. */
|
|
27
|
+
type: 'selector_numeric';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Represents a selector for integer columns.
|
|
32
|
+
* Selects all columns with integer data types.
|
|
33
|
+
*/
|
|
34
|
+
export interface IntegerSelectorExpression {
|
|
35
|
+
/** The type of operation, always 'integer'. */
|
|
36
|
+
type: 'selector_integer';
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Represents a selector for float columns.
|
|
41
|
+
* Selects all columns with floating-point data types.
|
|
42
|
+
*/
|
|
43
|
+
export interface FloatSelectorExpression {
|
|
44
|
+
/** The type of operation, always 'float'. */
|
|
45
|
+
type: 'selector_float';
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Represents a selector for columns that start with a specific prefix.
|
|
50
|
+
* Selects columns whose names start with the specified prefix string.
|
|
51
|
+
*/
|
|
52
|
+
export interface StartsWithSelectorExpression {
|
|
53
|
+
/** The type of operation, always 'starts_with'. */
|
|
54
|
+
type: 'selector_starts_with';
|
|
55
|
+
/** The prefix to match column names against. */
|
|
56
|
+
prefix: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Represents a selector for columns that end with a specific suffix.
|
|
61
|
+
* Selects columns whose names end with the specified suffix string.
|
|
62
|
+
*/
|
|
63
|
+
export interface EndsWithSelectorExpression {
|
|
64
|
+
/** The type of operation, always 'ends_with'. */
|
|
65
|
+
type: 'selector_ends_with';
|
|
66
|
+
/** The suffix to match column names against. */
|
|
67
|
+
suffix: string;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Represents a selector for columns that contain a specific substring.
|
|
72
|
+
* Selects columns whose names contain the specified substring.
|
|
73
|
+
*/
|
|
74
|
+
export interface ContainsSelectorExpression {
|
|
75
|
+
/** The type of operation, always 'contains'. */
|
|
76
|
+
type: 'selector_contains';
|
|
77
|
+
/** The substring to match within column names. */
|
|
78
|
+
substring: string;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Represents a selector for columns that match a regex pattern.
|
|
83
|
+
* Selects columns whose names match the specified regular expression pattern.
|
|
84
|
+
*/
|
|
85
|
+
export interface MatchesSelectorExpression {
|
|
86
|
+
/** The type of operation, always 'matches'. */
|
|
87
|
+
type: 'selector_matches';
|
|
88
|
+
/** The regex pattern to match column names against. */
|
|
89
|
+
pattern: string;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Represents a selector that excludes specific columns by name.
|
|
94
|
+
* Selects all columns except those explicitly listed.
|
|
95
|
+
*/
|
|
96
|
+
export interface ExcludeSelectorExpression {
|
|
97
|
+
/** The type of operation, always 'exclude'. */
|
|
98
|
+
type: 'selector_exclude';
|
|
99
|
+
/** The list of column names to exclude from selection. */
|
|
100
|
+
columns: string[];
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Represents a selector for columns by their exact names.
|
|
105
|
+
* Selects columns that match any of the specified names.
|
|
106
|
+
*/
|
|
107
|
+
export interface ByNameSelectorExpression {
|
|
108
|
+
/** The type of operation, always 'by_name'. */
|
|
109
|
+
type: 'selector_by_name';
|
|
110
|
+
/** The list of column names to select. */
|
|
111
|
+
names: string[];
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Represents a selector for axis.
|
|
116
|
+
* Selects axis with the given spec if it exists in the result.
|
|
117
|
+
*/
|
|
118
|
+
export interface AxisSelectorExpression {
|
|
119
|
+
/** The type of operation, always 'axis'. */
|
|
120
|
+
type: 'selector_axis';
|
|
121
|
+
/** The axis to select. */
|
|
122
|
+
axis: AxisSpec;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Represents a selector for nested columns.
|
|
127
|
+
* Selects columns that have nested/complex data types (e.g., structs, lists).
|
|
128
|
+
*/
|
|
129
|
+
export interface NestedSelectorExpression {
|
|
130
|
+
/** The type of operation, always 'nested'. */
|
|
131
|
+
type: 'selector_nested';
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** Defines all available selector expression types. */
|
|
135
|
+
export type SelectorExpression =
|
|
136
|
+
| AllSelectorExpression
|
|
137
|
+
| StringSelectorExpression
|
|
138
|
+
| NumericSelectorExpression
|
|
139
|
+
| IntegerSelectorExpression
|
|
140
|
+
| FloatSelectorExpression
|
|
141
|
+
| StartsWithSelectorExpression
|
|
142
|
+
| EndsWithSelectorExpression
|
|
143
|
+
| ContainsSelectorExpression
|
|
144
|
+
| MatchesSelectorExpression
|
|
145
|
+
| ExcludeSelectorExpression
|
|
146
|
+
| ByNameSelectorExpression
|
|
147
|
+
| AxisSelectorExpression
|
|
148
|
+
| NestedSelectorExpression;
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Represents the complement of a selector.
|
|
152
|
+
* Selects all columns that are NOT selected by the inner selector.
|
|
153
|
+
*/
|
|
154
|
+
export interface SelectorComplementExpression {
|
|
155
|
+
/** The type of operation, always 'selector_complement'. */
|
|
156
|
+
type: 'selector_complement';
|
|
157
|
+
/** The selector to complement. */
|
|
158
|
+
selector: SelectorExpression;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Represents the union of multiple selectors.
|
|
163
|
+
* Selects columns that match ANY of the provided selectors (logical OR).
|
|
164
|
+
*/
|
|
165
|
+
export interface SelectorUnionExpression {
|
|
166
|
+
/** The type of operation, always 'selector_union'. */
|
|
167
|
+
type: 'selector_union';
|
|
168
|
+
/** The list of selectors to union. */
|
|
169
|
+
selectors: SelectorExpression[];
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Represents the intersection of multiple selectors.
|
|
174
|
+
* Selects columns that match ALL of the provided selectors (logical AND).
|
|
175
|
+
*/
|
|
176
|
+
export interface SelectorIntersectionExpression {
|
|
177
|
+
/** The type of operation, always 'selector_intersection'. */
|
|
178
|
+
type: 'selector_intersection';
|
|
179
|
+
/** The list of selectors to intersect. */
|
|
180
|
+
selectors: SelectorExpression[];
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Represents the difference of multiple selectors.
|
|
185
|
+
* Selects columns from the first selector minus those selected by subsequent selectors.
|
|
186
|
+
*/
|
|
187
|
+
export interface SelectorDifferenceExpression {
|
|
188
|
+
/** The type of operation, always 'selector_difference'. */
|
|
189
|
+
type: 'selector_difference';
|
|
190
|
+
/** The list of selectors to apply difference operation. */
|
|
191
|
+
selectors: SelectorExpression[];
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Represents the symmetric difference of multiple selectors.
|
|
196
|
+
* Selects columns that are in an odd number of the provided selectors (logical XOR).
|
|
197
|
+
*/
|
|
198
|
+
export interface SelectorSymmetricDifferenceExpression {
|
|
199
|
+
/** The type of operation, always 'selector_symmetric_difference'. */
|
|
200
|
+
type: 'selector_symmetric_difference';
|
|
201
|
+
/** The list of selectors to apply symmetric difference operation. */
|
|
202
|
+
selectors: SelectorExpression[];
|
|
203
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import type { Expression } from './base';
|
|
2
|
+
|
|
3
|
+
/** Represents a string join operation on an array of expressions. */
|
|
4
|
+
export interface StringJoinExpression {
|
|
5
|
+
/** The type of operation, always 'str_join'. */
|
|
6
|
+
type: 'str_join';
|
|
7
|
+
/** An array of expressions whose string representations will be joined. */
|
|
8
|
+
operands: Expression[];
|
|
9
|
+
/** An optional delimiter string to insert between joined elements. */
|
|
10
|
+
delimiter?: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/** Defines the supported unary string operators. */
|
|
14
|
+
export type UnaryStringOperator = 'to_upper' | 'to_lower';
|
|
15
|
+
|
|
16
|
+
/** Represents a unary string operation on a single expression. */
|
|
17
|
+
export interface ExtendedUnaryStringExpression {
|
|
18
|
+
/** The type of unary string operation (e.g., 'to_upper', 'to_lower', 'str_len'). */
|
|
19
|
+
type: UnaryStringOperator | 'str_len';
|
|
20
|
+
/** The string expression to operate on. */
|
|
21
|
+
value: Expression;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Represents a substring extraction operation on an expression.
|
|
26
|
+
* Extracts a portion of the string value resulting from the 'value' expression.
|
|
27
|
+
* The substring starts at the 'start' index (0-based).
|
|
28
|
+
* - If 'length' is provided, it specifies the maximum length of the substring.
|
|
29
|
+
* - If 'end' is provided, it specifies the index *before* which the substring ends.
|
|
30
|
+
* - If neither 'length' nor 'end' is provided, the substring extends to the end of the string.
|
|
31
|
+
* - 'length' and 'end' are mutually exclusive.
|
|
32
|
+
* If the requested substring range extends beyond the actual string length,
|
|
33
|
+
* the extraction automatically stops at the end of the string.
|
|
34
|
+
*/
|
|
35
|
+
export interface SubstringExpression {
|
|
36
|
+
/** The type of operation, always 'substring'. */
|
|
37
|
+
type: 'substring';
|
|
38
|
+
/** The expression whose string value will be used. */
|
|
39
|
+
value: Expression;
|
|
40
|
+
/** The starting position (0-indexed). Should evaluate to a number. */
|
|
41
|
+
start: Expression;
|
|
42
|
+
/** The length of the substring. Mutually exclusive with 'end'. Should evaluate to a number. */
|
|
43
|
+
length?: Expression;
|
|
44
|
+
/** The end position of the substring (exclusive). Mutually exclusive with 'length'. Should evaluate to a number. */
|
|
45
|
+
end?: Expression;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Represents a string replacement operation.
|
|
50
|
+
* Replaces occurrences of a pattern (regex or literal) in a string expression with a replacement string.
|
|
51
|
+
* The behavior is aligned with Polars' `replace` and `replace_all` functions.
|
|
52
|
+
*
|
|
53
|
+
* - If `literal` is true, the `pattern` is treated as a literal string. Otherwise, it's treated as a regular expression.
|
|
54
|
+
* - If `replaceAll` is true, all occurrences of the pattern are replaced. Otherwise, only the first occurrence is replaced.
|
|
55
|
+
*
|
|
56
|
+
* When using regular expressions (i.e., `literal` is false or undefined):
|
|
57
|
+
* - Positional capture groups can be referenced in the `replacement` string using `$n` or `${n}` (e.g., `$1` for the first group).
|
|
58
|
+
* - Named capture groups can be referenced using `${name}`.
|
|
59
|
+
* - To include a literal dollar sign (`$`) in the replacement, it must be escaped as `$$`.
|
|
60
|
+
*/
|
|
61
|
+
export interface StringReplaceExpression {
|
|
62
|
+
/** The type of operation, always 'str_replace'. */
|
|
63
|
+
type: 'str_replace';
|
|
64
|
+
/** The input string expression to operate on. */
|
|
65
|
+
value: Expression;
|
|
66
|
+
/** The pattern (regex or literal string) to search for. Can be a string literal or an expression evaluating to a string. */
|
|
67
|
+
pattern: Expression | string;
|
|
68
|
+
/** The replacement string. Can be a string literal or an expression evaluating to a string. Can use $n or ${name} for captured groups if pattern is a regex. */
|
|
69
|
+
replacement: Expression | string;
|
|
70
|
+
/** If true, replace all occurrences of the pattern. If false or undefined, replace only the first. Defaults to false. */
|
|
71
|
+
replaceAll?: boolean;
|
|
72
|
+
/** If true, treat the pattern as a literal string. If false or undefined, treat it as a regex. Defaults to false. */
|
|
73
|
+
literal?: boolean;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Represents a string contains operation.
|
|
78
|
+
* Checks if the string contains a substring that matches a pattern using regex or literal matching.
|
|
79
|
+
* Based on polars.Series.str.contains - supports both regex and literal pattern matching with optional case-insensitive flags.
|
|
80
|
+
*/
|
|
81
|
+
export interface StringContainsExpression {
|
|
82
|
+
/** The type of operation, always 'str_contains'. */
|
|
83
|
+
type: 'str_contains';
|
|
84
|
+
/** The input string expression to search in. */
|
|
85
|
+
value: Expression;
|
|
86
|
+
/** The pattern to search for. Can be a regex pattern (default) or literal string when literal=true. */
|
|
87
|
+
pattern: Expression | string;
|
|
88
|
+
/** If true, treat the pattern as a literal string. If false, treat it as a regex pattern. Defaults to false. */
|
|
89
|
+
literal?: boolean;
|
|
90
|
+
/** If true, raise an error if pattern is invalid regex. If false, return null for invalid patterns. Defaults to true. */
|
|
91
|
+
strict?: boolean;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Represents a string starts_with operation.
|
|
96
|
+
* Checks if the string starts with a specified prefix. Always uses literal matching (no regex support).
|
|
97
|
+
* Based on polars.Series.str.starts_with - only supports literal prefix matching.
|
|
98
|
+
*/
|
|
99
|
+
export interface StringStartsWithExpression {
|
|
100
|
+
/** The type of operation, always 'str_starts_with'. */
|
|
101
|
+
type: 'str_starts_with';
|
|
102
|
+
/** The input string expression to check. */
|
|
103
|
+
value: Expression;
|
|
104
|
+
/** The prefix to check for (always treated as literal string, no regex support). */
|
|
105
|
+
prefix: Expression | string;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Represents a string ends_with operation.
|
|
110
|
+
* Checks if the string ends with a specified suffix. Always uses literal matching (no regex support).
|
|
111
|
+
* Based on polars.Series.str.ends_with - only supports literal suffix matching.
|
|
112
|
+
*/
|
|
113
|
+
export interface StringEndsWithExpression {
|
|
114
|
+
/** The type of operation, always 'str_ends_with'. */
|
|
115
|
+
type: 'str_ends_with';
|
|
116
|
+
/** The input string expression to check. */
|
|
117
|
+
value: Expression;
|
|
118
|
+
/** The suffix to check for (always treated as literal string, no regex support). */
|
|
119
|
+
suffix: Expression | string;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Represents a string contains_any operation using the Aho-Corasick algorithm.
|
|
124
|
+
* Checks if the string contains any of the provided patterns using fast multi-pattern string matching.
|
|
125
|
+
* Based on polars.Series.str.contains_any - uses Aho-Corasick algorithm for efficient multi-pattern matching.
|
|
126
|
+
*/
|
|
127
|
+
export interface StringContainsAnyExpression {
|
|
128
|
+
/** The type of operation, always 'str_contains_any'. */
|
|
129
|
+
type: 'str_contains_any';
|
|
130
|
+
/** The input string expression to search in. */
|
|
131
|
+
value: Expression;
|
|
132
|
+
/** Array of literal string patterns to search for. Only immediate string values are supported, no expressions or regex patterns. */
|
|
133
|
+
patterns: string[];
|
|
134
|
+
/** Enable ASCII-aware case insensitive matching. When enabled, searching is performed without respect to case for ASCII letters (a-z and A-Z) only. Defaults to false. */
|
|
135
|
+
asciiCaseInsensitive?: boolean;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Represents a string count_matches operation.
|
|
140
|
+
* Counts the number of times a pattern occurs in the string using regex or literal matching.
|
|
141
|
+
* Based on polars.Series.str.count_matches - supports both regex and literal pattern matching.
|
|
142
|
+
*/
|
|
143
|
+
export interface StringCountMatchesExpression {
|
|
144
|
+
/** The type of operation, always 'str_count_matches'. */
|
|
145
|
+
type: 'str_count_matches';
|
|
146
|
+
/** The input string expression to count matches in. */
|
|
147
|
+
value: Expression;
|
|
148
|
+
/** The pattern to count occurrences of. Can be a regex pattern (default) or literal string when literal=true. */
|
|
149
|
+
pattern: Expression | string;
|
|
150
|
+
/** If true, treat the pattern as a literal string. If false, treat it as a regex pattern. Defaults to false. */
|
|
151
|
+
literal?: boolean;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Represents a string extract operation using regex patterns.
|
|
156
|
+
* Extracts the first match of a regex pattern from the string, optionally targeting specific capture groups.
|
|
157
|
+
* Based on polars.Series.str.extract - only supports regex patterns (no literal mode).
|
|
158
|
+
*/
|
|
159
|
+
export interface StringExtractExpression {
|
|
160
|
+
/** The type of operation, always 'str_extract'. */
|
|
161
|
+
type: 'str_extract';
|
|
162
|
+
/** The input string expression to extract from. */
|
|
163
|
+
value: Expression;
|
|
164
|
+
/** The regex pattern to extract. Must be a valid regex pattern - no literal string mode is supported. */
|
|
165
|
+
pattern: Expression | string;
|
|
166
|
+
/** The capture group index to extract. Group 0 is the entire match, group 1 is the first capture group, etc. Defaults to 0. */
|
|
167
|
+
groupIndex?: number;
|
|
168
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { DataType } from '../common';
|
|
2
|
+
import type { Expression } from './base';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Represents a struct field access operation.
|
|
6
|
+
* This operation retrieves a single field from a struct (nested data structure).
|
|
7
|
+
*
|
|
8
|
+
* Uses native Polars struct.field() functionality when possible for optimal performance,
|
|
9
|
+
* but falls back to Python UDF (map_elements) when dtype casting or default values
|
|
10
|
+
* are specified, trading performance for robust handling of missing fields and null structs.
|
|
11
|
+
*
|
|
12
|
+
* When fields is an array, the operation performs recursive field access,
|
|
13
|
+
* where each element in the array represents a level in the nested structure.
|
|
14
|
+
*/
|
|
15
|
+
export interface StructFieldExpression {
|
|
16
|
+
/** The type of operation, always 'struct_field'. */
|
|
17
|
+
type: 'struct_field';
|
|
18
|
+
/** The struct expression to extract fields from. */
|
|
19
|
+
struct: Expression;
|
|
20
|
+
/**
|
|
21
|
+
* The field name(s) to extract from the struct.
|
|
22
|
+
* - If a string, extracts a single field from the struct.
|
|
23
|
+
* - If an array, performs recursive field access where each element represents a level in the nested structure.
|
|
24
|
+
*/
|
|
25
|
+
fields: string | string[];
|
|
26
|
+
/**
|
|
27
|
+
* Optional expected data type for the returned value.
|
|
28
|
+
* This can be used for type validation or casting of the extracted field.
|
|
29
|
+
*/
|
|
30
|
+
dtype?: DataType;
|
|
31
|
+
/**
|
|
32
|
+
* Optional default value to return if the field is not found or is null.
|
|
33
|
+
* If not provided and the field is missing, the operation returns null.
|
|
34
|
+
* Only constant scalar values are supported.
|
|
35
|
+
*/
|
|
36
|
+
default?: string | number | boolean | null;
|
|
37
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { Expression } from './base';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a rank function applied over a dataset partition.
|
|
5
|
+
* Calculates the rank of each row within its partition based on the specified ordering.
|
|
6
|
+
*/
|
|
7
|
+
export interface RankExpression {
|
|
8
|
+
/** The type of operation, always 'rank'. */
|
|
9
|
+
type: 'rank';
|
|
10
|
+
/** List of expressions to partition the data by before ranking. The output of these expressions will be used for partitioning. */
|
|
11
|
+
partitionBy: Expression[];
|
|
12
|
+
/** Defines the ordering expressions within partitions to determine the rank. */
|
|
13
|
+
orderBy: Expression[];
|
|
14
|
+
/** Whether to sort in descending order. Defaults to false (ascending). */
|
|
15
|
+
descending?: boolean;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Represents a cumulative sum function applied over a dataset partition.
|
|
20
|
+
* Calculates the cumulative sum of the 'value' expression within each partition,
|
|
21
|
+
* based on the specified ordering. Values are sorted by value and then by
|
|
22
|
+
* additional_order_by before summing.
|
|
23
|
+
*/
|
|
24
|
+
export interface CumsumExpression {
|
|
25
|
+
/** The type of operation, always 'cumsum'. */
|
|
26
|
+
type: 'cumsum';
|
|
27
|
+
/** The expression whose values will be cumulatively summed. */
|
|
28
|
+
value: Expression;
|
|
29
|
+
/** Defines additional ordering within partitions for the cumulative sum calculation, in addition to the ordering of the values themselves. */
|
|
30
|
+
additionalOrderBy: Expression[];
|
|
31
|
+
/** List of expressions to partition the data by before calculating the cumulative sum. The output of these expressions will be used for partitioning. */
|
|
32
|
+
partitionBy: Expression[];
|
|
33
|
+
/** Whether to sort in descending order. Defaults to false (ascending). */
|
|
34
|
+
descending?: boolean;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Defines standard aggregation functions that can be used in window expressions.
|
|
39
|
+
*/
|
|
40
|
+
export type AggregationType =
|
|
41
|
+
| 'sum'
|
|
42
|
+
| 'mean'
|
|
43
|
+
| 'median'
|
|
44
|
+
| 'min'
|
|
45
|
+
| 'max'
|
|
46
|
+
| 'std'
|
|
47
|
+
| 'var'
|
|
48
|
+
| 'count'
|
|
49
|
+
| 'first'
|
|
50
|
+
| 'last'
|
|
51
|
+
| 'n_unique';
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Represents a window function call.
|
|
55
|
+
* This allows applying an aggregation function over a specific partition of the data.
|
|
56
|
+
*/
|
|
57
|
+
export interface WindowExpression {
|
|
58
|
+
/** The type of operation, always 'aggregate'. Note: This might be confusing, consider 'window_aggregate' or similar if 'aggregate' is heavily used elsewhere for a different step type. */
|
|
59
|
+
type: 'aggregate';
|
|
60
|
+
/** The aggregation function to apply (e.g., 'sum', 'mean'). */
|
|
61
|
+
aggregation: AggregationType;
|
|
62
|
+
/** The expression to apply the aggregation function to. */
|
|
63
|
+
value: Expression;
|
|
64
|
+
/** List of expressions to partition the data by. The aggregation is performed independently within each partition. */
|
|
65
|
+
partitionBy: Expression[];
|
|
66
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -1,31 +1,61 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
1
|
+
import type {
|
|
2
|
+
ReadCsvStep,
|
|
3
|
+
ReadNdjsonStep,
|
|
4
|
+
WriteCsvStep,
|
|
5
|
+
WriteNdjsonStep,
|
|
6
|
+
BaseFileReadStep,
|
|
7
|
+
BaseFileWriteStep,
|
|
8
|
+
WriteParquetStep,
|
|
9
|
+
ReadParquetStep,
|
|
10
|
+
} from './io';
|
|
11
|
+
import type {
|
|
12
|
+
AddColumnsStep,
|
|
13
|
+
FilterStep,
|
|
14
|
+
LimitStep,
|
|
15
|
+
SelectStep,
|
|
16
|
+
WithColumnsStep,
|
|
17
|
+
WithoutColumnsStep,
|
|
18
|
+
} from './basic_steps';
|
|
3
19
|
import type { AggregateStep } from './aggregate';
|
|
4
20
|
import type { AnyJoinStep } from './join';
|
|
5
21
|
import type { ConcatenateStep } from './concatenate';
|
|
6
22
|
import type { SortStep } from './sort';
|
|
23
|
+
import type { WriteFrameStep } from './write_frame';
|
|
24
|
+
import type { ReadFrameStep } from './read_frame';
|
|
7
25
|
|
|
8
26
|
export type PTablerStep =
|
|
9
27
|
| ReadCsvStep
|
|
10
28
|
| ReadNdjsonStep
|
|
29
|
+
| ReadParquetStep
|
|
11
30
|
| WriteCsvStep
|
|
12
31
|
| WriteNdjsonStep
|
|
32
|
+
| WriteParquetStep
|
|
13
33
|
| AddColumnsStep
|
|
14
34
|
| FilterStep
|
|
35
|
+
| LimitStep
|
|
15
36
|
| AggregateStep
|
|
16
37
|
| AnyJoinStep
|
|
17
38
|
| ConcatenateStep
|
|
18
39
|
| SortStep
|
|
19
40
|
| SelectStep
|
|
20
41
|
| WithColumnsStep
|
|
21
|
-
| WithoutColumnsStep
|
|
42
|
+
| WithoutColumnsStep
|
|
43
|
+
| WriteFrameStep
|
|
44
|
+
| ReadFrameStep;
|
|
22
45
|
|
|
23
46
|
export type PTablerWorkflow = {
|
|
24
47
|
workflow: PTablerStep[];
|
|
25
48
|
};
|
|
26
49
|
|
|
27
50
|
// Re-export base interfaces for potential external use
|
|
28
|
-
export type {
|
|
51
|
+
export type {
|
|
52
|
+
AddColumnsStep, AggregateStep,
|
|
53
|
+
AnyJoinStep, BaseFileReadStep,
|
|
54
|
+
BaseFileWriteStep, ConcatenateStep, FilterStep, ReadCsvStep,
|
|
55
|
+
ReadNdjsonStep, SelectStep, SortStep, WithColumnsStep,
|
|
56
|
+
WithoutColumnsStep, WriteCsvStep,
|
|
57
|
+
WriteNdjsonStep,
|
|
58
|
+
};
|
|
29
59
|
|
|
30
60
|
// Re-export expression types for external use
|
|
31
|
-
export type
|
|
61
|
+
export type * from './expressions';
|