@platforma-open/milaboratories.software-ptabler.schema 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { DataType } from './common';
2
- export type Expression = ComparisonExpression | BinaryArithmeticExpression | UnaryArithmeticExpression | CastExpression | BooleanLogicExpression | NotExpression | NullCheckExpression | StringJoinExpression | HashExpression | ColumnReferenceExpression | ConstantValueExpression | RankExpression | CumsumExpression | ExtendedUnaryStringExpression | StringDistanceExpression | FuzzyStringFilterExpression | WhenThenOtherwiseExpression | SubstringExpression | StringReplaceExpression | MinMaxExpression | FillNaExpression | WindowExpression;
2
+ export type Expression = ComparisonExpression | BinaryArithmeticExpression | UnaryArithmeticExpression | CastExpression | BooleanLogicExpression | NotExpression | NullCheckExpression | StringJoinExpression | HashExpression | ColumnReferenceExpression | ConstantValueExpression | RankExpression | CumsumExpression | ExtendedUnaryStringExpression | StringDistanceExpression | FuzzyStringFilterExpression | WhenThenOtherwiseExpression | SubstringExpression | StringReplaceExpression | StringContainsExpression | StringStartsWithExpression | StringEndsWithExpression | StringContainsAnyExpression | StringCountMatchesExpression | StringExtractExpression | MinMaxExpression | FillNaExpression | WindowExpression;
3
3
  /** Represents all possible expression types in the system. */
4
4
  export type ComparisonOperator = 'gt' | 'ge' | 'eq' | 'lt' | 'le' | 'neq';
5
5
  /** Defines a comparison operation between two expressions. */
@@ -276,6 +276,94 @@ export interface StringReplaceExpression {
276
276
  /** If true, treat the pattern as a literal string. If false or undefined, treat it as a regex. Defaults to false. */
277
277
  literal?: boolean;
278
278
  }
279
+ /**
280
+ * Represents a string contains operation.
281
+ * Checks if the string contains a substring that matches a pattern using regex or literal matching.
282
+ * Based on polars.Series.str.contains - supports both regex and literal pattern matching with optional case-insensitive flags.
283
+ */
284
+ export interface StringContainsExpression {
285
+ /** The type of operation, always 'str_contains'. */
286
+ type: 'str_contains';
287
+ /** The input string expression to search in. */
288
+ value: Expression;
289
+ /** The pattern to search for. Can be a regex pattern (default) or literal string when literal=true. */
290
+ pattern: Expression | string;
291
+ /** If true, treat the pattern as a literal string. If false, treat it as a regex pattern. Defaults to false. */
292
+ literal?: boolean;
293
+ /** If true, raise an error if pattern is invalid regex. If false, return null for invalid patterns. Defaults to true. */
294
+ strict?: boolean;
295
+ }
296
+ /**
297
+ * Represents a string starts_with operation.
298
+ * Checks if the string starts with a specified prefix. Always uses literal matching (no regex support).
299
+ * Based on polars.Series.str.starts_with - only supports literal prefix matching.
300
+ */
301
+ export interface StringStartsWithExpression {
302
+ /** The type of operation, always 'str_starts_with'. */
303
+ type: 'str_starts_with';
304
+ /** The input string expression to check. */
305
+ value: Expression;
306
+ /** The prefix to check for (always treated as literal string, no regex support). */
307
+ prefix: Expression | string;
308
+ }
309
+ /**
310
+ * Represents a string ends_with operation.
311
+ * Checks if the string ends with a specified suffix. Always uses literal matching (no regex support).
312
+ * Based on polars.Series.str.ends_with - only supports literal suffix matching.
313
+ */
314
+ export interface StringEndsWithExpression {
315
+ /** The type of operation, always 'str_ends_with'. */
316
+ type: 'str_ends_with';
317
+ /** The input string expression to check. */
318
+ value: Expression;
319
+ /** The suffix to check for (always treated as literal string, no regex support). */
320
+ suffix: Expression | string;
321
+ }
322
+ /**
323
+ * Represents a string contains_any operation using the Aho-Corasick algorithm.
324
+ * Checks if the string contains any of the provided patterns using fast multi-pattern string matching.
325
+ * Based on polars.Series.str.contains_any - uses Aho-Corasick algorithm for efficient multi-pattern matching.
326
+ */
327
+ export interface StringContainsAnyExpression {
328
+ /** The type of operation, always 'str_contains_any'. */
329
+ type: 'str_contains_any';
330
+ /** The input string expression to search in. */
331
+ value: Expression;
332
+ /** Array of literal string patterns to search for. Only immediate string values are supported, no expressions or regex patterns. */
333
+ patterns: string[];
334
+ /** Enable ASCII-aware case insensitive matching. When enabled, searching is performed without respect to case for ASCII letters (a-z and A-Z) only. Defaults to false. */
335
+ asciiCaseInsensitive?: boolean;
336
+ }
337
+ /**
338
+ * Represents a string count_matches operation.
339
+ * Counts the number of times a pattern occurs in the string using regex or literal matching.
340
+ * Based on polars.Series.str.count_matches - supports both regex and literal pattern matching.
341
+ */
342
+ export interface StringCountMatchesExpression {
343
+ /** The type of operation, always 'str_count_matches'. */
344
+ type: 'str_count_matches';
345
+ /** The input string expression to count matches in. */
346
+ value: Expression;
347
+ /** The pattern to count occurrences of. Can be a regex pattern (default) or literal string when literal=true. */
348
+ pattern: Expression | string;
349
+ /** If true, treat the pattern as a literal string. If false, treat it as a regex pattern. Defaults to false. */
350
+ literal?: boolean;
351
+ }
352
+ /**
353
+ * Represents a string extract operation using regex patterns.
354
+ * Extracts the first match of a regex pattern from the string, optionally targeting specific capture groups.
355
+ * Based on polars.Series.str.extract - only supports regex patterns (no literal mode).
356
+ */
357
+ export interface StringExtractExpression {
358
+ /** The type of operation, always 'str_extract'. */
359
+ type: 'str_extract';
360
+ /** The input string expression to extract from. */
361
+ value: Expression;
362
+ /** The regex pattern to extract. Must be a valid regex pattern - no literal string mode is supported. */
363
+ pattern: Expression | string;
364
+ /** The capture group index to extract. Group 0 is the entire match, group 1 is the first capture group, etc. Defaults to 0. */
365
+ groupIndex?: number;
366
+ }
279
367
  /** Defines the supported min/max operators. */
280
368
  export type MinMaxOperator = 'min' | 'max';
281
369
  /** Represents a min or max operation on a list of expressions. */
package/dist/index.d.ts CHANGED
@@ -1,10 +1,11 @@
1
- import { ReadCsvStep, WriteCsvStep } from './io';
1
+ import { ReadCsvStep, ReadNdjsonStep, WriteCsvStep, WriteNdjsonStep, BaseFileReadStep, BaseFileWriteStep } from './io';
2
2
  import { AddColumnsStep, FilterStep, SelectStep, WithColumnsStep, WithoutColumnsStep } from './basic_steps';
3
3
  import { AggregateStep } from './aggregate';
4
4
  import { AnyJoinStep } from './join';
5
5
  import { ConcatenateStep } from './concatenate';
6
6
  import { SortStep } from './sort';
7
- export type PTablerStep = ReadCsvStep | WriteCsvStep | AddColumnsStep | FilterStep | AggregateStep | AnyJoinStep | ConcatenateStep | SortStep | SelectStep | WithColumnsStep | WithoutColumnsStep;
7
+ export type PTablerStep = ReadCsvStep | ReadNdjsonStep | WriteCsvStep | WriteNdjsonStep | AddColumnsStep | FilterStep | AggregateStep | AnyJoinStep | ConcatenateStep | SortStep | SelectStep | WithColumnsStep | WithoutColumnsStep;
8
8
  export type PTablerWorkflow = {
9
9
  workflow: PTablerStep[];
10
10
  };
11
+ export type { BaseFileReadStep, BaseFileWriteStep };
package/dist/io.d.ts CHANGED
@@ -10,16 +10,15 @@ export interface ColumnSchema {
10
10
  /** Optional: A specific string to be interpreted as a null value for this column. */
11
11
  nullValue?: string;
12
12
  }
13
- /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
14
- export interface ReadCsvStep {
15
- /** The type of the step, which is always 'read_csv' for this operation. */
16
- type: 'read_csv';
17
- /** Path to the CSV file to be read. */
13
+ /**
14
+ * Base interface for file reading operations that contains common fields
15
+ * shared across different file format readers.
16
+ */
17
+ export interface BaseFileReadStep {
18
+ /** Path to the file to be read. */
18
19
  file: string;
19
20
  /** The name assigned to the loaded DataFrame in the tablespace. */
20
21
  name: string;
21
- /** Optional: The delimiter character used in the CSV file. */
22
- delimiter?: string;
23
22
  /**
24
23
  * Optional: Provides schema information for specific columns.
25
24
  * If `infer_schema` is `true` (default), these definitions act as overrides
@@ -30,39 +29,61 @@ export interface ReadCsvStep {
30
29
  */
31
30
  schema?: ColumnSchema[];
32
31
  /**
33
- * Optional: Whether to infer the schema from the CSV file using Polars'
32
+ * Optional: Whether to infer the schema from the file using Polars'
34
33
  * default inference mechanism (e.g., reading a certain number of rows).
35
34
  * Defaults to `true`. If set to `false`, type inference is disabled,
36
35
  * and types will rely on the `schema` field or Polars' defaults for
37
36
  * columns not specified in `schema`.
38
37
  */
39
- infer_schema?: boolean;
38
+ inferSchema?: boolean;
39
+ /**
40
+ * Optional: Return null if parsing fails because of schema mismatches.
41
+ * Defaults to `false`.
42
+ */
43
+ ignoreErrors?: boolean;
44
+ /**
45
+ * Optional: Stop reading after this many rows.
46
+ * If not specified, all rows will be read.
47
+ */
48
+ nRows?: number;
49
+ }
50
+ /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
51
+ export interface ReadCsvStep extends BaseFileReadStep {
52
+ /** The type of the step, which is always 'read_csv' for this operation. */
53
+ type: 'read_csv';
54
+ /** Optional: The delimiter character used in the CSV file. */
55
+ delimiter?: string;
56
+ }
57
+ /** Represents the configuration for a step that reads data from an NDJSON file into the tablespace. */
58
+ export interface ReadNdjsonStep extends BaseFileReadStep {
59
+ /** The type of the step, which is always 'read_ndjson' for this operation. */
60
+ type: 'read_ndjson';
40
61
  }
41
62
  /**
42
- * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
63
+ * Base interface for file writing operations that contains common fields
64
+ * shared across different file format writers.
43
65
  */
44
- export interface WriteCsvStep {
45
- /** The type of the step, which is always 'write_csv' for this operation. */
46
- type: 'write_csv';
66
+ export interface BaseFileWriteStep {
47
67
  /** The name of the table in the tablespace to be written. */
48
68
  table: string;
49
- /** Path to the output CSV file. */
69
+ /** Path to the output file. */
50
70
  file: string;
51
- /** Optional: A list of column names to write to the CSV. If omitted, all columns are written. */
71
+ /** Optional: A list of column names to write to the file. If omitted, all columns are written. */
52
72
  columns?: string[];
73
+ }
74
+ /**
75
+ * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
76
+ */
77
+ export interface WriteCsvStep extends BaseFileWriteStep {
78
+ /** The type of the step, which is always 'write_csv' for this operation. */
79
+ type: 'write_csv';
53
80
  /** Optional: The delimiter character to use in the output CSV file. */
54
81
  delimiter?: string;
55
82
  }
56
83
  /**
57
- * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
84
+ * Represents the configuration for a step that writes a table from the tablespace to an NDJSON file.
58
85
  */
59
- export interface WriteJsonStep {
60
- /** The type of the step, which is always 'write_json' for this operation. */
61
- type: 'write_json';
62
- /** The name of the table in the tablespace to be written. */
63
- table: string;
64
- /** Path to the output JSON file. */
65
- file: string;
66
- /** Optional: A list of column names to write to the JSON. If omitted, all columns are written. */
67
- columns?: string[];
86
+ export interface WriteNdjsonStep extends BaseFileWriteStep {
87
+ /** The type of the step, which is always 'write_ndjson' for this operation. */
88
+ type: 'write_ndjson';
68
89
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.software-ptabler.schema",
3
- "version": "1.7.0",
3
+ "version": "1.9.0",
4
4
  "description": "Type definitions for PTabler",
5
5
  "types": "./dist/index.d.ts",
6
6
  "main": "./dist/index.js",
@@ -20,6 +20,12 @@ export type Expression =
20
20
  | WhenThenOtherwiseExpression
21
21
  | SubstringExpression
22
22
  | StringReplaceExpression
23
+ | StringContainsExpression
24
+ | StringStartsWithExpression
25
+ | StringEndsWithExpression
26
+ | StringContainsAnyExpression
27
+ | StringCountMatchesExpression
28
+ | StringExtractExpression
23
29
  | MinMaxExpression
24
30
  | FillNaExpression
25
31
  | WindowExpression;
@@ -357,6 +363,100 @@ export interface StringReplaceExpression {
357
363
  literal?: boolean;
358
364
  }
359
365
 
366
+ /**
367
+ * Represents a string contains operation.
368
+ * Checks if the string contains a substring that matches a pattern using regex or literal matching.
369
+ * Based on polars.Series.str.contains - supports both regex and literal pattern matching with optional case-insensitive flags.
370
+ */
371
+ export interface StringContainsExpression {
372
+ /** The type of operation, always 'str_contains'. */
373
+ type: 'str_contains';
374
+ /** The input string expression to search in. */
375
+ value: Expression;
376
+ /** The pattern to search for. Can be a regex pattern (default) or literal string when literal=true. */
377
+ pattern: Expression | string;
378
+ /** If true, treat the pattern as a literal string. If false, treat it as a regex pattern. Defaults to false. */
379
+ literal?: boolean;
380
+ /** If true, raise an error if pattern is invalid regex. If false, return null for invalid patterns. Defaults to true. */
381
+ strict?: boolean;
382
+ }
383
+
384
+ /**
385
+ * Represents a string starts_with operation.
386
+ * Checks if the string starts with a specified prefix. Always uses literal matching (no regex support).
387
+ * Based on polars.Series.str.starts_with - only supports literal prefix matching.
388
+ */
389
+ export interface StringStartsWithExpression {
390
+ /** The type of operation, always 'str_starts_with'. */
391
+ type: 'str_starts_with';
392
+ /** The input string expression to check. */
393
+ value: Expression;
394
+ /** The prefix to check for (always treated as literal string, no regex support). */
395
+ prefix: Expression | string;
396
+ }
397
+
398
+ /**
399
+ * Represents a string ends_with operation.
400
+ * Checks if the string ends with a specified suffix. Always uses literal matching (no regex support).
401
+ * Based on polars.Series.str.ends_with - only supports literal suffix matching.
402
+ */
403
+ export interface StringEndsWithExpression {
404
+ /** The type of operation, always 'str_ends_with'. */
405
+ type: 'str_ends_with';
406
+ /** The input string expression to check. */
407
+ value: Expression;
408
+ /** The suffix to check for (always treated as literal string, no regex support). */
409
+ suffix: Expression | string;
410
+ }
411
+
412
+ /**
413
+ * Represents a string contains_any operation using the Aho-Corasick algorithm.
414
+ * Checks if the string contains any of the provided patterns using fast multi-pattern string matching.
415
+ * Based on polars.Series.str.contains_any - uses Aho-Corasick algorithm for efficient multi-pattern matching.
416
+ */
417
+ export interface StringContainsAnyExpression {
418
+ /** The type of operation, always 'str_contains_any'. */
419
+ type: 'str_contains_any';
420
+ /** The input string expression to search in. */
421
+ value: Expression;
422
+ /** Array of literal string patterns to search for. Only immediate string values are supported, no expressions or regex patterns. */
423
+ patterns: string[];
424
+ /** Enable ASCII-aware case insensitive matching. When enabled, searching is performed without respect to case for ASCII letters (a-z and A-Z) only. Defaults to false. */
425
+ asciiCaseInsensitive?: boolean;
426
+ }
427
+
428
+ /**
429
+ * Represents a string count_matches operation.
430
+ * Counts the number of times a pattern occurs in the string using regex or literal matching.
431
+ * Based on polars.Series.str.count_matches - supports both regex and literal pattern matching.
432
+ */
433
+ export interface StringCountMatchesExpression {
434
+ /** The type of operation, always 'str_count_matches'. */
435
+ type: 'str_count_matches';
436
+ /** The input string expression to count matches in. */
437
+ value: Expression;
438
+ /** The pattern to count occurrences of. Can be a regex pattern (default) or literal string when literal=true. */
439
+ pattern: Expression | string;
440
+ /** If true, treat the pattern as a literal string. If false, treat it as a regex pattern. Defaults to false. */
441
+ literal?: boolean;
442
+ }
443
+
444
+ /**
445
+ * Represents a string extract operation using regex patterns.
446
+ * Extracts the first match of a regex pattern from the string, optionally targeting specific capture groups.
447
+ * Based on polars.Series.str.extract - only supports regex patterns (no literal mode).
448
+ */
449
+ export interface StringExtractExpression {
450
+ /** The type of operation, always 'str_extract'. */
451
+ type: 'str_extract';
452
+ /** The input string expression to extract from. */
453
+ value: Expression;
454
+ /** The regex pattern to extract. Must be a valid regex pattern - no literal string mode is supported. */
455
+ pattern: Expression | string;
456
+ /** The capture group index to extract. Group 0 is the entire match, group 1 is the first capture group, etc. Defaults to 0. */
457
+ groupIndex?: number;
458
+ }
459
+
360
460
  /** Defines the supported min/max operators. */
361
461
  export type MinMaxOperator = 'min' | 'max';
362
462
 
package/src/index.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { ReadCsvStep, WriteCsvStep } from './io';
1
+ import type { ReadCsvStep, ReadNdjsonStep, WriteCsvStep, WriteJsonStep, WriteNdjsonStep, BaseFileReadStep, BaseFileWriteStep } from './io';
2
2
  import type { AddColumnsStep, FilterStep, SelectStep, WithColumnsStep, WithoutColumnsStep } from './basic_steps';
3
3
  import type { AggregateStep } from './aggregate';
4
4
  import type { AnyJoinStep } from './join';
@@ -7,7 +7,9 @@ import type { SortStep } from './sort';
7
7
 
8
8
  export type PTablerStep =
9
9
  | ReadCsvStep
10
+ | ReadNdjsonStep
10
11
  | WriteCsvStep
12
+ | WriteNdjsonStep
11
13
  | AddColumnsStep
12
14
  | FilterStep
13
15
  | AggregateStep
@@ -21,3 +23,6 @@ export type PTablerStep =
21
23
  export type PTablerWorkflow = {
22
24
  workflow: PTablerStep[];
23
25
  };
26
+
27
+ // Re-export base interfaces for potential external use
28
+ export type { BaseFileReadStep, BaseFileWriteStep };
package/src/io.ts CHANGED
@@ -12,16 +12,15 @@ export interface ColumnSchema {
12
12
  nullValue?: string;
13
13
  }
14
14
 
15
- /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
16
- export interface ReadCsvStep {
17
- /** The type of the step, which is always 'read_csv' for this operation. */
18
- type: 'read_csv';
19
- /** Path to the CSV file to be read. */
15
+ /**
16
+ * Base interface for file reading operations that contains common fields
17
+ * shared across different file format readers.
18
+ */
19
+ export interface BaseFileReadStep {
20
+ /** Path to the file to be read. */
20
21
  file: string;
21
22
  /** The name assigned to the loaded DataFrame in the tablespace. */
22
23
  name: string;
23
- /** Optional: The delimiter character used in the CSV file. */
24
- delimiter?: string;
25
24
  /**
26
25
  * Optional: Provides schema information for specific columns.
27
26
  * If `infer_schema` is `true` (default), these definitions act as overrides
@@ -32,41 +31,76 @@ export interface ReadCsvStep {
32
31
  */
33
32
  schema?: ColumnSchema[];
34
33
  /**
35
- * Optional: Whether to infer the schema from the CSV file using Polars'
34
+ * Optional: Whether to infer the schema from the file using Polars'
36
35
  * default inference mechanism (e.g., reading a certain number of rows).
37
36
  * Defaults to `true`. If set to `false`, type inference is disabled,
38
37
  * and types will rely on the `schema` field or Polars' defaults for
39
38
  * columns not specified in `schema`.
40
39
  */
41
- infer_schema?: boolean;
40
+ inferSchema?: boolean;
41
+ /**
42
+ * Optional: Return null if parsing fails because of schema mismatches.
43
+ * Defaults to `false`.
44
+ */
45
+ ignoreErrors?: boolean;
46
+ /**
47
+ * Optional: Stop reading after this many rows.
48
+ * If not specified, all rows will be read.
49
+ */
50
+ nRows?: number;
51
+ }
52
+
53
+ /** Represents the configuration for a step that reads data from a CSV file into the tablespace. */
54
+ export interface ReadCsvStep extends BaseFileReadStep {
55
+ /** The type of the step, which is always 'read_csv' for this operation. */
56
+ type: 'read_csv';
57
+ /** Optional: The delimiter character used in the CSV file. */
58
+ delimiter?: string;
59
+ }
60
+
61
+ /** Represents the configuration for a step that reads data from an NDJSON file into the tablespace. */
62
+ export interface ReadNdjsonStep extends BaseFileReadStep {
63
+ /** The type of the step, which is always 'read_ndjson' for this operation. */
64
+ type: 'read_ndjson';
42
65
  }
43
66
 
44
67
  /**
45
- * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
68
+ * Base interface for file writing operations that contains common fields
69
+ * shared across different file format writers.
46
70
  */
47
- export interface WriteCsvStep {
48
- /** The type of the step, which is always 'write_csv' for this operation. */
49
- type: 'write_csv';
71
+ export interface BaseFileWriteStep {
50
72
  /** The name of the table in the tablespace to be written. */
51
73
  table: string;
52
- /** Path to the output CSV file. */
74
+ /** Path to the output file. */
53
75
  file: string;
54
- /** Optional: A list of column names to write to the CSV. If omitted, all columns are written. */
76
+ /** Optional: A list of column names to write to the file. If omitted, all columns are written. */
55
77
  columns?: string[];
78
+ }
79
+
80
+ /**
81
+ * Represents the configuration for a step that writes a table from the tablespace to a CSV file.
82
+ */
83
+ export interface WriteCsvStep extends BaseFileWriteStep {
84
+ /** The type of the step, which is always 'write_csv' for this operation. */
85
+ type: 'write_csv';
56
86
  /** Optional: The delimiter character to use in the output CSV file. */
57
87
  delimiter?: string;
58
88
  }
59
89
 
90
+ // Not yet supported, should be a normal write_json, but we don't have a lazy sink_json, can create a workaround
91
+ // if needed.
92
+ // /**
93
+ // * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
94
+ // */
95
+ // export interface WriteJsonStep extends BaseFileWriteStep {
96
+ // /** The type of the step, which is always 'write_json' for this operation. */
97
+ // type: 'write_json';
98
+ // }
99
+
60
100
  /**
61
- * Represents the configuration for a step that writes a table from the tablespace to a JSON file.
101
+ * Represents the configuration for a step that writes a table from the tablespace to an NDJSON file.
62
102
  */
63
- export interface WriteJsonStep {
64
- /** The type of the step, which is always 'write_json' for this operation. */
65
- type: 'write_json';
66
- /** The name of the table in the tablespace to be written. */
67
- table: string;
68
- /** Path to the output JSON file. */
69
- file: string;
70
- /** Optional: A list of column names to write to the JSON. If omitted, all columns are written. */
71
- columns?: string[];
103
+ export interface WriteNdjsonStep extends BaseFileWriteStep {
104
+ /** The type of the step, which is always 'write_ndjson' for this operation. */
105
+ type: 'write_ndjson';
72
106
  }