exdrf 0.0.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. exdrf/__init__.py +0 -0
  2. exdrf/__version__.py +24 -0
  3. exdrf/api.py +51 -0
  4. exdrf/constants.py +30 -0
  5. exdrf/dataset.py +197 -0
  6. exdrf/field.py +554 -0
  7. exdrf/field_types/__init__.py +0 -0
  8. exdrf/field_types/api.py +78 -0
  9. exdrf/field_types/blob_field.py +44 -0
  10. exdrf/field_types/bool_field.py +47 -0
  11. exdrf/field_types/date_field.py +49 -0
  12. exdrf/field_types/date_time.py +52 -0
  13. exdrf/field_types/dur_field.py +44 -0
  14. exdrf/field_types/enum_field.py +41 -0
  15. exdrf/field_types/filter_field.py +11 -0
  16. exdrf/field_types/float_field.py +85 -0
  17. exdrf/field_types/float_list.py +18 -0
  18. exdrf/field_types/formatted.py +39 -0
  19. exdrf/field_types/int_field.py +70 -0
  20. exdrf/field_types/int_list.py +18 -0
  21. exdrf/field_types/ref_base.py +105 -0
  22. exdrf/field_types/ref_m2m.py +39 -0
  23. exdrf/field_types/ref_m2o.py +23 -0
  24. exdrf/field_types/ref_o2m.py +36 -0
  25. exdrf/field_types/ref_o2o.py +32 -0
  26. exdrf/field_types/sort_field.py +18 -0
  27. exdrf/field_types/str_field.py +77 -0
  28. exdrf/field_types/str_list.py +18 -0
  29. exdrf/field_types/time_field.py +49 -0
  30. exdrf/filter.py +653 -0
  31. exdrf/filter_dsl.py +950 -0
  32. exdrf/filter_op_catalog.py +222 -0
  33. exdrf/label_dsl.py +691 -0
  34. exdrf/moment.py +496 -0
  35. exdrf/py.typed +0 -0
  36. exdrf/py_support.py +21 -0
  37. exdrf/resource.py +901 -0
  38. exdrf/sa_fi_item.py +69 -0
  39. exdrf/sa_filter_op.py +324 -0
  40. exdrf/utils.py +17 -0
  41. exdrf/validator.py +45 -0
  42. exdrf/var_bag.py +328 -0
  43. exdrf/visitor.py +58 -0
  44. exdrf-0.0.1.dev0.dist-info/METADATA +42 -0
  45. exdrf-0.0.1.dev0.dist-info/RECORD +57 -0
  46. exdrf-0.0.1.dev0.dist-info/WHEEL +5 -0
  47. exdrf-0.0.1.dev0.dist-info/top_level.txt +3 -0
  48. exdrf_tests/__init__.py +0 -0
  49. exdrf_tests/test_dataset.py +422 -0
  50. exdrf_tests/test_field.py +109 -0
  51. exdrf_tests/test_filter.py +425 -0
  52. exdrf_tests/test_filter_dsl.py +556 -0
  53. exdrf_tests/test_label_dsl.py +234 -0
  54. exdrf_tests/test_resource.py +107 -0
  55. exdrf_tests/test_utils.py +43 -0
  56. exdrf_tests/test_visitor.py +31 -0
  57. exdrf_tests/var_bag_test.py +502 -0
exdrf/filter_dsl.py ADDED
@@ -0,0 +1,950 @@
1
+ import re
2
+ from bisect import bisect_right
3
+ from collections import namedtuple
4
+ from enum import StrEnum
5
+ from typing import (
6
+ TYPE_CHECKING,
7
+ Any,
8
+ Generic,
9
+ List,
10
+ Optional,
11
+ TypeVar,
12
+ Union,
13
+ cast,
14
+ )
15
+
16
+ from attrs import define, field
17
+
18
+ from exdrf.filter import FieldFilter, FilterType
19
+ from exdrf.filter_op_catalog import (
20
+ ALL_CANONICAL_FILTER_OPS,
21
+ canonical_filter_ops_for_type,
22
+ filter_op_allowed_for_type,
23
+ normalize_filter_op,
24
+ )
25
+
26
+ try:
27
+ from exdrf_qt.context_use import QtUseContext
28
+ from exdrf_qt.models.model import DBM, QtModel # noqa: F401
29
+
30
+ _HAS_EXDRF_QT = True
31
+ except ImportError:
32
+ # Fallback when exdrf_qt is not available
33
+ class QtUseContext: # type: ignore
34
+ """Fallback base class when exdrf_qt is not available."""
35
+
36
+ DBM = TypeVar("DBM") # type: ignore
37
+ QtModel = Any # type: ignore
38
+ _HAS_EXDRF_QT = False
39
+
40
+ if TYPE_CHECKING:
41
+ try:
42
+ from exdrf_qt.context import QtContext # noqa: F401
43
+ except ImportError:
44
+ QtContext = Any # type: ignore
45
+
46
+
47
+ class FltErrCode(StrEnum):
48
+ UNTERMINATED_STRING = "unterminated_string"
49
+ UNMATCHED_BRACKETS = "unmatched_brackets"
50
+ UNEXPECTED_CHAR = "unexpected_char"
51
+ EXPECTED_TOKEN = "expected_token"
52
+ UNEXPECTED_END_OF_INPUT = "unexpected_end_of_input"
53
+ INVALID_INT_VALUE = "invalid_int_value"
54
+ INVALID_FLOAT_VALUE = "invalid_float_value"
55
+ UNKNOWN_FIELD = "unknown_field"
56
+ UNKNOWN_OPERATION = "unknown_operation"
57
+ INVALID_VALUE_TYPE = "invalid_value_type"
58
+
59
+
60
+ class FltSyntaxError(Exception):
61
+ """A syntax error in the DSL.
62
+
63
+ Attributes:
64
+ code: The error code.
65
+ lineno: The 1-based line number.
66
+ column: The 1-based column number inside the line.
67
+ offset: The 0-based offset from thee start of the string.
68
+ end_offset: The 0-based end offset.
69
+ text: The text.
70
+ """
71
+
72
+ code: FltErrCode
73
+ lineno: int
74
+ offset: int
75
+ end_offset: int
76
+ text: str
77
+ value: Optional[str] = None
78
+ expected: Optional[str] = None
79
+
80
+ def __init__(
81
+ self,
82
+ msg: str,
83
+ code: FltErrCode,
84
+ text: str,
85
+ lineno: int,
86
+ column: int,
87
+ offset: int,
88
+ end_offset: int = -1,
89
+ value: Optional[str] = None,
90
+ expected: Optional[str] = None,
91
+ ):
92
+ super().__init__(msg)
93
+ self.code = code
94
+ self.source = text
95
+ self.lineno = lineno
96
+ self.column = column
97
+ self.offset = offset
98
+ self.end_offset = end_offset if end_offset != -1 else (len(text) - offset)
99
+ self.value = value
100
+ self.expected = expected
101
+
102
+ def as_dict(self) -> dict:
103
+ return {
104
+ "code": self.code,
105
+ "source": self.source,
106
+ "line": self.lineno,
107
+ "column": self.column,
108
+ "offset": self.offset,
109
+ "end": self.end_offset,
110
+ "value": self.value,
111
+ "expected": self.expected,
112
+ }
113
+
114
+
115
+ @define
116
+ class Token:
117
+ """A token from the DSL.
118
+
119
+ A token can have one of these values:
120
+ - `AND`, `OR`, `NOT` - these are the logic operators,
121
+ - `(` and `)` - these are the grouping operators,
122
+ - a field name consisting of parts separated by dots,
123
+ - a comparison operator: `==`, `!=`, `>`, `>=`, `<`, `<=`,
124
+ - a field value.
125
+
126
+ Attributes:
127
+ value: The value of the token.
128
+ line: The 0-based line number of the token.
129
+ column: The 0-based column number of the token.
130
+ index: The 0-based index of the end of the token in the original string.
131
+ """
132
+
133
+ value: str
134
+ line: int
135
+ column: int
136
+ index: int
137
+
138
+ @property
139
+ def start_index(self) -> int:
140
+ """The 0-based index of the start of the token in the original string.
141
+
142
+ Returns:
143
+ The 0-based index of the start of the token in the original string.
144
+ """
145
+ return self.index - len(self.value)
146
+
147
+
148
+ @define
149
+ class ParsedElement:
150
+ """A parsed element.
151
+
152
+ This is the common base class for logic operators and field filters.
153
+ """
154
+
155
+
156
+ @define
157
+ class ParsedFieldFilter(FieldFilter, ParsedElement):
158
+ """A parsed field filter.
159
+
160
+ Attributes:
161
+ fld: The field to filter by.z
162
+ op: The operation to perform.
163
+ vl: The value to compare against.
164
+ tk_fld: The token for the field name.
165
+ tk_op: The token for the operation.
166
+ tk_val: The token for the value.
167
+ """
168
+
169
+ tk_fld: Token
170
+ tk_op: Token
171
+ tk_val: Token
172
+
173
+
174
+ @define
175
+ class ParsedLogic(ParsedElement):
176
+ """Base class for parsed logic operators.
177
+
178
+ Attributes:
179
+ tk_op: The token for the operation.
180
+ """
181
+
182
+ tk_op: Token
183
+
184
+
185
+ @define
186
+ class ParsedLogicAnd(ParsedLogic):
187
+ """A parsed logic and.
188
+
189
+ Attributes:
190
+ op: The operation to perform.
191
+ items: The items to perform the operation on.
192
+ """
193
+
194
+ items: List[ParsedFieldFilter]
195
+
196
+
197
+ @define
198
+ class ParsedLogicOr(ParsedLogic):
199
+ """A parsed logic or.
200
+
201
+ Attributes:
202
+ op: The operation to perform.
203
+ items: The items to perform the operation on.
204
+ """
205
+
206
+ items: List[ParsedFieldFilter]
207
+
208
+
209
+ @define
210
+ class ParsedLogicNot(ParsedLogic):
211
+ """A parsed logic not.
212
+
213
+ Attributes:
214
+ op: The operation to perform.
215
+ items: The items to perform the operation on.
216
+ """
217
+
218
+ item: ParsedFieldFilter
219
+
220
+
221
+ @define
222
+ class DSLTokenizer:
223
+ """A tokenizer for the DSL.
224
+
225
+ Attributes:
226
+ text: The text to tokenize.
227
+ pos: The current position in the text.
228
+ line: The current line number.
229
+ col: The current column number.
230
+ """
231
+
232
+ text: str
233
+ pos: int = 0
234
+ line: int = 1
235
+ col: int = 1
236
+
237
+ def _advance(self, count: int = 1):
238
+ """Advance the position by the given count.
239
+
240
+ Args:
241
+ count: The number of characters to advance.
242
+ """
243
+
244
+ for _ in range(count):
245
+ if self.pos < len(self.text):
246
+ if self.text[self.pos] == "\n":
247
+ self.line += 1
248
+ self.col = 1
249
+ else:
250
+ self.col += 1
251
+ self.pos += 1
252
+
253
+ def _match(self, pattern: str) -> Optional[re.Match]:
254
+ """Match a pattern at the current position.
255
+
256
+ Args:
257
+ pattern: The pattern to match.
258
+
259
+ Returns:
260
+ The match if found, otherwise None.
261
+ """
262
+ return re.match(pattern, self.text[self.pos :]) # noqa: E203
263
+
264
+ def _skip_whitespace(self):
265
+ """Skip whitespace at the current position.
266
+
267
+ This is a helper method for the `next_token` method.
268
+ """
269
+
270
+ while self.pos < len(self.text) and self.text[self.pos].isspace():
271
+ self._advance()
272
+
273
+ def next_token(self) -> Optional[Token]:
274
+ """Get the next token from the text.
275
+
276
+ Returns:
277
+ The next token, or None if the end of the text is reached.
278
+ """
279
+
280
+ # Skip whitespace.
281
+ self._skip_whitespace()
282
+
283
+ # If we've reached the end of the text, return None
284
+ if self.pos >= len(self.text):
285
+ return None
286
+
287
+ # Get the current character
288
+ ch = self.text[self.pos]
289
+ start_line, start_col = self.line, self.col
290
+
291
+ # If the current character is a parenthesis or comma, return a token
292
+ if ch in "(),":
293
+ self._advance()
294
+ return Token(ch, start_line, start_col, self.pos)
295
+
296
+ # If the current character is a single quote, return a string token
297
+ if ch == "'":
298
+ end_pos = self.pos + 1
299
+ while end_pos < len(self.text) and self.text[end_pos] != "'":
300
+ if self.text[end_pos] == "\\" and end_pos + 1 < len(self.text):
301
+ end_pos += 2
302
+ else:
303
+ end_pos += 1
304
+ if end_pos >= len(self.text) or self.text[end_pos] != "'":
305
+ raise FltSyntaxError(
306
+ msg=(f"Unterminated string at line {start_line} col {start_col}"),
307
+ code=FltErrCode.UNTERMINATED_STRING,
308
+ text=self.text,
309
+ lineno=start_line,
310
+ column=start_col,
311
+ offset=self.pos,
312
+ end_offset=end_pos,
313
+ value=self.text[self.pos : end_pos + 1], # noqa: E203
314
+ )
315
+ token_value = self.text[self.pos : end_pos + 1] # noqa: E203
316
+ self._advance(len(token_value))
317
+ return Token(token_value, start_line, start_col, self.pos)
318
+
319
+ # If the current character is a bracket, return a bracket token.
320
+ if ch == "[":
321
+ end_pos = self.pos
322
+ bracket_level = 0
323
+ while end_pos < len(self.text):
324
+ if self.text[end_pos] == "[":
325
+ bracket_level += 1
326
+ elif self.text[end_pos] == "]":
327
+ bracket_level -= 1
328
+ if bracket_level == 0:
329
+ break
330
+ end_pos += 1
331
+ if bracket_level != 0:
332
+ raise FltSyntaxError(
333
+ msg=(f"Unmatched brackets at line {start_line} col {start_col}"),
334
+ code=FltErrCode.UNMATCHED_BRACKETS,
335
+ text=self.text,
336
+ lineno=start_line,
337
+ column=start_col,
338
+ offset=self.pos,
339
+ end_offset=end_pos,
340
+ value=self.text[self.pos : end_pos + 1], # noqa: E203
341
+ )
342
+ token_value = self.text[self.pos : end_pos + 1] # noqa: E203
343
+ self._advance(len(token_value))
344
+ return Token(token_value, start_line, start_col, self.pos)
345
+
346
+ # If the current character is a letter, number, or underscore, return a
347
+ # word token.
348
+ match = self._match(r"[A-Za-z_][A-Za-z0-9_.]*")
349
+ if match:
350
+ token_value = match.group(0)
351
+ self._advance(len(token_value))
352
+ return Token(token_value, start_line, start_col, self.pos)
353
+
354
+ # If the current character is a number, return a number token
355
+ match = self._match(r"\d+(\.\d+)?")
356
+ if match:
357
+ token_value = match.group(0)
358
+ self._advance(len(token_value))
359
+ return Token(token_value, start_line, start_col, self.pos)
360
+
361
+ # See if this is an operation
362
+ match = self._match(r"==|!=|>=|<=|>|<")
363
+ if match:
364
+ token_value = match.group(0)
365
+ self._advance(len(token_value))
366
+ return Token(token_value, start_line, start_col, self.pos)
367
+
368
+ raise FltSyntaxError(
369
+ msg=(f"Unexpected character '{ch}' at line {start_line} col {start_col}"),
370
+ code=FltErrCode.UNEXPECTED_CHAR,
371
+ text=self.text,
372
+ lineno=start_line,
373
+ column=start_col,
374
+ offset=self.pos,
375
+ value=ch,
376
+ )
377
+
378
+ def tokenize(self) -> List[Token]:
379
+ """Tokenize the text.
380
+
381
+ Returns:
382
+ A list of tokens.
383
+ """
384
+
385
+ tokens = []
386
+ while self.pos < len(self.text):
387
+ token = self.next_token()
388
+ if token:
389
+ tokens.append(token)
390
+ return tokens
391
+
392
+
393
+ def infer_value_type(value: Any) -> str:
394
+ """Infer the type of a value.
395
+
396
+ Args:
397
+ value: The value to infer the type of.
398
+
399
+ Returns:
400
+ The type of the value.
401
+ """
402
+ if isinstance(value, str):
403
+ return "string"
404
+ elif isinstance(value, bool):
405
+ return "unknown"
406
+ elif isinstance(value, (int, float)):
407
+ return "number"
408
+ elif isinstance(value, list):
409
+ return "list"
410
+ return "unknown"
411
+
412
+
413
+ @define
414
+ class FieldValidator(QtUseContext, Generic[DBM]):
415
+ """Validate fields in the DSL.
416
+
417
+ Attributes:
418
+ field_map: A dictionary of fields.
419
+ """
420
+
421
+ ctx: "QtContext"
422
+ qt_model: "QtModel[DBM]"
423
+
424
+ def validate(
425
+ self,
426
+ parser: "DSLParser",
427
+ field: ParsedFieldFilter,
428
+ token: Optional[Token] = None,
429
+ ):
430
+ """Validate field name and operator against model metadata.
431
+
432
+ Args:
433
+ parser: Parser providing source text for error messages.
434
+ field: Parsed field filter (includes operator tokens).
435
+ token: Optional field-name token; defaults to ``field.tk_fld``.
436
+ """
437
+ fld_tok = token or field.tk_fld
438
+ for fld in self.qt_model.filter_fields:
439
+ if fld.name == field.fld:
440
+ canon = normalize_filter_op(field.op)
441
+ if canon is None:
442
+ tk_op = field.tk_op
443
+ raise FltSyntaxError(
444
+ msg="Unknown filter operation: %s" % (field.op,),
445
+ code=FltErrCode.UNKNOWN_OPERATION,
446
+ text=parser.src_text,
447
+ lineno=tk_op.line,
448
+ column=tk_op.column,
449
+ offset=tk_op.start_index,
450
+ end_offset=tk_op.index,
451
+ value=field.op,
452
+ expected=", ".join(sorted(ALL_CANONICAL_FILTER_OPS)),
453
+ )
454
+ if not filter_op_allowed_for_type(fld.type_name, field.op):
455
+ tk_op = field.tk_op
456
+ allowed = canonical_filter_ops_for_type(fld.type_name)
457
+ raise FltSyntaxError(
458
+ msg=(
459
+ "Operation %r is not allowed for field type %r"
460
+ % (field.op, fld.type_name)
461
+ ),
462
+ code=FltErrCode.UNKNOWN_OPERATION,
463
+ text=parser.src_text,
464
+ lineno=tk_op.line,
465
+ column=tk_op.column,
466
+ offset=tk_op.start_index,
467
+ end_offset=tk_op.index,
468
+ value=field.op,
469
+ expected=", ".join(sorted(allowed)),
470
+ )
471
+ return
472
+ raise FltSyntaxError(
473
+ msg=f"Unknown field: {field.fld}",
474
+ code=FltErrCode.UNKNOWN_FIELD,
475
+ text=parser.src_text,
476
+ lineno=fld_tok.line,
477
+ column=fld_tok.column,
478
+ offset=fld_tok.start_index,
479
+ end_offset=fld_tok.index,
480
+ value=field.fld,
481
+ expected=", ".join([f.name for f in self.qt_model.filter_fields]),
482
+ )
483
+
484
+
485
+ @define
486
+ class DSLParser:
487
+ """Parse the DSL.
488
+
489
+ Attributes:
490
+ tokens: The tokens to parse.
491
+ index: The current index in the tokens.
492
+ """
493
+
494
+ src_text: str
495
+ tokens: List[Token]
496
+ index: int
497
+ last_error: Optional[FltSyntaxError] = field(default=None, init=False)
498
+
499
+ @property
500
+ def last_token(self) -> Optional[Token]:
501
+ """Get the last token.
502
+
503
+ Returns:
504
+ The last token, or None if there are no tokens.
505
+ """
506
+ return self.tokens[-1] if self.tokens else None
507
+
508
+ @property
509
+ def last_line(self) -> int:
510
+ """Get the last line number.
511
+
512
+ Returns:
513
+ The last line number.
514
+ """
515
+ if len(self.tokens) == 0:
516
+ return 0
517
+ return self.tokens[-1].line
518
+
519
+ def current(self) -> Optional[Token]:
520
+ """Get the current token.
521
+
522
+ Returns:
523
+ The current token, or None if the end of the tokens is reached.
524
+ """
525
+ return self.tokens[self.index] if self.index < len(self.tokens) else None
526
+
527
+ def match(self, expected: str) -> Token:
528
+ """Match the expected token.
529
+
530
+ Args:
531
+ expected: The expected token.
532
+
533
+ Returns:
534
+ The matched token.
535
+ """
536
+ tok = self.current()
537
+ if not tok:
538
+ last = self.last_token
539
+ raise FltSyntaxError(
540
+ msg=(f"Expected '{expected}', but got end of input"),
541
+ code=FltErrCode.EXPECTED_TOKEN,
542
+ text=self.src_text,
543
+ lineno=self.last_line,
544
+ column=last.column if last else 0,
545
+ offset=last.index if last else 0,
546
+ expected=expected,
547
+ )
548
+ if tok.value.lower() != expected.lower():
549
+ raise FltSyntaxError(
550
+ msg=(f"Expected '{expected}', but got '{tok.value}'"),
551
+ code=FltErrCode.EXPECTED_TOKEN,
552
+ text=self.src_text,
553
+ lineno=tok.line,
554
+ column=tok.column,
555
+ offset=tok.index,
556
+ value=expected,
557
+ expected=expected,
558
+ )
559
+ self.index += 1
560
+ return tok
561
+
562
+ def match_any(self, expected: str) -> Token:
563
+ """Match any token.
564
+
565
+ Returns:
566
+ The matched token.
567
+ """
568
+ tok = self.current()
569
+ if tok is None:
570
+ last = self.last_token
571
+ err = FltSyntaxError(
572
+ msg=(f"Expected <{expected}>, but got end of input"),
573
+ code=FltErrCode.EXPECTED_TOKEN,
574
+ text=self.src_text,
575
+ lineno=self.last_line,
576
+ column=last.column if last else 0,
577
+ offset=last.index if last else 0,
578
+ )
579
+ self.last_error = err
580
+ raise err
581
+ self.index += 1
582
+ return tok
583
+
584
+ def parse(self) -> List[Union[ParsedFieldFilter, ParsedLogic]]:
585
+ """Parse the DSL.
586
+
587
+ Returns:
588
+ The parsed filter.
589
+ """
590
+ self.last_error = None # Clear previous error
591
+ result = []
592
+ tok = self.current()
593
+ while tok:
594
+ try:
595
+ result.extend(self.parse_expression())
596
+ except FltSyntaxError as e:
597
+ self.last_error = e
598
+ raise e
599
+ tok = self.current()
600
+ if tok and tok.value == ",":
601
+ self.index += 1
602
+ return result
603
+
604
+ def parse_expression(self) -> List[Union[ParsedFieldFilter, ParsedLogic]]:
605
+ """Parse an expression.
606
+
607
+ Returns:
608
+ The parsed filter.
609
+ """
610
+ tok = self.current()
611
+ if tok is None:
612
+ last = self.last_token
613
+ raise FltSyntaxError(
614
+ msg=("Unexpected end of input"),
615
+ code=FltErrCode.UNEXPECTED_END_OF_INPUT,
616
+ text=self.src_text,
617
+ lineno=self.last_line,
618
+ column=last.column if last else 0,
619
+ offset=last.index if last else 0,
620
+ )
621
+ value = tok.value.upper()
622
+ if value in ("AND", "OR"):
623
+ return [self.parse_logic(tok)]
624
+ elif value == "NOT":
625
+ return [self.parse_not()]
626
+ else:
627
+ return [self.parse_field_expr()]
628
+
629
+ def parse_logic(self, op: Token) -> ParsedLogic:
630
+ """Parse a logic expression.
631
+
632
+ Args:
633
+ op: The operator.
634
+
635
+ Returns:
636
+ The parsed filter.
637
+ """
638
+ op_str = op.value.upper()
639
+ self.match(op_str)
640
+ self.match("(")
641
+ items = []
642
+ while True:
643
+ tok = self.current()
644
+ if not tok:
645
+ break
646
+ if tok.value == ")":
647
+ break
648
+ items.extend(self.parse_expression())
649
+ tok = self.current()
650
+ if tok and tok.value == ",":
651
+ self.match(",")
652
+ self.match(")")
653
+ if op_str == "AND":
654
+ return ParsedLogicAnd(tk_op=op, items=cast(List[ParsedFieldFilter], items))
655
+ elif op_str == "OR":
656
+ return ParsedLogicOr(tk_op=op, items=cast(List[ParsedFieldFilter], items))
657
+ else:
658
+ raise ValueError(f"Unknown operator: {op}")
659
+
660
+ def parse_not(self) -> ParsedLogic:
661
+ """Parse a not expression.
662
+
663
+ Returns:
664
+ The parsed filter.
665
+ """
666
+ op = self.match("NOT")
667
+ self.match("(")
668
+ expr = self.parse_field_expr()
669
+ self.match(")")
670
+ return ParsedLogicNot(tk_op=op, item=expr)
671
+
672
+ def parse_field_expr(self) -> ParsedFieldFilter:
673
+ """Parse a field expression.
674
+
675
+ Returns:
676
+ The parsed filter.
677
+ """
678
+ fld_tok = self.match_any("identifier")
679
+ op_tok = self.match_any("operator")
680
+ val_tok = self.match_any("value")
681
+ val = self.parse_value(val_tok)
682
+ ff = ParsedFieldFilter(
683
+ fld=fld_tok.value,
684
+ op=op_tok.value,
685
+ vl=val,
686
+ tk_fld=fld_tok,
687
+ tk_op=op_tok,
688
+ tk_val=val_tok,
689
+ )
690
+ return ff
691
+
692
+ def parse_value(self, tok: Token) -> Any:
693
+ """Parse a value.
694
+
695
+ Args:
696
+ raw: The raw value.
697
+
698
+ Returns:
699
+ The parsed value.
700
+ """
701
+ raw = tok.value
702
+ if raw.startswith("'") and raw.endswith("'"):
703
+ return raw[1:-1]
704
+ elif raw.startswith("[") and raw.endswith("]"):
705
+ items = raw[1:-1].split(",")
706
+ return [item.strip().strip("'") for item in items if item.strip()]
707
+ elif "." in raw:
708
+ try:
709
+ return float(raw)
710
+ except ValueError:
711
+ err = FltSyntaxError(
712
+ msg=(
713
+ f"Invalid float value: {raw} (expected because "
714
+ "string has decimal point)"
715
+ ),
716
+ code=FltErrCode.INVALID_FLOAT_VALUE,
717
+ text=self.src_text,
718
+ lineno=tok.line,
719
+ column=tok.column,
720
+ offset=tok.index,
721
+ end_offset=tok.index,
722
+ value=raw,
723
+ )
724
+ self.last_error = err
725
+ raise err
726
+ else:
727
+ try:
728
+ return int(raw)
729
+ except ValueError:
730
+ err = FltSyntaxError(
731
+ msg=(
732
+ f"Invalid integer value: {raw} (expected because "
733
+ "this is the last valid choice)"
734
+ ),
735
+ code=FltErrCode.INVALID_INT_VALUE,
736
+ text=self.src_text,
737
+ lineno=tok.line,
738
+ column=tok.column,
739
+ offset=tok.index,
740
+ end_offset=tok.index,
741
+ value=raw,
742
+ )
743
+ self.last_error = err
744
+ raise err
745
+
746
+
747
+ @define
748
+ class DSLParserWithValidation(DSLParser):
749
+ """Parse the DSL with validation.
750
+
751
+ Attributes:
752
+ validator: The validator.
753
+ """
754
+
755
+ validator: FieldValidator
756
+
757
+ def parse_field_expr(self) -> ParsedFieldFilter:
758
+ """Parse a field expression.
759
+
760
+ Returns:
761
+ The parsed filter.
762
+ """
763
+ fld_tok = self.match_any("identifier")
764
+ op_tok = self.match_any("operator")
765
+ val_tok = self.match_any("value")
766
+ val = self.parse_value(val_tok)
767
+ ff = ParsedFieldFilter(
768
+ fld=fld_tok.value,
769
+ op=op_tok.value,
770
+ vl=val,
771
+ tk_fld=fld_tok,
772
+ tk_op=op_tok,
773
+ tk_val=val_tok,
774
+ )
775
+ self.validator.validate(self, ff)
776
+ return ff
777
+
778
+
779
+ def serialize_filter(obj: Any) -> FilterType:
780
+ """Serialize a filter.
781
+
782
+ Args:
783
+ obj: The filter to serialize.
784
+
785
+ Returns:
786
+ The serialized filter.
787
+ """
788
+ if isinstance(obj, ParsedFieldFilter):
789
+ return cast(
790
+ FilterType,
791
+ {"fld": obj.fld, "op": obj.op, "vl": obj.vl},
792
+ )
793
+
794
+ elif isinstance(obj, ParsedLogic):
795
+ if obj.tk_op.value == "NOT":
796
+ obj = cast(ParsedLogicNot, obj)
797
+ return cast(
798
+ FilterType,
799
+ [obj.tk_op.value, serialize_filter(obj.item)],
800
+ )
801
+ else:
802
+ obj = cast(ParsedLogicAnd, obj)
803
+ return cast(
804
+ FilterType,
805
+ [obj.tk_op.value, [serialize_filter(e) for e in obj.items]],
806
+ )
807
+
808
+ elif isinstance(obj, list):
809
+ return cast(
810
+ FilterType,
811
+ [serialize_filter(e) for e in obj],
812
+ )
813
+
814
+ else:
815
+ raise ValueError(f"Unknown object type: {type(obj)}")
816
+
817
+
818
+ def raw_filter_to_text(filter: Union[FilterType, FieldFilter]) -> str:
819
+ """Convert a raw filter to a text string.
820
+
821
+ Args:
822
+ filter: The filter to convert.
823
+
824
+ Returns:
825
+ The filter as a string.
826
+ """
827
+ result = ""
828
+
829
+ def do_part(part: Any, indent=0) -> None:
830
+ nonlocal result
831
+ prefix = "\t" * indent
832
+ if isinstance(part, list):
833
+ if len(part) == 0:
834
+ return
835
+
836
+ if isinstance(part[0], str):
837
+ op_name = part[0].lower()
838
+ if len(part) != 2:
839
+ raise ValueError(
840
+ f"The logic operator list expects two elements. Got {part}"
841
+ )
842
+ op_name = part[0].lower()
843
+ if op_name == "and":
844
+ if not isinstance(part[1], list):
845
+ raise ValueError(
846
+ f"The logic operator list expects a list as the "
847
+ f"second element. Got {part}"
848
+ )
849
+ result += prefix + "AND (\n"
850
+ do_part(part[1], indent + 1)
851
+ result += prefix + ")\n"
852
+ elif op_name == "or":
853
+ if not isinstance(part[1], list):
854
+ raise ValueError(
855
+ f"The logic operator list expects a list as the "
856
+ f"second element. Got {part}"
857
+ )
858
+ result += prefix + "OR (\n"
859
+ do_part(part[1], indent + 1)
860
+ result += prefix + ")\n"
861
+ elif op_name == "not":
862
+ # NOT can have dict or FieldFilter directly; it does not
863
+ # need to be wrapped in a list.
864
+ result += prefix + "NOT (\n"
865
+ do_part(part[1], indent + 1)
866
+ result += prefix + ")\n"
867
+ else:
868
+ raise ValueError(f"Invalid logic operator: {op_name}")
869
+ return
870
+
871
+ for item in part:
872
+ do_part(item, indent)
873
+ elif isinstance(part, dict):
874
+ # Make sure that the string value is quoted.
875
+ value = part["vl"]
876
+ if isinstance(value, str):
877
+ value = f"'{value}'"
878
+ result += prefix + f"{part['fld']} {part['op']} {value}\n"
879
+ elif isinstance(part, FieldFilter):
880
+ value = part.vl
881
+ if isinstance(value, str):
882
+ value = f"'{value}'"
883
+ result += prefix + f"{part.fld} {part.op} {value}\n"
884
+ else:
885
+ raise ValueError(f"Invalid filter part: {part}")
886
+
887
+ # Get rid of the outer AND layer if present.
888
+ if (
889
+ isinstance(filter, list)
890
+ and len(filter) == 2
891
+ and isinstance(filter[0], str)
892
+ and filter[0].upper() == "AND"
893
+ ):
894
+ # Strip outer AND - just process the inner content
895
+ do_part(cast(FilterType, filter)[1])
896
+ else:
897
+ do_part(filter)
898
+ return result
899
+
900
+
901
+ # Define a namedtuple for our index entries.
902
+ IndexEntry = namedtuple("IndexEntry", ["start", "end", "element"])
903
+
904
+
905
+ @define
906
+ class Index:
907
+ """An index of parsed elements.
908
+
909
+ Attributes:
910
+ index: The index.
911
+ """
912
+
913
+ index: List[IndexEntry] = field(factory=list)
914
+
915
+ def add_to_index(self, token: Token, element: ParsedElement):
916
+ self.index.append(IndexEntry(token.start_index, token.index, element))
917
+
918
+ def build_index(self, element: Union[ParsedElement, List[ParsedElement]]):
919
+ if isinstance(element, ParsedFieldFilter):
920
+ for token in (element.tk_fld, element.tk_op, element.tk_val):
921
+ self.add_to_index(token, element)
922
+
923
+ elif isinstance(element, ParsedLogicNot):
924
+ self.add_to_index(element.tk_op, element)
925
+ self.build_index(element.item)
926
+
927
+ elif isinstance(element, (ParsedLogicAnd, ParsedLogicOr)):
928
+ self.add_to_index(element.tk_op, element)
929
+ for item in element.items:
930
+ self.build_index(item)
931
+
932
+ elif isinstance(element, list):
933
+ for item in cast(list, element):
934
+ self.build_index(item)
935
+
936
+ def find_element(self, position):
937
+ i = bisect_right(self.index, position, key=lambda x: x.start) - 1
938
+ if i >= 0 and self.index[i].start <= position < self.index[i].end:
939
+ return self.index[i].element
940
+ return None
941
+
942
+ @classmethod
943
+ def create(cls, parsed: Union[ParsedElement, List[ParsedElement]]):
944
+ result = cls()
945
+ result.build_index(parsed)
946
+
947
+ # Sort the index by start position
948
+ result.index.sort(key=lambda x: x.start)
949
+
950
+ return result