ransacklib 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ransack/parser.py ADDED
@@ -0,0 +1,174 @@
1
+ """
2
+ parser.py - Provides a `Parser` class for parsing input data using Lark.
3
+
4
+ This module defines a custom grammar for parsing various expressions such as
5
+ comparisons, arithmetic operations, logical operators, IP address parsing,
6
+ datetime formats, and strings. The `Parser` class utilizes the Lark parser
7
+ to build a parse tree for input expressions that conform to the grammar rules.
8
+
9
+ Classes:
10
+ - Parser: Encapsulates the Lark parser with the defined grammar to parse
11
+ input data into an abstract syntax tree (AST).
12
+ """
13
+
14
+ from typing import Any, no_type_check
15
+
16
+ from lark import Lark, Token, Tree
17
+ from lark.exceptions import UnexpectedInput, VisitError
18
+
19
+ from .exceptions import ParseError, ShapeError, add_caret_to_context
20
+ from .transformer import ExpressionTransformer
21
+
22
+
23
+ class Parser:
24
+ """
25
+ Parser for query expressions based on a custom grammar using Lark.
26
+
27
+ This class accepts an optional `context` dictionary at initialization. The context
28
+ provides user-defined variables that can be referenced in expressions and take
29
+ precedence over variables from the input data source.
30
+
31
+ To avoid ambiguity, variables prefixed with a dot (e.g., `.foo`) are always
32
+ resolved from the data, while unprefixed variables (e.g., `foo`) are resolved
33
+ from the context if present.
34
+ """
35
+
36
+ grammar = r"""
37
+ ?start: or_expr
38
+
39
+ ?or_expr: and_expr
40
+ | or_expr ("or" | "||") and_expr -> or_op
41
+
42
+ ?and_expr: not_expr
43
+ | and_expr ("and" | "&&") not_expr -> and_op
44
+
45
+ ?not_expr: comparison
46
+ | ("not" | "!") comparison -> not_op
47
+
48
+ ?comparison: sum
49
+ | sum ">" sum -> gt
50
+ | sum ">=" sum -> gte
51
+ | sum "<" sum -> lt
52
+ | sum "<=" sum -> lte
53
+ | sum "=" sum -> any_eq
54
+ | sum "==" sum -> eq
55
+ | sum ("like" | "LIKE") sum -> like_op
56
+ | sum ("in" | "IN") sum -> in_op
57
+ | sum ("contains" | "CONTAINS") sum -> contains_op
58
+
59
+ ?sum: product
60
+ | sum "+" product -> add
61
+ | sum "-" product -> sub
62
+
63
+ ?product: range
64
+ | product "*" range -> mul
65
+ | product "/" range -> div
66
+ | product "%" range -> mod
67
+
68
+ ?range: exists
69
+ | exists ".." exists -> range_op
70
+ | range "." exists -> concat_op
71
+
72
+ ?exists: atom
73
+ | VARIABLE "??" -> exists_op
74
+ | VARIABLE "??" atom -> exists_with_default
75
+
76
+ ?atom: NUMBER -> number
77
+ | "-" atom -> neg
78
+ | "(" or_expr ")"
79
+ | ipv4_atom
80
+ | ipv6_atom
81
+ | datetime
82
+ | TIMEDELTA -> timedelta
83
+ | STRING -> string
84
+ | VARIABLE -> variable
85
+ | function
86
+ | list
87
+
88
+ ?ipv4_atom: IPV4 -> ipv4_single
89
+ | IPV4_RANGE -> ipv4_range
90
+ | IPV4_CIDR -> ipv4_cidr
91
+
92
+ ?ipv6_atom: IPV6 -> ipv6_single
93
+ | IPV6_RANGE -> ipv6_range
94
+ | IPV6_CIDR -> ipv6_cidr
95
+
96
+ datetime: DATE ("T" | "t")? TIME -> datetime_full
97
+ | DATE -> datetime_only_date
98
+
99
+ function: FUNCTION [args] ")"
100
+ args: atom ("," atom)*
101
+
102
+ list: "[" [atom ("," atom)*] "]"
103
+
104
+ NUMBER: /\d+(\.\d+)?([eE][+-]?\d+)?/
105
+ IPV4.2: /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
106
+ IPV4_RANGE.2: /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}-\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
107
+ IPV4_CIDR.2: /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\/\d{1,2}/
108
+ IPV6.2: /[:a-fA-F0-9]+:[:a-fA-F0-9]*/
109
+ IPV6_RANGE.2: /[:a-fA-F0-9]+:[:a-fA-F0-9]*-[:a-fA-F0-9]+:[:a-fA-F0-9]*/
110
+ IPV6_CIDR.2: /[:a-fA-F0-9]+:[:a-fA-F0-9]*\/\d{1,3}/
111
+ DATE.2: /[0-9]{4}-[0-9]{2}-[0-9]{2}/
112
+ TIME.2: /[0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|(?:[+-][0-9]{2}:[0-9]{2}))?/
113
+ TIMEDELTA.2: /([0-9]+[D|d])?[0-9]{2}:[0-9]{2}:[0-9]{2}/
114
+ STRING: /"([^"]+)"|\'([^\']+)\'/
115
+ VARIABLE: /\.?[_a-zA-Z][-_a-zA-Z0-9]*(?:\.?[a-zA-Z][-_a-zA-Z0-9]*)*/
116
+ FUNCTION.2: /[_a-zA-Z][_a-zA-Z0-9]*\(/
117
+
118
+ %import common.WS
119
+ %ignore WS
120
+ """ # noqa
121
+
122
+ def __init__(self, context: dict[str, Any] | None = None) -> None:
123
+ """
124
+ Initialize the Parser.
125
+
126
+ Args:
127
+ context (dict[str, Any] | None): Optional dictionary of variables that can
128
+ be referenced in queries. Context variables override data variables unless
129
+ the data variable is explicitly accessed with a leading dot.
130
+ """
131
+ self.parser = Lark(self.grammar, parser="lalr", propagate_positions=True)
132
+ self.shaper = ExpressionTransformer(context)
133
+
134
+ @no_type_check
135
+ def parse(self, data: str) -> Tree[Token]:
136
+ try:
137
+ parsed_tree = self.parser.parse(data)
138
+ return self.shaper.transform(parsed_tree)
139
+ except UnexpectedInput as e:
140
+ raise ParseError(e.line, e.column, e.get_context(data)) from None
141
+ except VisitError as e:
142
+ first_token = e.obj.children[0]
143
+ last_token = e.obj.children[-1]
144
+
145
+ # Extract context - input before and after the problematic tokens
146
+ context_start_pos = max(first_token.start_pos - 40, 0)
147
+ context_end_pos = min(last_token.end_pos + 40, len(data))
148
+ raw_context = data[context_start_pos:context_end_pos]
149
+
150
+ # Add '^' add the start of the problematic token
151
+ context_with_caret = add_caret_to_context(
152
+ context=raw_context,
153
+ line=first_token.line,
154
+ column=first_token.column,
155
+ original_data=data,
156
+ context_start_pos=context_start_pos,
157
+ )
158
+
159
+ raise ShapeError(
160
+ str(e.orig_exc),
161
+ line=first_token.line,
162
+ column=first_token.column,
163
+ context=context_with_caret,
164
+ start_pos=first_token.start_pos,
165
+ end_pos=last_token.end_pos,
166
+ end_line=last_token.end_line,
167
+ end_column=last_token.end_column,
168
+ ) from None
169
+
170
+ def parse_only(self, data: str) -> Tree[Token]:
171
+ try:
172
+ return self.parser.parse(data)
173
+ except UnexpectedInput as e:
174
+ raise ParseError(e.line, e.column, e.get_context(data)) from None
ransack/py.typed ADDED
File without changes