ransacklib 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ransack/__init__.py +15 -0
- ransack/exceptions.py +118 -0
- ransack/function.py +38 -0
- ransack/operator.py +500 -0
- ransack/parser.py +174 -0
- ransack/py.typed +0 -0
- ransack/transformer.py +846 -0
- ransacklib-0.1.10.dist-info/METADATA +60 -0
- ransacklib-0.1.10.dist-info/RECORD +12 -0
- ransacklib-0.1.10.dist-info/WHEEL +5 -0
- ransacklib-0.1.10.dist-info/licenses/LICENSE +21 -0
- ransacklib-0.1.10.dist-info/top_level.txt +1 -0
ransack/parser.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""
|
|
2
|
+
parser.py - Provides a `Parser` class for parsing input data using Lark.
|
|
3
|
+
|
|
4
|
+
This module defines a custom grammar for parsing various expressions such as
|
|
5
|
+
comparisons, arithmetic operations, logical operators, IP address parsing,
|
|
6
|
+
datetime formats, and strings. The `Parser` class utilizes the Lark parser
|
|
7
|
+
to build a parse tree for input expressions that conform to the grammar rules.
|
|
8
|
+
|
|
9
|
+
Classes:
|
|
10
|
+
- Parser: Encapsulates the Lark parser with the defined grammar to parse
|
|
11
|
+
input data into an abstract syntax tree (AST).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Any, no_type_check
|
|
15
|
+
|
|
16
|
+
from lark import Lark, Token, Tree
|
|
17
|
+
from lark.exceptions import UnexpectedInput, VisitError
|
|
18
|
+
|
|
19
|
+
from .exceptions import ParseError, ShapeError, add_caret_to_context
|
|
20
|
+
from .transformer import ExpressionTransformer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Parser:
|
|
24
|
+
"""
|
|
25
|
+
Parser for query expressions based on a custom grammar using Lark.
|
|
26
|
+
|
|
27
|
+
This class accepts an optional `context` dictionary at initialization. The context
|
|
28
|
+
provides user-defined variables that can be referenced in expressions and take
|
|
29
|
+
precedence over variables from the input data source.
|
|
30
|
+
|
|
31
|
+
To avoid ambiguity, variables prefixed with a dot (e.g., `.foo`) are always
|
|
32
|
+
resolved from the data, while unprefixed variables (e.g., `foo`) are resolved
|
|
33
|
+
from the context if present.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
grammar = r"""
|
|
37
|
+
?start: or_expr
|
|
38
|
+
|
|
39
|
+
?or_expr: and_expr
|
|
40
|
+
| or_expr ("or" | "||") and_expr -> or_op
|
|
41
|
+
|
|
42
|
+
?and_expr: not_expr
|
|
43
|
+
| and_expr ("and" | "&&") not_expr -> and_op
|
|
44
|
+
|
|
45
|
+
?not_expr: comparison
|
|
46
|
+
| ("not" | "!") comparison -> not_op
|
|
47
|
+
|
|
48
|
+
?comparison: sum
|
|
49
|
+
| sum ">" sum -> gt
|
|
50
|
+
| sum ">=" sum -> gte
|
|
51
|
+
| sum "<" sum -> lt
|
|
52
|
+
| sum "<=" sum -> lte
|
|
53
|
+
| sum "=" sum -> any_eq
|
|
54
|
+
| sum "==" sum -> eq
|
|
55
|
+
| sum ("like" | "LIKE") sum -> like_op
|
|
56
|
+
| sum ("in" | "IN") sum -> in_op
|
|
57
|
+
| sum ("contains" | "CONTAINS") sum -> contains_op
|
|
58
|
+
|
|
59
|
+
?sum: product
|
|
60
|
+
| sum "+" product -> add
|
|
61
|
+
| sum "-" product -> sub
|
|
62
|
+
|
|
63
|
+
?product: range
|
|
64
|
+
| product "*" range -> mul
|
|
65
|
+
| product "/" range -> div
|
|
66
|
+
| product "%" range -> mod
|
|
67
|
+
|
|
68
|
+
?range: exists
|
|
69
|
+
| exists ".." exists -> range_op
|
|
70
|
+
| range "." exists -> concat_op
|
|
71
|
+
|
|
72
|
+
?exists: atom
|
|
73
|
+
| VARIABLE "??" -> exists_op
|
|
74
|
+
| VARIABLE "??" atom -> exists_with_default
|
|
75
|
+
|
|
76
|
+
?atom: NUMBER -> number
|
|
77
|
+
| "-" atom -> neg
|
|
78
|
+
| "(" or_expr ")"
|
|
79
|
+
| ipv4_atom
|
|
80
|
+
| ipv6_atom
|
|
81
|
+
| datetime
|
|
82
|
+
| TIMEDELTA -> timedelta
|
|
83
|
+
| STRING -> string
|
|
84
|
+
| VARIABLE -> variable
|
|
85
|
+
| function
|
|
86
|
+
| list
|
|
87
|
+
|
|
88
|
+
?ipv4_atom: IPV4 -> ipv4_single
|
|
89
|
+
| IPV4_RANGE -> ipv4_range
|
|
90
|
+
| IPV4_CIDR -> ipv4_cidr
|
|
91
|
+
|
|
92
|
+
?ipv6_atom: IPV6 -> ipv6_single
|
|
93
|
+
| IPV6_RANGE -> ipv6_range
|
|
94
|
+
| IPV6_CIDR -> ipv6_cidr
|
|
95
|
+
|
|
96
|
+
datetime: DATE ("T" | "t")? TIME -> datetime_full
|
|
97
|
+
| DATE -> datetime_only_date
|
|
98
|
+
|
|
99
|
+
function: FUNCTION [args] ")"
|
|
100
|
+
args: atom ("," atom)*
|
|
101
|
+
|
|
102
|
+
list: "[" [atom ("," atom)*] "]"
|
|
103
|
+
|
|
104
|
+
NUMBER: /\d+(\.\d+)?([eE][+-]?\d+)?/
|
|
105
|
+
IPV4.2: /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
|
|
106
|
+
IPV4_RANGE.2: /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}-\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
|
|
107
|
+
IPV4_CIDR.2: /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\/\d{1,2}/
|
|
108
|
+
IPV6.2: /[:a-fA-F0-9]+:[:a-fA-F0-9]*/
|
|
109
|
+
IPV6_RANGE.2: /[:a-fA-F0-9]+:[:a-fA-F0-9]*-[:a-fA-F0-9]+:[:a-fA-F0-9]*/
|
|
110
|
+
IPV6_CIDR.2: /[:a-fA-F0-9]+:[:a-fA-F0-9]*\/\d{1,3}/
|
|
111
|
+
DATE.2: /[0-9]{4}-[0-9]{2}-[0-9]{2}/
|
|
112
|
+
TIME.2: /[0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|(?:[+-][0-9]{2}:[0-9]{2}))?/
|
|
113
|
+
TIMEDELTA.2: /([0-9]+[D|d])?[0-9]{2}:[0-9]{2}:[0-9]{2}/
|
|
114
|
+
STRING: /"([^"]+)"|\'([^\']+)\'/
|
|
115
|
+
VARIABLE: /\.?[_a-zA-Z][-_a-zA-Z0-9]*(?:\.?[a-zA-Z][-_a-zA-Z0-9]*)*/
|
|
116
|
+
FUNCTION.2: /[_a-zA-Z][_a-zA-Z0-9]*\(/
|
|
117
|
+
|
|
118
|
+
%import common.WS
|
|
119
|
+
%ignore WS
|
|
120
|
+
""" # noqa
|
|
121
|
+
|
|
122
|
+
def __init__(self, context: dict[str, Any] | None = None) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Initialize the Parser.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
context (dict[str, Any] | None): Optional dictionary of variables that can
|
|
128
|
+
be referenced in queries. Context variables override data variables unless
|
|
129
|
+
the data variable is explicitly accessed with a leading dot.
|
|
130
|
+
"""
|
|
131
|
+
self.parser = Lark(self.grammar, parser="lalr", propagate_positions=True)
|
|
132
|
+
self.shaper = ExpressionTransformer(context)
|
|
133
|
+
|
|
134
|
+
@no_type_check
|
|
135
|
+
def parse(self, data: str) -> Tree[Token]:
|
|
136
|
+
try:
|
|
137
|
+
parsed_tree = self.parser.parse(data)
|
|
138
|
+
return self.shaper.transform(parsed_tree)
|
|
139
|
+
except UnexpectedInput as e:
|
|
140
|
+
raise ParseError(e.line, e.column, e.get_context(data)) from None
|
|
141
|
+
except VisitError as e:
|
|
142
|
+
first_token = e.obj.children[0]
|
|
143
|
+
last_token = e.obj.children[-1]
|
|
144
|
+
|
|
145
|
+
# Extract context - input before and after the problematic tokens
|
|
146
|
+
context_start_pos = max(first_token.start_pos - 40, 0)
|
|
147
|
+
context_end_pos = min(last_token.end_pos + 40, len(data))
|
|
148
|
+
raw_context = data[context_start_pos:context_end_pos]
|
|
149
|
+
|
|
150
|
+
# Add '^' add the start of the problematic token
|
|
151
|
+
context_with_caret = add_caret_to_context(
|
|
152
|
+
context=raw_context,
|
|
153
|
+
line=first_token.line,
|
|
154
|
+
column=first_token.column,
|
|
155
|
+
original_data=data,
|
|
156
|
+
context_start_pos=context_start_pos,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
raise ShapeError(
|
|
160
|
+
str(e.orig_exc),
|
|
161
|
+
line=first_token.line,
|
|
162
|
+
column=first_token.column,
|
|
163
|
+
context=context_with_caret,
|
|
164
|
+
start_pos=first_token.start_pos,
|
|
165
|
+
end_pos=last_token.end_pos,
|
|
166
|
+
end_line=last_token.end_line,
|
|
167
|
+
end_column=last_token.end_column,
|
|
168
|
+
) from None
|
|
169
|
+
|
|
170
|
+
def parse_only(self, data: str) -> Tree[Token]:
|
|
171
|
+
try:
|
|
172
|
+
return self.parser.parse(data)
|
|
173
|
+
except UnexpectedInput as e:
|
|
174
|
+
raise ParseError(e.line, e.column, e.get_context(data)) from None
|
ransack/py.typed
ADDED
|
File without changes
|