pbi-parsers 0.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pbi_parsers/__init__.py +9 -0
- pbi_parsers/base/__init__.py +7 -0
- pbi_parsers/base/lexer.py +127 -0
- pbi_parsers/base/tokens.py +61 -0
- pbi_parsers/dax/__init__.py +22 -0
- pbi_parsers/dax/exprs/__init__.py +107 -0
- pbi_parsers/dax/exprs/_base.py +46 -0
- pbi_parsers/dax/exprs/_utils.py +45 -0
- pbi_parsers/dax/exprs/add_sub.py +73 -0
- pbi_parsers/dax/exprs/add_sub_unary.py +72 -0
- pbi_parsers/dax/exprs/array.py +75 -0
- pbi_parsers/dax/exprs/column.py +56 -0
- pbi_parsers/dax/exprs/comparison.py +76 -0
- pbi_parsers/dax/exprs/concatenation.py +73 -0
- pbi_parsers/dax/exprs/div_mul.py +75 -0
- pbi_parsers/dax/exprs/exponent.py +67 -0
- pbi_parsers/dax/exprs/function.py +102 -0
- pbi_parsers/dax/exprs/hierarchy.py +68 -0
- pbi_parsers/dax/exprs/identifier.py +46 -0
- pbi_parsers/dax/exprs/ins.py +67 -0
- pbi_parsers/dax/exprs/keyword.py +60 -0
- pbi_parsers/dax/exprs/literal_number.py +46 -0
- pbi_parsers/dax/exprs/literal_string.py +45 -0
- pbi_parsers/dax/exprs/logical.py +76 -0
- pbi_parsers/dax/exprs/measure.py +44 -0
- pbi_parsers/dax/exprs/none.py +30 -0
- pbi_parsers/dax/exprs/parens.py +61 -0
- pbi_parsers/dax/exprs/returns.py +76 -0
- pbi_parsers/dax/exprs/table.py +51 -0
- pbi_parsers/dax/exprs/variable.py +68 -0
- pbi_parsers/dax/formatter.py +215 -0
- pbi_parsers/dax/lexer.py +222 -0
- pbi_parsers/dax/main.py +63 -0
- pbi_parsers/dax/parser.py +66 -0
- pbi_parsers/dax/tokens.py +54 -0
- pbi_parsers/dax/utils.py +120 -0
- pbi_parsers/pq/__init__.py +17 -0
- pbi_parsers/pq/exprs/__init__.py +98 -0
- pbi_parsers/pq/exprs/_base.py +33 -0
- pbi_parsers/pq/exprs/_utils.py +31 -0
- pbi_parsers/pq/exprs/add_sub.py +59 -0
- pbi_parsers/pq/exprs/add_sub_unary.py +57 -0
- pbi_parsers/pq/exprs/and_or_expr.py +60 -0
- pbi_parsers/pq/exprs/array.py +53 -0
- pbi_parsers/pq/exprs/arrow.py +50 -0
- pbi_parsers/pq/exprs/column.py +42 -0
- pbi_parsers/pq/exprs/comparison.py +62 -0
- pbi_parsers/pq/exprs/concatenation.py +61 -0
- pbi_parsers/pq/exprs/div_mul.py +59 -0
- pbi_parsers/pq/exprs/each.py +41 -0
- pbi_parsers/pq/exprs/ellipsis_expr.py +28 -0
- pbi_parsers/pq/exprs/function.py +63 -0
- pbi_parsers/pq/exprs/identifier.py +77 -0
- pbi_parsers/pq/exprs/if_expr.py +70 -0
- pbi_parsers/pq/exprs/is_expr.py +54 -0
- pbi_parsers/pq/exprs/keyword.py +40 -0
- pbi_parsers/pq/exprs/literal_number.py +31 -0
- pbi_parsers/pq/exprs/literal_string.py +31 -0
- pbi_parsers/pq/exprs/meta.py +54 -0
- pbi_parsers/pq/exprs/negation.py +52 -0
- pbi_parsers/pq/exprs/none.py +22 -0
- pbi_parsers/pq/exprs/not_expr.py +39 -0
- pbi_parsers/pq/exprs/parens.py +43 -0
- pbi_parsers/pq/exprs/record.py +58 -0
- pbi_parsers/pq/exprs/row.py +54 -0
- pbi_parsers/pq/exprs/row_index.py +57 -0
- pbi_parsers/pq/exprs/statement.py +67 -0
- pbi_parsers/pq/exprs/try_expr.py +55 -0
- pbi_parsers/pq/exprs/type_expr.py +78 -0
- pbi_parsers/pq/exprs/variable.py +52 -0
- pbi_parsers/pq/formatter.py +13 -0
- pbi_parsers/pq/lexer.py +219 -0
- pbi_parsers/pq/main.py +63 -0
- pbi_parsers/pq/parser.py +65 -0
- pbi_parsers/pq/tokens.py +81 -0
- pbi_parsers-0.7.8.dist-info/METADATA +66 -0
- pbi_parsers-0.7.8.dist-info/RECORD +78 -0
- pbi_parsers-0.7.8.dist-info/WHEEL +4 -0
@@ -0,0 +1,215 @@
|
|
1
|
+
import string
|
2
|
+
import textwrap
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from .exprs import (
|
6
|
+
AddSubExpression,
|
7
|
+
AddSubUnaryExpression,
|
8
|
+
ArrayExpression,
|
9
|
+
ColumnExpression,
|
10
|
+
ComparisonExpression,
|
11
|
+
ConcatenationExpression,
|
12
|
+
DivMulExpression,
|
13
|
+
ExponentExpression,
|
14
|
+
Expression,
|
15
|
+
FunctionExpression,
|
16
|
+
HierarchyExpression,
|
17
|
+
IdentifierExpression,
|
18
|
+
InExpression,
|
19
|
+
KeywordExpression,
|
20
|
+
LiteralNumberExpression,
|
21
|
+
LiteralStringExpression,
|
22
|
+
LogicalExpression,
|
23
|
+
MeasureExpression,
|
24
|
+
NoneExpression,
|
25
|
+
ParenthesesExpression,
|
26
|
+
ReturnExpression,
|
27
|
+
TableExpression,
|
28
|
+
VariableExpression,
|
29
|
+
)
|
30
|
+
from .tokens import Token
|
31
|
+
|
32
|
+
MAX_ARGUMENT_LENGTH = 40 # Maximum length of arguments before formatting them on new lines
|
33
|
+
|
34
|
+
|
35
|
+
def format_comments(comments: list[Token], indent_chars: int) -> str:
|
36
|
+
"""Concatenates a list of comments into a single string."""
|
37
|
+
base = "\n".join(comment.text_slice.get_text().strip() for comment in comments)
|
38
|
+
return textwrap.indent(base, " " * indent_chars)
|
39
|
+
|
40
|
+
|
41
|
+
class Formatter:
|
42
|
+
"""Formats a DAX expression into a standardized format."""
|
43
|
+
|
44
|
+
def __init__(self, expression: "Expression") -> None:
|
45
|
+
self.expression = expression
|
46
|
+
|
47
|
+
def format(self) -> str:
|
48
|
+
return self._format_helper(self.expression)
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def _format_helper(cls, expr: Expression) -> str:
|
52
|
+
mapper: Any = {
|
53
|
+
AddSubExpression: cls._format_add_sub,
|
54
|
+
AddSubUnaryExpression: cls._format_add_sub_unary,
|
55
|
+
ArrayExpression: cls._format_array,
|
56
|
+
ComparisonExpression: cls._format_comparison,
|
57
|
+
ColumnExpression: cls._format_column,
|
58
|
+
ConcatenationExpression: cls._format_concatenation,
|
59
|
+
DivMulExpression: cls._format_div_mul,
|
60
|
+
ExponentExpression: cls._format_exponent,
|
61
|
+
FunctionExpression: cls._format_function,
|
62
|
+
HierarchyExpression: cls._format_hierarchy,
|
63
|
+
IdentifierExpression: cls._format_identifier,
|
64
|
+
InExpression: cls._format_in,
|
65
|
+
KeywordExpression: cls._format_keyword,
|
66
|
+
LiteralNumberExpression: cls._format_literal_number,
|
67
|
+
LiteralStringExpression: cls._format_literal_string,
|
68
|
+
LogicalExpression: cls._format_logical,
|
69
|
+
MeasureExpression: cls._format_measure,
|
70
|
+
NoneExpression: lambda _: "",
|
71
|
+
ParenthesesExpression: cls._format_parens,
|
72
|
+
ReturnExpression: cls._format_return,
|
73
|
+
TableExpression: cls._format_table,
|
74
|
+
VariableExpression: cls._format_variable,
|
75
|
+
}
|
76
|
+
if type(expr) in mapper:
|
77
|
+
base_format = mapper[type(expr)](expr)
|
78
|
+
if expr.pre_comments:
|
79
|
+
base_format = f"{format_comments(expr.pre_comments, 0)}\n{base_format}"
|
80
|
+
if expr.post_comments:
|
81
|
+
base_format = f"{base_format} {format_comments(expr.post_comments, 0)}"
|
82
|
+
return base_format
|
83
|
+
|
84
|
+
msg = f"Unsupported expression type: {type(expr).__name__}"
|
85
|
+
raise TypeError(msg)
|
86
|
+
|
87
|
+
@classmethod
|
88
|
+
def _format_add_sub(cls, expr: AddSubExpression) -> str:
|
89
|
+
left = cls._format_helper(expr.left)
|
90
|
+
right = cls._format_helper(expr.right)
|
91
|
+
return f"""{left} {expr.operator.text} {right}"""
|
92
|
+
|
93
|
+
@classmethod
|
94
|
+
def _format_add_sub_unary(cls, expr: AddSubUnaryExpression) -> str:
|
95
|
+
return f"{expr.operator.text}{cls._format_helper(expr.number)}"
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
def _format_array(cls, expr: ArrayExpression) -> str:
|
99
|
+
elements = ",\n".join(cls._format_helper(el) for el in expr.elements)
|
100
|
+
elements = textwrap.indent(elements, " " * 4)[4:]
|
101
|
+
return f"""{{
|
102
|
+
{elements}
|
103
|
+
}}
|
104
|
+
"""
|
105
|
+
|
106
|
+
@classmethod
|
107
|
+
def _format_comparison(cls, expr: ComparisonExpression) -> str:
|
108
|
+
left = cls._format_helper(expr.left)
|
109
|
+
right = cls._format_helper(expr.right)
|
110
|
+
return f"""{left} {expr.operator.text} {right}"""
|
111
|
+
|
112
|
+
@classmethod
|
113
|
+
def _format_column(cls, expr: ColumnExpression) -> str:
|
114
|
+
table = expr.table.text
|
115
|
+
if table.startswith("'") and all(c in string.ascii_letters + string.digits + "_" for c in table[1:-1]):
|
116
|
+
table = table[1:-1]
|
117
|
+
column = expr.column.text
|
118
|
+
return f"{table}{column}"
|
119
|
+
|
120
|
+
@classmethod
|
121
|
+
def _format_concatenation(cls, expr: ConcatenationExpression) -> str:
|
122
|
+
left = cls._format_helper(expr.left)
|
123
|
+
right = cls._format_helper(expr.right)
|
124
|
+
return f"""{left} {expr.operator.text} {right}"""
|
125
|
+
|
126
|
+
@classmethod
|
127
|
+
def _format_div_mul(cls, expr: DivMulExpression) -> str:
|
128
|
+
left = cls._format_helper(expr.left)
|
129
|
+
right = cls._format_helper(expr.right)
|
130
|
+
return f"""{left} {expr.operator.text} {right}"""
|
131
|
+
|
132
|
+
@classmethod
|
133
|
+
def _format_exponent(cls, expr: ExponentExpression) -> str:
|
134
|
+
base = cls._format_helper(expr.base)
|
135
|
+
power = cls._format_helper(expr.power)
|
136
|
+
return f"""{base}^{power}"""
|
137
|
+
|
138
|
+
@classmethod
|
139
|
+
def _format_function(cls, expr: FunctionExpression) -> str:
|
140
|
+
name = "".join(token.text for token in expr.name_parts)
|
141
|
+
args = [cls._format_helper(arg) for arg in expr.args]
|
142
|
+
if sum(len(x) for x in args) < MAX_ARGUMENT_LENGTH:
|
143
|
+
arg_str = ", ".join(args)
|
144
|
+
return f"{name}({arg_str})"
|
145
|
+
arg_str = textwrap.indent(",\n".join(args), " " * 4)[4:]
|
146
|
+
return f"""
|
147
|
+
{name}(
|
148
|
+
{arg_str}
|
149
|
+
)""".strip()
|
150
|
+
|
151
|
+
@classmethod
|
152
|
+
def _format_hierarchy(cls, expr: HierarchyExpression) -> str:
|
153
|
+
table = expr.table.text
|
154
|
+
if table.startswith("'") and all(c in string.ascii_letters + string.digits + "_" for c in table[1:-1]):
|
155
|
+
table = table[1:-1]
|
156
|
+
return f"{table}{expr.column.text}.{expr.level.text}"
|
157
|
+
|
158
|
+
@classmethod
|
159
|
+
def _format_identifier(cls, expr: IdentifierExpression) -> str:
|
160
|
+
return expr.name.text
|
161
|
+
|
162
|
+
@classmethod
|
163
|
+
def _format_in(cls, expr: InExpression) -> str:
|
164
|
+
value = cls._format_helper(expr.value)
|
165
|
+
array = cls._format_helper(expr.array)
|
166
|
+
return f"""{value} IN {array}"""
|
167
|
+
|
168
|
+
@classmethod
|
169
|
+
def _format_keyword(cls, expr: KeywordExpression) -> str:
|
170
|
+
return expr.name.text
|
171
|
+
|
172
|
+
@classmethod
|
173
|
+
def _format_literal_string(cls, expr: LiteralStringExpression) -> str:
|
174
|
+
return expr.value.text
|
175
|
+
|
176
|
+
@classmethod
|
177
|
+
def _format_literal_number(cls, expr: LiteralNumberExpression) -> str:
|
178
|
+
return expr.value.text
|
179
|
+
|
180
|
+
@classmethod
|
181
|
+
def _format_logical(cls, expr: LogicalExpression) -> str:
|
182
|
+
left = cls._format_helper(expr.left)
|
183
|
+
right = cls._format_helper(expr.right)
|
184
|
+
return f"""{left} {expr.operator.text} {right}"""
|
185
|
+
|
186
|
+
@classmethod
|
187
|
+
def _format_measure(cls, expr: MeasureExpression) -> str:
|
188
|
+
return expr.name.text
|
189
|
+
|
190
|
+
@classmethod
|
191
|
+
def _format_parens(cls, expr: ParenthesesExpression) -> str:
|
192
|
+
inner = cls._format_helper(expr.inner_statement)
|
193
|
+
return f"({inner})"
|
194
|
+
|
195
|
+
@classmethod
|
196
|
+
def _format_return(cls, expr: ReturnExpression) -> str:
|
197
|
+
variable_strs = "\n".join(cls._format_helper(var) for var in expr.variable_statements)
|
198
|
+
return_statement: str = cls._format_helper(expr.ret)
|
199
|
+
return f"""
|
200
|
+
{variable_strs}
|
201
|
+
RETURN {return_statement}
|
202
|
+
""".strip()
|
203
|
+
|
204
|
+
@classmethod
|
205
|
+
def _format_table(cls, expr: TableExpression) -> str:
|
206
|
+
table_name = expr.name.text
|
207
|
+
if table_name.startswith("'") and all(
|
208
|
+
c in string.ascii_letters + string.digits + "_" for c in table_name[1:-1]
|
209
|
+
):
|
210
|
+
table_name = table_name[1:-1]
|
211
|
+
return table_name
|
212
|
+
|
213
|
+
@classmethod
|
214
|
+
def _format_variable(cls, expr: VariableExpression) -> str:
|
215
|
+
return f"{expr.var_name.text} = {cls._format_helper(expr.statement)}"
|
pbi_parsers/dax/lexer.py
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
from pbi_parsers.base import BaseLexer
|
2
|
+
from pbi_parsers.base.tokens import TextSlice
|
3
|
+
|
4
|
+
from .tokens import KEYWORD_MAPPING, Token, TokenType
|
5
|
+
|
6
|
+
WHITESPACE = ["\n", "\r", "\t", " ", "\f", "\v"]
|
7
|
+
|
8
|
+
|
9
|
+
class Lexer(BaseLexer):
|
10
|
+
def scan(self) -> tuple[Token]:
|
11
|
+
return super().scan() # type: ignore[override]
|
12
|
+
|
13
|
+
def create_token(self, tok_type: TokenType, start_pos: int) -> Token:
|
14
|
+
"""Create a new token with the given type and text."""
|
15
|
+
text_slice = TextSlice(
|
16
|
+
full_text=self.source,
|
17
|
+
start=start_pos,
|
18
|
+
end=self.current_position,
|
19
|
+
)
|
20
|
+
return Token(tok_type=tok_type, text_slice=text_slice)
|
21
|
+
|
22
|
+
def _match_in(self, start_pos: int) -> Token | None:
|
23
|
+
if self.match(
|
24
|
+
"in ",
|
25
|
+
case_insensitive=True,
|
26
|
+
): # I have found no case where "in" is not followed by a space
|
27
|
+
# this allows us to avoid matching with the "int" function
|
28
|
+
self.advance(-1) # leave the space to be consumed by whitespace handling
|
29
|
+
return self.create_token(
|
30
|
+
tok_type=TokenType.IN,
|
31
|
+
start_pos=start_pos,
|
32
|
+
)
|
33
|
+
return None
|
34
|
+
|
35
|
+
def _match_keyword(self, start_pos: int) -> Token | None:
|
36
|
+
for keyword, token_type in KEYWORD_MAPPING.items():
|
37
|
+
if self.match(keyword, case_insensitive=True):
|
38
|
+
return self.create_token(
|
39
|
+
tok_type=token_type,
|
40
|
+
start_pos=start_pos,
|
41
|
+
)
|
42
|
+
return None
|
43
|
+
|
44
|
+
def _match_whitespace(self, start_pos: int) -> Token | None:
|
45
|
+
if self.match(lambda c: c in WHITESPACE):
|
46
|
+
while self.match(lambda c: c in WHITESPACE):
|
47
|
+
pass
|
48
|
+
return self.create_token(
|
49
|
+
tok_type=TokenType.WHITESPACE,
|
50
|
+
start_pos=start_pos,
|
51
|
+
)
|
52
|
+
return None
|
53
|
+
|
54
|
+
def _match_var(self, start_pos: int) -> Token | None:
|
55
|
+
if self.match("var", case_insensitive=True):
|
56
|
+
return self.create_token(
|
57
|
+
tok_type=TokenType.VARIABLE,
|
58
|
+
start_pos=start_pos,
|
59
|
+
)
|
60
|
+
return None
|
61
|
+
|
62
|
+
def _match_return(self, start_pos: int) -> Token | None:
|
63
|
+
if self.match("return", case_insensitive=True):
|
64
|
+
return self.create_token(
|
65
|
+
tok_type=TokenType.RETURN,
|
66
|
+
start_pos=start_pos,
|
67
|
+
)
|
68
|
+
return None
|
69
|
+
|
70
|
+
def _match_period(self, start_pos: int) -> Token | None:
|
71
|
+
if self.match("."):
|
72
|
+
# must come before number literal to avoid conflict
|
73
|
+
return self.create_token(
|
74
|
+
tok_type=TokenType.PERIOD,
|
75
|
+
start_pos=start_pos,
|
76
|
+
)
|
77
|
+
return None
|
78
|
+
|
79
|
+
def _match_number_literal(self, start_pos: int) -> Token | None:
|
80
|
+
if self.match(
|
81
|
+
lambda c: c.isdigit() or c == ".",
|
82
|
+
): # must come before unquoted identifier to avoid conflict
|
83
|
+
while self.match(lambda c: c.isdigit() or c in {".", "e", "E"}):
|
84
|
+
pass
|
85
|
+
return self.create_token(
|
86
|
+
tok_type=TokenType.NUMBER_LITERAL,
|
87
|
+
start_pos=start_pos,
|
88
|
+
)
|
89
|
+
return None
|
90
|
+
|
91
|
+
def _match_unquoted_identifier(self, start_pos: int) -> Token | None:
|
92
|
+
if self.match(lambda c: c.isalnum() or c == "_"):
|
93
|
+
while self.match(lambda c: c.isalnum() or c == "_"):
|
94
|
+
pass
|
95
|
+
return self.create_token(
|
96
|
+
tok_type=TokenType.UNQUOTED_IDENTIFIER,
|
97
|
+
start_pos=start_pos,
|
98
|
+
)
|
99
|
+
return None
|
100
|
+
|
101
|
+
def _match_single_quoted_identifier(self, start_pos: int) -> Token | None:
|
102
|
+
if self.match("'"):
|
103
|
+
while self.match(lambda c: c != "'"):
|
104
|
+
pass
|
105
|
+
if self.match("'"):
|
106
|
+
return self.create_token(
|
107
|
+
tok_type=TokenType.SINGLE_QUOTED_IDENTIFIER,
|
108
|
+
start_pos=start_pos,
|
109
|
+
)
|
110
|
+
msg = "Unterminated string literal"
|
111
|
+
raise ValueError(msg)
|
112
|
+
return None
|
113
|
+
|
114
|
+
def _match_bracketed_identifier(self, start_pos: int) -> Token | None:
|
115
|
+
if self.match("["):
|
116
|
+
while self.match(lambda c: c != "]"):
|
117
|
+
pass
|
118
|
+
if self.match("]"):
|
119
|
+
return self.create_token(
|
120
|
+
tok_type=TokenType.BRACKETED_IDENTIFIER,
|
121
|
+
start_pos=start_pos,
|
122
|
+
)
|
123
|
+
msg = "Unterminated bracketed identifier"
|
124
|
+
raise ValueError(msg)
|
125
|
+
return None
|
126
|
+
|
127
|
+
def _match_string_literal(self, start_pos: int) -> Token | None:
|
128
|
+
if self.match('"'):
|
129
|
+
while self.match(lambda c: c != '"') or self.match('""'):
|
130
|
+
pass
|
131
|
+
if self.match('"'):
|
132
|
+
return self.create_token(
|
133
|
+
tok_type=TokenType.STRING_LITERAL,
|
134
|
+
start_pos=start_pos,
|
135
|
+
)
|
136
|
+
msg = "Unterminated string literal"
|
137
|
+
raise ValueError(msg)
|
138
|
+
return None
|
139
|
+
|
140
|
+
def _match_single_line_comment(self, start_pos: int) -> Token | None:
|
141
|
+
if self.match("//") or self.match("--"):
|
142
|
+
while self.match(lambda c: c not in {"\n", ""}):
|
143
|
+
pass
|
144
|
+
return self.create_token(
|
145
|
+
tok_type=TokenType.SINGLE_LINE_COMMENT,
|
146
|
+
start_pos=start_pos,
|
147
|
+
)
|
148
|
+
return None
|
149
|
+
|
150
|
+
def _match_multi_line_comment(self, start_pos: int) -> Token | None:
|
151
|
+
if not self.match("/*"):
|
152
|
+
return None
|
153
|
+
|
154
|
+
while not self.at_end():
|
155
|
+
if self.match("*/", chunk=2):
|
156
|
+
return self.create_token(
|
157
|
+
tok_type=TokenType.MULTI_LINE_COMMENT,
|
158
|
+
start_pos=start_pos,
|
159
|
+
)
|
160
|
+
self.advance()
|
161
|
+
|
162
|
+
msg = "Unterminated multi-line comment"
|
163
|
+
raise ValueError(msg)
|
164
|
+
|
165
|
+
def _match_token(self, start_pos: int) -> Token | None:
|
166
|
+
fixed_character_mapping = {
|
167
|
+
"(": TokenType.LEFT_PAREN,
|
168
|
+
")": TokenType.RIGHT_PAREN,
|
169
|
+
",": TokenType.COMMA,
|
170
|
+
"==": TokenType.EQUAL_SIGN,
|
171
|
+
"=": TokenType.EQUAL_SIGN,
|
172
|
+
"{": TokenType.LEFT_CURLY_BRACE,
|
173
|
+
"}": TokenType.RIGHT_CURLY_BRACE,
|
174
|
+
"<>": TokenType.NOT_EQUAL_SIGN,
|
175
|
+
"<=": TokenType.COMPARISON_OPERATOR,
|
176
|
+
"<": TokenType.COMPARISON_OPERATOR,
|
177
|
+
">=": TokenType.COMPARISON_OPERATOR,
|
178
|
+
">": TokenType.COMPARISON_OPERATOR,
|
179
|
+
"||": TokenType.DOUBLE_PIPE_OPERATOR,
|
180
|
+
"&&": TokenType.DOUBLE_AMPERSAND_OPERATOR,
|
181
|
+
"&": TokenType.AMPERSAND_OPERATOR,
|
182
|
+
"+": TokenType.PLUS_SIGN,
|
183
|
+
"-": TokenType.MINUS_SIGN,
|
184
|
+
"^": TokenType.EXPONENTIATION_SIGN,
|
185
|
+
"*": TokenType.MULTIPLY_SIGN,
|
186
|
+
"%": TokenType.MODULUS_SIGN,
|
187
|
+
"/": TokenType.DIVIDE_SIGN,
|
188
|
+
}
|
189
|
+
|
190
|
+
for char, token_type in fixed_character_mapping.items():
|
191
|
+
if self.match(char):
|
192
|
+
return self.create_token(tok_type=token_type, start_pos=start_pos)
|
193
|
+
return None
|
194
|
+
|
195
|
+
def scan_helper(self) -> Token:
|
196
|
+
start_pos: int = self.current_position
|
197
|
+
|
198
|
+
if not self.peek():
|
199
|
+
return Token()
|
200
|
+
|
201
|
+
for candidate_func in (
|
202
|
+
self._match_in,
|
203
|
+
self._match_keyword,
|
204
|
+
self._match_whitespace,
|
205
|
+
self._match_var,
|
206
|
+
self._match_return,
|
207
|
+
self._match_period,
|
208
|
+
self._match_number_literal,
|
209
|
+
self._match_unquoted_identifier,
|
210
|
+
self._match_single_quoted_identifier,
|
211
|
+
self._match_bracketed_identifier,
|
212
|
+
self._match_string_literal,
|
213
|
+
self._match_single_line_comment,
|
214
|
+
self._match_multi_line_comment,
|
215
|
+
self._match_token,
|
216
|
+
):
|
217
|
+
match_candidate = candidate_func(start_pos)
|
218
|
+
if match_candidate:
|
219
|
+
return match_candidate
|
220
|
+
|
221
|
+
msg = f"Unexpected character: {self.peek()} at position {self.current_position}"
|
222
|
+
raise ValueError(msg)
|
pbi_parsers/dax/main.py
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
from collections.abc import Iterable
|
2
|
+
|
3
|
+
from .exprs._base import Expression
|
4
|
+
from .formatter import Formatter
|
5
|
+
from .lexer import Lexer
|
6
|
+
from .parser import Parser
|
7
|
+
from .tokens import Token, TokenType
|
8
|
+
|
9
|
+
|
10
|
+
def remove_non_executing_tokens(tokens: Iterable[Token]) -> list[Token]:
|
11
|
+
"""Removes tokens that are not executed in the DAX expression.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
tokens (Iterable[Token]): List of tokens to filter.
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
list[Token]: Filtered list of tokens that are executed.
|
18
|
+
|
19
|
+
"""
|
20
|
+
return list(
|
21
|
+
filter(
|
22
|
+
lambda x: x.tok_type
|
23
|
+
not in {
|
24
|
+
TokenType.WHITESPACE,
|
25
|
+
TokenType.SINGLE_LINE_COMMENT,
|
26
|
+
TokenType.MULTI_LINE_COMMENT,
|
27
|
+
},
|
28
|
+
tokens,
|
29
|
+
),
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
def to_ast(text: str) -> Expression | None:
|
34
|
+
"""Converts a DAX expression string into an AST (Abstract Syntax Tree).
|
35
|
+
|
36
|
+
Args:
|
37
|
+
text (str): The DAX expression to parse.
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
Expression | None: when matched, returns the root node of the AST representing the DAX expression.
|
41
|
+
When not matched, returns None.
|
42
|
+
|
43
|
+
"""
|
44
|
+
tokens = Lexer(text).scan()
|
45
|
+
tokens = remove_non_executing_tokens(tokens)
|
46
|
+
parser = Parser(tokens)
|
47
|
+
return parser.to_ast()
|
48
|
+
|
49
|
+
|
50
|
+
def format_expression(text: str) -> str:
|
51
|
+
"""Formats a DAX expression string into a more readable format.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
text (str): The DAX expression to format.
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
str: The formatted DAX expression.
|
58
|
+
|
59
|
+
"""
|
60
|
+
ast = to_ast(text)
|
61
|
+
if ast is None:
|
62
|
+
return text
|
63
|
+
return Formatter(ast).format()
|
@@ -0,0 +1,66 @@
|
|
1
|
+
from typing import TYPE_CHECKING, Any
|
2
|
+
|
3
|
+
from .tokens import Token, TokenType
|
4
|
+
|
5
|
+
if TYPE_CHECKING:
|
6
|
+
from .exprs import Expression
|
7
|
+
|
8
|
+
EOF_TOKEN = Token()
|
9
|
+
|
10
|
+
|
11
|
+
class Parser:
|
12
|
+
__tokens: list[Token]
|
13
|
+
index: int = 0
|
14
|
+
cache: dict[Any, Any]
|
15
|
+
|
16
|
+
def __init__(self, tokens: list[Token]) -> None:
|
17
|
+
self.__tokens = tokens
|
18
|
+
self.index = 0
|
19
|
+
self.cache = {}
|
20
|
+
|
21
|
+
def peek(self, forward: int = 0) -> Token:
|
22
|
+
"""Peek at the next token without advancing the index.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
forward (int): How many tokens to look ahead. Defaults to 0.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
Token: The token at the current index + forward.
|
29
|
+
|
30
|
+
"""
|
31
|
+
if self.index + forward >= len(self.__tokens):
|
32
|
+
return EOF_TOKEN
|
33
|
+
return self.__tokens[self.index + forward]
|
34
|
+
|
35
|
+
def remaining(self) -> list[Token]:
|
36
|
+
"""Returns the remaining tokens from the current index.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
list[Token]: The list of tokens from the current index to the end.
|
40
|
+
|
41
|
+
"""
|
42
|
+
return self.__tokens[self.index :]
|
43
|
+
|
44
|
+
def to_ast(self) -> "Expression | None":
|
45
|
+
"""Parse the tokens and return the root expression.
|
46
|
+
|
47
|
+
Raises:
|
48
|
+
ValueError: If no valid expression is found in the token stream.
|
49
|
+
|
50
|
+
"""
|
51
|
+
from .exprs import any_expression_match # noqa: PLC0415
|
52
|
+
|
53
|
+
ret = any_expression_match(self)
|
54
|
+
if ret is None:
|
55
|
+
msg = "No valid expression found in the token stream."
|
56
|
+
raise ValueError(msg)
|
57
|
+
assert self.peek().tok_type == TokenType.EOF
|
58
|
+
return ret
|
59
|
+
|
60
|
+
def consume(self) -> Token:
|
61
|
+
"""Returns the next token and advances the index."""
|
62
|
+
if self.index >= len(self.__tokens):
|
63
|
+
return EOF_TOKEN
|
64
|
+
ret = self.__tokens[self.index]
|
65
|
+
self.index += 1
|
66
|
+
return ret
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from enum import Enum, auto
|
3
|
+
|
4
|
+
from pbi_parsers.base import BaseToken
|
5
|
+
|
6
|
+
|
7
|
+
class TokenType(Enum):
|
8
|
+
AMPERSAND_OPERATOR = auto()
|
9
|
+
ASC = auto()
|
10
|
+
BRACKETED_IDENTIFIER = auto()
|
11
|
+
COMMA = auto()
|
12
|
+
COMPARISON_OPERATOR = auto()
|
13
|
+
DESC = auto()
|
14
|
+
DIVIDE_SIGN = auto()
|
15
|
+
DOUBLE_AMPERSAND_OPERATOR = auto()
|
16
|
+
DOUBLE_PIPE_OPERATOR = auto()
|
17
|
+
EOF = auto()
|
18
|
+
EQUAL_SIGN = auto()
|
19
|
+
EXPONENTIATION_SIGN = auto()
|
20
|
+
FALSE = auto()
|
21
|
+
IN = auto()
|
22
|
+
LEFT_CURLY_BRACE = auto()
|
23
|
+
LEFT_PAREN = auto()
|
24
|
+
MINUS_SIGN = auto()
|
25
|
+
MODULUS_SIGN = auto()
|
26
|
+
MULTIPLY_SIGN = auto()
|
27
|
+
MULTI_LINE_COMMENT = auto()
|
28
|
+
NOT_EQUAL_SIGN = auto()
|
29
|
+
NUMBER_LITERAL = auto()
|
30
|
+
PERIOD = auto()
|
31
|
+
PLUS_SIGN = auto()
|
32
|
+
RETURN = auto()
|
33
|
+
RIGHT_CURLY_BRACE = auto()
|
34
|
+
RIGHT_PAREN = auto()
|
35
|
+
SINGLE_LINE_COMMENT = auto()
|
36
|
+
SINGLE_QUOTED_IDENTIFIER = auto()
|
37
|
+
STRING_LITERAL = auto()
|
38
|
+
TRUE = auto()
|
39
|
+
UNQUOTED_IDENTIFIER = auto()
|
40
|
+
VARIABLE = auto()
|
41
|
+
WHITESPACE = auto()
|
42
|
+
|
43
|
+
|
44
|
+
KEYWORD_MAPPING = {
|
45
|
+
"TRUE": TokenType.TRUE,
|
46
|
+
"FALSE": TokenType.FALSE,
|
47
|
+
"ASC": TokenType.ASC,
|
48
|
+
"DESC": TokenType.DESC,
|
49
|
+
}
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass
|
53
|
+
class Token(BaseToken):
|
54
|
+
tok_type: TokenType = TokenType.EOF
|