pbi-parsers 0.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. pbi_parsers/__init__.py +9 -0
  2. pbi_parsers/base/__init__.py +7 -0
  3. pbi_parsers/base/lexer.py +127 -0
  4. pbi_parsers/base/tokens.py +61 -0
  5. pbi_parsers/dax/__init__.py +22 -0
  6. pbi_parsers/dax/exprs/__init__.py +107 -0
  7. pbi_parsers/dax/exprs/_base.py +46 -0
  8. pbi_parsers/dax/exprs/_utils.py +45 -0
  9. pbi_parsers/dax/exprs/add_sub.py +73 -0
  10. pbi_parsers/dax/exprs/add_sub_unary.py +72 -0
  11. pbi_parsers/dax/exprs/array.py +75 -0
  12. pbi_parsers/dax/exprs/column.py +56 -0
  13. pbi_parsers/dax/exprs/comparison.py +76 -0
  14. pbi_parsers/dax/exprs/concatenation.py +73 -0
  15. pbi_parsers/dax/exprs/div_mul.py +75 -0
  16. pbi_parsers/dax/exprs/exponent.py +67 -0
  17. pbi_parsers/dax/exprs/function.py +102 -0
  18. pbi_parsers/dax/exprs/hierarchy.py +68 -0
  19. pbi_parsers/dax/exprs/identifier.py +46 -0
  20. pbi_parsers/dax/exprs/ins.py +67 -0
  21. pbi_parsers/dax/exprs/keyword.py +60 -0
  22. pbi_parsers/dax/exprs/literal_number.py +46 -0
  23. pbi_parsers/dax/exprs/literal_string.py +45 -0
  24. pbi_parsers/dax/exprs/logical.py +76 -0
  25. pbi_parsers/dax/exprs/measure.py +44 -0
  26. pbi_parsers/dax/exprs/none.py +30 -0
  27. pbi_parsers/dax/exprs/parens.py +61 -0
  28. pbi_parsers/dax/exprs/returns.py +76 -0
  29. pbi_parsers/dax/exprs/table.py +51 -0
  30. pbi_parsers/dax/exprs/variable.py +68 -0
  31. pbi_parsers/dax/formatter.py +215 -0
  32. pbi_parsers/dax/lexer.py +222 -0
  33. pbi_parsers/dax/main.py +63 -0
  34. pbi_parsers/dax/parser.py +66 -0
  35. pbi_parsers/dax/tokens.py +54 -0
  36. pbi_parsers/dax/utils.py +120 -0
  37. pbi_parsers/pq/__init__.py +17 -0
  38. pbi_parsers/pq/exprs/__init__.py +98 -0
  39. pbi_parsers/pq/exprs/_base.py +33 -0
  40. pbi_parsers/pq/exprs/_utils.py +31 -0
  41. pbi_parsers/pq/exprs/add_sub.py +59 -0
  42. pbi_parsers/pq/exprs/add_sub_unary.py +57 -0
  43. pbi_parsers/pq/exprs/and_or_expr.py +60 -0
  44. pbi_parsers/pq/exprs/array.py +53 -0
  45. pbi_parsers/pq/exprs/arrow.py +50 -0
  46. pbi_parsers/pq/exprs/column.py +42 -0
  47. pbi_parsers/pq/exprs/comparison.py +62 -0
  48. pbi_parsers/pq/exprs/concatenation.py +61 -0
  49. pbi_parsers/pq/exprs/div_mul.py +59 -0
  50. pbi_parsers/pq/exprs/each.py +41 -0
  51. pbi_parsers/pq/exprs/ellipsis_expr.py +28 -0
  52. pbi_parsers/pq/exprs/function.py +63 -0
  53. pbi_parsers/pq/exprs/identifier.py +77 -0
  54. pbi_parsers/pq/exprs/if_expr.py +70 -0
  55. pbi_parsers/pq/exprs/is_expr.py +54 -0
  56. pbi_parsers/pq/exprs/keyword.py +40 -0
  57. pbi_parsers/pq/exprs/literal_number.py +31 -0
  58. pbi_parsers/pq/exprs/literal_string.py +31 -0
  59. pbi_parsers/pq/exprs/meta.py +54 -0
  60. pbi_parsers/pq/exprs/negation.py +52 -0
  61. pbi_parsers/pq/exprs/none.py +22 -0
  62. pbi_parsers/pq/exprs/not_expr.py +39 -0
  63. pbi_parsers/pq/exprs/parens.py +43 -0
  64. pbi_parsers/pq/exprs/record.py +58 -0
  65. pbi_parsers/pq/exprs/row.py +54 -0
  66. pbi_parsers/pq/exprs/row_index.py +57 -0
  67. pbi_parsers/pq/exprs/statement.py +67 -0
  68. pbi_parsers/pq/exprs/try_expr.py +55 -0
  69. pbi_parsers/pq/exprs/type_expr.py +78 -0
  70. pbi_parsers/pq/exprs/variable.py +52 -0
  71. pbi_parsers/pq/formatter.py +13 -0
  72. pbi_parsers/pq/lexer.py +219 -0
  73. pbi_parsers/pq/main.py +63 -0
  74. pbi_parsers/pq/parser.py +65 -0
  75. pbi_parsers/pq/tokens.py +81 -0
  76. pbi_parsers-0.7.8.dist-info/METADATA +66 -0
  77. pbi_parsers-0.7.8.dist-info/RECORD +78 -0
  78. pbi_parsers-0.7.8.dist-info/WHEEL +4 -0
@@ -0,0 +1,215 @@
1
+ import string
2
+ import textwrap
3
+ from typing import Any
4
+
5
+ from .exprs import (
6
+ AddSubExpression,
7
+ AddSubUnaryExpression,
8
+ ArrayExpression,
9
+ ColumnExpression,
10
+ ComparisonExpression,
11
+ ConcatenationExpression,
12
+ DivMulExpression,
13
+ ExponentExpression,
14
+ Expression,
15
+ FunctionExpression,
16
+ HierarchyExpression,
17
+ IdentifierExpression,
18
+ InExpression,
19
+ KeywordExpression,
20
+ LiteralNumberExpression,
21
+ LiteralStringExpression,
22
+ LogicalExpression,
23
+ MeasureExpression,
24
+ NoneExpression,
25
+ ParenthesesExpression,
26
+ ReturnExpression,
27
+ TableExpression,
28
+ VariableExpression,
29
+ )
30
+ from .tokens import Token
31
+
32
+ MAX_ARGUMENT_LENGTH = 40 # Maximum length of arguments before formatting them on new lines
33
+
34
+
35
+ def format_comments(comments: list[Token], indent_chars: int) -> str:
36
+ """Concatenates a list of comments into a single string."""
37
+ base = "\n".join(comment.text_slice.get_text().strip() for comment in comments)
38
+ return textwrap.indent(base, " " * indent_chars)
39
+
40
+
41
+ class Formatter:
42
+ """Formats a DAX expression into a standardized format."""
43
+
44
+ def __init__(self, expression: "Expression") -> None:
45
+ self.expression = expression
46
+
47
+ def format(self) -> str:
48
+ return self._format_helper(self.expression)
49
+
50
+ @classmethod
51
+ def _format_helper(cls, expr: Expression) -> str:
52
+ mapper: Any = {
53
+ AddSubExpression: cls._format_add_sub,
54
+ AddSubUnaryExpression: cls._format_add_sub_unary,
55
+ ArrayExpression: cls._format_array,
56
+ ComparisonExpression: cls._format_comparison,
57
+ ColumnExpression: cls._format_column,
58
+ ConcatenationExpression: cls._format_concatenation,
59
+ DivMulExpression: cls._format_div_mul,
60
+ ExponentExpression: cls._format_exponent,
61
+ FunctionExpression: cls._format_function,
62
+ HierarchyExpression: cls._format_hierarchy,
63
+ IdentifierExpression: cls._format_identifier,
64
+ InExpression: cls._format_in,
65
+ KeywordExpression: cls._format_keyword,
66
+ LiteralNumberExpression: cls._format_literal_number,
67
+ LiteralStringExpression: cls._format_literal_string,
68
+ LogicalExpression: cls._format_logical,
69
+ MeasureExpression: cls._format_measure,
70
+ NoneExpression: lambda _: "",
71
+ ParenthesesExpression: cls._format_parens,
72
+ ReturnExpression: cls._format_return,
73
+ TableExpression: cls._format_table,
74
+ VariableExpression: cls._format_variable,
75
+ }
76
+ if type(expr) in mapper:
77
+ base_format = mapper[type(expr)](expr)
78
+ if expr.pre_comments:
79
+ base_format = f"{format_comments(expr.pre_comments, 0)}\n{base_format}"
80
+ if expr.post_comments:
81
+ base_format = f"{base_format} {format_comments(expr.post_comments, 0)}"
82
+ return base_format
83
+
84
+ msg = f"Unsupported expression type: {type(expr).__name__}"
85
+ raise TypeError(msg)
86
+
87
+ @classmethod
88
+ def _format_add_sub(cls, expr: AddSubExpression) -> str:
89
+ left = cls._format_helper(expr.left)
90
+ right = cls._format_helper(expr.right)
91
+ return f"""{left} {expr.operator.text} {right}"""
92
+
93
+ @classmethod
94
+ def _format_add_sub_unary(cls, expr: AddSubUnaryExpression) -> str:
95
+ return f"{expr.operator.text}{cls._format_helper(expr.number)}"
96
+
97
+ @classmethod
98
+ def _format_array(cls, expr: ArrayExpression) -> str:
99
+ elements = ",\n".join(cls._format_helper(el) for el in expr.elements)
100
+ elements = textwrap.indent(elements, " " * 4)[4:]
101
+ return f"""{{
102
+ {elements}
103
+ }}
104
+ """
105
+
106
+ @classmethod
107
+ def _format_comparison(cls, expr: ComparisonExpression) -> str:
108
+ left = cls._format_helper(expr.left)
109
+ right = cls._format_helper(expr.right)
110
+ return f"""{left} {expr.operator.text} {right}"""
111
+
112
+ @classmethod
113
+ def _format_column(cls, expr: ColumnExpression) -> str:
114
+ table = expr.table.text
115
+ if table.startswith("'") and all(c in string.ascii_letters + string.digits + "_" for c in table[1:-1]):
116
+ table = table[1:-1]
117
+ column = expr.column.text
118
+ return f"{table}{column}"
119
+
120
+ @classmethod
121
+ def _format_concatenation(cls, expr: ConcatenationExpression) -> str:
122
+ left = cls._format_helper(expr.left)
123
+ right = cls._format_helper(expr.right)
124
+ return f"""{left} {expr.operator.text} {right}"""
125
+
126
+ @classmethod
127
+ def _format_div_mul(cls, expr: DivMulExpression) -> str:
128
+ left = cls._format_helper(expr.left)
129
+ right = cls._format_helper(expr.right)
130
+ return f"""{left} {expr.operator.text} {right}"""
131
+
132
+ @classmethod
133
+ def _format_exponent(cls, expr: ExponentExpression) -> str:
134
+ base = cls._format_helper(expr.base)
135
+ power = cls._format_helper(expr.power)
136
+ return f"""{base}^{power}"""
137
+
138
+ @classmethod
139
+ def _format_function(cls, expr: FunctionExpression) -> str:
140
+ name = "".join(token.text for token in expr.name_parts)
141
+ args = [cls._format_helper(arg) for arg in expr.args]
142
+ if sum(len(x) for x in args) < MAX_ARGUMENT_LENGTH:
143
+ arg_str = ", ".join(args)
144
+ return f"{name}({arg_str})"
145
+ arg_str = textwrap.indent(",\n".join(args), " " * 4)[4:]
146
+ return f"""
147
+ {name}(
148
+ {arg_str}
149
+ )""".strip()
150
+
151
+ @classmethod
152
+ def _format_hierarchy(cls, expr: HierarchyExpression) -> str:
153
+ table = expr.table.text
154
+ if table.startswith("'") and all(c in string.ascii_letters + string.digits + "_" for c in table[1:-1]):
155
+ table = table[1:-1]
156
+ return f"{table}{expr.column.text}.{expr.level.text}"
157
+
158
+ @classmethod
159
+ def _format_identifier(cls, expr: IdentifierExpression) -> str:
160
+ return expr.name.text
161
+
162
+ @classmethod
163
+ def _format_in(cls, expr: InExpression) -> str:
164
+ value = cls._format_helper(expr.value)
165
+ array = cls._format_helper(expr.array)
166
+ return f"""{value} IN {array}"""
167
+
168
+ @classmethod
169
+ def _format_keyword(cls, expr: KeywordExpression) -> str:
170
+ return expr.name.text
171
+
172
+ @classmethod
173
+ def _format_literal_string(cls, expr: LiteralStringExpression) -> str:
174
+ return expr.value.text
175
+
176
+ @classmethod
177
+ def _format_literal_number(cls, expr: LiteralNumberExpression) -> str:
178
+ return expr.value.text
179
+
180
+ @classmethod
181
+ def _format_logical(cls, expr: LogicalExpression) -> str:
182
+ left = cls._format_helper(expr.left)
183
+ right = cls._format_helper(expr.right)
184
+ return f"""{left} {expr.operator.text} {right}"""
185
+
186
+ @classmethod
187
+ def _format_measure(cls, expr: MeasureExpression) -> str:
188
+ return expr.name.text
189
+
190
+ @classmethod
191
+ def _format_parens(cls, expr: ParenthesesExpression) -> str:
192
+ inner = cls._format_helper(expr.inner_statement)
193
+ return f"({inner})"
194
+
195
+ @classmethod
196
+ def _format_return(cls, expr: ReturnExpression) -> str:
197
+ variable_strs = "\n".join(cls._format_helper(var) for var in expr.variable_statements)
198
+ return_statement: str = cls._format_helper(expr.ret)
199
+ return f"""
200
+ {variable_strs}
201
+ RETURN {return_statement}
202
+ """.strip()
203
+
204
+ @classmethod
205
+ def _format_table(cls, expr: TableExpression) -> str:
206
+ table_name = expr.name.text
207
+ if table_name.startswith("'") and all(
208
+ c in string.ascii_letters + string.digits + "_" for c in table_name[1:-1]
209
+ ):
210
+ table_name = table_name[1:-1]
211
+ return table_name
212
+
213
+ @classmethod
214
+ def _format_variable(cls, expr: VariableExpression) -> str:
215
+ return f"{expr.var_name.text} = {cls._format_helper(expr.statement)}"
@@ -0,0 +1,222 @@
1
+ from pbi_parsers.base import BaseLexer
2
+ from pbi_parsers.base.tokens import TextSlice
3
+
4
+ from .tokens import KEYWORD_MAPPING, Token, TokenType
5
+
6
+ WHITESPACE = ["\n", "\r", "\t", " ", "\f", "\v"]
7
+
8
+
9
+ class Lexer(BaseLexer):
10
+ def scan(self) -> tuple[Token]:
11
+ return super().scan() # type: ignore[override]
12
+
13
+ def create_token(self, tok_type: TokenType, start_pos: int) -> Token:
14
+ """Create a new token with the given type and text."""
15
+ text_slice = TextSlice(
16
+ full_text=self.source,
17
+ start=start_pos,
18
+ end=self.current_position,
19
+ )
20
+ return Token(tok_type=tok_type, text_slice=text_slice)
21
+
22
+ def _match_in(self, start_pos: int) -> Token | None:
23
+ if self.match(
24
+ "in ",
25
+ case_insensitive=True,
26
+ ): # I have found no case where "in" is not followed by a space
27
+ # this allows us to avoid matching with the "int" function
28
+ self.advance(-1) # leave the space to be consumed by whitespace handling
29
+ return self.create_token(
30
+ tok_type=TokenType.IN,
31
+ start_pos=start_pos,
32
+ )
33
+ return None
34
+
35
+ def _match_keyword(self, start_pos: int) -> Token | None:
36
+ for keyword, token_type in KEYWORD_MAPPING.items():
37
+ if self.match(keyword, case_insensitive=True):
38
+ return self.create_token(
39
+ tok_type=token_type,
40
+ start_pos=start_pos,
41
+ )
42
+ return None
43
+
44
+ def _match_whitespace(self, start_pos: int) -> Token | None:
45
+ if self.match(lambda c: c in WHITESPACE):
46
+ while self.match(lambda c: c in WHITESPACE):
47
+ pass
48
+ return self.create_token(
49
+ tok_type=TokenType.WHITESPACE,
50
+ start_pos=start_pos,
51
+ )
52
+ return None
53
+
54
+ def _match_var(self, start_pos: int) -> Token | None:
55
+ if self.match("var", case_insensitive=True):
56
+ return self.create_token(
57
+ tok_type=TokenType.VARIABLE,
58
+ start_pos=start_pos,
59
+ )
60
+ return None
61
+
62
+ def _match_return(self, start_pos: int) -> Token | None:
63
+ if self.match("return", case_insensitive=True):
64
+ return self.create_token(
65
+ tok_type=TokenType.RETURN,
66
+ start_pos=start_pos,
67
+ )
68
+ return None
69
+
70
+ def _match_period(self, start_pos: int) -> Token | None:
71
+ if self.match("."):
72
+ # must come before number literal to avoid conflict
73
+ return self.create_token(
74
+ tok_type=TokenType.PERIOD,
75
+ start_pos=start_pos,
76
+ )
77
+ return None
78
+
79
+ def _match_number_literal(self, start_pos: int) -> Token | None:
80
+ if self.match(
81
+ lambda c: c.isdigit() or c == ".",
82
+ ): # must come before unquoted identifier to avoid conflict
83
+ while self.match(lambda c: c.isdigit() or c in {".", "e", "E"}):
84
+ pass
85
+ return self.create_token(
86
+ tok_type=TokenType.NUMBER_LITERAL,
87
+ start_pos=start_pos,
88
+ )
89
+ return None
90
+
91
+ def _match_unquoted_identifier(self, start_pos: int) -> Token | None:
92
+ if self.match(lambda c: c.isalnum() or c == "_"):
93
+ while self.match(lambda c: c.isalnum() or c == "_"):
94
+ pass
95
+ return self.create_token(
96
+ tok_type=TokenType.UNQUOTED_IDENTIFIER,
97
+ start_pos=start_pos,
98
+ )
99
+ return None
100
+
101
+ def _match_single_quoted_identifier(self, start_pos: int) -> Token | None:
102
+ if self.match("'"):
103
+ while self.match(lambda c: c != "'"):
104
+ pass
105
+ if self.match("'"):
106
+ return self.create_token(
107
+ tok_type=TokenType.SINGLE_QUOTED_IDENTIFIER,
108
+ start_pos=start_pos,
109
+ )
110
+ msg = "Unterminated string literal"
111
+ raise ValueError(msg)
112
+ return None
113
+
114
+ def _match_bracketed_identifier(self, start_pos: int) -> Token | None:
115
+ if self.match("["):
116
+ while self.match(lambda c: c != "]"):
117
+ pass
118
+ if self.match("]"):
119
+ return self.create_token(
120
+ tok_type=TokenType.BRACKETED_IDENTIFIER,
121
+ start_pos=start_pos,
122
+ )
123
+ msg = "Unterminated bracketed identifier"
124
+ raise ValueError(msg)
125
+ return None
126
+
127
+ def _match_string_literal(self, start_pos: int) -> Token | None:
128
+ if self.match('"'):
129
+ while self.match(lambda c: c != '"') or self.match('""'):
130
+ pass
131
+ if self.match('"'):
132
+ return self.create_token(
133
+ tok_type=TokenType.STRING_LITERAL,
134
+ start_pos=start_pos,
135
+ )
136
+ msg = "Unterminated string literal"
137
+ raise ValueError(msg)
138
+ return None
139
+
140
+ def _match_single_line_comment(self, start_pos: int) -> Token | None:
141
+ if self.match("//") or self.match("--"):
142
+ while self.match(lambda c: c not in {"\n", ""}):
143
+ pass
144
+ return self.create_token(
145
+ tok_type=TokenType.SINGLE_LINE_COMMENT,
146
+ start_pos=start_pos,
147
+ )
148
+ return None
149
+
150
+ def _match_multi_line_comment(self, start_pos: int) -> Token | None:
151
+ if not self.match("/*"):
152
+ return None
153
+
154
+ while not self.at_end():
155
+ if self.match("*/", chunk=2):
156
+ return self.create_token(
157
+ tok_type=TokenType.MULTI_LINE_COMMENT,
158
+ start_pos=start_pos,
159
+ )
160
+ self.advance()
161
+
162
+ msg = "Unterminated multi-line comment"
163
+ raise ValueError(msg)
164
+
165
+ def _match_token(self, start_pos: int) -> Token | None:
166
+ fixed_character_mapping = {
167
+ "(": TokenType.LEFT_PAREN,
168
+ ")": TokenType.RIGHT_PAREN,
169
+ ",": TokenType.COMMA,
170
+ "==": TokenType.EQUAL_SIGN,
171
+ "=": TokenType.EQUAL_SIGN,
172
+ "{": TokenType.LEFT_CURLY_BRACE,
173
+ "}": TokenType.RIGHT_CURLY_BRACE,
174
+ "<>": TokenType.NOT_EQUAL_SIGN,
175
+ "<=": TokenType.COMPARISON_OPERATOR,
176
+ "<": TokenType.COMPARISON_OPERATOR,
177
+ ">=": TokenType.COMPARISON_OPERATOR,
178
+ ">": TokenType.COMPARISON_OPERATOR,
179
+ "||": TokenType.DOUBLE_PIPE_OPERATOR,
180
+ "&&": TokenType.DOUBLE_AMPERSAND_OPERATOR,
181
+ "&": TokenType.AMPERSAND_OPERATOR,
182
+ "+": TokenType.PLUS_SIGN,
183
+ "-": TokenType.MINUS_SIGN,
184
+ "^": TokenType.EXPONENTIATION_SIGN,
185
+ "*": TokenType.MULTIPLY_SIGN,
186
+ "%": TokenType.MODULUS_SIGN,
187
+ "/": TokenType.DIVIDE_SIGN,
188
+ }
189
+
190
+ for char, token_type in fixed_character_mapping.items():
191
+ if self.match(char):
192
+ return self.create_token(tok_type=token_type, start_pos=start_pos)
193
+ return None
194
+
195
+ def scan_helper(self) -> Token:
196
+ start_pos: int = self.current_position
197
+
198
+ if not self.peek():
199
+ return Token()
200
+
201
+ for candidate_func in (
202
+ self._match_in,
203
+ self._match_keyword,
204
+ self._match_whitespace,
205
+ self._match_var,
206
+ self._match_return,
207
+ self._match_period,
208
+ self._match_number_literal,
209
+ self._match_unquoted_identifier,
210
+ self._match_single_quoted_identifier,
211
+ self._match_bracketed_identifier,
212
+ self._match_string_literal,
213
+ self._match_single_line_comment,
214
+ self._match_multi_line_comment,
215
+ self._match_token,
216
+ ):
217
+ match_candidate = candidate_func(start_pos)
218
+ if match_candidate:
219
+ return match_candidate
220
+
221
+ msg = f"Unexpected character: {self.peek()} at position {self.current_position}"
222
+ raise ValueError(msg)
@@ -0,0 +1,63 @@
1
+ from collections.abc import Iterable
2
+
3
+ from .exprs._base import Expression
4
+ from .formatter import Formatter
5
+ from .lexer import Lexer
6
+ from .parser import Parser
7
+ from .tokens import Token, TokenType
8
+
9
+
10
+ def remove_non_executing_tokens(tokens: Iterable[Token]) -> list[Token]:
11
+ """Removes tokens that are not executed in the DAX expression.
12
+
13
+ Args:
14
+ tokens (Iterable[Token]): List of tokens to filter.
15
+
16
+ Returns:
17
+ list[Token]: Filtered list of tokens that are executed.
18
+
19
+ """
20
+ return list(
21
+ filter(
22
+ lambda x: x.tok_type
23
+ not in {
24
+ TokenType.WHITESPACE,
25
+ TokenType.SINGLE_LINE_COMMENT,
26
+ TokenType.MULTI_LINE_COMMENT,
27
+ },
28
+ tokens,
29
+ ),
30
+ )
31
+
32
+
33
+ def to_ast(text: str) -> Expression | None:
34
+ """Converts a DAX expression string into an AST (Abstract Syntax Tree).
35
+
36
+ Args:
37
+ text (str): The DAX expression to parse.
38
+
39
+ Returns:
40
+ Expression | None: when matched, returns the root node of the AST representing the DAX expression.
41
+ When not matched, returns None.
42
+
43
+ """
44
+ tokens = Lexer(text).scan()
45
+ tokens = remove_non_executing_tokens(tokens)
46
+ parser = Parser(tokens)
47
+ return parser.to_ast()
48
+
49
+
50
+ def format_expression(text: str) -> str:
51
+ """Formats a DAX expression string into a more readable format.
52
+
53
+ Args:
54
+ text (str): The DAX expression to format.
55
+
56
+ Returns:
57
+ str: The formatted DAX expression.
58
+
59
+ """
60
+ ast = to_ast(text)
61
+ if ast is None:
62
+ return text
63
+ return Formatter(ast).format()
@@ -0,0 +1,66 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ from .tokens import Token, TokenType
4
+
5
+ if TYPE_CHECKING:
6
+ from .exprs import Expression
7
+
8
+ EOF_TOKEN = Token()
9
+
10
+
11
+ class Parser:
12
+ __tokens: list[Token]
13
+ index: int = 0
14
+ cache: dict[Any, Any]
15
+
16
+ def __init__(self, tokens: list[Token]) -> None:
17
+ self.__tokens = tokens
18
+ self.index = 0
19
+ self.cache = {}
20
+
21
+ def peek(self, forward: int = 0) -> Token:
22
+ """Peek at the next token without advancing the index.
23
+
24
+ Args:
25
+ forward (int): How many tokens to look ahead. Defaults to 0.
26
+
27
+ Returns:
28
+ Token: The token at the current index + forward.
29
+
30
+ """
31
+ if self.index + forward >= len(self.__tokens):
32
+ return EOF_TOKEN
33
+ return self.__tokens[self.index + forward]
34
+
35
+ def remaining(self) -> list[Token]:
36
+ """Returns the remaining tokens from the current index.
37
+
38
+ Returns:
39
+ list[Token]: The list of tokens from the current index to the end.
40
+
41
+ """
42
+ return self.__tokens[self.index :]
43
+
44
+ def to_ast(self) -> "Expression | None":
45
+ """Parse the tokens and return the root expression.
46
+
47
+ Raises:
48
+ ValueError: If no valid expression is found in the token stream.
49
+
50
+ """
51
+ from .exprs import any_expression_match # noqa: PLC0415
52
+
53
+ ret = any_expression_match(self)
54
+ if ret is None:
55
+ msg = "No valid expression found in the token stream."
56
+ raise ValueError(msg)
57
+ assert self.peek().tok_type == TokenType.EOF
58
+ return ret
59
+
60
+ def consume(self) -> Token:
61
+ """Returns the next token and advances the index."""
62
+ if self.index >= len(self.__tokens):
63
+ return EOF_TOKEN
64
+ ret = self.__tokens[self.index]
65
+ self.index += 1
66
+ return ret
@@ -0,0 +1,54 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum, auto
3
+
4
+ from pbi_parsers.base import BaseToken
5
+
6
+
7
+ class TokenType(Enum):
8
+ AMPERSAND_OPERATOR = auto()
9
+ ASC = auto()
10
+ BRACKETED_IDENTIFIER = auto()
11
+ COMMA = auto()
12
+ COMPARISON_OPERATOR = auto()
13
+ DESC = auto()
14
+ DIVIDE_SIGN = auto()
15
+ DOUBLE_AMPERSAND_OPERATOR = auto()
16
+ DOUBLE_PIPE_OPERATOR = auto()
17
+ EOF = auto()
18
+ EQUAL_SIGN = auto()
19
+ EXPONENTIATION_SIGN = auto()
20
+ FALSE = auto()
21
+ IN = auto()
22
+ LEFT_CURLY_BRACE = auto()
23
+ LEFT_PAREN = auto()
24
+ MINUS_SIGN = auto()
25
+ MODULUS_SIGN = auto()
26
+ MULTIPLY_SIGN = auto()
27
+ MULTI_LINE_COMMENT = auto()
28
+ NOT_EQUAL_SIGN = auto()
29
+ NUMBER_LITERAL = auto()
30
+ PERIOD = auto()
31
+ PLUS_SIGN = auto()
32
+ RETURN = auto()
33
+ RIGHT_CURLY_BRACE = auto()
34
+ RIGHT_PAREN = auto()
35
+ SINGLE_LINE_COMMENT = auto()
36
+ SINGLE_QUOTED_IDENTIFIER = auto()
37
+ STRING_LITERAL = auto()
38
+ TRUE = auto()
39
+ UNQUOTED_IDENTIFIER = auto()
40
+ VARIABLE = auto()
41
+ WHITESPACE = auto()
42
+
43
+
44
+ KEYWORD_MAPPING = {
45
+ "TRUE": TokenType.TRUE,
46
+ "FALSE": TokenType.FALSE,
47
+ "ASC": TokenType.ASC,
48
+ "DESC": TokenType.DESC,
49
+ }
50
+
51
+
52
+ @dataclass
53
+ class Token(BaseToken):
54
+ tok_type: TokenType = TokenType.EOF