dftly 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dftly/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ """dftly - DataFrame Transformation Language parser."""
2
+
3
+ from .nodes import Column, Expression, Literal
4
+ from .parser import Parser, from_yaml, parse
5
+
6
+ __all__ = [
7
+ "Column",
8
+ "Expression",
9
+ "Literal",
10
+ "Parser",
11
+ "parse",
12
+ "from_yaml",
13
+ ]
dftly/grammar.lark ADDED
@@ -0,0 +1,88 @@
1
+ %import common.WS
2
+ %ignore WS
3
+ STRING: /'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*"/
4
+
5
+ PLUS: "+"
6
+ MINUS: "-"
7
+ AT: "@"
8
+ AS: /as/i
9
+ IF: /if/i
10
+ ELSE: /else/i
11
+ AND_SYM: "&&"
12
+ OR_SYM: "||"
13
+ NOT_SYM: "!"
14
+ NOT_MATCH.2: /not\s+match/i
15
+ AND.2: "and"i
16
+ OR.2: "or"i
17
+ NOT.2: "not"i
18
+ NAME: /[A-Za-z_][A-Za-z0-9_]*/
19
+ IN: /in/i
20
+ NUMBER: /\d+(?:\.\d+)?/
21
+ REGEX_PAREN_TOKEN.2: /\([^\s]+\)/
22
+ REGEX_TOKEN: /[^\s()]+/
23
+ LPAR: "("
24
+ RPAR: ")"
25
+ EXTRACT.2: /extract/i
26
+ GROUP.2: /group/i
27
+ OF.2: /of/i
28
+ FROM.2: /from/i
29
+ MATCH.2: /match/i
30
+ AGAINST.2: /against/i
31
+
32
+ start: expr
33
+
34
+ ?expr: conditional
35
+
36
+ conditional: bool_expr IF bool_expr ELSE expr -> ifexpr
37
+ | bool_expr
38
+
39
+ ?bool_expr: bool_expr (OR|OR_SYM) bool_term -> or_expr
40
+ | bool_term
41
+
42
+ ?bool_term: bool_term (AND|AND_SYM) bool_factor -> and_expr
43
+ | bool_factor
44
+
45
+ ?bool_factor: (NOT|NOT_SYM) bool_factor -> not_expr
46
+ | in_expr
47
+
48
+ ?in_expr: additive IN set_literal -> value_in_set
49
+ | additive IN range_literal -> value_in_range
50
+ | additive
51
+
52
+ ?additive: additive PLUS multiplicative
53
+ | additive MINUS multiplicative
54
+ | multiplicative
55
+
56
+ ?multiplicative: multiplicative AT unary -> resolve_ts
57
+ | unary
58
+
59
+ ?unary: primary
60
+
61
+ primary: call_expr
62
+ | regex_extract
63
+ | regex_match
64
+ | NAME AS STRING -> parse_as_format
65
+ | NAME AS NAME -> cast
66
+ | NUMBER -> number
67
+ | STRING -> string
68
+ | NAME -> name
69
+ | group
70
+
71
+ group: "(" expr ")" -> paren_expr
72
+
73
+ call_expr: NAME "(" [args] ")" -> func
74
+ args: expr ("," expr)* -> arg_list
75
+
76
+ set_literal: "{" [args] "}" -> literal_set
77
+
78
+ range_literal: "[" expr "," expr "]" -> range_inc
79
+ | "[" expr "," expr ")" -> range_ie
80
+ | "(" expr "," expr "]" -> range_ei
81
+ | "(" expr "," expr ")" -> range_exc
82
+
83
+ regex_extract: EXTRACT (GROUP NUMBER OF)? regex FROM expr
84
+ regex_match: MATCH regex AGAINST expr -> regex_match
85
+ | NOT_MATCH regex AGAINST expr -> regex_match
86
+ regex: REGEX_PAREN_TOKEN
87
+ | REGEX_TOKEN
88
+ | STRING
dftly/nodes.py ADDED
@@ -0,0 +1,147 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import (
5
+ Any,
6
+ ClassVar,
7
+ Dict,
8
+ List,
9
+ Mapping,
10
+ Optional,
11
+ TYPE_CHECKING,
12
+ Union,
13
+ )
14
+
15
+ if TYPE_CHECKING: # pragma: no cover - imported for type checking only
16
+ from .parser import Parser
17
+
18
+
19
+ class NodeBase:
20
+ """Base utilities for node dataclasses."""
21
+
22
+ KEY: ClassVar[str]
23
+
24
+ @staticmethod
25
+ def _validate_keys(
26
+ mapping: Mapping[str, Any],
27
+ allowed: set[str],
28
+ *,
29
+ label: str,
30
+ required: Optional[set[str]] = None,
31
+ ) -> None:
32
+ required = required or set()
33
+ extra = set(mapping) - allowed
34
+ if extra:
35
+ raise ValueError(f"invalid {label} keys: {extra}")
36
+ missing = required - set(mapping)
37
+ if missing:
38
+ raise ValueError(f"{label} missing required keys: {missing}")
39
+
40
+ @classmethod
41
+ def _validate_map(cls, value: Any, **kwargs: Any) -> Any:
42
+ """Optional hook to validate the mapping value."""
43
+ return value
44
+
45
+ @classmethod
46
+ def from_mapping(cls, mapping: Mapping[str, Any], **kwargs: Any) -> "NodeBase":
47
+ cls._validate_keys(
48
+ mapping,
49
+ {cls.KEY},
50
+ label=f"{cls.KEY} mapping",
51
+ required={cls.KEY},
52
+ )
53
+ value = cls._validate_map(mapping[cls.KEY], **kwargs)
54
+ if isinstance(value, Mapping):
55
+ return cls(**value)
56
+ return cls(value)
57
+
58
+
59
+ @dataclass
60
+ class Literal(NodeBase):
61
+ """A literal value."""
62
+
63
+ KEY: ClassVar[str] = "literal"
64
+
65
+ value: Any
66
+
67
+ def to_dict(self) -> Dict[str, Any]:
68
+ return {"literal": self.value}
69
+
70
+
71
+ @dataclass
72
+ class Column(NodeBase):
73
+ """Reference to a dataframe column."""
74
+
75
+ KEY: ClassVar[str] = "column"
76
+
77
+ name: str
78
+ type: Optional[str] = None
79
+
80
+ def __post_init__(self) -> None:
81
+ if not isinstance(self.name, str):
82
+ raise TypeError("column name must be a string")
83
+ if self.type is not None and not isinstance(self.type, str):
84
+ raise TypeError("column type must be a string")
85
+
86
+ def to_dict(self) -> Dict[str, Any]:
87
+ data = {"name": self.name}
88
+ if self.type is not None:
89
+ data["type"] = self.type
90
+ return {"column": data}
91
+
92
+ @classmethod
93
+ def _validate_map(
94
+ cls,
95
+ value: Any,
96
+ *,
97
+ input_schema: Optional[Mapping[str, Optional[str]]] = None,
98
+ ) -> Mapping[str, Any]:
99
+ if isinstance(value, str):
100
+ typ = None if input_schema is None else input_schema.get(value)
101
+ return {"name": value, "type": typ}
102
+ if isinstance(value, Mapping):
103
+ cls._validate_keys(
104
+ value, {"name", "type"}, label="column", required={"name"}
105
+ )
106
+ name = value["name"]
107
+ typ = value.get("type")
108
+ if typ is None and input_schema is not None:
109
+ typ = input_schema.get(name)
110
+ return {"name": name, "type": typ}
111
+ raise TypeError("column value must be a string or mapping")
112
+
113
+
114
+ @dataclass
115
+ class Expression(NodeBase):
116
+ """A parsed expression."""
117
+
118
+ KEY: ClassVar[str] = "expression"
119
+
120
+ type: str
121
+ arguments: Union[List[Any], Dict[str, Any]]
122
+
123
+ def __post_init__(self) -> None:
124
+ if not isinstance(self.type, str):
125
+ raise TypeError("expression type must be a string")
126
+ if not isinstance(self.arguments, (list, dict)):
127
+ raise TypeError("expression arguments must be list or dict")
128
+
129
+ def to_dict(self) -> Dict[str, Any]:
130
+ return {"expression": {"type": self.type, "arguments": self.arguments}}
131
+
132
+ @classmethod
133
+ def _validate_map(
134
+ cls,
135
+ value: Any,
136
+ *,
137
+ parser: "Parser",
138
+ ) -> Mapping[str, Any]:
139
+ if not isinstance(value, Mapping):
140
+ raise TypeError("expression value must be a mapping")
141
+ cls._validate_keys(
142
+ value, {"type", "arguments"}, label="expression", required={"type"}
143
+ )
144
+ expr_type = value["type"]
145
+ args = value.get("arguments", [])
146
+ parsed_args = parser._parse_arguments(args)
147
+ return {"type": expr_type, "arguments": parsed_args}