polars-runtime-compat 1.34.0b2__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
polars/_utils/udfs.py ADDED
@@ -0,0 +1,1251 @@
1
+ """Utilities related to user defined functions (such as those passed to `apply`)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ import dis
7
+ import inspect
8
+ import re
9
+ import sys
10
+ import warnings
11
+ from bisect import bisect_left
12
+ from collections import defaultdict
13
+ from dis import get_instructions
14
+ from inspect import signature
15
+ from itertools import count, zip_longest
16
+ from pathlib import Path
17
+ from typing import (
18
+ TYPE_CHECKING,
19
+ Any,
20
+ Callable,
21
+ ClassVar,
22
+ Literal,
23
+ NamedTuple,
24
+ Union,
25
+ )
26
+
27
+ from polars._utils.cache import LRUCache
28
+ from polars._utils.various import no_default, re_escape
29
+
30
+ if TYPE_CHECKING:
31
+ from collections.abc import Iterator, MutableMapping
32
+ from collections.abc import Set as AbstractSet
33
+ from dis import Instruction
34
+
35
+ from polars._utils.various import NoDefault
36
+
37
+ if sys.version_info >= (3, 10):
38
+ from typing import TypeAlias
39
+ else:
40
+ from typing_extensions import TypeAlias
41
+
42
+
43
+ class StackValue(NamedTuple):
44
+ operator: str
45
+ operator_arity: int
46
+ left_operand: str
47
+ right_operand: str
48
+ from_module: str | None = None
49
+
50
+
51
+ MapTarget: TypeAlias = Literal["expr", "frame", "series"]
52
+ StackEntry: TypeAlias = Union[str, StackValue]
53
+
54
+ _MIN_PY311 = sys.version_info >= (3, 11)
55
+ _MIN_PY312 = _MIN_PY311 and sys.version_info >= (3, 12)
56
+ _MIN_PY314 = _MIN_PY312 and sys.version_info >= (3, 14)
57
+
58
+ _BYTECODE_PARSER_CACHE_: MutableMapping[
59
+ tuple[Callable[[Any], Any], str], BytecodeParser
60
+ ] = LRUCache(32)
61
+
62
+
63
+ class OpNames:
64
+ BINARY: ClassVar[dict[str, str]] = {
65
+ "BINARY_ADD": "+",
66
+ "BINARY_AND": "&",
67
+ "BINARY_FLOOR_DIVIDE": "//",
68
+ "BINARY_LSHIFT": "<<",
69
+ "BINARY_RSHIFT": ">>",
70
+ "BINARY_MODULO": "%",
71
+ "BINARY_MULTIPLY": "*",
72
+ "BINARY_OR": "|",
73
+ "BINARY_POWER": "**",
74
+ "BINARY_SUBTRACT": "-",
75
+ "BINARY_TRUE_DIVIDE": "/",
76
+ "BINARY_XOR": "^",
77
+ }
78
+ CALL = frozenset({"CALL"} if _MIN_PY311 else {"CALL_FUNCTION", "CALL_METHOD"})
79
+ CONTROL_FLOW: ClassVar[dict[str, str]] = (
80
+ {
81
+ "POP_JUMP_FORWARD_IF_FALSE": "&",
82
+ "POP_JUMP_FORWARD_IF_TRUE": "|",
83
+ "JUMP_IF_FALSE_OR_POP": "&",
84
+ "JUMP_IF_TRUE_OR_POP": "|",
85
+ }
86
+ # note: 3.12 dropped POP_JUMP_FORWARD_IF_* opcodes
87
+ if _MIN_PY311 and not _MIN_PY312
88
+ else {
89
+ "POP_JUMP_IF_FALSE": "&",
90
+ "POP_JUMP_IF_TRUE": "|",
91
+ "JUMP_IF_FALSE_OR_POP": "&",
92
+ "JUMP_IF_TRUE_OR_POP": "|",
93
+ }
94
+ )
95
+ LOAD_VALUES = frozenset(("LOAD_CONST", "LOAD_DEREF", "LOAD_FAST", "LOAD_GLOBAL"))
96
+ LOAD_ATTR = frozenset({"LOAD_METHOD", "LOAD_ATTR"})
97
+ LOAD = LOAD_VALUES | LOAD_ATTR
98
+ SIMPLIFY_SPECIALIZED: ClassVar[dict[str, str]] = {
99
+ "LOAD_FAST_BORROW": "LOAD_FAST",
100
+ "LOAD_SMALL_INT": "LOAD_CONST",
101
+ }
102
+ SYNTHETIC: ClassVar[dict[str, int]] = {
103
+ "POLARS_EXPRESSION": 1,
104
+ }
105
+ UNARY: ClassVar[dict[str, str]] = {
106
+ "UNARY_NEGATIVE": "-",
107
+ "UNARY_POSITIVE": "+",
108
+ "UNARY_NOT": "~",
109
+ }
110
+ PARSEABLE_OPS = frozenset(
111
+ {"BINARY_OP", "BINARY_SUBSCR", "COMPARE_OP", "CONTAINS_OP", "IS_OP"}
112
+ | set(UNARY)
113
+ | set(CONTROL_FLOW)
114
+ | set(SYNTHETIC)
115
+ | LOAD_VALUES
116
+ )
117
+ MATCHABLE_OPS = (
118
+ set(SIMPLIFY_SPECIALIZED) | PARSEABLE_OPS | set(BINARY) | LOAD_ATTR | CALL
119
+ )
120
+ UNARY_VALUES = frozenset(UNARY.values())
121
+
122
+
123
+ # math module funcs that we can map to native expressions
124
+ _MATH_FUNCTIONS = frozenset(
125
+ (
126
+ "acos",
127
+ "acosh",
128
+ "asin",
129
+ "asinh",
130
+ "atan",
131
+ "atanh",
132
+ "cbrt",
133
+ "ceil",
134
+ "cos",
135
+ "cosh",
136
+ "degrees",
137
+ "exp",
138
+ "floor",
139
+ "log",
140
+ "log10",
141
+ "log1p",
142
+ "pow",
143
+ "radians",
144
+ "sin",
145
+ "sinh",
146
+ "sqrt",
147
+ "tan",
148
+ "tanh",
149
+ )
150
+ )
151
+
152
+ # numpy functions that we can map to native expressions
153
+ _NUMPY_MODULE_ALIASES = frozenset(("np", "numpy"))
154
+ _NUMPY_FUNCTIONS = frozenset(
155
+ (
156
+ # "abs", # TODO: this one clashes with Python builtin abs
157
+ "arccos",
158
+ "arccosh",
159
+ "arcsin",
160
+ "arcsinh",
161
+ "arctan",
162
+ "arctanh",
163
+ "cbrt",
164
+ "ceil",
165
+ "cos",
166
+ "cosh",
167
+ "degrees",
168
+ "exp",
169
+ "floor",
170
+ "log",
171
+ "log10",
172
+ "log1p",
173
+ "radians",
174
+ "sign",
175
+ "sin",
176
+ "sinh",
177
+ "sqrt",
178
+ "tan",
179
+ "tanh",
180
+ )
181
+ )
182
+
183
+ # python attrs/funcs that map to native expressions
184
+ _PYTHON_ATTRS_MAP = {
185
+ "date": "dt.date()",
186
+ "day": "dt.day()",
187
+ "hour": "dt.hour()",
188
+ "microsecond": "dt.microsecond()",
189
+ "minute": "dt.minute()",
190
+ "month": "dt.month()",
191
+ "second": "dt.second()",
192
+ "year": "dt.year()",
193
+ }
194
+ _PYTHON_CASTS_MAP = {"float": "Float64", "int": "Int64", "str": "String"}
195
+ _PYTHON_BUILTINS = frozenset(_PYTHON_CASTS_MAP) | {"abs"}
196
+ _PYTHON_METHODS_MAP = {
197
+ # string
198
+ "endswith": "str.ends_with",
199
+ "lower": "str.to_lowercase",
200
+ "lstrip": "str.strip_chars_start",
201
+ "removeprefix": "str.strip_prefix",
202
+ "removesuffix": "str.strip_suffix",
203
+ "replace": "str.replace",
204
+ "rstrip": "str.strip_chars_end",
205
+ "startswith": "str.starts_with",
206
+ "strip": "str.strip_chars",
207
+ "title": "str.to_titlecase",
208
+ "upper": "str.to_uppercase",
209
+ "zfill": "str.zfill",
210
+ # temporal
211
+ "date": "dt.date",
212
+ "day": "dt.day",
213
+ "hour": "dt.hour",
214
+ "isoweekday": "dt.weekday",
215
+ "microsecond": "dt.microsecond",
216
+ "month": "dt.month",
217
+ "second": "dt.second",
218
+ "strftime": "dt.strftime",
219
+ "time": "dt.time",
220
+ "year": "dt.year",
221
+ }
222
+
223
+ _MODULE_FUNCTIONS: list[dict[str, list[AbstractSet[str]]]] = [
224
+ # lambda x: numpy.func(x)
225
+ # lambda x: numpy.func(CONSTANT)
226
+ {
227
+ "argument_1_opname": [{"LOAD_FAST", "LOAD_CONST"}],
228
+ "argument_2_opname": [],
229
+ "module_opname": [OpNames.LOAD_ATTR],
230
+ "attribute_opname": [],
231
+ "module_name": [_NUMPY_MODULE_ALIASES],
232
+ "attribute_name": [],
233
+ "function_name": [_NUMPY_FUNCTIONS],
234
+ },
235
+ # lambda x: math.func(x)
236
+ # lambda x: math.func(CONSTANT)
237
+ {
238
+ "argument_1_opname": [{"LOAD_FAST", "LOAD_CONST"}],
239
+ "argument_2_opname": [],
240
+ "module_opname": [OpNames.LOAD_ATTR],
241
+ "attribute_opname": [],
242
+ "module_name": [{"math"}],
243
+ "attribute_name": [],
244
+ "function_name": [_MATH_FUNCTIONS],
245
+ },
246
+ # lambda x: json.loads(x)
247
+ {
248
+ "argument_1_opname": [{"LOAD_FAST"}],
249
+ "argument_2_opname": [],
250
+ "module_opname": [OpNames.LOAD_ATTR],
251
+ "attribute_opname": [],
252
+ "module_name": [{"json"}],
253
+ "attribute_name": [],
254
+ "function_name": [{"loads"}],
255
+ },
256
+ # lambda x: datetime.strptime(x, CONSTANT)
257
+ {
258
+ "argument_1_opname": [{"LOAD_FAST"}],
259
+ "argument_2_opname": [{"LOAD_CONST"}],
260
+ "module_opname": [OpNames.LOAD_ATTR],
261
+ "attribute_opname": [],
262
+ "module_name": [{"datetime"}],
263
+ "attribute_name": [],
264
+ "function_name": [{"strptime"}],
265
+ "check_load_global": False, # type: ignore[dict-item]
266
+ },
267
+ # lambda x: module.attribute.func(x, CONSTANT)
268
+ {
269
+ "argument_1_opname": [{"LOAD_FAST"}],
270
+ "argument_2_opname": [{"LOAD_CONST"}],
271
+ "module_opname": [{"LOAD_ATTR"}],
272
+ "attribute_opname": [OpNames.LOAD_ATTR],
273
+ "module_name": [{"datetime", "dt"}],
274
+ "attribute_name": [{"datetime"}],
275
+ "function_name": [{"strptime"}],
276
+ "check_load_global": False, # type: ignore[dict-item]
277
+ },
278
+ ]
279
+ # In addition to `lambda x: func(x)`, also support cases when a unary operation
280
+ # has been applied to `x`, like `lambda x: func(-x)` or `lambda x: func(~x)`.
281
+ _MODULE_FUNCTIONS = [
282
+ {**kind, "argument_1_unary_opname": unary} # type: ignore[dict-item]
283
+ for kind in _MODULE_FUNCTIONS
284
+ for unary in [[set(OpNames.UNARY)], []]
285
+ ]
286
+ # Lookup for module functions that have different names as polars expressions
287
+ _MODULE_FUNC_TO_EXPR_NAME = {
288
+ "math.acos": "arccos",
289
+ "math.acosh": "arccosh",
290
+ "math.asin": "arcsin",
291
+ "math.asinh": "arcsinh",
292
+ "math.atan": "arctan",
293
+ "math.atanh": "arctanh",
294
+ "json.loads": "str.json_decode",
295
+ }
296
+ _RE_IMPLICIT_BOOL = re.compile(r'pl\.col\("([^"]*)"\) & pl\.col\("\1"\)\.(.+)')
297
+ _RE_SERIES_NAMES = re.compile(r"^(s|srs\d?|series)\.")
298
+ _RE_STRIP_BOOL = re.compile(r"^bool\((.+)\)$")
299
+
300
+
301
+ def _get_all_caller_variables() -> dict[str, Any]:
302
+ """Get all local and global variables from caller's frame."""
303
+ pkg_dir = Path(__file__).parent.parent
304
+
305
+ # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
306
+ frame = inspect.currentframe()
307
+ n = 0
308
+ try:
309
+ while frame:
310
+ fname = inspect.getfile(frame)
311
+ if fname.startswith(str(pkg_dir)):
312
+ frame = frame.f_back
313
+ n += 1
314
+ else:
315
+ break
316
+ variables: dict[str, Any]
317
+ if frame is None:
318
+ variables = {}
319
+ else:
320
+ variables = {**frame.f_locals, **frame.f_globals}
321
+ finally:
322
+ # https://docs.python.org/3/library/inspect.html
323
+ # > Though the cycle detector will catch these, destruction of the frames
324
+ # > (and local variables) can be made deterministic by removing the cycle
325
+ # > in a finally clause.
326
+ del frame
327
+ return variables
328
+
329
+
330
+ def _get_target_name(col: str, expression: str, map_target: str) -> str:
331
+ """The name of the object against which the 'map' is being invoked."""
332
+ col_expr = f'pl.col("{col}")'
333
+ if map_target == "expr":
334
+ return col_expr
335
+ elif map_target == "series":
336
+ if _RE_SERIES_NAMES.match(expression):
337
+ return expression.split(".", 1)[0]
338
+
339
+ # note: handle overlapping name from global variables; fallback
340
+ # through "s", "srs", "series" and (finally) srs0 -> srsN...
341
+ search_expr = expression.replace(col_expr, "")
342
+ for name in ("s", "srs", "series"):
343
+ if not re.search(rf"\b{name}\b", search_expr):
344
+ return name
345
+ n = count()
346
+ while True:
347
+ name = f"srs{next(n)}"
348
+ if not re.search(rf"\b{name}\b", search_expr):
349
+ return name
350
+
351
+ msg = f"TODO: map_target = {map_target!r}"
352
+ raise NotImplementedError(msg)
353
+
354
+
355
+ class BytecodeParser:
356
+ """Introspect UDF bytecode and determine if we can rewrite as native expression."""
357
+
358
+ _map_target_name: str | None = None
359
+ _can_attempt_rewrite: bool | None = None
360
+ _caller_variables: dict[str, Any] | None = None
361
+ _col_expression: tuple[str, str] | NoDefault | None = no_default
362
+
363
+ def __init__(self, function: Callable[[Any], Any], map_target: MapTarget) -> None:
364
+ """
365
+ Initialize BytecodeParser instance and prepare to introspect UDFs.
366
+
367
+ Parameters
368
+ ----------
369
+ function : callable
370
+ The function/lambda to disassemble and introspect.
371
+ map_target : {'expr','series','frame'}
372
+ The underlying target object type of the map operation.
373
+ """
374
+ try:
375
+ original_instructions = get_instructions(function)
376
+ except TypeError:
377
+ # in case we hit something that can't be disassembled (eg: code object
378
+ # unavailable, like a bare numpy ufunc that isn't in a lambda/function)
379
+ original_instructions = iter([])
380
+
381
+ self._function = function
382
+ self._map_target = map_target
383
+ self._param_name = self._get_param_name(function)
384
+ self._rewritten_instructions = RewrittenInstructions(
385
+ instructions=original_instructions,
386
+ caller_variables=self._caller_variables,
387
+ function=function,
388
+ )
389
+
390
+ def _omit_implicit_bool(self, expr: str) -> str:
391
+ """Drop extraneous/implied bool (eg: `pl.col("d") & pl.col("d").dt.date()`)."""
392
+ while _RE_IMPLICIT_BOOL.search(expr):
393
+ expr = _RE_IMPLICIT_BOOL.sub(repl=r'pl.col("\1").\2', string=expr)
394
+ return expr
395
+
396
+ @staticmethod
397
+ def _get_param_name(function: Callable[[Any], Any]) -> str | None:
398
+ """Return single function parameter name."""
399
+ try:
400
+ # note: we do not parse/handle functions with > 1 params
401
+ sig = signature(function)
402
+ except ValueError:
403
+ return None
404
+ return (
405
+ next(iter(parameters.keys()))
406
+ if len(parameters := sig.parameters) == 1
407
+ else None
408
+ )
409
+
410
+ def _inject_nesting(
411
+ self,
412
+ expression_blocks: dict[int, str],
413
+ logical_instructions: list[Instruction],
414
+ ) -> list[tuple[int, str]]:
415
+ """Inject nesting boundaries into expression blocks (as parentheses)."""
416
+ if logical_instructions:
417
+ # reconstruct nesting for mixed 'and'/'or' ops by associating control flow
418
+ # jump offsets with their target expression blocks and applying parens
419
+ if len({inst.opname for inst in logical_instructions}) > 1:
420
+ block_offsets: list[int] = list(expression_blocks.keys())
421
+ prev_end = -1
422
+ for inst in logical_instructions:
423
+ start = block_offsets[bisect_left(block_offsets, inst.offset) - 1]
424
+ end = block_offsets[bisect_left(block_offsets, inst.argval) - 1]
425
+ if not (start == 0 and end == block_offsets[-1]):
426
+ if prev_end not in (start, end):
427
+ expression_blocks[start] = "(" + expression_blocks[start]
428
+ expression_blocks[end] += ")"
429
+ prev_end = end
430
+
431
+ for inst in logical_instructions: # inject connecting "&" and "|" ops
432
+ expression_blocks[inst.offset] = OpNames.CONTROL_FLOW[inst.opname]
433
+
434
+ return sorted(expression_blocks.items())
435
+
436
+ @property
437
+ def map_target(self) -> MapTarget:
438
+ """The map target, eg: one of 'expr', 'frame', or 'series'."""
439
+ return self._map_target
440
+
441
+ def can_attempt_rewrite(self) -> bool:
442
+ """
443
+ Determine if we may be able to offer a native polars expression instead.
444
+
445
+ Note that `lambda x: x` is inefficient, but we ignore it because it is not
446
+ guaranteed that using the equivalent bare constant value will return the
447
+ same output. (Hopefully nobody is writing lambdas like that anyway...)
448
+ """
449
+ if self._can_attempt_rewrite is None:
450
+ self._can_attempt_rewrite = (
451
+ self._param_name is not None
452
+ # check minimum number of ops, ensuring all are parseable
453
+ and len(self._rewritten_instructions) >= 2
454
+ and all(
455
+ inst.opname in OpNames.PARSEABLE_OPS
456
+ for inst in self._rewritten_instructions
457
+ )
458
+ # exclude constructs/functions with multiple RETURN_VALUE ops
459
+ and sum(
460
+ 1
461
+ for inst in self.original_instructions
462
+ if inst.opname == "RETURN_VALUE"
463
+ )
464
+ == 1
465
+ )
466
+ return self._can_attempt_rewrite
467
+
468
+ def dis(self) -> None:
469
+ """Print disassembled function bytecode."""
470
+ dis.dis(self._function)
471
+
472
+ @property
473
+ def function(self) -> Callable[[Any], Any]:
474
+ """The function being parsed."""
475
+ return self._function
476
+
477
+ @property
478
+ def original_instructions(self) -> list[Instruction]:
479
+ """The original bytecode instructions from the function we are parsing."""
480
+ return list(self._rewritten_instructions._original_instructions)
481
+
482
+ @property
483
+ def param_name(self) -> str | None:
484
+ """The parameter name of the function being parsed."""
485
+ return self._param_name
486
+
487
+ @property
488
+ def rewritten_instructions(self) -> list[Instruction]:
489
+ """The rewritten bytecode instructions from the function we are parsing."""
490
+ return list(self._rewritten_instructions)
491
+
492
+ def to_expression(self, col: str) -> str | None:
493
+ """Translate postfix bytecode instructions to polars expression/string."""
494
+ if self._col_expression is not no_default and self._col_expression is not None:
495
+ col_name, expr = self._col_expression
496
+ if col != col_name:
497
+ expr = re.sub(
498
+ rf'pl\.col\("{re_escape(col_name)}"\)',
499
+ f'pl.col("{re_escape(col)}")',
500
+ expr,
501
+ )
502
+ self._col_expression = (col, expr)
503
+ return expr
504
+
505
+ self._map_target_name = None
506
+ if self._param_name is None:
507
+ self._col_expression = None
508
+ return None
509
+
510
+ # decompose bytecode into logical 'and'/'or' expression blocks (if present)
511
+ control_flow_blocks = defaultdict(list)
512
+ logical_instructions = []
513
+ jump_offset = 0
514
+ for idx, inst in enumerate(self._rewritten_instructions):
515
+ if inst.opname in OpNames.CONTROL_FLOW:
516
+ jump_offset = self._rewritten_instructions[idx + 1].offset
517
+ logical_instructions.append(inst)
518
+ else:
519
+ control_flow_blocks[jump_offset].append(inst)
520
+
521
+ # convert each block to a polars expression string
522
+ try:
523
+ expression_strings = self._inject_nesting(
524
+ {
525
+ offset: InstructionTranslator(
526
+ instructions=ops,
527
+ caller_variables=self._caller_variables,
528
+ map_target=self._map_target,
529
+ function=self._function,
530
+ ).to_expression(
531
+ col=col,
532
+ param_name=self._param_name,
533
+ depth=int(bool(logical_instructions)),
534
+ )
535
+ for offset, ops in control_flow_blocks.items()
536
+ },
537
+ logical_instructions,
538
+ )
539
+ except NotImplementedError:
540
+ self._col_expression = None
541
+ return None
542
+
543
+ polars_expr = " ".join(expr for _offset, expr in expression_strings)
544
+
545
+ # note: if no 'pl.col' in the expression, it likely represents a compound
546
+ # constant value (e.g. `lambda x: CONST + 123`), so we don't want to warn
547
+ if "pl.col(" not in polars_expr:
548
+ self._col_expression = None
549
+ return None
550
+ else:
551
+ polars_expr = self._omit_implicit_bool(polars_expr)
552
+ if self._map_target == "series":
553
+ if (target_name := self._map_target_name) is None:
554
+ target_name = _get_target_name(col, polars_expr, self._map_target)
555
+ polars_expr = polars_expr.replace(f'pl.col("{col}")', target_name)
556
+
557
+ self._col_expression = (col, polars_expr)
558
+ return polars_expr
559
+
560
+ def warn(
561
+ self,
562
+ col: str,
563
+ *,
564
+ suggestion_override: str | None = None,
565
+ udf_override: str | None = None,
566
+ ) -> None:
567
+ """Generate warning that suggests an equivalent native polars expression."""
568
+ # Import these here so that udfs can be imported without polars installed.
569
+
570
+ from polars._utils.various import (
571
+ find_stacklevel,
572
+ in_terminal_that_supports_colour,
573
+ )
574
+ from polars.exceptions import PolarsInefficientMapWarning
575
+
576
+ suggested_expression = suggestion_override or self.to_expression(col)
577
+
578
+ if suggested_expression is not None:
579
+ if (target_name := self._map_target_name) is None:
580
+ target_name = _get_target_name(
581
+ col, suggested_expression, self._map_target
582
+ )
583
+ func_name = udf_override or self._function.__name__ or "..."
584
+ if func_name == "<lambda>":
585
+ func_name = f"lambda {self._param_name}: ..."
586
+
587
+ addendum = (
588
+ 'Note: in list.eval context, pl.col("") should be written as pl.element()'
589
+ if 'pl.col("")' in suggested_expression
590
+ else ""
591
+ )
592
+ apitype, clsname = (
593
+ ("expressions", "Expr")
594
+ if self._map_target == "expr"
595
+ else ("series", "Series")
596
+ )
597
+ before, after = (
598
+ (
599
+ f" \033[31m- {target_name}.map_elements({func_name})\033[0m\n",
600
+ f" \033[32m+ {suggested_expression}\033[0m\n{addendum}",
601
+ )
602
+ if in_terminal_that_supports_colour()
603
+ else (
604
+ f" - {target_name}.map_elements({func_name})\n",
605
+ f" + {suggested_expression}\n{addendum}",
606
+ )
607
+ )
608
+ warnings.warn(
609
+ f"\n{clsname}.map_elements is significantly slower than the native {apitype} API.\n"
610
+ "Only use if you absolutely CANNOT implement your logic otherwise.\n"
611
+ "Replace this expression...\n"
612
+ f"{before}"
613
+ "with this one instead:\n"
614
+ f"{after}",
615
+ PolarsInefficientMapWarning,
616
+ stacklevel=find_stacklevel(),
617
+ )
618
+
619
+
620
+ class InstructionTranslator:
621
+ """Translates Instruction bytecode to a polars expression string."""
622
+
623
+ def __init__(
624
+ self,
625
+ instructions: list[Instruction],
626
+ caller_variables: dict[str, Any] | None,
627
+ function: Callable[[Any], Any],
628
+ map_target: MapTarget,
629
+ ) -> None:
630
+ self._stack = self._to_intermediate_stack(instructions, map_target)
631
+ self._caller_variables = caller_variables
632
+ self._function = function
633
+
634
+ def to_expression(self, col: str, param_name: str, depth: int) -> str:
635
+ """Convert intermediate stack to polars expression string."""
636
+ return self._expr(self._stack, col, param_name, depth)
637
+
638
+ @staticmethod
639
+ def op(inst: Instruction) -> str:
640
+ """Convert bytecode instruction to suitable intermediate op string."""
641
+ if (opname := inst.opname) in OpNames.CONTROL_FLOW:
642
+ return OpNames.CONTROL_FLOW[opname]
643
+ elif inst.argrepr:
644
+ return inst.argrepr
645
+ elif opname == "IS_OP":
646
+ return "is not" if inst.argval else "is"
647
+ elif opname == "CONTAINS_OP":
648
+ return "not in" if inst.argval else "in"
649
+ elif opname in OpNames.UNARY:
650
+ return OpNames.UNARY[opname]
651
+ elif opname == "BINARY_SUBSCR":
652
+ return "replace_strict"
653
+ else:
654
+ msg = (
655
+ f"unexpected or unrecognised op name ({opname})\n\n"
656
+ "Please report a bug to https://github.com/pola-rs/polars/issues "
657
+ "with the content of function you were passing to the `map` "
658
+ f"expression and the following instruction object:\n{inst!r}"
659
+ )
660
+ raise AssertionError(msg)
661
+
662
+ def _expr(self, value: StackEntry, col: str, param_name: str, depth: int) -> str:
663
+ """Take stack entry value and convert to polars expression string."""
664
+ if isinstance(value, StackValue):
665
+ op = _RE_STRIP_BOOL.sub(r"\1", value.operator)
666
+ e1 = self._expr(value.left_operand, col, param_name, depth + 1)
667
+ if value.operator_arity == 1:
668
+ if op not in OpNames.UNARY_VALUES:
669
+ if e1.startswith("pl.col("):
670
+ call = "" if op.endswith(")") else "()"
671
+ return f"{e1}.{op}{call}"
672
+ if e1[0] in OpNames.UNARY_VALUES and e1[1:].startswith("pl.col("):
673
+ call = "" if op.endswith(")") else "()"
674
+ return f"({e1}).{op}{call}"
675
+
676
+ # support use of consts as numpy/builtin params, eg:
677
+ # "np.sin(3) + np.cos(x)", or "len('const_string') + len(x)"
678
+ if (
679
+ value.from_module in _NUMPY_MODULE_ALIASES
680
+ and op in _NUMPY_FUNCTIONS
681
+ ):
682
+ pfx = "np."
683
+ elif (
684
+ value.from_module == "math"
685
+ and _MODULE_FUNC_TO_EXPR_NAME.get(f"math.{op}", op)
686
+ in _MATH_FUNCTIONS
687
+ ):
688
+ pfx = "math."
689
+ else:
690
+ pfx = ""
691
+ return f"{pfx}{op}({e1})"
692
+ return f"{op}{e1}"
693
+ else:
694
+ e2 = self._expr(value.right_operand, col, param_name, depth + 1)
695
+ if op in ("is", "is not") and value.left_operand == "None":
696
+ not_ = "" if op == "is" else "not_"
697
+ return f"{e1}.is_{not_}null()"
698
+ elif op in ("in", "not in"):
699
+ not_ = "" if op == "in" else "~"
700
+ return (
701
+ f"{not_}({e1}.is_in({e2}))"
702
+ if " " in e1
703
+ else f"{not_}{e1}.is_in({e2})"
704
+ )
705
+ elif op == "replace_strict":
706
+ if not self._caller_variables:
707
+ self._caller_variables = _get_all_caller_variables()
708
+ if not isinstance(self._caller_variables.get(e1, None), dict):
709
+ msg = "require dict mapping"
710
+ raise NotImplementedError(msg)
711
+ return f"{e2}.{op}({e1})"
712
+ elif op == "<<":
713
+ # 2**e2 may be float if e2 was -ve, but if e1 << e2 was valid then
714
+ # e2 must have been +ve. therefore 2**e2 can be safely cast to
715
+ # i64, which may be necessary if chaining ops that assume i64.
716
+ return f"({e1} * 2**{e2}).cast(pl.Int64)"
717
+ elif op == ">>":
718
+ # (motivation for the cast is same as the '<<' case above)
719
+ return f"({e1} / 2**{e2}).cast(pl.Int64)"
720
+ else:
721
+ expr = f"{e1} {op} {e2}"
722
+ return f"({expr})" if depth else expr
723
+
724
+ elif value == param_name:
725
+ return f'pl.col("{col}")'
726
+
727
+ return value
728
+
729
+ def _to_intermediate_stack(
730
+ self, instructions: list[Instruction], map_target: MapTarget
731
+ ) -> StackEntry:
732
+ """Take postfix bytecode and convert to an intermediate natural-order stack."""
733
+ if map_target in ("expr", "series"):
734
+ stack: list[StackEntry] = []
735
+ for inst in instructions:
736
+ stack.append(
737
+ inst.argrepr
738
+ if inst.opname in OpNames.LOAD
739
+ else (
740
+ StackValue(
741
+ operator=self.op(inst),
742
+ operator_arity=1,
743
+ left_operand=stack.pop(), # type: ignore[arg-type]
744
+ right_operand=None, # type: ignore[arg-type]
745
+ from_module=getattr(inst, "_from_module", None),
746
+ )
747
+ if (
748
+ inst.opname in OpNames.UNARY
749
+ or OpNames.SYNTHETIC.get(inst.opname) == 1
750
+ )
751
+ else StackValue(
752
+ operator=self.op(inst),
753
+ operator_arity=2,
754
+ left_operand=stack.pop(-2), # type: ignore[arg-type]
755
+ right_operand=stack.pop(-1), # type: ignore[arg-type]
756
+ from_module=getattr(inst, "_from_module", None),
757
+ )
758
+ )
759
+ )
760
+ return stack[0]
761
+
762
+ # TODO: dataframe.map... ?
763
+ msg = f"TODO: {map_target!r} map target not yet supported."
764
+ raise NotImplementedError(msg)
765
+
766
+
767
+ class RewrittenInstructions:
768
+ """
769
+ Standalone class that applies Instruction rewrite/filtering rules.
770
+
771
+ This significantly simplifies subsequent parsing by injecting
772
+ synthetic POLARS_EXPRESSION ops into the Instruction stream for
773
+ easy identification/translation, and separates the parsing logic
774
+ from the identification of expression translation opportunities.
775
+ """
776
+
777
+ _ignored_ops = frozenset(
778
+ [
779
+ "COPY",
780
+ "COPY_FREE_VARS",
781
+ "NOT_TAKEN",
782
+ "POP_TOP",
783
+ "PRECALL",
784
+ "PUSH_NULL",
785
+ "RESUME",
786
+ "RETURN_VALUE",
787
+ "TO_BOOL",
788
+ ]
789
+ )
790
+
791
+ def __init__(
792
+ self,
793
+ instructions: Iterator[Instruction],
794
+ function: Callable[[Any], Any],
795
+ caller_variables: dict[str, Any] | None,
796
+ ) -> None:
797
+ self._function = function
798
+ self._caller_variables = caller_variables
799
+ self._original_instructions = list(instructions)
800
+
801
+ normalised_instructions = []
802
+
803
+ for inst in self._unpack_superinstructions(self._original_instructions):
804
+ if inst.opname not in self._ignored_ops:
805
+ if inst.opname not in OpNames.MATCHABLE_OPS:
806
+ self._rewritten_instructions = []
807
+ return
808
+ upgraded_inst = self._update_instruction(inst)
809
+ normalised_instructions.append(upgraded_inst)
810
+
811
+ self._rewritten_instructions = self._rewrite(normalised_instructions)
812
+
813
+ def __len__(self) -> int:
814
+ return len(self._rewritten_instructions)
815
+
816
+ def __iter__(self) -> Iterator[Instruction]:
817
+ return iter(self._rewritten_instructions)
818
+
819
+ def __getitem__(self, item: Any) -> Instruction:
820
+ return self._rewritten_instructions[item]
821
+
822
+ def _matches(
823
+ self,
824
+ idx: int,
825
+ *,
826
+ opnames: list[AbstractSet[str]],
827
+ argvals: list[AbstractSet[Any] | dict[Any, Any] | None] | None,
828
+ is_attr: bool = False,
829
+ ) -> list[Instruction]:
830
+ """
831
+ Check if a sequence of Instructions matches the specified ops/argvals.
832
+
833
+ Parameters
834
+ ----------
835
+ idx
836
+ The index of the first instruction to check.
837
+ opnames
838
+ The full opname sequence that defines a match.
839
+ argvals
840
+ Associated argvals that must also match (in same position as opnames).
841
+ is_attr
842
+ Indicate if the match represents pure attribute access (cannot be called).
843
+ """
844
+ n_required_ops, argvals = len(opnames), argvals or []
845
+ idx_offset = idx + n_required_ops
846
+ if (
847
+ is_attr
848
+ and (trailing_inst := self._instructions[idx_offset : idx_offset + 1])
849
+ and trailing_inst[0].opname in OpNames.CALL # not pure attr if called
850
+ ):
851
+ return []
852
+
853
+ instructions = self._instructions[idx:idx_offset]
854
+ if len(instructions) == n_required_ops and all(
855
+ inst.opname in match_opnames
856
+ and (match_argval is None or inst.argval in match_argval)
857
+ for inst, match_opnames, match_argval in zip_longest(
858
+ instructions, opnames, argvals
859
+ )
860
+ ):
861
+ return instructions
862
+ return []
863
+
864
+ def _rewrite(self, instructions: list[Instruction]) -> list[Instruction]:
865
+ """
866
+ Apply rewrite rules, potentially injecting synthetic operations.
867
+
868
+ Rules operate on the instruction stream and can examine/modify
869
+ it as needed, pushing updates into "updated_instructions" and
870
+ returning True/False to indicate if any changes were made.
871
+ """
872
+ self._instructions = instructions
873
+ updated_instructions: list[Instruction] = []
874
+ idx = 0
875
+ while idx < len(self._instructions):
876
+ inst, increment = self._instructions[idx], 1
877
+ if inst.opname not in OpNames.LOAD or not any(
878
+ (increment := map_rewrite(idx, updated_instructions))
879
+ for map_rewrite in (
880
+ # add any other rewrite methods here
881
+ self._rewrite_functions,
882
+ self._rewrite_methods,
883
+ self._rewrite_builtins,
884
+ self._rewrite_attrs,
885
+ )
886
+ ):
887
+ updated_instructions.append(inst)
888
+ idx += increment or 1
889
+ return updated_instructions
890
+
891
+ def _rewrite_attrs(self, idx: int, updated_instructions: list[Instruction]) -> int:
892
+ """Replace python attribute lookup with synthetic POLARS_EXPRESSION op."""
893
+ if matching_instructions := self._matches(
894
+ idx,
895
+ opnames=[{"LOAD_FAST"}, {"LOAD_ATTR"}],
896
+ argvals=[None, _PYTHON_ATTRS_MAP],
897
+ is_attr=True,
898
+ ):
899
+ inst = matching_instructions[1]
900
+ expr_name = _PYTHON_ATTRS_MAP[inst.argval]
901
+ px = inst._replace(
902
+ opname="POLARS_EXPRESSION", argval=expr_name, argrepr=expr_name
903
+ )
904
+ updated_instructions.extend([matching_instructions[0], px])
905
+
906
+ return len(matching_instructions)
907
+
908
+ def _rewrite_builtins(
909
+ self, idx: int, updated_instructions: list[Instruction]
910
+ ) -> int:
911
+ """Replace builtin function calls with a synthetic POLARS_EXPRESSION op."""
912
+ if matching_instructions := self._matches(
913
+ idx,
914
+ opnames=[{"LOAD_GLOBAL"}, {"LOAD_FAST", "LOAD_CONST"}, OpNames.CALL],
915
+ argvals=[_PYTHON_BUILTINS],
916
+ ):
917
+ inst1, inst2 = matching_instructions[:2]
918
+ if (argval := inst1.argval) in _PYTHON_CASTS_MAP:
919
+ dtype = _PYTHON_CASTS_MAP[argval]
920
+ argval = f"cast(pl.{dtype})"
921
+
922
+ px = inst1._replace(
923
+ opname="POLARS_EXPRESSION",
924
+ argval=argval,
925
+ argrepr=argval,
926
+ offset=inst2.offset,
927
+ )
928
+ # POLARS_EXPRESSION is mapped as a unary op, so switch instruction order
929
+ operand = inst2._replace(offset=inst1.offset)
930
+ updated_instructions.extend((operand, px))
931
+
932
+ return len(matching_instructions)
933
+
934
+ def _rewrite_functions(
935
+ self, idx: int, updated_instructions: list[Instruction]
936
+ ) -> int:
937
+ """Replace function calls with a synthetic POLARS_EXPRESSION op."""
938
+ for check_globals in (False, True):
939
+ for function_kind in _MODULE_FUNCTIONS:
940
+ if check_globals and not function_kind.get("check_load_global", True):
941
+ return 0
942
+
943
+ opnames: list[AbstractSet[str]] = (
944
+ [
945
+ {"LOAD_GLOBAL", "LOAD_DEREF"},
946
+ *function_kind["argument_1_opname"],
947
+ *function_kind["argument_1_unary_opname"],
948
+ *function_kind["argument_2_opname"],
949
+ OpNames.CALL,
950
+ ]
951
+ if check_globals
952
+ else [
953
+ {"LOAD_GLOBAL", "LOAD_DEREF"},
954
+ *function_kind["module_opname"],
955
+ *function_kind["attribute_opname"],
956
+ *function_kind["argument_1_opname"],
957
+ *function_kind["argument_1_unary_opname"],
958
+ *function_kind["argument_2_opname"],
959
+ OpNames.CALL,
960
+ ]
961
+ )
962
+ module_aliases = function_kind["module_name"]
963
+ if matching_instructions := self._matches(
964
+ idx,
965
+ opnames=opnames,
966
+ argvals=[
967
+ *function_kind["function_name"],
968
+ ]
969
+ if check_globals
970
+ else [
971
+ *function_kind["module_name"],
972
+ *function_kind["attribute_name"],
973
+ *function_kind["function_name"],
974
+ ],
975
+ ):
976
+ attribute_count = len(function_kind["attribute_name"])
977
+ inst1, inst2, inst3 = matching_instructions[
978
+ attribute_count : 3 + attribute_count
979
+ ]
980
+ if check_globals:
981
+ if not self._caller_variables:
982
+ self._caller_variables = _get_all_caller_variables()
983
+ if (expr_name := inst1.argval) not in self._caller_variables:
984
+ continue
985
+ else:
986
+ module_name = self._caller_variables[expr_name].__module__
987
+ if not any((module_name in m) for m in module_aliases):
988
+ continue
989
+ expr_name = _MODULE_FUNC_TO_EXPR_NAME.get(
990
+ f"{module_name}.{expr_name}", expr_name
991
+ )
992
+ elif inst1.argval == "json":
993
+ expr_name = "str.json_decode"
994
+ elif inst1.argval == "datetime":
995
+ fmt = matching_instructions[attribute_count + 3].argval
996
+ expr_name = f'str.to_datetime(format="{fmt}")'
997
+ if not self._is_stdlib_datetime(
998
+ inst1.argval,
999
+ matching_instructions[0].argval,
1000
+ attribute_count,
1001
+ ):
1002
+ # skip these instructions if not stdlib datetime function
1003
+ return len(matching_instructions)
1004
+ elif inst1.argval == "math":
1005
+ expr_name = _MODULE_FUNC_TO_EXPR_NAME.get(
1006
+ f"math.{inst2.argval}", inst2.argval
1007
+ )
1008
+ else:
1009
+ expr_name = inst2.argval
1010
+
1011
+ # note: POLARS_EXPRESSION is mapped as unary op, so switch
1012
+ # instruction order/offsets (for later RPE-type stack walk)
1013
+ swap_inst = inst2 if check_globals else inst3
1014
+ px = inst1._replace(
1015
+ opname="POLARS_EXPRESSION",
1016
+ argval=expr_name,
1017
+ argrepr=expr_name,
1018
+ offset=swap_inst.offset,
1019
+ )
1020
+ px._from_module = None if check_globals else (inst1.argval or None) # type: ignore[attr-defined]
1021
+ operand = swap_inst._replace(offset=inst1.offset)
1022
+ updated_instructions.extend(
1023
+ (
1024
+ operand,
1025
+ matching_instructions[3 + attribute_count],
1026
+ px,
1027
+ )
1028
+ if function_kind["argument_1_unary_opname"]
1029
+ else (operand, px)
1030
+ )
1031
+ return len(matching_instructions)
1032
+
1033
+ return 0
1034
+
1035
+ def _rewrite_methods(
1036
+ self, idx: int, updated_instructions: list[Instruction]
1037
+ ) -> int:
1038
+ """Replace python method calls with synthetic POLARS_EXPRESSION op."""
1039
+ LOAD_METHOD = OpNames.LOAD_ATTR if _MIN_PY312 else {"LOAD_METHOD"}
1040
+ if matching_instructions := (
1041
+ # method call with one arg, eg: "s.endswith('!')"
1042
+ self._matches(
1043
+ idx,
1044
+ opnames=[LOAD_METHOD, {"LOAD_CONST"}, OpNames.CALL],
1045
+ argvals=[_PYTHON_METHODS_MAP],
1046
+ )
1047
+ or
1048
+ # method call with no arg, eg: "s.lower()"
1049
+ self._matches(
1050
+ idx,
1051
+ opnames=[LOAD_METHOD, OpNames.CALL],
1052
+ argvals=[_PYTHON_METHODS_MAP],
1053
+ )
1054
+ ):
1055
+ inst = matching_instructions[0]
1056
+ expr = _PYTHON_METHODS_MAP[inst.argval]
1057
+
1058
+ if matching_instructions[1].opname == "LOAD_CONST":
1059
+ param_value = matching_instructions[1].argval
1060
+ if isinstance(param_value, tuple) and expr in (
1061
+ "str.starts_with",
1062
+ "str.ends_with",
1063
+ ):
1064
+ starts, ends = ("^", "") if "starts" in expr else ("", "$")
1065
+ rx = "|".join(re_escape(v) for v in param_value)
1066
+ q = '"' if "'" in param_value else "'"
1067
+ expr = f"str.contains(r{q}{starts}({rx}){ends}{q})"
1068
+ else:
1069
+ expr += f"({param_value!r})"
1070
+
1071
+ px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr)
1072
+ updated_instructions.append(px)
1073
+
1074
+ elif matching_instructions := (
1075
+ # method call with three args, eg: "s.replace('!','?',count=2)"
1076
+ self._matches(
1077
+ idx,
1078
+ opnames=[
1079
+ LOAD_METHOD,
1080
+ {"LOAD_CONST"},
1081
+ {"LOAD_CONST"},
1082
+ {"LOAD_CONST"},
1083
+ OpNames.CALL,
1084
+ ],
1085
+ argvals=[_PYTHON_METHODS_MAP],
1086
+ )
1087
+ or
1088
+ # method call with two args, eg: "s.replace('!','?')"
1089
+ self._matches(
1090
+ idx,
1091
+ opnames=[LOAD_METHOD, {"LOAD_CONST"}, {"LOAD_CONST"}, OpNames.CALL],
1092
+ argvals=[_PYTHON_METHODS_MAP],
1093
+ )
1094
+ ):
1095
+ inst = matching_instructions[0]
1096
+ expr = _PYTHON_METHODS_MAP[inst.argval]
1097
+
1098
+ param_values = [
1099
+ i.argval
1100
+ for i in matching_instructions[1 : len(matching_instructions) - 1]
1101
+ ]
1102
+ if expr == "str.replace":
1103
+ if len(param_values) == 3:
1104
+ old, new, count = param_values
1105
+ expr += f"({old!r},{new!r},n={count},literal=True)"
1106
+ else:
1107
+ old, new = param_values
1108
+ expr = f"str.replace_all({old!r},{new!r},literal=True)"
1109
+ else:
1110
+ expr += f"({','.join(repr(v) for v in param_values)})"
1111
+
1112
+ px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr)
1113
+ updated_instructions.append(px)
1114
+
1115
+ return len(matching_instructions)
1116
+
1117
+ @staticmethod
1118
+ def _unpack_superinstructions(
1119
+ instructions: list[Instruction],
1120
+ ) -> Iterator[Instruction]:
1121
+ """Expand known 'superinstructions' into their component parts."""
1122
+ for inst in instructions:
1123
+ if inst.opname in (
1124
+ "LOAD_FAST_LOAD_FAST",
1125
+ "LOAD_FAST_BORROW_LOAD_FAST_BORROW",
1126
+ ):
1127
+ for idx in (0, 1):
1128
+ yield inst._replace(
1129
+ opname="LOAD_FAST",
1130
+ argval=inst.argval[idx],
1131
+ argrepr=inst.argval[idx],
1132
+ )
1133
+ else:
1134
+ yield inst
1135
+
1136
+ @staticmethod
1137
+ def _update_instruction(inst: Instruction) -> Instruction:
1138
+ """Update/modify specific instructions to simplify multi-version parsing."""
1139
+ if not _MIN_PY311 and inst.opname in OpNames.BINARY:
1140
+ # update older binary opcodes using py >= 3.11 'BINARY_OP' instead
1141
+ inst = inst._replace(
1142
+ argrepr=OpNames.BINARY[inst.opname],
1143
+ opname="BINARY_OP",
1144
+ )
1145
+ elif _MIN_PY314:
1146
+ if (opname := inst.opname) in OpNames.SIMPLIFY_SPECIALIZED:
1147
+ # simplify specialised opcode variants to their more generic form
1148
+ # (eg: 'LOAD_FAST_BORROW' -> 'LOAD_FAST', etc)
1149
+ updated_params = {"opname": OpNames.SIMPLIFY_SPECIALIZED[inst.opname]}
1150
+ if opname == "LOAD_SMALL_INT":
1151
+ updated_params["argrepr"] = str(inst.argval)
1152
+ inst = inst._replace(**updated_params) # type: ignore[arg-type]
1153
+
1154
+ elif opname == "BINARY_OP" and inst.argrepr == "[]":
1155
+ # special case for new 'BINARY_OP ([])'; revert to 'BINARY_SUBSCR'
1156
+ inst = inst._replace(opname="BINARY_SUBSCR", argrepr="")
1157
+
1158
+ return inst
1159
+
1160
+ def _is_stdlib_datetime(
1161
+ self, function_name: str, module_name: str, attribute_count: int
1162
+ ) -> bool:
1163
+ if not self._caller_variables:
1164
+ self._caller_variables = _get_all_caller_variables()
1165
+ vars = self._caller_variables
1166
+ return (
1167
+ attribute_count == 0 and vars.get(function_name) is datetime.datetime
1168
+ ) or (attribute_count == 1 and vars.get(module_name) is datetime)
1169
+
1170
+
1171
+ def _raw_function_meta(function: Callable[[Any], Any]) -> tuple[str, str]:
1172
+ """Identify translatable calls that aren't wrapped inside a lambda/function."""
1173
+ try:
1174
+ func_module = function.__class__.__module__
1175
+ func_name = function.__name__
1176
+ except AttributeError:
1177
+ return "", ""
1178
+
1179
+ # numpy function calls
1180
+ if func_module == "numpy" and func_name in _NUMPY_FUNCTIONS:
1181
+ return "np", f"{func_name}()"
1182
+
1183
+ # python function calls
1184
+ elif func_module == "builtins":
1185
+ if func_name in _PYTHON_CASTS_MAP:
1186
+ return "builtins", f"cast(pl.{_PYTHON_CASTS_MAP[func_name]})"
1187
+ elif func_name in _MATH_FUNCTIONS:
1188
+ import math
1189
+
1190
+ if function is getattr(math, func_name):
1191
+ expr_name = _MODULE_FUNC_TO_EXPR_NAME.get(
1192
+ f"math.{func_name}", func_name
1193
+ )
1194
+ return "math", f"{expr_name}()"
1195
+ elif func_name == "loads":
1196
+ import json # double-check since it is referenced via 'builtins'
1197
+
1198
+ if function is json.loads:
1199
+ return "json", "str.json_decode()"
1200
+
1201
+ return "", ""
1202
+
1203
+
1204
+ def warn_on_inefficient_map(
1205
+ function: Callable[[Any], Any], columns: list[str], map_target: MapTarget
1206
+ ) -> None:
1207
+ """
1208
+ Generate `PolarsInefficientMapWarning` on poor usage of a `map` function.
1209
+
1210
+ Parameters
1211
+ ----------
1212
+ function
1213
+ The function passed to `map`.
1214
+ columns
1215
+ The column name(s) of the original object; in the case of an `Expr` this
1216
+ will be a list of length 1, containing the expression's root name.
1217
+ map_target
1218
+ The target of the `map` call. One of `"expr"`, `"frame"`, or `"series"`.
1219
+ """
1220
+ if map_target == "frame":
1221
+ msg = "TODO: 'frame' map-function parsing"
1222
+ raise NotImplementedError(msg)
1223
+
1224
+ # note: we only consider simple functions with a single col/param
1225
+ col: str = columns and columns[0] # type: ignore[assignment]
1226
+ if not col and col != "":
1227
+ return None
1228
+
1229
+ # the parser introspects function bytecode to determine if we can
1230
+ # rewrite as a (much) more optimal native polars expression instead
1231
+ if (parser := _BYTECODE_PARSER_CACHE_.get(key := (function, map_target))) is None:
1232
+ parser = BytecodeParser(function, map_target)
1233
+ _BYTECODE_PARSER_CACHE_[key] = parser
1234
+
1235
+ if parser.can_attempt_rewrite():
1236
+ parser.warn(col)
1237
+ else:
1238
+ # handle bare numpy/json functions
1239
+ module, suggestion = _raw_function_meta(function)
1240
+ if module and suggestion:
1241
+ target_name = _get_target_name(col, suggestion, map_target)
1242
+ parser._map_target_name = target_name
1243
+ fn = function.__name__
1244
+ parser.warn(
1245
+ col,
1246
+ suggestion_override=f"{target_name}.{suggestion}",
1247
+ udf_override=fn if module == "builtins" else f"{module}.{fn}",
1248
+ )
1249
+
1250
+
1251
+ __all__ = ["BytecodeParser", "warn_on_inefficient_map"]