chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/df/ast_parser.py CHANGED
@@ -93,9 +93,7 @@ def parse_dataframe_getitem():
93
93
  )
94
94
  assert isinstance(func_node, ast.Subscript)
95
95
  slc = func_node.slice
96
- if isinstance(slc, ast.Index):
97
- slc = slc.value # type: ignore
98
- assert isinstance(slc, ast.expr)
96
+ assert isinstance(slc, ast.expr)
99
97
  converted_slice = convert_slice(slc)
100
98
  return eval_converted_expr(converted_slice, glbs=func_frame.f_globals, lcls=func_frame.f_locals)
101
99
 
@@ -227,13 +225,7 @@ def _convert_maybe_tuple(slc: ast.expr):
227
225
  return _convert_ops(slc)
228
226
 
229
227
 
230
- def convert_slice(slc: Union[ast.expr, ast.Index]):
231
- if isinstance(slc, ast.Index):
232
- # Index is deprecated in Python 3.9+
233
- slc = slc.value # type: ignore
234
- assert isinstance(slc, ast.expr)
235
- slc = _convert_maybe_tuple(slc)
236
- return ast.Index(value=slc) # pyright: ignore[reportCallIssue]
228
+ def convert_slice(slc: ast.expr):
237
229
  return _convert_maybe_tuple(slc)
238
230
 
239
231
 
@@ -5,6 +5,8 @@ import functools
5
5
  from typing import Any, Callable, List, Type, TypeVar, cast
6
6
 
7
7
  from chalk._lsp.error_builder import FeatureClassErrorBuilder
8
+ from chalk.features.feature_wrapper import UnresolvedFeature
9
+ from chalk.utils.notebook import is_notebook
8
10
 
9
11
  T = TypeVar("T")
10
12
  V = TypeVar("V")
@@ -54,6 +56,11 @@ def classproperty_support(cls: Type[T]) -> Type[T]:
54
56
  if (res := self.__chalk_notebook_feature_expressions__.get(item)) is not None:
55
57
  return res
56
58
 
59
+ # If in notebook, fallback to constructing FQN string instead of raising error
60
+ if is_notebook():
61
+ fqn = f"{self.namespace}.{item}"
62
+ return UnresolvedFeature(fqn)
63
+
57
64
  builder: FeatureClassErrorBuilder = self.__chalk_error_builder__
58
65
  builder.invalid_attribute(
59
66
  root_feature_str=self.namespace,
@@ -25,6 +25,7 @@ from chalk.utils.collections import ensure_tuple
25
25
  SUPPORTED_LOCAL_MODELS = {
26
26
  "all-MiniLM-L6-v2", # https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
27
27
  "sample-bert", # For internal Chalk use
28
+ "sample-linear-nn", # For internal Chalk use
28
29
  }
29
30
 
30
31
  # This will eventually be included in SUPPORTED_LOCAL_MODELS
@@ -111,7 +111,7 @@ class SentenceTransformerProvider(EmbeddingProvider):
111
111
  raise ValueError(
112
112
  f"Expected to find an embedding for input at position {idx}, but the response data was exhausted."
113
113
  )
114
- yield create_fixedsize_with_nulls(response, self.dimensions)
114
+ yield create_fixedsize_with_nulls(values_with_nulls, self.dimensions)
115
115
 
116
116
  def get_vector_class(self) -> Type[Vector]:
117
117
  return Vector[self.dimensions]
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import io
3
4
  import json
4
5
  import types
5
6
  import typing
@@ -50,6 +51,7 @@ from chalk.features._encoding.pyarrow import (
50
51
  rich_to_pyarrow,
51
52
  )
52
53
  from chalk.features._encoding.rich import structure_primitive_to_rich, unstructure_rich_to_primitive
54
+ from chalk.features.feature_wrapper import UnresolvedFeature
53
55
  from chalk.utils.collections import unwrap_annotated_if_needed, unwrap_optional_and_annotated_if_needed
54
56
  from chalk.utils.df_utils import pa_array_to_pl_series
55
57
  from chalk.utils.json import JSON, TJSON, is_pyarrow_json_type, pyarrow_json_type
@@ -870,6 +872,79 @@ class PrimitiveFeatureConverter(Generic[_TPrim]):
870
872
  else:
871
873
  raise TypeError(f"Could not convert the pyarrow dtype {dtype} to a protobuf message")
872
874
 
875
+ @classmethod
876
+ def convert_pa_field_to_proto_field(cls, field: pa.Field) -> pb.Field:
877
+ """Convert a PyArrow Field to proto Field."""
878
+ field_proto = pb.Field(
879
+ name=field.name, arrow_type=cls.convert_pa_dtype_to_proto_dtype(field.type), nullable=field.nullable
880
+ )
881
+
882
+ if field.metadata:
883
+ # field.metadata is of types dict[bytes, bytes]
884
+ for k, v in field.metadata.items():
885
+ field_proto.metadata[k.decode("utf-8")] = v.decode("utf-8")
886
+
887
+ return field_proto
888
+
889
+ @classmethod
890
+ def convert_proto_field_to_pa_field(cls, proto_field: pb.Field) -> pa.Field:
891
+ """Convert a proto Field to PyArrow Field."""
892
+ arrow_type = cls.convert_proto_dtype_to_pa_dtype(proto_field.arrow_type)
893
+
894
+ # don't have to convert back to dict[bytes, bytes] as can initialize with dict[str, str]
895
+ metadata = dict(proto_field.metadata) if proto_field.metadata else None
896
+
897
+ return pa.field(
898
+ name=proto_field.name,
899
+ type=arrow_type,
900
+ nullable=proto_field.nullable,
901
+ metadata=metadata,
902
+ )
903
+
904
+ @classmethod
905
+ def convert_pa_schema_to_proto_schema(cls, schema: pa.Schema) -> pb.Schema:
906
+ schema_proto = pb.Schema(
907
+ columns=[cls.convert_pa_field_to_proto_field(field) for field in schema],
908
+ )
909
+
910
+ if schema.metadata:
911
+ # schema.metadata is of types dict[bytes, bytes]
912
+ for k, v in schema.metadata.items():
913
+ schema_proto.metadata[k.decode("utf-8")] = v.decode("utf-8")
914
+
915
+ return schema_proto
916
+
917
+ @classmethod
918
+ def convert_proto_schema_to_pa_schema(cls, proto_schema: pb.Schema) -> pa.Schema:
919
+ fields = [cls.convert_proto_field_to_pa_field(proto_field) for proto_field in proto_schema.columns]
920
+
921
+ # don't have to convert back to dict[bytes, bytes] as can initialize with dict[str, str]
922
+ metadata = dict(proto_schema.metadata) if proto_schema.metadata else None
923
+
924
+ return pa.schema(fields, metadata=metadata)
925
+
926
+ @staticmethod
927
+ def convert_arrow_table_to_proto(table: pa.Table | pa.RecordBatch) -> pb.TableParquetBytes:
928
+ if isinstance(table, pa.RecordBatch):
929
+ table = pa.Table.from_batches([table])
930
+ elif isinstance(table, pa.Table):
931
+ pass
932
+ else:
933
+ raise TypeError(f"expected pa.Table or pa.RecordBatch, got {type(table)!r}")
934
+
935
+ sink = io.BytesIO()
936
+ import pyarrow.parquet
937
+
938
+ pyarrow.parquet.write_table(table, sink)
939
+ return pb.TableParquetBytes(encoded_parquet_bytes=sink.getvalue())
940
+
941
+ @staticmethod
942
+ def convert_arrow_table_from_proto(proto: pb.TableParquetBytes) -> pa.Table:
943
+ import pyarrow.parquet
944
+
945
+ pf = pyarrow.parquet.ParquetFile(io.BytesIO(proto.encoded_parquet_bytes))
946
+ return pyarrow.parquet.read_table(pf)
947
+
873
948
  @staticmethod
874
949
  def _serialize_pa_decimal_to_pb(value: Union[pa.Decimal128Scalar, pa.Decimal256Scalar]) -> pb.ScalarValue:
875
950
  dec_val = value.as_py()
@@ -1183,8 +1258,14 @@ class FeatureConverter(PrimitiveFeatureConverter[_TPrim], Generic[_TPrim, _TRich
1183
1258
  # because it is also used for error handling inside of `from_rich_to_primitive`.
1184
1259
  self._name = name
1185
1260
  if rich_default != ...:
1186
- # The missing value strategy doesn't really matter because rich_default is not missing
1187
- primitive_default = self.from_rich_to_primitive(rich_default, missing_value_strategy="allow")
1261
+ # In notebook environments, UnresolvedFeature may be used as a placeholder
1262
+ # for features that can't be resolved due to a stale registry.
1263
+ # Treat these as missing defaults since they're not concrete values.
1264
+ if isinstance(rich_default, UnresolvedFeature):
1265
+ rich_default = ...
1266
+ else:
1267
+ # The missing value strategy doesn't really matter because rich_default is not missing
1268
+ primitive_default = self.from_rich_to_primitive(rich_default, missing_value_strategy="allow")
1188
1269
  super().__init__(
1189
1270
  name, is_nullable=is_nullable, pyarrow_dtype=pyarrow_dtype, primitive_default=primitive_default
1190
1271
  )
@@ -8,12 +8,28 @@ import ipaddress
8
8
  import typing
9
9
  import uuid
10
10
  from datetime import date, datetime, time, timedelta
11
- from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Mapping, Set, Tuple, Type, cast
11
+ from typing import (
12
+ TYPE_CHECKING,
13
+ Annotated,
14
+ Any,
15
+ Dict,
16
+ FrozenSet,
17
+ List,
18
+ Literal,
19
+ Mapping,
20
+ Set,
21
+ Tuple,
22
+ Type,
23
+ TypeGuard,
24
+ cast,
25
+ get_args,
26
+ get_origin,
27
+ is_typeddict,
28
+ )
12
29
 
13
30
  import attrs
14
31
  import google.protobuf.message
15
32
  import pyarrow as pa
16
- from typing_extensions import Annotated, Literal, TypeGuard, get_args, get_origin, is_typeddict
17
33
 
18
34
  from chalk.features._encoding.http import HttpResponse, get_http_response_as_pyarrow
19
35
  from chalk.features._encoding.primitive import ChalkStructType, TPrimitive
@@ -24,7 +40,7 @@ from chalk.utils.collections import is_namedtuple, is_optional, unwrap_optional_
24
40
  from chalk.utils.enum import get_enum_value_type
25
41
  from chalk.utils.json import JSON, is_pyarrow_json_type
26
42
  from chalk.utils.missing_dependency import missing_dependency_exception
27
- from chalk.utils.pl_helpers import is_new_polars
43
+ from chalk.utils.pl_helpers import is_new_polars, pl_array
28
44
  from chalk.utils.pydanticutil.pydantic_compat import is_pydantic_basemodel
29
45
 
30
46
  if TYPE_CHECKING:
@@ -418,7 +434,7 @@ def pyarrow_to_polars(
418
434
  underlying = pa_type.value_type
419
435
  if is_new_polars and use_fixed_size_list:
420
436
  # pl.Array is only available in polars >=0.18
421
- return pl.Array(inner=pyarrow_to_polars(underlying, name=f"{name}[]"), width=pa_type.list_size)
437
+ return pl_array(inner=pyarrow_to_polars(underlying, name=f"{name}[]"), size=pa_type.list_size)
422
438
  else:
423
439
  return pl.List(pyarrow_to_polars(underlying, name=f"{name}[]"))
424
440
  if pa.types.is_struct(pa_type):
@@ -8,7 +8,7 @@ import enum
8
8
  import ipaddress
9
9
  import uuid
10
10
  from datetime import date, datetime, time, timedelta
11
- from typing import Any, FrozenSet, List, Set, Tuple, Type, TypeVar, Union, cast
11
+ from typing import Any, FrozenSet, List, Set, Tuple, Type, TypeVar, Union, cast, get_args, get_origin, is_typeddict
12
12
 
13
13
  import attrs
14
14
  import cattrs
@@ -23,8 +23,6 @@ try:
23
23
  except ImportError:
24
24
  V1BaseModel = None
25
25
 
26
- from typing_extensions import get_args, get_origin, is_typeddict
27
-
28
26
  from chalk.features._encoding.primitive import ChalkStructType, TPrimitive
29
27
  from chalk.utils.cached_type_hints import cached_get_type_hints
30
28
  from chalk.utils.collections import is_namedtuple, unwrap_optional_and_annotated_if_needed
chalk/features/_tensor.py CHANGED
@@ -1,11 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from enum import Enum
4
- from typing import Any, Tuple, Type, Union, overload
4
+ from typing import Any, Tuple, Type, TypeGuard, Union, overload
5
5
 
6
6
  import numpy as np
7
7
  import pyarrow as pa
8
- from typing_extensions import TypeGuard
9
8
 
10
9
  TensorDimension = Union[int, str]
11
10
 
@@ -4,16 +4,17 @@ import collections.abc
4
4
  import datetime
5
5
  import enum
6
6
  import functools
7
- from typing import TYPE_CHECKING, Any, List, Mapping, Optional, Protocol, Sequence, TypeVar, Union, cast
7
+ from typing import TYPE_CHECKING, Any, List, Mapping, Optional, Protocol, Sequence, TypeGuard, TypeVar, Union, cast
8
8
 
9
9
  import pyarrow as pa
10
- from typing_extensions import Self, TypeGuard
10
+ from typing_extensions import Self
11
11
 
12
12
  from chalk.features._encoding.converter import pyarrow_to_polars
13
13
  from chalk.features.feature_field import Feature
14
14
  from chalk.features.feature_wrapper import FeatureWrapper, unwrap_feature
15
15
  from chalk.features.filter import Filter, TimeDelta, get_filter_now
16
16
  from chalk.utils.collections import ensure_tuple
17
+ from chalk.utils.pl_helpers import polars_lazy_frame_collect_schema
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  import polars as pl
@@ -442,7 +443,7 @@ class _PolarsStructAdapter(StructAdapter["pl.Expr"]):
442
443
 
443
444
  def filter_data_frame(
444
445
  item: Any,
445
- underlying: Union[pl.DataFrame, pl.LazyFrame],
446
+ underlying: pl.LazyFrame,
446
447
  namespace: Optional[str],
447
448
  ) -> Union[pl.DataFrame, pl.LazyFrame]:
448
449
 
@@ -463,7 +464,10 @@ def filter_data_frame(
463
464
  )
464
465
  now = get_filter_now()
465
466
  if len(projections) > 0:
466
- key_error_or_none = dataframe_missing_key_error(projections, underlying.columns)
467
+ key_error_or_none = dataframe_missing_key_error(
468
+ projections,
469
+ (underlying.collect_schema().names() if polars_lazy_frame_collect_schema else underlying.columns),
470
+ )
467
471
  if key_error_or_none is not None:
468
472
  raise key_error_or_none
469
473
  # now = datetime.datetime.now(tz=datetime.timezone.utc)
@@ -472,7 +476,12 @@ def filter_data_frame(
472
476
  timestamp_feature = (
473
477
  None if namespace is None else CURRENT_FEATURE_REGISTRY.get().get_feature_sets()[namespace].__chalk_ts__
474
478
  )
475
- pl_expr = convert_filters_to_pl_expr(filters, underlying.schema, timestamp_feature, now)
479
+ pl_expr = convert_filters_to_pl_expr(
480
+ filters,
481
+ (underlying.collect_schema() if polars_lazy_frame_collect_schema else underlying.schema),
482
+ timestamp_feature,
483
+ now,
484
+ )
476
485
  df = underlying
477
486
  if pl_expr is not None:
478
487
  df = df.filter(pl_expr)
@@ -32,7 +32,6 @@ from typing import (
32
32
  overload,
33
33
  )
34
34
 
35
- import packaging.version
36
35
  import pyarrow as pa
37
36
 
38
37
  from chalk.features._chalkop import Aggregation
@@ -56,6 +55,13 @@ from chalk.utils.df_utils import (
56
55
  )
57
56
  from chalk.utils.duration import Duration, parse_chalk_duration
58
57
  from chalk.utils.missing_dependency import missing_dependency_exception
58
+ from chalk.utils.pl_helpers import (
59
+ polars_group_by_instead_of_groupby,
60
+ polars_lazy_frame_collect_schema,
61
+ polars_name_dot_suffix_instead_of_suffix,
62
+ polars_uses_schema_overrides,
63
+ schema_compat,
64
+ )
59
65
  from chalk.utils.pydanticutil.pydantic_compat import is_pydantic_basemodel
60
66
 
61
67
  if TYPE_CHECKING:
@@ -473,7 +479,12 @@ class DataFrame(metaclass=DataFrameMeta):
473
479
  raise ValueError(f"Unable to convert data of type {type(data).__name__} into a DataFrame")
474
480
  # Rename / validate that all column names are root fqns
475
481
  if self._pydantic_model is None:
476
- self.columns = tuple(Feature.from_root_fqn(str(c)) for c in underlying.columns)
482
+ self.columns = tuple(
483
+ Feature.from_root_fqn(str(c))
484
+ for c in (
485
+ underlying.collect_schema().names() if polars_lazy_frame_collect_schema else underlying.columns
486
+ )
487
+ )
477
488
  else:
478
489
  self.columns = ()
479
490
 
@@ -505,7 +516,13 @@ class DataFrame(metaclass=DataFrameMeta):
505
516
  """
506
517
  import polars as pl
507
518
 
508
- rename_map = {x: Distance.fqn for x in underlying.columns if x in self._distance_feature_fqns}
519
+ rename_map = {
520
+ x: Distance.fqn
521
+ for x in (
522
+ underlying.collect_schema().names() if polars_lazy_frame_collect_schema else underlying.columns
523
+ ) # pyright: ignore
524
+ if x in self._distance_feature_fqns
525
+ }
509
526
 
510
527
  underlying = underlying.rename(rename_map)
511
528
  if len(rename_map) > 0:
@@ -620,7 +637,14 @@ class DataFrame(metaclass=DataFrameMeta):
620
637
  )
621
638
  elif all(isinstance(col, str) for col in ensure_tuple(item)):
622
639
  # Select the columns with `.select()` since they're by name.
623
- key_error_or_none = dataframe_missing_key_error(ensure_tuple(item), self._underlying.columns)
640
+ key_error_or_none = dataframe_missing_key_error(
641
+ ensure_tuple(item),
642
+ (
643
+ self._underlying.collect_schema().names()
644
+ if polars_lazy_frame_collect_schema
645
+ else self._underlying.columns
646
+ ),
647
+ )
624
648
  if key_error_or_none is not None:
625
649
  raise key_error_or_none
626
650
  materialized = self._materialize()
@@ -700,7 +724,7 @@ class DataFrame(metaclass=DataFrameMeta):
700
724
  if len(operation.filters) > 0:
701
725
  f = convert_filters_to_pl_expr(
702
726
  operation.filters,
703
- self._underlying.schema,
727
+ schema_compat(self._underlying),
704
728
  timestamp_feature,
705
729
  now,
706
730
  )
@@ -711,10 +735,10 @@ class DataFrame(metaclass=DataFrameMeta):
711
735
 
712
736
  data = self._underlying.lazy()
713
737
 
714
- if packaging.version.parse(pl.__version__) <= packaging.version.parse("0.19.0"):
715
- data = data.groupby(groupby)
716
- else:
738
+ if polars_group_by_instead_of_groupby:
717
739
  data = data.group_by(groupby)
740
+ else:
741
+ data = data.groupby(groupby) # pyright: ignore
718
742
 
719
743
  data = data.agg(cols).collect()
720
744
 
@@ -778,7 +802,7 @@ class DataFrame(metaclass=DataFrameMeta):
778
802
 
779
803
  col_str = str(column)
780
804
 
781
- col_dtype = self._underlying.schema[col_str]
805
+ col_dtype = schema_compat(self._underlying)[col_str]
782
806
  underlying = self._underlying
783
807
  if col_dtype != pl.Float64() and col_dtype != pl.Float32():
784
808
  underlying = underlying.select(pl.col(col_str).cast(pl.Float32))
@@ -965,7 +989,7 @@ class DataFrame(metaclass=DataFrameMeta):
965
989
  if len(operation.filters) > 0:
966
990
  f = convert_filters_to_pl_expr(
967
991
  operation.filters,
968
- self._underlying.schema,
992
+ schema_compat(self._underlying),
969
993
  timestamp_feature,
970
994
  now,
971
995
  )
@@ -974,22 +998,40 @@ class DataFrame(metaclass=DataFrameMeta):
974
998
 
975
999
  cols.append(operation.fn(c).alias(str(alias)))
976
1000
 
977
- return DataFrame(
978
- self._underlying.lazy()
979
- .sort(str(index), descending=False)
980
- .groupby_dynamic(
981
- index_column=str(index),
982
- by=groupby,
983
- offset=offset,
984
- every=every,
985
- period=period,
986
- start_by=start_by,
1001
+ if polars_group_by_instead_of_groupby:
1002
+ return DataFrame(
1003
+ self._underlying.lazy()
1004
+ .sort(str(index), descending=False)
1005
+ .group_by_dynamic(
1006
+ index_column=str(index),
1007
+ group_by=groupby,
1008
+ offset=offset,
1009
+ every=every,
1010
+ period=period,
1011
+ start_by=start_by,
1012
+ )
1013
+ .agg(cols)
1014
+ .collect(),
1015
+ convert_dtypes=self._convert_dtypes,
1016
+ pydantic_model=self._pydantic_model,
1017
+ )
1018
+ else:
1019
+ return DataFrame(
1020
+ self._underlying.lazy()
1021
+ .sort(str(index), descending=False)
1022
+ .groupby_dynamic( # pyright: ignore
1023
+ index_column=str(index),
1024
+ by=groupby,
1025
+ offset=offset,
1026
+ every=every,
1027
+ period=period,
1028
+ start_by=start_by,
1029
+ )
1030
+ .agg(cols)
1031
+ .collect(),
1032
+ convert_dtypes=self._convert_dtypes,
1033
+ pydantic_model=self._pydantic_model,
987
1034
  )
988
- .agg(cols)
989
- .collect(),
990
- convert_dtypes=self._convert_dtypes,
991
- pydantic_model=self._pydantic_model,
992
- )
993
1035
 
994
1036
  def join(
995
1037
  self,
@@ -1200,7 +1242,7 @@ class DataFrame(metaclass=DataFrameMeta):
1200
1242
  if len(operation.filters) > 0:
1201
1243
  f = convert_filters_to_pl_expr(
1202
1244
  operation.filters,
1203
- self._underlying.schema,
1245
+ schema_compat(self._underlying),
1204
1246
  timestamp_feature,
1205
1247
  now,
1206
1248
  )
@@ -1424,15 +1466,25 @@ class DataFrame(metaclass=DataFrameMeta):
1424
1466
  else:
1425
1467
  cols_to_select, dtypes, new_columns = cls._parse_columns(columns)
1426
1468
 
1427
- # 'dtypes' deprecated for 'schema_overrides' in polars 0.20+, but parameter renamed without breaking
1428
- data = pl.read_csv(
1429
- source=path,
1430
- has_header=has_header,
1431
- columns=cols_to_select,
1432
- dtypes=dtypes, # pyright: ignore[reportCallIssue]
1433
- new_columns=new_columns,
1434
- storage_options=DataFrame._get_storage_options(),
1435
- )
1469
+ # 'dtypes' deprecated for 'schema_overrides' in polars 0.20.31+
1470
+ if polars_uses_schema_overrides:
1471
+ data = pl.read_csv(
1472
+ source=path,
1473
+ has_header=has_header,
1474
+ columns=cols_to_select,
1475
+ schema_overrides=dtypes, # pyright: ignore[reportCallIssue]
1476
+ new_columns=new_columns,
1477
+ storage_options=DataFrame._get_storage_options(),
1478
+ )
1479
+ else:
1480
+ data = pl.read_csv(
1481
+ source=path,
1482
+ has_header=has_header,
1483
+ columns=cols_to_select,
1484
+ dtypes=dtypes, # pyright: ignore[reportCallIssue]
1485
+ new_columns=new_columns,
1486
+ storage_options=DataFrame._get_storage_options(),
1487
+ )
1436
1488
  return cls(data)
1437
1489
 
1438
1490
  @classmethod
@@ -1922,7 +1974,10 @@ class DataFrame(metaclass=DataFrameMeta):
1922
1974
  raise ValueError("DataFrame dimensions do not match")
1923
1975
 
1924
1976
  suffix = "__POLARS_CMP_OTHER"
1925
- other_renamed = other.select(pl.all().suffix(suffix))
1977
+ if polars_name_dot_suffix_instead_of_suffix:
1978
+ other_renamed = other.select(pl.all().name.suffix(suffix))
1979
+ else:
1980
+ other_renamed = other.select(pl.all().suffix(suffix)) # pyright: ignore
1926
1981
  combined = pl.concat([materialized, other_renamed], how="horizontal")
1927
1982
 
1928
1983
  if op == "eq":
@@ -8,6 +8,7 @@ import isodate
8
8
  from chalk.features._encoding.missing_value import MissingValueStrategy
9
9
  from chalk.features.feature_field import Feature, FeatureNotFoundException
10
10
  from chalk.utils.collections import get_unique_item
11
+ from chalk.utils.pl_helpers import apply_compat, schema_compat, str_json_decode_compat
11
12
 
12
13
  if TYPE_CHECKING:
13
14
  import polars as pl
@@ -67,7 +68,7 @@ def validate_df_schema(underlying: Union[pl.DataFrame, pl.LazyFrame]):
67
68
  # This is called from within DataFrame.__init__, which validates that polars is installed
68
69
  import polars as pl
69
70
 
70
- for root_fqn, actual_dtype in underlying.schema.items():
71
+ for root_fqn, actual_dtype in schema_compat(underlying).items():
71
72
  feature = Feature.from_root_fqn(root_fqn)
72
73
  if feature.is_has_one or feature.is_has_many:
73
74
  continue
@@ -87,7 +88,7 @@ def validate_df_schema(underlying: Union[pl.DataFrame, pl.LazyFrame]):
87
88
  isinstance(expected_dtype, pl.List)
88
89
  and actual_dtype == pl.Utf8 # pyright: ignore[reportUnnecessaryComparison]
89
90
  ):
90
- col = pl.col(root_fqn).str.json_extract(expected_dtype)
91
+ col = str_json_decode_compat(pl.col(root_fqn), expected_dtype)
91
92
  try:
92
93
  underlying = underlying.with_columns(col.cast(expected_dtype))
93
94
  except (Exception, pl.PolarsPanicError) as e:
@@ -123,21 +124,24 @@ def validate_df_schema(underlying: Union[pl.DataFrame, pl.LazyFrame]):
123
124
  if isinstance(expected_dtype, pl.Datetime):
124
125
  # tzinfo = None if expected_dtype.time_zone is None else zoneinfo.ZoneInfo(expected_dtype.time_zone)
125
126
  underlying = underlying.with_columns(pl.col(root_fqn).str.strptime(pl.Datetime).alias(root_fqn))
126
- if cast(pl.Datetime, underlying.schema[root_fqn]).time_zone is not None:
127
+ if cast(pl.Datetime, schema_compat(underlying)[root_fqn]).time_zone is not None:
127
128
  assert expected_dtype.time_zone is not None
128
129
  cast_expr = pl.col(root_fqn).dt.convert_time_zone(expected_dtype.time_zone)
129
130
  else:
130
131
  cast_expr = pl.col(root_fqn).dt.replace_time_zone(expected_dtype.time_zone)
131
132
  elif expected_dtype == pl.Date:
132
- cast_expr = pl.col(root_fqn).apply(
133
+ cast_expr = apply_compat(
134
+ pl.col(root_fqn),
133
135
  lambda x: None if x is None else isodate.parse_date(x),
134
136
  )
135
137
  elif expected_dtype == pl.Time:
136
- cast_expr = pl.col(root_fqn).apply(
138
+ cast_expr = apply_compat(
139
+ pl.col(root_fqn),
137
140
  lambda x: None if x is None else isodate.parse_time(x),
138
141
  )
139
142
  elif expected_dtype == pl.Duration:
140
- cast_expr = pl.col(root_fqn).apply(
143
+ cast_expr = apply_compat(
144
+ pl.col(root_fqn),
141
145
  lambda x: None if x is None else isodate.parse_duration(x),
142
146
  )
143
147
  else:
@@ -168,7 +172,7 @@ def validate_nulls(
168
172
 
169
173
  if isinstance(underlying, pl.LazyFrame):
170
174
  underlying = underlying.collect()
171
- schema = underlying.schema
175
+ schema = schema_compat(underlying)
172
176
  null_count_rows = underlying.null_count().to_dicts()
173
177
  if len(null_count_rows) == 0:
174
178
  return underlying # Empty dataframe