chalkpy 2.90.1__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +16 -16
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +4 -0
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +17 -15
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +25 -0
  10. chalk/_gen/chalk/common/v1/script_task_pb2.py +3 -3
  11. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +2 -0
  12. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  13. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  14. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  15. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  18. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  19. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  20. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  21. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  22. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  23. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  26. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  27. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  28. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  29. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  32. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  33. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  34. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  35. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  38. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  39. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  40. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  41. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  42. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  43. chalk/_gen/chalk/server/v1/builder_pb2.py +358 -288
  44. chalk/_gen/chalk/server/v1/builder_pb2.pyi +360 -10
  45. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +225 -0
  46. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +60 -0
  47. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  48. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  49. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  50. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  53. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +141 -119
  54. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +106 -4
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +45 -0
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +12 -0
  57. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  58. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  61. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +52 -38
  62. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +62 -1
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +90 -0
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +24 -0
  65. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  66. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  69. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  70. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  71. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  72. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  73. chalk/_gen/chalk/server/v1/deployment_pb2.py +6 -6
  74. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +20 -0
  75. chalk/_gen/chalk/server/v1/environment_pb2.py +14 -12
  76. chalk/_gen/chalk/server/v1/environment_pb2.pyi +19 -0
  77. chalk/_gen/chalk/server/v1/eventbus_pb2.py +4 -2
  78. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  79. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  80. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  81. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  82. chalk/_gen/chalk/server/v1/graph_pb2.py +38 -26
  83. chalk/_gen/chalk/server/v1/graph_pb2.pyi +58 -0
  84. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +47 -0
  85. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +18 -0
  86. chalk/_gen/chalk/server/v1/incident_pb2.py +23 -21
  87. chalk/_gen/chalk/server/v1/incident_pb2.pyi +15 -1
  88. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  89. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  90. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  91. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  92. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  93. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  94. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  95. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  96. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  97. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  98. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  99. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  100. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  101. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  102. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  103. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  104. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  105. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  106. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  107. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  108. chalk/_gen/chalk/server/v1/queries_pb2.py +66 -66
  109. chalk/_gen/chalk/server/v1/queries_pb2.pyi +32 -2
  110. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -12
  111. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +16 -3
  112. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  113. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  114. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  115. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  116. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +15 -3
  117. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +22 -0
  118. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  119. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  120. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  121. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  122. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  123. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  124. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  125. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  126. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  127. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  128. chalk/_gen/chalk/server/v1/team_pb2.py +154 -141
  129. chalk/_gen/chalk/server/v1/team_pb2.pyi +30 -2
  130. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +45 -0
  131. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +12 -0
  132. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  133. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  134. chalk/_gen/chalk/server/v1/trace_pb2.py +44 -40
  135. chalk/_gen/chalk/server/v1/trace_pb2.pyi +20 -0
  136. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  137. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  138. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  139. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  140. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +16 -10
  141. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +52 -1
  142. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  143. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  144. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  145. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  146. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  147. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  148. chalk/_lsp/error_builder.py +11 -0
  149. chalk/_version.py +1 -1
  150. chalk/client/client.py +128 -43
  151. chalk/client/client_async.py +149 -0
  152. chalk/client/client_async_impl.py +22 -0
  153. chalk/client/client_grpc.py +539 -104
  154. chalk/client/client_impl.py +449 -122
  155. chalk/client/dataset.py +7 -1
  156. chalk/client/models.py +98 -0
  157. chalk/client/serialization/model_serialization.py +92 -9
  158. chalk/df/LazyFramePlaceholder.py +1154 -0
  159. chalk/features/_class_property.py +7 -0
  160. chalk/features/_embedding/embedding.py +1 -0
  161. chalk/features/_encoding/converter.py +83 -2
  162. chalk/features/feature_field.py +40 -30
  163. chalk/features/feature_set_decorator.py +1 -0
  164. chalk/features/feature_wrapper.py +42 -3
  165. chalk/features/hooks.py +81 -10
  166. chalk/features/inference.py +33 -31
  167. chalk/features/resolver.py +224 -24
  168. chalk/functions/__init__.py +65 -3
  169. chalk/gitignore/gitignore_parser.py +5 -1
  170. chalk/importer.py +142 -68
  171. chalk/ml/__init__.py +2 -0
  172. chalk/ml/model_hooks.py +194 -26
  173. chalk/ml/model_reference.py +56 -8
  174. chalk/ml/model_version.py +24 -15
  175. chalk/ml/utils.py +20 -17
  176. chalk/operators/_utils.py +10 -3
  177. chalk/parsed/_proto/export.py +22 -0
  178. chalk/parsed/duplicate_input_gql.py +3 -0
  179. chalk/parsed/json_conversions.py +20 -14
  180. chalk/parsed/to_proto.py +16 -4
  181. chalk/parsed/user_types_to_json.py +31 -10
  182. chalk/parsed/validation_from_registries.py +182 -0
  183. chalk/queries/named_query.py +16 -6
  184. chalk/queries/scheduled_query.py +9 -1
  185. chalk/serialization/parsed_annotation.py +24 -11
  186. chalk/sql/__init__.py +18 -0
  187. chalk/sql/_internal/integrations/databricks.py +55 -17
  188. chalk/sql/_internal/integrations/mssql.py +127 -62
  189. chalk/sql/_internal/integrations/redshift.py +4 -0
  190. chalk/sql/_internal/sql_file_resolver.py +53 -9
  191. chalk/sql/_internal/sql_source.py +35 -2
  192. chalk/streams/_kafka_source.py +5 -1
  193. chalk/streams/_windows.py +15 -2
  194. chalk/utils/_otel_version.py +13 -0
  195. chalk/utils/async_helpers.py +2 -2
  196. chalk/utils/missing_dependency.py +5 -4
  197. chalk/utils/tracing.py +185 -95
  198. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/METADATA +4 -6
  199. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/RECORD +202 -146
  200. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  201. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  202. {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,8 @@ import functools
5
5
  from typing import Any, Callable, List, Type, TypeVar, cast
6
6
 
7
7
  from chalk._lsp.error_builder import FeatureClassErrorBuilder
8
+ from chalk.features.feature_wrapper import UnresolvedFeature
9
+ from chalk.utils.notebook import is_notebook
8
10
 
9
11
  T = TypeVar("T")
10
12
  V = TypeVar("V")
@@ -54,6 +56,11 @@ def classproperty_support(cls: Type[T]) -> Type[T]:
54
56
  if (res := self.__chalk_notebook_feature_expressions__.get(item)) is not None:
55
57
  return res
56
58
 
59
+ # If in notebook, fallback to constructing FQN string instead of raising error
60
+ if is_notebook():
61
+ fqn = f"{self.namespace}.{item}"
62
+ return UnresolvedFeature(fqn)
63
+
57
64
  builder: FeatureClassErrorBuilder = self.__chalk_error_builder__
58
65
  builder.invalid_attribute(
59
66
  root_feature_str=self.namespace,
@@ -25,6 +25,7 @@ from chalk.utils.collections import ensure_tuple
25
25
  SUPPORTED_LOCAL_MODELS = {
26
26
  "all-MiniLM-L6-v2", # https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
27
27
  "sample-bert", # For internal Chalk use
28
+ "sample-linear-nn", # For internal Chalk use
28
29
  }
29
30
 
30
31
  # This will eventually be included in SUPPORTED_LOCAL_MODELS
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import io
3
4
  import json
4
5
  import types
5
6
  import typing
@@ -50,6 +51,7 @@ from chalk.features._encoding.pyarrow import (
50
51
  rich_to_pyarrow,
51
52
  )
52
53
  from chalk.features._encoding.rich import structure_primitive_to_rich, unstructure_rich_to_primitive
54
+ from chalk.features.feature_wrapper import UnresolvedFeature
53
55
  from chalk.utils.collections import unwrap_annotated_if_needed, unwrap_optional_and_annotated_if_needed
54
56
  from chalk.utils.df_utils import pa_array_to_pl_series
55
57
  from chalk.utils.json import JSON, TJSON, is_pyarrow_json_type, pyarrow_json_type
@@ -870,6 +872,79 @@ class PrimitiveFeatureConverter(Generic[_TPrim]):
870
872
  else:
871
873
  raise TypeError(f"Could not convert the pyarrow dtype {dtype} to a protobuf message")
872
874
 
875
+ @classmethod
876
+ def convert_pa_field_to_proto_field(cls, field: pa.Field) -> pb.Field:
877
+ """Convert a PyArrow Field to proto Field."""
878
+ field_proto = pb.Field(
879
+ name=field.name, arrow_type=cls.convert_pa_dtype_to_proto_dtype(field.type), nullable=field.nullable
880
+ )
881
+
882
+ if field.metadata:
883
+ # field.metadata is of types dict[bytes, bytes]
884
+ for k, v in field.metadata.items():
885
+ field_proto.metadata[k.decode("utf-8")] = v.decode("utf-8")
886
+
887
+ return field_proto
888
+
889
+ @classmethod
890
+ def convert_proto_field_to_pa_field(cls, proto_field: pb.Field) -> pa.Field:
891
+ """Convert a proto Field to PyArrow Field."""
892
+ arrow_type = cls.convert_proto_dtype_to_pa_dtype(proto_field.arrow_type)
893
+
894
+ # don't have to convert back to dict[bytes, bytes] as can initialize with dict[str, str]
895
+ metadata = dict(proto_field.metadata) if proto_field.metadata else None
896
+
897
+ return pa.field(
898
+ name=proto_field.name,
899
+ type=arrow_type,
900
+ nullable=proto_field.nullable,
901
+ metadata=metadata,
902
+ )
903
+
904
+ @classmethod
905
+ def convert_pa_schema_to_proto_schema(cls, schema: pa.Schema) -> pb.Schema:
906
+ schema_proto = pb.Schema(
907
+ columns=[cls.convert_pa_field_to_proto_field(field) for field in schema],
908
+ )
909
+
910
+ if schema.metadata:
911
+ # schema.metadata is of types dict[bytes, bytes]
912
+ for k, v in schema.metadata.items():
913
+ schema_proto.metadata[k.decode("utf-8")] = v.decode("utf-8")
914
+
915
+ return schema_proto
916
+
917
+ @classmethod
918
+ def convert_proto_schema_to_pa_schema(cls, proto_schema: pb.Schema) -> pa.Schema:
919
+ fields = [cls.convert_proto_field_to_pa_field(proto_field) for proto_field in proto_schema.columns]
920
+
921
+ # don't have to convert back to dict[bytes, bytes] as can initialize with dict[str, str]
922
+ metadata = dict(proto_schema.metadata) if proto_schema.metadata else None
923
+
924
+ return pa.schema(fields, metadata=metadata)
925
+
926
+ @staticmethod
927
+ def convert_arrow_table_to_proto(table: pa.Table | pa.RecordBatch) -> pb.TableParquetBytes:
928
+ if isinstance(table, pa.RecordBatch):
929
+ table = pa.Table.from_batches([table])
930
+ elif isinstance(table, pa.Table):
931
+ pass
932
+ else:
933
+ raise TypeError(f"expected pa.Table or pa.RecordBatch, got {type(table)!r}")
934
+
935
+ sink = io.BytesIO()
936
+ import pyarrow.parquet
937
+
938
+ pyarrow.parquet.write_table(table, sink)
939
+ return pb.TableParquetBytes(encoded_parquet_bytes=sink.getvalue())
940
+
941
+ @staticmethod
942
+ def convert_arrow_table_from_proto(proto: pb.TableParquetBytes) -> pa.Table:
943
+ import pyarrow.parquet
944
+
945
+ pf = pyarrow.parquet.ParquetFile(io.BytesIO(proto.encoded_parquet_bytes))
946
+ return pyarrow.parquet.read_table(pf)
947
+
873
948
  @staticmethod
874
949
  def _serialize_pa_decimal_to_pb(value: Union[pa.Decimal128Scalar, pa.Decimal256Scalar]) -> pb.ScalarValue:
875
950
  dec_val = value.as_py()
@@ -1183,8 +1258,14 @@ class FeatureConverter(PrimitiveFeatureConverter[_TPrim], Generic[_TPrim, _TRich
1183
1258
  # because it is also used for error handling inside of `from_rich_to_primitive`.
1184
1259
  self._name = name
1185
1260
  if rich_default != ...:
1186
- # The missing value strategy doesn't really matter because rich_default is not missing
1187
- primitive_default = self.from_rich_to_primitive(rich_default, missing_value_strategy="allow")
1261
+ # In notebook environments, UnresolvedFeature may be used as a placeholder
1262
+ # for features that can't be resolved due to a stale registry.
1263
+ # Treat these as missing defaults since they're not concrete values.
1264
+ if isinstance(rich_default, UnresolvedFeature):
1265
+ rich_default = ...
1266
+ else:
1267
+ # The missing value strategy doesn't really matter because rich_default is not missing
1268
+ primitive_default = self.from_rich_to_primitive(rich_default, missing_value_strategy="allow")
1188
1269
  super().__init__(
1189
1270
  name, is_nullable=is_nullable, pyarrow_dtype=pyarrow_dtype, primitive_default=primitive_default
1190
1271
  )
@@ -4,11 +4,12 @@ import copy
4
4
  import dataclasses
5
5
  import functools
6
6
  import inspect
7
+ import itertools
7
8
  import os
8
9
  import re
9
10
  import weakref
10
11
  from collections.abc import Mapping, MutableMapping
11
- from datetime import timedelta, datetime
12
+ from datetime import datetime, timedelta
12
13
  from typing import (
13
14
  TYPE_CHECKING,
14
15
  Any,
@@ -28,7 +29,6 @@ from typing import (
28
29
  cast,
29
30
  )
30
31
 
31
- import itertools
32
32
  import numpy as np
33
33
  import pyarrow as pa
34
34
 
@@ -39,11 +39,11 @@ from chalk.features._encoding.converter import FeatureConverter, JSONCodec, TDec
39
39
  from chalk.features._encoding.primitive import TPrimitive
40
40
  from chalk.features.feature_set import CURRENT_FEATURE_REGISTRY, FeatureRegistryProtocol
41
41
  from chalk.features.feature_wrapper import FeatureWrapper, NearestNeighborException
42
- from chalk.features.filter import Filter, TimeDelta, ClauseJoinWithAndException
42
+ from chalk.features.filter import ClauseJoinWithAndException, Filter, TimeDelta
43
43
  from chalk.features.tag import Tags
44
44
  from chalk.features.underscore import Underscore
45
45
  from chalk.serialization.parsed_annotation import ParsedAnnotation
46
- from chalk.utils.collections import ensure_tuple, get_unique_item, FrozenOrderedSet, OrderedSet
46
+ from chalk.utils.collections import FrozenOrderedSet, OrderedSet, ensure_tuple, get_unique_item
47
47
  from chalk.utils.duration import CHALK_MAX_TIMEDELTA, Duration, parse_chalk_duration
48
48
  from chalk.utils.import_utils import get_type_checking_imports
49
49
  from chalk.utils.json import JSON, pyarrow_json_type
@@ -54,7 +54,7 @@ from chalk.utils.pydanticutil.pydantic_compat import (
54
54
  is_pydantic_basemodel_instance,
55
55
  parse_pydantic_model,
56
56
  )
57
- from chalk.utils.string import to_snake_case, oxford_comma_list
57
+ from chalk.utils.string import oxford_comma_list, to_snake_case
58
58
 
59
59
  if TYPE_CHECKING:
60
60
  from google.protobuf.message import Message as ProtobufMessage
@@ -68,9 +68,9 @@ _TPrim = TypeVar("_TPrim", bound=TPrimitive)
68
68
  _logger = get_logger(__name__)
69
69
 
70
70
  from chalk.features.feature_cache_strategy import (
71
- CacheStrategy,
72
- CacheNullsType,
73
71
  CacheDefaultsType,
72
+ CacheNullsType,
73
+ CacheStrategy,
74
74
  get_cache_strategy_from_cache_settings,
75
75
  )
76
76
 
@@ -252,6 +252,7 @@ class Feature(Generic[_TPrim, _TRich]):
252
252
  "tags",
253
253
  "underlying",
254
254
  "underscore_expression",
255
+ "offline_underscore_expression",
255
256
  "version",
256
257
  "window_duration",
257
258
  "window_durations",
@@ -276,6 +277,7 @@ class Feature(Generic[_TPrim, _TRich]):
276
277
  primary: bool | None = None,
277
278
  default: _TRich | ellipsis = ...,
278
279
  underscore_expression: Underscore | None = None,
280
+ offline_underscore_expression: Underscore | None = None,
279
281
  max_staleness: Duration | None | ellipsis = ...,
280
282
  online_store_max_items: int | None = None,
281
283
  cache_strategy: CacheStrategy = CacheStrategy.ALL_WITH_BOTH_UNSET,
@@ -386,6 +388,7 @@ class Feature(Generic[_TPrim, _TRich]):
386
388
  self._primary = primary
387
389
  self._primary_feature: Optional[Feature] = None
388
390
  self.underscore_expression: Underscore | None = underscore_expression
391
+ self.offline_underscore_expression: Underscore | None = offline_underscore_expression
389
392
  self.is_distance_pseudofeature = is_distance_pseudofeature
390
393
 
391
394
  self._raw_max_staleness = max_staleness
@@ -503,7 +506,7 @@ class Feature(Generic[_TPrim, _TRich]):
503
506
 
504
507
  @property
505
508
  def converter(self) -> FeatureConverter:
506
- from chalk.features import DataFrame, Vector, Tensor
509
+ from chalk.features import DataFrame, Tensor, Vector
507
510
 
508
511
  self._converter_entered += 1
509
512
 
@@ -923,7 +926,7 @@ class Feature(Generic[_TPrim, _TRich]):
923
926
 
924
927
  def __repr__(self):
925
928
  try:
926
- root_fqn=self.root_fqn
929
+ root_fqn = self.root_fqn
927
930
  except:
928
931
  # self.root_fqn is a property, if it failed then just return the object repr
929
932
  return object.__repr__(self)
@@ -1367,7 +1370,7 @@ class Feature(Generic[_TPrim, _TRich]):
1367
1370
  ),
1368
1371
  label="invalid nearest neighbor join",
1369
1372
  range=self.lsp_error_builder.property_value_range(self.attribute_name)
1370
- or self.lsp_error_builder.property_range(self.attribute_name),
1373
+ or self.lsp_error_builder.property_range(self.attribute_name),
1371
1374
  code="32",
1372
1375
  raise_error=TypeError,
1373
1376
  )
@@ -1387,7 +1390,7 @@ class Feature(Generic[_TPrim, _TRich]):
1387
1390
  ),
1388
1391
  label="invalid join",
1389
1392
  range=self.lsp_error_builder.property_value_range(self.attribute_name)
1390
- or self.lsp_error_builder.property_range(self.attribute_name),
1393
+ or self.lsp_error_builder.property_range(self.attribute_name),
1391
1394
  code="32",
1392
1395
  raise_error=TypeError,
1393
1396
  )
@@ -1400,7 +1403,7 @@ class Feature(Generic[_TPrim, _TRich]):
1400
1403
  ),
1401
1404
  label="invalid join",
1402
1405
  range=self.lsp_error_builder.property_value_range(self.attribute_name)
1403
- or self.lsp_error_builder.property_range(self.attribute_name),
1406
+ or self.lsp_error_builder.property_range(self.attribute_name),
1404
1407
  code="32",
1405
1408
  raise_error=TypeError,
1406
1409
  )
@@ -1424,18 +1427,21 @@ class Feature(Generic[_TPrim, _TRich]):
1424
1427
  )
1425
1428
 
1426
1429
  if not self.is_has_many and not self.is_has_one:
1427
- assert self.features_cls is not None
1428
- self.lsp_error_builder.add_diagnostic(
1429
- message=(
1430
- f"The attribute '{self.features_cls.__name__}.{self.attribute_name}' "
1431
- f"has a join filter ({join}) but its type annotation '{self.typ}' is not a feature class or DataFrame."
1432
- ),
1433
- label="invalid join",
1434
- range=self.lsp_error_builder.property_value_range(self.attribute_name)
1435
- or self.lsp_error_builder.property_range(self.attribute_name),
1436
- code="37",
1437
- raise_error=TypeError,
1438
- )
1430
+ # Check if user tried to use DataFrame (even if validation failed)
1431
+ # Use is_dataframe_annotation() to detect DataFrame types without triggering validation errors
1432
+ if not self.typ.is_dataframe_annotation():
1433
+ assert self.features_cls is not None
1434
+ self.lsp_error_builder.add_diagnostic(
1435
+ message=(
1436
+ f"The attribute '{self.features_cls.__name__}.{self.attribute_name}' "
1437
+ f"has a join filter ({join}) but its type annotation '{self.typ}' is not a feature class or DataFrame that links to another feature class."
1438
+ ),
1439
+ label="invalid join",
1440
+ range=self.lsp_error_builder.property_value_range(self.attribute_name)
1441
+ or self.lsp_error_builder.property_range(self.attribute_name),
1442
+ code="37",
1443
+ raise_error=TypeError,
1444
+ )
1439
1445
  if self._join_type == "has_one":
1440
1446
  if self.is_has_many:
1441
1447
  assert self.features_cls is not None
@@ -1748,6 +1754,7 @@ def feature(
1748
1754
  default: Union[_TRich, ellipsis] = ...,
1749
1755
  underscore: Optional[Underscore] = None, # Deprecated. Prefer `expression`.
1750
1756
  expression: Optional[Underscore] = None,
1757
+ offline_expression: Optional[Underscore] = None,
1751
1758
  offline_ttl: Optional[Union[ellipsis, Duration]] = ...,
1752
1759
  deprecated: bool = False,
1753
1760
  store_online: bool = True,
@@ -1811,6 +1818,8 @@ def feature(
1811
1818
  ... total: int = feature(expression=_.subtotal + _.tax, default=0)
1812
1819
 
1813
1820
  See more at https://docs.chalk.ai/docs/expression
1821
+ offline_expression
1822
+ Defines an alternate expression to compute the feature during offline queries.
1814
1823
  dtype
1815
1824
  The backing `pyarrow.DataType` for the feature. This parameter can
1816
1825
  be used to control the storage format of data. For example, if you
@@ -2058,10 +2067,7 @@ def feature(
2058
2067
  if not isinstance(value, Feature): # pyright: ignore[reportUnnecessaryIsInstance]
2059
2068
  raise ValueError(f"When `versions` is provided, the values must be features, but `{value}` was given.")
2060
2069
 
2061
- cache_strategy = get_cache_strategy_from_cache_settings(
2062
- cache_nulls=cache_nulls,
2063
- cache_defaults=cache_defaults
2064
- )
2070
+ cache_strategy = get_cache_strategy_from_cache_settings(cache_nulls=cache_nulls, cache_defaults=cache_defaults)
2065
2071
 
2066
2072
  return cast(
2067
2073
  _TRich,
@@ -2108,6 +2114,7 @@ def feature(
2108
2114
  ),
2109
2115
  default=default,
2110
2116
  underscore_expression=expression if expression is not None else underscore,
2117
+ offline_underscore_expression=offline_expression,
2111
2118
  offline_ttl=offline_ttl,
2112
2119
  is_deprecated=deprecated,
2113
2120
  store_online=store_online,
@@ -2173,7 +2180,7 @@ def has_one(f: Callable[[], Any]) -> Any:
2173
2180
  def has_many(
2174
2181
  f: Callable[[], Any],
2175
2182
  max_staleness: Union[Duration, None, ellipsis] = ...,
2176
- online_store_max_items: int | None = None
2183
+ online_store_max_items: int | None = None,
2177
2184
  ) -> Any:
2178
2185
  """Specify a feature that represents a one-to-many relationship.
2179
2186
 
@@ -2192,6 +2199,7 @@ def has_many(
2192
2199
  The maximum number of items to cache for the joined feature. The
2193
2200
  items in the joined feature aggregate, storing the latest values
2194
2201
  of the joined feature for each primary key in the joined feature.
2202
+
2195
2203
  Examples
2196
2204
  --------
2197
2205
  >>> from chalk.features import DataFrame, features, has_many
@@ -2207,7 +2215,9 @@ def has_many(
2207
2215
  ... lambda: User.id == Card.user_id
2208
2216
  ... )
2209
2217
  """
2210
- return Feature(join=f, max_staleness=max_staleness,online_store_max_items=online_store_max_items, join_type="has_many")
2218
+ return Feature(
2219
+ join=f, max_staleness=max_staleness, online_store_max_items=online_store_max_items, join_type="has_many"
2220
+ )
2211
2221
 
2212
2222
 
2213
2223
  __all__ = (
@@ -141,6 +141,7 @@ def features(
141
141
  The `cache_nulls` and `cache_defaults` options can be used together on the same feature with the
142
142
  following exceptions: if `cache_nulls=False`, then `cache_defaults` cannot be `"evict_defaults"`, and if
143
143
  `cache_nulls="evict_defaults"`, then `cache_defaults` cannot be `False`.
144
+
144
145
  Other Parameters
145
146
  ----------------
146
147
  cls
@@ -11,6 +11,7 @@ from chalk.features._chalkop import op, Aggregation
11
11
  from chalk.features.filter import Filter
12
12
  from chalk.serialization.parsed_annotation import ParsedAnnotation
13
13
  from chalk.utils.collections import ensure_tuple
14
+ from chalk.utils.notebook import is_notebook
14
15
 
15
16
  if TYPE_CHECKING:
16
17
  from chalk.features.feature_field import Feature
@@ -22,16 +23,43 @@ class NearestNeighborException(ValueError):
22
23
  ...
23
24
 
24
25
 
26
+ class UnresolvedFeature:
27
+ """Fallback for features that can't be resolved in notebook environments.
28
+
29
+ This allows notebooks to work even when the feature registry is stale or incomplete.
30
+ The server will validate the feature exists when the query is executed.
31
+ """
32
+ __slots__ = ("fqn",)
33
+
34
+ def __init__(self, fqn: str):
35
+ self.fqn = fqn
36
+ super().__init__()
37
+
38
+ def __str__(self):
39
+ return self.fqn
40
+
41
+ def __repr__(self):
42
+ return f"UnresolvedFeature({self.fqn!r})"
43
+
44
+ def __hash__(self):
45
+ return hash(self.fqn)
46
+
47
+ def __eq__(self, other: object):
48
+ if isinstance(other, UnresolvedFeature):
49
+ return self.fqn == other.fqn
50
+ return False
51
+
52
+
25
53
  class _MarkedUnderlyingFeature:
26
54
  __slots__ = ("_fn", "_source", "_debug_info")
27
55
 
28
- def __init__(self, fn: Callable[[], Feature | Filter | type[DataFrame] | FeatureWrapper | Aggregation],
56
+ def __init__(self, fn: Callable[[], Feature | Filter | type[DataFrame] | FeatureWrapper | Aggregation | UnresolvedFeature],
29
57
  debug_info: Any = None) -> None:
30
58
  super().__init__()
31
59
  self._fn = fn
32
60
  self._debug_info = debug_info
33
61
 
34
- def __call__(self, *args: Any, **kwds: Any) -> Feature | Filter | type[DataFrame] | FeatureWrapper | Aggregation:
62
+ def __call__(self, *args: Any, **kwds: Any) -> Feature | Filter | type[DataFrame] | FeatureWrapper | Aggregation | UnresolvedFeature:
35
63
  return self._fn()
36
64
 
37
65
 
@@ -51,7 +79,7 @@ class FeatureWrapper:
51
79
  super().__init__()
52
80
  self._chalk_underlying = underlying
53
81
 
54
- def _chalk_get_underlying(self) -> Feature | Aggregation | Filter | type[DataFrame]:
82
+ def _chalk_get_underlying(self) -> Feature | Aggregation | Filter | type[DataFrame] | UnresolvedFeature:
55
83
  if isinstance(self._chalk_underlying, _MarkedUnderlyingFeature):
56
84
  self._chalk_underlying = self._chalk_underlying()
57
85
  if isinstance(self._chalk_underlying, FeatureWrapper):
@@ -303,6 +331,12 @@ class FeatureWrapper:
303
331
  if f.attribute_name == item:
304
332
  return FeatureWrapper(underlying.copy_with_path(f))
305
333
 
334
+ if is_notebook():
335
+ # Construct FQN by preserving the path from the underlying feature
336
+ # If underlying has a path, we need to include it in the FQN
337
+ fqn = f"{underlying.root_fqn}.{item}"
338
+ return UnresolvedFeature(fqn)
339
+
306
340
  assert underlying.features_cls is not None
307
341
  underlying.features_cls.__chalk_error_builder__.invalid_attribute(
308
342
  root_feature_str=joined_class.namespace,
@@ -314,6 +348,11 @@ class FeatureWrapper:
314
348
  )
315
349
  assert False, "unreachable"
316
350
 
351
+ # If in notebook, fallback to constructing FQN string instead of raising error
352
+ if is_notebook():
353
+ fqn = f"{underlying.fqn}.{item}"
354
+ return UnresolvedFeature(fqn)
355
+
317
356
  assert underlying.features_cls is not None
318
357
  underlying.features_cls.__chalk_error_builder__.invalid_attribute(
319
358
  root_feature_str=underlying.fqn,
chalk/features/hooks.py CHANGED
@@ -4,12 +4,15 @@ import asyncio
4
4
  import inspect
5
5
  import logging
6
6
  import time # Added for measuring duration
7
- from typing import Any, Callable, Iterable, Optional, Set, Tuple, TypeAlias, Union, overload
7
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, Set, Tuple, TypeAlias, Union, overload
8
8
 
9
9
  from chalk.features.tag import Environments
10
10
  from chalk.utils.collections import ensure_tuple
11
11
  from chalk.utils.log_with_context import get_logger
12
12
 
13
+ if TYPE_CHECKING:
14
+ from chalk.features.resolver import ResourceHint
15
+
13
16
  HookFn: TypeAlias = Callable[[], Any]
14
17
 
15
18
 
@@ -43,21 +46,31 @@ class Hook:
43
46
  venv: Optional[str]
44
47
  fn: HookFn
45
48
  filename: str
49
+ resource_hint: Optional["ResourceHint"]
50
+ resource_group: Optional[str]
46
51
 
47
52
  def __init__(
48
- self, fn: HookFn, filename: str, environment: Optional[Environments] = None, venv: Optional[str] = None
53
+ self,
54
+ fn: HookFn,
55
+ filename: str,
56
+ environment: Optional[Environments] = None,
57
+ venv: Optional[str] = None,
58
+ resource_hint: Optional["ResourceHint"] = None,
59
+ resource_group: Optional[str] = None,
49
60
  ):
50
61
  super().__init__()
51
62
  self.fn = fn
52
63
  self.filename = filename
53
64
  self.environment = None if environment is None else ensure_tuple(environment)
54
65
  self.venv = venv
66
+ self.resource_hint = resource_hint
67
+ self.resource_group = resource_group
55
68
 
56
69
  def __call__(self):
57
70
  return self.fn()
58
71
 
59
72
  def __repr__(self):
60
- return f'Hook(filename={self.filename}, fn={self.fn.__name__}", environment={str(self.environment)}, venv={self.venv})'
73
+ return f'Hook(filename={self.filename}, fn={self.fn.__name__}", environment={str(self.environment)}, venv={self.venv}, resource_hint={self.resource_hint}, resource_group={self.resource_group})'
61
74
 
62
75
  @classmethod
63
76
  async def async_run_all_before_all(cls, environment: str, venv: Optional[str] = None) -> None:
@@ -75,17 +88,46 @@ def before_all(fn: HookFn, /) -> Hook:
75
88
 
76
89
  @overload
77
90
  def before_all(
78
- fn: None = None, /, environment: Optional[Environments] = None, venv: Optional[str] = None
91
+ fn: HookFn,
92
+ /,
93
+ environment: Optional[Environments] = None,
94
+ venv: Optional[str] = None,
95
+ resource_hint: Optional["ResourceHint"] = None,
96
+ resource_group: Optional[str] = None,
97
+ ) -> Hook:
98
+ ...
99
+
100
+
101
+ @overload
102
+ def before_all(
103
+ fn: None = None,
104
+ /,
105
+ environment: Optional[Environments] = None,
106
+ venv: Optional[str] = None,
107
+ resource_hint: Optional["ResourceHint"] = None,
108
+ resource_group: Optional[str] = None,
79
109
  ) -> Callable[[HookFn], Hook]:
80
110
  ...
81
111
 
82
112
 
83
113
  def before_all(
84
- fn: Optional[HookFn] = None, /, environment: Optional[Environments] = None, venv: Optional[str] = None
114
+ fn: Optional[HookFn] = None,
115
+ /,
116
+ environment: Optional[Environments] = None,
117
+ venv: Optional[str] = None,
118
+ resource_hint: Optional["ResourceHint"] = None,
119
+ resource_group: Optional[str] = None,
85
120
  ) -> Union[Hook, Callable[[HookFn], Hook]]:
86
121
  def decorator(f: HookFn):
87
122
  caller_filename = inspect.getsourcefile(f) or "unknown_file"
88
- hook = Hook(fn=f, filename=caller_filename, environment=environment, venv=venv)
123
+ hook = Hook(
124
+ fn=f,
125
+ filename=caller_filename,
126
+ environment=environment,
127
+ venv=venv,
128
+ resource_hint=resource_hint,
129
+ resource_group=resource_group,
130
+ )
89
131
  Hook.before_all.add(hook)
90
132
  return hook
91
133
 
@@ -93,23 +135,52 @@ def before_all(
93
135
 
94
136
 
95
137
  @overload
96
- def after_all(fn: HookFn, /, environment: Optional[Environments] = None, venv: Optional[str] = None) -> Hook:
138
+ def after_all(fn: HookFn, /) -> Hook:
139
+ ...
140
+
141
+
142
+ @overload
143
+ def after_all(
144
+ fn: HookFn,
145
+ /,
146
+ environment: Optional[Environments] = None,
147
+ venv: Optional[str] = None,
148
+ resource_hint: Optional["ResourceHint"] = None,
149
+ resource_group: Optional[str] = None,
150
+ ) -> Hook:
97
151
  ...
98
152
 
99
153
 
100
154
  @overload
101
155
  def after_all(
102
- fn: None = None, /, environment: Optional[Environments] = None, venv: Optional[str] = None
156
+ fn: None = None,
157
+ /,
158
+ environment: Optional[Environments] = None,
159
+ venv: Optional[str] = None,
160
+ resource_hint: Optional["ResourceHint"] = None,
161
+ resource_group: Optional[str] = None,
103
162
  ) -> Callable[[HookFn], Hook]:
104
163
  ...
105
164
 
106
165
 
107
166
  def after_all(
108
- fn: Optional[HookFn] = None, /, environment: Optional[Environments] = None, venv: Optional[str] = None
167
+ fn: Optional[HookFn] = None,
168
+ /,
169
+ environment: Optional[Environments] = None,
170
+ venv: Optional[str] = None,
171
+ resource_hint: Optional["ResourceHint"] = None,
172
+ resource_group: Optional[str] = None,
109
173
  ) -> Union[Hook, Callable[[HookFn], Hook]]:
110
174
  def decorator(f: HookFn):
111
175
  caller_filename = inspect.getsourcefile(f) or "unknown_file"
112
- hook = Hook(fn=f, filename=caller_filename, environment=environment, venv=venv)
176
+ hook = Hook(
177
+ fn=f,
178
+ filename=caller_filename,
179
+ environment=environment,
180
+ venv=venv,
181
+ resource_hint=resource_hint,
182
+ resource_group=resource_group,
183
+ )
113
184
  Hook.after_all.add(hook)
114
185
  return hook
115
186