chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
- chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
- chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
- chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
- chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
- chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
- chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
- chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
- chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_monitoring/Chart.py +1 -3
- chalk/_version.py +1 -1
- chalk/cli.py +5 -10
- chalk/client/client.py +178 -64
- chalk/client/client_async.py +154 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +738 -112
- chalk/client/client_impl.py +541 -136
- chalk/client/dataset.py +27 -6
- chalk/client/models.py +99 -2
- chalk/client/serialization/model_serialization.py +126 -10
- chalk/config/project_config.py +1 -1
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/df/ast_parser.py +2 -10
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_embedding/sentence_transformer.py +1 -1
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/_encoding/pyarrow.py +20 -4
- chalk/features/_encoding/rich.py +1 -3
- chalk/features/_tensor.py +1 -2
- chalk/features/dataframe/_filters.py +14 -5
- chalk/features/dataframe/_impl.py +91 -36
- chalk/features/dataframe/_validation.py +11 -7
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set.py +1 -2
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -12
- chalk/features/inference.py +65 -10
- chalk/features/resolver.py +338 -56
- chalk/features/tag.py +1 -3
- chalk/features/underscore_features.py +2 -1
- chalk/functions/__init__.py +456 -21
- chalk/functions/holidays.py +1 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +186 -74
- chalk/ml/__init__.py +6 -2
- chalk/ml/model_hooks.py +368 -51
- chalk/ml/model_reference.py +68 -10
- chalk/ml/model_version.py +34 -21
- chalk/ml/utils.py +143 -40
- chalk/operators/_utils.py +14 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +4 -0
- chalk/parsed/expressions.py +1 -3
- chalk/parsed/json_conversions.py +21 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +13 -1
- chalk/serialization/parsed_annotation.py +25 -12
- chalk/sql/__init__.py +221 -0
- chalk/sql/_internal/integrations/athena.py +6 -1
- chalk/sql/_internal/integrations/bigquery.py +22 -2
- chalk/sql/_internal/integrations/databricks.py +61 -18
- chalk/sql/_internal/integrations/mssql.py +281 -0
- chalk/sql/_internal/integrations/postgres.py +11 -3
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/integrations/snowflake.py +11 -2
- chalk/sql/_internal/integrations/util.py +2 -1
- chalk/sql/_internal/sql_file_resolver.py +55 -10
- chalk/sql/_internal/sql_source.py +36 -2
- chalk/streams/__init__.py +1 -3
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +16 -4
- chalk/streams/types.py +1 -2
- chalk/utils/__init__.py +1 -3
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +14 -5
- chalk/utils/df_utils.py +2 -2
- chalk/utils/duration.py +1 -3
- chalk/utils/job_log_display.py +538 -0
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/notebook.py +255 -2
- chalk/utils/pl_helpers.py +190 -37
- chalk/utils/pydanticutil/pydantic_compat.py +1 -2
- chalk/utils/storage_client.py +246 -0
- chalk/utils/threading.py +1 -3
- chalk/utils/tracing.py +194 -86
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/features/resolver.py
CHANGED
|
@@ -21,6 +21,7 @@ import random
|
|
|
21
21
|
import re
|
|
22
22
|
import statistics
|
|
23
23
|
import types
|
|
24
|
+
import typing
|
|
24
25
|
from dataclasses import dataclass, is_dataclass
|
|
25
26
|
from datetime import datetime
|
|
26
27
|
from enum import Enum, IntEnum
|
|
@@ -43,13 +44,18 @@ from typing import (
|
|
|
43
44
|
Literal,
|
|
44
45
|
Mapping,
|
|
45
46
|
Optional,
|
|
47
|
+
ParamSpec,
|
|
46
48
|
Protocol,
|
|
47
49
|
Sequence,
|
|
48
50
|
Set,
|
|
49
51
|
Type,
|
|
52
|
+
TypeAlias,
|
|
50
53
|
TypeVar,
|
|
51
54
|
Union,
|
|
52
55
|
cast,
|
|
56
|
+
final,
|
|
57
|
+
get_args,
|
|
58
|
+
get_origin,
|
|
53
59
|
overload,
|
|
54
60
|
)
|
|
55
61
|
|
|
@@ -61,10 +67,10 @@ from google.protobuf import message_factory
|
|
|
61
67
|
from google.protobuf.descriptor import Descriptor
|
|
62
68
|
from google.protobuf.internal.python_message import GeneratedProtocolMessageType
|
|
63
69
|
from pydantic import BaseModel
|
|
64
|
-
from typing_extensions import ParamSpec, TypeAlias, final, get_args, get_origin
|
|
65
70
|
|
|
66
71
|
from chalk._lsp._class_finder import get_function_caller_info
|
|
67
72
|
from chalk._lsp.error_builder import FunctionCallErrorBuilder, ResolverErrorBuilder, get_resolver_error_builder
|
|
73
|
+
from chalk.df.LazyFramePlaceholder import LazyFramePlaceholder
|
|
68
74
|
from chalk.features._encoding.protobuf import (
|
|
69
75
|
convert_proto_message_type_to_pyarrow_type,
|
|
70
76
|
serialize_message_file_descriptor,
|
|
@@ -109,6 +115,7 @@ if TYPE_CHECKING:
|
|
|
109
115
|
|
|
110
116
|
from chalk.features import Underscore
|
|
111
117
|
from chalk.features.underscore import UnderscoreAttr, UnderscoreCall, UnderscoreCast, UnderscoreFunction
|
|
118
|
+
from chalk.ml.model_version import ModelVersion
|
|
112
119
|
from chalk.sql import BaseSQLSourceProtocol, SQLSourceGroup
|
|
113
120
|
from chalk.sql._internal.sql_settings import SQLResolverSettings
|
|
114
121
|
from chalk.sql._internal.sql_source import BaseSQLSource
|
|
@@ -550,28 +557,18 @@ class ResolverRegistry:
|
|
|
550
557
|
short_name = resolver.name
|
|
551
558
|
if short_name in self._short_name_to_resolver:
|
|
552
559
|
if not override and not notebook.is_notebook():
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
message=(
|
|
566
|
-
f"Another resolver with the same function name '{resolver.name}' in module "
|
|
567
|
-
f"'{self._short_name_to_resolver[short_name].__module__}' exists. "
|
|
568
|
-
f"Resolver function names must be unique. Please rename this resolver in module '{resolver.__module__}'."
|
|
569
|
-
),
|
|
570
|
-
label="duplicate resolver shortname",
|
|
571
|
-
code="71",
|
|
572
|
-
range=resolver.lsp_builder.function_name(),
|
|
573
|
-
raise_error=None,
|
|
574
|
-
)
|
|
560
|
+
# Same short name was reused
|
|
561
|
+
resolver.lsp_builder.add_diagnostic(
|
|
562
|
+
message=(
|
|
563
|
+
f"Another resolver with the same function name '{resolver.name}' in module "
|
|
564
|
+
f"'{self._short_name_to_resolver[short_name].__module__}' exists. "
|
|
565
|
+
f"Resolver function names must be unique. Please rename this resolver in module '{resolver.__module__}'."
|
|
566
|
+
),
|
|
567
|
+
label="duplicate resolver shortname",
|
|
568
|
+
code="71",
|
|
569
|
+
range=resolver.lsp_builder.function_name(),
|
|
570
|
+
raise_error=None,
|
|
571
|
+
)
|
|
575
572
|
return
|
|
576
573
|
existing_resolver = self._short_name_to_resolver[short_name]
|
|
577
574
|
# Need to remove the resolver from the typed registry
|
|
@@ -644,6 +641,7 @@ class Resolver(ResolverProtocol[P, T], abc.ABC):
|
|
|
644
641
|
output_row_order: Literal["one-to-one"] | None = None,
|
|
645
642
|
venv: str | None = None,
|
|
646
643
|
name: None = None, # deprecated
|
|
644
|
+
postprocessing: Underscore | None = None,
|
|
647
645
|
):
|
|
648
646
|
self._function_definition = ... if function_definition is None else function_definition
|
|
649
647
|
self._function_captured_globals = ... if function_captured_globals is None else function_captured_globals
|
|
@@ -687,6 +685,7 @@ class Resolver(ResolverProtocol[P, T], abc.ABC):
|
|
|
687
685
|
self._data_lineage = data_lineage
|
|
688
686
|
self._sql_settings = sql_settings
|
|
689
687
|
self.output_row_order = output_row_order
|
|
688
|
+
self.postprocessing = postprocessing
|
|
690
689
|
super().__init__()
|
|
691
690
|
|
|
692
691
|
@property
|
|
@@ -844,14 +843,20 @@ class Resolver(ResolverProtocol[P, T], abc.ABC):
|
|
|
844
843
|
annotation = None
|
|
845
844
|
|
|
846
845
|
if annotation is not None:
|
|
847
|
-
if
|
|
846
|
+
if self.static and type(val).__name__ == "DataFrame" and type(val).__module__ == "chalkdf.dataframe":
|
|
847
|
+
# No need to wrap this class in DataFrame.
|
|
848
|
+
pass
|
|
849
|
+
elif self.static and isinstance(val, LazyFramePlaceholder):
|
|
850
|
+
# No need to wrap this class in DataFrame.
|
|
851
|
+
pass
|
|
852
|
+
elif not isinstance(val, DataFrame):
|
|
848
853
|
val = DataFrame(val)
|
|
849
854
|
|
|
850
855
|
if time_is_frozen():
|
|
851
856
|
frozen_filter = Filter(lhs=CHALK_TS_FEATURE, operation="<=", rhs=TimeDelta(hours_ago=0))
|
|
852
857
|
annotation.filters = (frozen_filter, *annotation.filters)
|
|
853
858
|
|
|
854
|
-
if annotation.filters and len(annotation.filters) > 0:
|
|
859
|
+
if annotation.filters and len(annotation.filters) > 0 and not isinstance(val, LazyFramePlaceholder):
|
|
855
860
|
try:
|
|
856
861
|
val = val[annotation.filters]
|
|
857
862
|
val._materialize() # pyright: ignore[reportPrivateUsage]
|
|
@@ -2824,6 +2829,8 @@ class StreamResolver(Resolver[P, T]):
|
|
|
2824
2829
|
sql_settings: SQLResolverSettings | None,
|
|
2825
2830
|
feature_expressions: dict[Feature, Underscore] | None,
|
|
2826
2831
|
message_producer_parsed: StreamResolverMessageProducerParsed | None,
|
|
2832
|
+
skip_online: bool = False,
|
|
2833
|
+
skip_offline: bool = False,
|
|
2827
2834
|
):
|
|
2828
2835
|
super().__init__(
|
|
2829
2836
|
function_definition=function_definition,
|
|
@@ -2892,6 +2899,8 @@ class StreamResolver(Resolver[P, T]):
|
|
|
2892
2899
|
|
|
2893
2900
|
self.feature_expressions: dict[Feature, Underscore] | None = feature_expressions
|
|
2894
2901
|
self.message_producer_parsed: StreamResolverMessageProducerParsed | None = message_producer_parsed
|
|
2902
|
+
self.skip_online = skip_online
|
|
2903
|
+
self.skip_offline = skip_offline
|
|
2895
2904
|
|
|
2896
2905
|
@property
|
|
2897
2906
|
def output_features(self) -> Sequence[Feature]:
|
|
@@ -3175,6 +3184,18 @@ def _parse_stream_resolver_output_features(
|
|
|
3175
3184
|
return output_features
|
|
3176
3185
|
|
|
3177
3186
|
|
|
3187
|
+
def _is_valid_stream_message_type(typ: Type) -> bool:
|
|
3188
|
+
from chalk.functions.proto import _is_protobuf_message
|
|
3189
|
+
|
|
3190
|
+
if is_pydantic_basemodel(typ):
|
|
3191
|
+
return True
|
|
3192
|
+
if is_dataclass(typ):
|
|
3193
|
+
return True
|
|
3194
|
+
if _is_protobuf_message(typ):
|
|
3195
|
+
return True
|
|
3196
|
+
return False
|
|
3197
|
+
|
|
3198
|
+
|
|
3178
3199
|
@dataclass(frozen=True)
|
|
3179
3200
|
class ParseInfo(Generic[T, V]):
|
|
3180
3201
|
fn: Callable[[T], V]
|
|
@@ -3186,18 +3207,16 @@ class ParseInfo(Generic[T, V]):
|
|
|
3186
3207
|
|
|
3187
3208
|
|
|
3188
3209
|
def _validate_parse_function(
|
|
3189
|
-
|
|
3210
|
+
stream_fqn: str,
|
|
3190
3211
|
parse_fn: Callable[[T], Any],
|
|
3191
3212
|
globals: dict[str, Any] | None,
|
|
3192
3213
|
locals: dict[str, Any] | None,
|
|
3193
|
-
|
|
3194
|
-
resolver_error_builder: ResolverErrorBuilder,
|
|
3214
|
+
stream_fn_input_type: Type[Any],
|
|
3195
3215
|
name: str | None,
|
|
3196
3216
|
) -> ParseInfo:
|
|
3197
3217
|
parse_error_builder = get_resolver_error_builder(parse_fn)
|
|
3198
3218
|
"""We need separate error builders for resolver and parse fn: different AST nodes"""
|
|
3199
3219
|
|
|
3200
|
-
stream_fqn = get_resolver_fqn(function=stream_fn, name=name)
|
|
3201
3220
|
parse_fqn = get_resolver_fqn(function=parse_fn, name=name)
|
|
3202
3221
|
sig = inspect.signature(parse_fn)
|
|
3203
3222
|
annotation_parser = ResolverAnnotationParser(parse_fn, globals, locals, parse_error_builder)
|
|
@@ -3237,18 +3256,14 @@ def _validate_parse_function(
|
|
|
3237
3256
|
raise
|
|
3238
3257
|
else:
|
|
3239
3258
|
parse_output = return_annotation
|
|
3240
|
-
if not
|
|
3259
|
+
if not _is_valid_stream_message_type(parse_output):
|
|
3241
3260
|
parse_error_builder.add_diagnostic(
|
|
3242
|
-
message=f"Parse function '{parse_fqn}' return annotation must be
|
|
3261
|
+
message=f"Parse function '{parse_fqn}' return annotation must be either a pydantic BaseModel, decorated with @dataclass, or a protobuf Message type",
|
|
3243
3262
|
code="101",
|
|
3244
3263
|
label="invalid parse function return annotation",
|
|
3245
3264
|
range=parse_error_builder.function_return_annotation(),
|
|
3246
3265
|
raise_error=TypeError,
|
|
3247
3266
|
)
|
|
3248
|
-
stream_fn_input_arg = next(
|
|
3249
|
-
param for param in params if isinstance(param, (StreamResolverParamMessage, StreamResolverParamMessageWindow))
|
|
3250
|
-
)
|
|
3251
|
-
stream_fn_input_type = _get_stream_resolver_input_type(stream_fn_input_arg, stream_fqn, resolver_error_builder)
|
|
3252
3267
|
if parse_output != stream_fn_input_type:
|
|
3253
3268
|
parse_error_builder.add_diagnostic(
|
|
3254
3269
|
message=(
|
|
@@ -3655,13 +3670,19 @@ def parse_and_register_stream_resolver(
|
|
|
3655
3670
|
)
|
|
3656
3671
|
parse_info = None
|
|
3657
3672
|
if parse:
|
|
3673
|
+
stream_fqn = get_resolver_fqn(function=fn, name=name)
|
|
3674
|
+
stream_fn_input_arg = next(
|
|
3675
|
+
param
|
|
3676
|
+
for param in params
|
|
3677
|
+
if isinstance(param, (StreamResolverParamMessage, StreamResolverParamMessageWindow))
|
|
3678
|
+
)
|
|
3679
|
+
stream_fn_input_type = _get_stream_resolver_input_type(stream_fn_input_arg, stream_fqn, error_builder)
|
|
3658
3680
|
parse_info = _validate_parse_function(
|
|
3659
|
-
|
|
3681
|
+
stream_fqn=stream_fqn,
|
|
3660
3682
|
parse_fn=parse,
|
|
3661
3683
|
globals=caller_globals,
|
|
3662
3684
|
locals=caller_locals,
|
|
3663
|
-
|
|
3664
|
-
resolver_error_builder=error_builder,
|
|
3685
|
+
stream_fn_input_type=stream_fn_input_type,
|
|
3665
3686
|
name=name,
|
|
3666
3687
|
)
|
|
3667
3688
|
if keys is not None:
|
|
@@ -3744,18 +3765,55 @@ def parse_and_register_stream_resolver(
|
|
|
3744
3765
|
return resolver
|
|
3745
3766
|
|
|
3746
3767
|
|
|
3768
|
+
def _validate_message_type(message_type: Type[Any], allow_lists: bool = True) -> str | None:
|
|
3769
|
+
if hasattr(message_type, "__origin__"):
|
|
3770
|
+
assert hasattr(message_type, "__args__")
|
|
3771
|
+
if message_type.__origin__ not in (list, typing.List, typing.Sequence):
|
|
3772
|
+
return "The only generic type supported is list|List|Sequence."
|
|
3773
|
+
if len(message_type.__args__) != 1:
|
|
3774
|
+
return f"Found {len(message_type.__args__)} type parameters for generic type, only one supported."
|
|
3775
|
+
if not allow_lists:
|
|
3776
|
+
return f"Nested lists (e.g. List[List[MessageType]]) not supported."
|
|
3777
|
+
sub_type = message_type.__args__[0]
|
|
3778
|
+
sub_res = _validate_message_type(sub_type, allow_lists=False)
|
|
3779
|
+
if sub_res is not None:
|
|
3780
|
+
sub_res = f"Found type List[T] with invalid T: {sub_res}"
|
|
3781
|
+
return sub_res
|
|
3782
|
+
|
|
3783
|
+
if message_type in (str, bytes):
|
|
3784
|
+
return None
|
|
3785
|
+
|
|
3786
|
+
if inspect.isclass(message_type): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
3787
|
+
if issubclass(message_type, BaseModel):
|
|
3788
|
+
return None
|
|
3789
|
+
elif issubclass(message_type, google.protobuf.message.Message):
|
|
3790
|
+
return None
|
|
3791
|
+
elif is_dataclass(message_type):
|
|
3792
|
+
return None
|
|
3793
|
+
else:
|
|
3794
|
+
return "Unsupported type (expected str/bytes, a struct type, or a list[struct])"
|
|
3795
|
+
else:
|
|
3796
|
+
return "message type should be a type"
|
|
3797
|
+
|
|
3798
|
+
|
|
3799
|
+
def _is_list_message_type(message_type: Type[Any]):
|
|
3800
|
+
return getattr(message_type, "__origin__", None) in (list, typing.List, typing.Sequence)
|
|
3801
|
+
|
|
3802
|
+
|
|
3747
3803
|
def make_stream_resolver(
|
|
3748
3804
|
*,
|
|
3749
3805
|
name: str,
|
|
3750
3806
|
source: StreamSource,
|
|
3751
3807
|
message_type: Type[BaseModel | google.protobuf.message.Message | AnyDataclass | str | bytes],
|
|
3752
3808
|
output_features: "Mapping[FeatureWrapper, Underscore]",
|
|
3753
|
-
parse:
|
|
3809
|
+
parse: Underscore | Callable[[str | bytes], Any] | None = None,
|
|
3754
3810
|
environment: Optional[Environments] = None,
|
|
3755
3811
|
machine_type: Optional[MachineType] = None,
|
|
3756
3812
|
owner: Optional[str] = None,
|
|
3757
3813
|
doc: str | None = None,
|
|
3758
3814
|
sink: Sink | None = None,
|
|
3815
|
+
skip_online: bool = False,
|
|
3816
|
+
skip_offline: bool = False,
|
|
3759
3817
|
) -> StreamResolver:
|
|
3760
3818
|
"""Constructs a streaming resolver that, instead of a Python function,
|
|
3761
3819
|
defines its output features as column projections on an input message.
|
|
@@ -3788,12 +3846,21 @@ def make_stream_resolver(
|
|
|
3788
3846
|
sink
|
|
3789
3847
|
An optional message producer configuration that specifies where to send messages.
|
|
3790
3848
|
Read more at https://docs.chalk.ai/api-docs#Sink
|
|
3849
|
+
skip_online
|
|
3850
|
+
If True, skip online persistence (no writes to Redis/DynamoDB/etc).
|
|
3851
|
+
Results will still be processed but not stored in online stores.
|
|
3852
|
+
Note: Only applies to native streaming. Default: False
|
|
3853
|
+
skip_offline
|
|
3854
|
+
If True, skip offline persistence (no result bus publishing for offline storage).
|
|
3855
|
+
Results will still be processed but not stored in offline stores (S3/BigQuery/etc).
|
|
3856
|
+
Note: Only applies to native streaming. Default: False
|
|
3791
3857
|
|
|
3792
3858
|
Returns
|
|
3793
3859
|
-------
|
|
3794
3860
|
StreamResolver
|
|
3795
3861
|
A configured stream resolver.
|
|
3796
3862
|
"""
|
|
3863
|
+
from chalk.features.underscore import Underscore
|
|
3797
3864
|
|
|
3798
3865
|
# The function "definition" will be the source code of the invocation, for error reporting / LSP highlighting.
|
|
3799
3866
|
caller_info = get_function_caller_info(frame_offset=1)
|
|
@@ -3802,6 +3869,15 @@ def make_stream_resolver(
|
|
|
3802
3869
|
caller_lineno = caller_info.lineno
|
|
3803
3870
|
error_builder = FunctionCallErrorBuilder(caller_info)
|
|
3804
3871
|
|
|
3872
|
+
# TODO unify this with the above
|
|
3873
|
+
frame = inspect.currentframe()
|
|
3874
|
+
assert frame is not None
|
|
3875
|
+
caller_frame = frame.f_back
|
|
3876
|
+
assert caller_frame is not None
|
|
3877
|
+
caller_globals = caller_frame.f_globals
|
|
3878
|
+
caller_locals = caller_frame.f_locals
|
|
3879
|
+
del frame
|
|
3880
|
+
|
|
3805
3881
|
if not isinstance(source, StreamSource): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
3806
3882
|
error_builder.add_diagnostic(
|
|
3807
3883
|
message=(
|
|
@@ -3841,27 +3917,26 @@ def make_stream_resolver(
|
|
|
3841
3917
|
)
|
|
3842
3918
|
|
|
3843
3919
|
# Validate message_type is one of the allowed types
|
|
3844
|
-
|
|
3845
|
-
if
|
|
3846
|
-
if issubclass(message_type, BaseModel):
|
|
3847
|
-
valid_message_type = True
|
|
3848
|
-
elif issubclass(message_type, google.protobuf.message.Message):
|
|
3849
|
-
valid_message_type = True
|
|
3850
|
-
elif is_dataclass(message_type):
|
|
3851
|
-
valid_message_type = True
|
|
3852
|
-
|
|
3853
|
-
# Also allow str and bytes types
|
|
3854
|
-
if message_type in (str, bytes):
|
|
3855
|
-
valid_message_type = True
|
|
3856
|
-
|
|
3857
|
-
if not valid_message_type: # pyright: ignore[reportUnnecessaryIsInstance]
|
|
3920
|
+
message_type_validation_msg = _validate_message_type(message_type)
|
|
3921
|
+
if message_type_validation_msg is not None:
|
|
3858
3922
|
error_builder.add_diagnostic(
|
|
3859
|
-
message=f"Invalid message_type for stream resolver '{name}':
|
|
3923
|
+
message=f"Invalid message_type for stream resolver '{name}' ({message_type}): {message_type_validation_msg}. Supported message types includes 'str', 'bytes', some struct type (a pydantic BaseModel or dataclass), or a list[T] where T is a string/bytes/struct)",
|
|
3860
3924
|
code="195",
|
|
3861
3925
|
label="Invalid message type",
|
|
3862
3926
|
range=error_builder.function_arg_range_by_name("message_type"),
|
|
3863
3927
|
)
|
|
3864
3928
|
|
|
3929
|
+
if _is_list_message_type(message_type) and parse is None:
|
|
3930
|
+
error_builder.add_diagnostic(
|
|
3931
|
+
message=(
|
|
3932
|
+
f"Found list message_type without a parse function for stream resolver '{name}' ({message_type}): List message types are only supported if a custom parse function is provided. "
|
|
3933
|
+
f"Otherwise, only struct or bytes/string messages are supported."
|
|
3934
|
+
),
|
|
3935
|
+
code="196",
|
|
3936
|
+
label="List message type without parse function",
|
|
3937
|
+
range=error_builder.function_arg_range_by_name("message_type"),
|
|
3938
|
+
)
|
|
3939
|
+
|
|
3865
3940
|
from chalk import Features
|
|
3866
3941
|
|
|
3867
3942
|
unwrapped_features: list[Feature] = []
|
|
@@ -3891,8 +3966,10 @@ def make_stream_resolver(
|
|
|
3891
3966
|
f"Stream resolver '{name}' has expression-based parse function so it can't be called directly."
|
|
3892
3967
|
)
|
|
3893
3968
|
|
|
3969
|
+
params = [StreamResolverParamMessage(typ=message_type, name="message")]
|
|
3970
|
+
|
|
3894
3971
|
parse_info: Optional[ParseInfo] = None
|
|
3895
|
-
if parse
|
|
3972
|
+
if isinstance(parse, Underscore):
|
|
3896
3973
|
parse_info = ParseInfo(
|
|
3897
3974
|
fn=_dummy_parse_fn,
|
|
3898
3975
|
input_type=bytes,
|
|
@@ -3901,6 +3978,19 @@ def make_stream_resolver(
|
|
|
3901
3978
|
parse_function_captured_globals=None,
|
|
3902
3979
|
parse_expression=parse,
|
|
3903
3980
|
)
|
|
3981
|
+
elif callable(parse):
|
|
3982
|
+
parse_info = _validate_parse_function(
|
|
3983
|
+
name,
|
|
3984
|
+
parse_fn=parse,
|
|
3985
|
+
globals=caller_globals,
|
|
3986
|
+
locals=caller_locals,
|
|
3987
|
+
stream_fn_input_type=message_type,
|
|
3988
|
+
name=name,
|
|
3989
|
+
)
|
|
3990
|
+
if parse_info.input_type != bytes:
|
|
3991
|
+
raise ValueError(
|
|
3992
|
+
f"Native streaming resolvers only support python parse functions with input bytes 'bytes'. Function {parse} has input type {parse_info.input_type}"
|
|
3993
|
+
)
|
|
3904
3994
|
|
|
3905
3995
|
# Validate and parse sink before creating StreamResolver
|
|
3906
3996
|
message_producer_parsed: StreamResolverMessageProducerParsed | None = None
|
|
@@ -3925,7 +4015,7 @@ def make_stream_resolver(
|
|
|
3925
4015
|
message=message_type,
|
|
3926
4016
|
output=output_type,
|
|
3927
4017
|
signature=StreamResolverSignature(
|
|
3928
|
-
params=
|
|
4018
|
+
params=params,
|
|
3929
4019
|
output_feature_fqns={str(x) for x in output_features.keys()},
|
|
3930
4020
|
),
|
|
3931
4021
|
state=None,
|
|
@@ -3941,6 +4031,8 @@ def make_stream_resolver(
|
|
|
3941
4031
|
sql_settings=None,
|
|
3942
4032
|
feature_expressions={unwrap_feature(x): u for x, u in output_features.items()},
|
|
3943
4033
|
message_producer_parsed=message_producer_parsed,
|
|
4034
|
+
skip_online=skip_online,
|
|
4035
|
+
skip_offline=skip_offline,
|
|
3944
4036
|
)
|
|
3945
4037
|
resolver.add_to_registry(override=False)
|
|
3946
4038
|
return resolver
|
|
@@ -4146,6 +4238,10 @@ def is_structured_type(
|
|
|
4146
4238
|
if not inspect.isclass(message_type): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
4147
4239
|
return False
|
|
4148
4240
|
|
|
4241
|
+
if hasattr(message_type, "__origin__"):
|
|
4242
|
+
# It's a generic type like List[T]
|
|
4243
|
+
return False
|
|
4244
|
+
|
|
4149
4245
|
# Check if it's a Pydantic BaseModel
|
|
4150
4246
|
if issubclass(message_type, BaseModel):
|
|
4151
4247
|
return True
|
|
@@ -4227,6 +4323,9 @@ def validate_field_chain(
|
|
|
4227
4323
|
|
|
4228
4324
|
# Base case: if parent is UnderscoreRoot (_), validate field against current_type
|
|
4229
4325
|
if isinstance(underscore_attr._chalk__parent, UnderscoreRoot):
|
|
4326
|
+
if underscore_attr._chalk__attr == "chalk_now":
|
|
4327
|
+
return datetime
|
|
4328
|
+
|
|
4230
4329
|
# Check if current_type allows field access
|
|
4231
4330
|
if current_type in (str, bytes):
|
|
4232
4331
|
error_builder.add_diagnostic(
|
|
@@ -4392,5 +4491,188 @@ def validate_message_attributes(
|
|
|
4392
4491
|
name: str,
|
|
4393
4492
|
) -> None:
|
|
4394
4493
|
"""Validate that all underscore expressions use valid field names for the message_type."""
|
|
4494
|
+
if _is_list_message_type(message_type):
|
|
4495
|
+
message_type = message_type.__args__[0] # pyright: ignore[reportAttributeAccessIssue]
|
|
4395
4496
|
for expression in expressions:
|
|
4396
4497
|
validate_underscore_expression(expression, message_type, error_builder, name)
|
|
4498
|
+
|
|
4499
|
+
|
|
4500
|
+
def make_model_resolver(
|
|
4501
|
+
name: str,
|
|
4502
|
+
model: "ModelVersion",
|
|
4503
|
+
inputs: Dict[Feature, str] | List[Feature],
|
|
4504
|
+
output: Feature | List[Feature] | Dict[Feature, str],
|
|
4505
|
+
feature_class: Optional[type[Features]] = None,
|
|
4506
|
+
resource_group: Optional[str] = None,
|
|
4507
|
+
resource_hint: Optional[ResourceHint] = None,
|
|
4508
|
+
) -> OnlineResolver:
|
|
4509
|
+
"""
|
|
4510
|
+
Create an online resolver that runs inference on a model.
|
|
4511
|
+
|
|
4512
|
+
This function provides an imperative API for creating model inference resolvers,
|
|
4513
|
+
as an alternative to using F.inference in feature definitions. It uses the same
|
|
4514
|
+
underlying implementation as F.inference but allows you to create resolvers
|
|
4515
|
+
programmatically.
|
|
4516
|
+
|
|
4517
|
+
Parameters
|
|
4518
|
+
----------
|
|
4519
|
+
name
|
|
4520
|
+
The name of the resolver
|
|
4521
|
+
model
|
|
4522
|
+
A ModelVersion reference to a deployed model
|
|
4523
|
+
inputs
|
|
4524
|
+
Either a dict mapping Feature objects to model input names (strings), or a list of
|
|
4525
|
+
Feature objects. If a dict, the values represent the model's expected input names
|
|
4526
|
+
(for future use). If a list, the features will be passed as a single DataFrame to
|
|
4527
|
+
the model.
|
|
4528
|
+
output
|
|
4529
|
+
The output feature(s) that will contain the predictions.
|
|
4530
|
+
Can be a single Feature, a list of Features, or a dict mapping Feature objects to
|
|
4531
|
+
model output names (strings) for future use with multi-output models.
|
|
4532
|
+
feature_class
|
|
4533
|
+
Optional feature class to use. If not provided, will be inferred from the inputs.
|
|
4534
|
+
resource_group
|
|
4535
|
+
Optional resource group for the resolver
|
|
4536
|
+
resource_hint
|
|
4537
|
+
Optional resource hint for execution (e.g., CPU/GPU preferences)
|
|
4538
|
+
|
|
4539
|
+
Returns
|
|
4540
|
+
-------
|
|
4541
|
+
OnlineResolver
|
|
4542
|
+
The created resolver
|
|
4543
|
+
|
|
4544
|
+
Examples
|
|
4545
|
+
--------
|
|
4546
|
+
>>> from chalk.features import features, feature
|
|
4547
|
+
>>> from chalk.features.resolver import make_model_resolver
|
|
4548
|
+
>>> from chalk.ml import ModelVersion
|
|
4549
|
+
>>>
|
|
4550
|
+
>>> @features
|
|
4551
|
+
... class User:
|
|
4552
|
+
... id: str = feature(primary=True)
|
|
4553
|
+
... age: float
|
|
4554
|
+
... income: float
|
|
4555
|
+
... risk_score: float
|
|
4556
|
+
... credit_score: float
|
|
4557
|
+
>>>
|
|
4558
|
+
>>> # Create a model version reference
|
|
4559
|
+
>>> model = ModelVersion(
|
|
4560
|
+
... name="risk_model",
|
|
4561
|
+
... version=1,
|
|
4562
|
+
... model_type="sklearn",
|
|
4563
|
+
... model_encoding="pickle",
|
|
4564
|
+
... filename="model.pkl"
|
|
4565
|
+
... )
|
|
4566
|
+
>>>
|
|
4567
|
+
>>> # Create resolver with single output
|
|
4568
|
+
>>> resolver = make_model_resolver(
|
|
4569
|
+
... name="risk_model",
|
|
4570
|
+
... model=model,
|
|
4571
|
+
... inputs=[User.age, User.income],
|
|
4572
|
+
... output=User.risk_score,
|
|
4573
|
+
... )
|
|
4574
|
+
>>>
|
|
4575
|
+
>>> # Create resolver with multiple outputs (list)
|
|
4576
|
+
>>> resolver = make_model_resolver(
|
|
4577
|
+
... name="multi_output_model",
|
|
4578
|
+
... model=model,
|
|
4579
|
+
... inputs=[User.age, User.income],
|
|
4580
|
+
... output=[User.risk_score, User.credit_score],
|
|
4581
|
+
... )
|
|
4582
|
+
>>>
|
|
4583
|
+
>>> # Create resolver with named inputs and outputs (dict)
|
|
4584
|
+
>>> resolver = make_model_resolver(
|
|
4585
|
+
... name="named_model",
|
|
4586
|
+
... model=model,
|
|
4587
|
+
... inputs={User.age: "age_input", User.income: "income_input"},
|
|
4588
|
+
... output={User.risk_score: "risk_output", User.credit_score: "credit_output"},
|
|
4589
|
+
... )
|
|
4590
|
+
"""
|
|
4591
|
+
from chalk.features.inference import build_inference_function
|
|
4592
|
+
|
|
4593
|
+
if isinstance(inputs, dict):
|
|
4594
|
+
input_features_raw = list(inputs.keys())
|
|
4595
|
+
else:
|
|
4596
|
+
input_features_raw = inputs
|
|
4597
|
+
|
|
4598
|
+
input_features = [unwrap_feature(f) for f in input_features_raw]
|
|
4599
|
+
|
|
4600
|
+
if isinstance(output, dict):
|
|
4601
|
+
output_features = [unwrap_feature(f) for f in output.keys()]
|
|
4602
|
+
elif isinstance(output, list):
|
|
4603
|
+
output_features = [unwrap_feature(f) for f in output]
|
|
4604
|
+
else:
|
|
4605
|
+
output_features = [unwrap_feature(output)]
|
|
4606
|
+
|
|
4607
|
+
# If feature_class is not provided, try to infer it from the first input feature
|
|
4608
|
+
if feature_class is None:
|
|
4609
|
+
if not input_features:
|
|
4610
|
+
raise ValueError("Cannot infer feature class: no input features provided and feature_class not specified")
|
|
4611
|
+
|
|
4612
|
+
first_input = input_features[0]
|
|
4613
|
+
|
|
4614
|
+
if hasattr(first_input, "features_cls") and first_input.features_cls is not None:
|
|
4615
|
+
feature_class = first_input.features_cls
|
|
4616
|
+
else:
|
|
4617
|
+
raise ValueError(
|
|
4618
|
+
"Cannot infer feature class from inputs. Please provide feature_class parameter explicitly."
|
|
4619
|
+
)
|
|
4620
|
+
|
|
4621
|
+
pkey = feature_class.__chalk_primary__
|
|
4622
|
+
if pkey is None:
|
|
4623
|
+
raise ValueError(f"Feature class {feature_class} does not have a primary key defined")
|
|
4624
|
+
|
|
4625
|
+
first_output = output_features[0]
|
|
4626
|
+
|
|
4627
|
+
output_namespace = (
|
|
4628
|
+
first_output.namespace
|
|
4629
|
+
if hasattr(first_output, "namespace") and first_output.namespace
|
|
4630
|
+
else feature_class.__name__.lower()
|
|
4631
|
+
)
|
|
4632
|
+
|
|
4633
|
+
# Use the same underlying inference function as F.inference
|
|
4634
|
+
# Pass list of outputs if multiple, single if only one
|
|
4635
|
+
output_for_inference = output_features if len(output_features) > 1 else output_features[0]
|
|
4636
|
+
inference_fn = build_inference_function(model, pkey, output_for_inference)
|
|
4637
|
+
|
|
4638
|
+
if len(output_features) == 1:
|
|
4639
|
+
output_names = output_features[0].name
|
|
4640
|
+
else:
|
|
4641
|
+
output_names = "_".join(f.name for f in output_features)
|
|
4642
|
+
|
|
4643
|
+
resolver = OnlineResolver(
|
|
4644
|
+
function_definition="",
|
|
4645
|
+
filename="",
|
|
4646
|
+
fqn=f"{name}__{output_namespace}_{output_names}",
|
|
4647
|
+
doc=None,
|
|
4648
|
+
inputs=[DataFrame[[pkey, *ensure_tuple(input_features)]]],
|
|
4649
|
+
state=None,
|
|
4650
|
+
output=Features[DataFrame[tuple([*output_features, pkey])]], # type: ignore[misc]
|
|
4651
|
+
fn=inference_fn,
|
|
4652
|
+
environment=None,
|
|
4653
|
+
machine_type=None,
|
|
4654
|
+
default_args=[None],
|
|
4655
|
+
timeout=None,
|
|
4656
|
+
cron=None,
|
|
4657
|
+
when=None,
|
|
4658
|
+
tags=None,
|
|
4659
|
+
owner=None,
|
|
4660
|
+
resource_hint=resource_hint or model.resource_hint,
|
|
4661
|
+
data_sources=None,
|
|
4662
|
+
is_sql_file_resolver=False,
|
|
4663
|
+
source_line=None,
|
|
4664
|
+
lsp_builder=get_resolver_error_builder(inference_fn),
|
|
4665
|
+
parse=None,
|
|
4666
|
+
static=False,
|
|
4667
|
+
total=False,
|
|
4668
|
+
autogenerated=False,
|
|
4669
|
+
unique_on=None,
|
|
4670
|
+
partitioned_by=None,
|
|
4671
|
+
data_lineage=None,
|
|
4672
|
+
sql_settings=None,
|
|
4673
|
+
)
|
|
4674
|
+
|
|
4675
|
+
# Register the resolver
|
|
4676
|
+
RESOLVER_REGISTRY.add_to_registry(resolver, override=False)
|
|
4677
|
+
|
|
4678
|
+
return resolver
|
chalk/features/tag.py
CHANGED
|
@@ -24,6 +24,7 @@ from chalk.features.underscore import (
|
|
|
24
24
|
UnderscoreRoot,
|
|
25
25
|
)
|
|
26
26
|
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
27
|
+
from chalk.utils.pl_helpers import schema_compat
|
|
27
28
|
|
|
28
29
|
if typing_extensions.TYPE_CHECKING:
|
|
29
30
|
from chalk import Features
|
|
@@ -66,7 +67,7 @@ def _parse_underscore_in_context(exp: Any, context: Any, is_pydantic: bool) -> A
|
|
|
66
67
|
from chalk.features.dataframe import DataFrame
|
|
67
68
|
|
|
68
69
|
if isinstance(parent_context, DataFrame) and is_pydantic:
|
|
69
|
-
if attr not in parent_context._underlying
|
|
70
|
+
if attr not in schema_compat(parent_context._underlying):
|
|
70
71
|
warnings.warn(
|
|
71
72
|
f"Attribute {attr} not found in dataframe schema. Returning None. Found expression {exp}."
|
|
72
73
|
)
|