chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
- chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
- chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
- chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
- chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
- chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
- chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
- chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
- chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_monitoring/Chart.py +1 -3
- chalk/_version.py +1 -1
- chalk/cli.py +5 -10
- chalk/client/client.py +178 -64
- chalk/client/client_async.py +154 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +738 -112
- chalk/client/client_impl.py +541 -136
- chalk/client/dataset.py +27 -6
- chalk/client/models.py +99 -2
- chalk/client/serialization/model_serialization.py +126 -10
- chalk/config/project_config.py +1 -1
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/df/ast_parser.py +2 -10
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_embedding/sentence_transformer.py +1 -1
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/_encoding/pyarrow.py +20 -4
- chalk/features/_encoding/rich.py +1 -3
- chalk/features/_tensor.py +1 -2
- chalk/features/dataframe/_filters.py +14 -5
- chalk/features/dataframe/_impl.py +91 -36
- chalk/features/dataframe/_validation.py +11 -7
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set.py +1 -2
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -12
- chalk/features/inference.py +65 -10
- chalk/features/resolver.py +338 -56
- chalk/features/tag.py +1 -3
- chalk/features/underscore_features.py +2 -1
- chalk/functions/__init__.py +456 -21
- chalk/functions/holidays.py +1 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +186 -74
- chalk/ml/__init__.py +6 -2
- chalk/ml/model_hooks.py +368 -51
- chalk/ml/model_reference.py +68 -10
- chalk/ml/model_version.py +34 -21
- chalk/ml/utils.py +143 -40
- chalk/operators/_utils.py +14 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +4 -0
- chalk/parsed/expressions.py +1 -3
- chalk/parsed/json_conversions.py +21 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +13 -1
- chalk/serialization/parsed_annotation.py +25 -12
- chalk/sql/__init__.py +221 -0
- chalk/sql/_internal/integrations/athena.py +6 -1
- chalk/sql/_internal/integrations/bigquery.py +22 -2
- chalk/sql/_internal/integrations/databricks.py +61 -18
- chalk/sql/_internal/integrations/mssql.py +281 -0
- chalk/sql/_internal/integrations/postgres.py +11 -3
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/integrations/snowflake.py +11 -2
- chalk/sql/_internal/integrations/util.py +2 -1
- chalk/sql/_internal/sql_file_resolver.py +55 -10
- chalk/sql/_internal/sql_source.py +36 -2
- chalk/streams/__init__.py +1 -3
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +16 -4
- chalk/streams/types.py +1 -2
- chalk/utils/__init__.py +1 -3
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +14 -5
- chalk/utils/df_utils.py +2 -2
- chalk/utils/duration.py +1 -3
- chalk/utils/job_log_display.py +538 -0
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/notebook.py +255 -2
- chalk/utils/pl_helpers.py +190 -37
- chalk/utils/pydanticutil/pydantic_compat.py +1 -2
- chalk/utils/storage_client.py +246 -0
- chalk/utils/threading.py +1 -3
- chalk/utils/tracing.py +194 -86
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/client/client_impl.py
CHANGED
|
@@ -99,10 +99,12 @@ from chalk.client.models import (
|
|
|
99
99
|
GetRegisteredModelResponse,
|
|
100
100
|
GetRegisteredModelVersionResponse,
|
|
101
101
|
IngestDatasetRequest,
|
|
102
|
+
ManualTriggerScheduledQueryResponse,
|
|
102
103
|
MultiUploadFeaturesRequest,
|
|
103
104
|
MultiUploadFeaturesResponse,
|
|
104
105
|
OfflineQueryContext,
|
|
105
106
|
OfflineQueryInput,
|
|
107
|
+
OfflineQueryInputSql,
|
|
106
108
|
OfflineQueryInputUri,
|
|
107
109
|
OfflineQueryParquetUploadURLResponse,
|
|
108
110
|
OnlineQuery,
|
|
@@ -124,6 +126,7 @@ from chalk.client.models import (
|
|
|
124
126
|
ResolverReplayResponse,
|
|
125
127
|
ResolverRunResponse,
|
|
126
128
|
ResourceRequests,
|
|
129
|
+
ScheduledQueryRun,
|
|
127
130
|
SetDatasetRevisionMetadataRequest,
|
|
128
131
|
SetDatasetRevisionMetadataResponse,
|
|
129
132
|
SetIncrementalProgressRequest,
|
|
@@ -171,7 +174,9 @@ from chalk.utils.duration import parse_chalk_duration, timedelta_to_duration
|
|
|
171
174
|
from chalk.utils.environment_parsing import env_var_bool
|
|
172
175
|
from chalk.utils.log_with_context import get_logger
|
|
173
176
|
from chalk.utils.missing_dependency import missing_dependency_exception
|
|
177
|
+
from chalk.utils.notebook import parse_notebook_into_script
|
|
174
178
|
from chalk.utils.string import s
|
|
179
|
+
from chalk.utils.tracing import add_trace_headers, safe_trace
|
|
175
180
|
|
|
176
181
|
if TYPE_CHECKING:
|
|
177
182
|
import ssl
|
|
@@ -433,7 +438,7 @@ def _offline_query_inputs_should_be_uploaded(
|
|
|
433
438
|
|
|
434
439
|
for single_input in inputs_as_list:
|
|
435
440
|
if isinstance(single_input, collections.abc.Mapping):
|
|
436
|
-
num_rows = max(len(v) if
|
|
441
|
+
num_rows = max(len(v) if hasattr(v, "__len__") else 1 for v in single_input.values())
|
|
437
442
|
elif isinstance(single_input, pl.DataFrame):
|
|
438
443
|
num_rows = single_input.height
|
|
439
444
|
elif isinstance(single_input, pd.DataFrame):
|
|
@@ -741,6 +746,8 @@ class OnlineQueryResponseImpl(OnlineQueryResult):
|
|
|
741
746
|
self.warnings = warnings
|
|
742
747
|
self.meta = meta
|
|
743
748
|
|
|
749
|
+
print(self.data)
|
|
750
|
+
|
|
744
751
|
for d in self.data:
|
|
745
752
|
if d.value is not None:
|
|
746
753
|
try:
|
|
@@ -1867,78 +1874,85 @@ https://docs.chalk.ai/cli/apply
|
|
|
1867
1874
|
connect_timeout: float | ellipsis | None = ...,
|
|
1868
1875
|
headers: Mapping[str, str] | None = None,
|
|
1869
1876
|
query_context: Mapping[str, JsonValue] | str | None = None,
|
|
1877
|
+
trace: bool = False,
|
|
1870
1878
|
value_metrics_tag_by_features: Sequence[FeatureReference] = (),
|
|
1871
1879
|
) -> OnlineQueryResponseImpl:
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
if
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
required_resolver_tags=required_resolver_tags,
|
|
1905
|
-
),
|
|
1906
|
-
deployment_id=preview_deployment_id,
|
|
1907
|
-
branch_id=branch,
|
|
1908
|
-
correlation_id=correlation_id,
|
|
1909
|
-
query_name=query_name,
|
|
1910
|
-
query_name_version=query_name_version,
|
|
1911
|
-
meta=meta,
|
|
1912
|
-
explain=explain,
|
|
1913
|
-
include_meta=bool(include_meta or explain),
|
|
1914
|
-
store_plan_stages=store_plan_stages,
|
|
1915
|
-
encoding_options=encoding_options or FeatureEncodingOptions(),
|
|
1916
|
-
planner_options=planner_options,
|
|
1917
|
-
value_metrics_tag_by_features=tuple(encoded_value_metrics_tag_by_features),
|
|
1918
|
-
query_context=_validate_context_dict(query_context),
|
|
1919
|
-
overlay_graph=_get_overlay_graph_b64(),
|
|
1920
|
-
)
|
|
1880
|
+
with safe_trace("query"):
|
|
1881
|
+
if branch is ...:
|
|
1882
|
+
branch = self._branch
|
|
1883
|
+
extra_headers = {"X-Chalk-Deployment-Type": "branch" if branch else "engine"}
|
|
1884
|
+
if query_name is not None:
|
|
1885
|
+
extra_headers["X-Chalk-Query-Name"] = query_name
|
|
1886
|
+
if trace:
|
|
1887
|
+
extra_headers = add_trace_headers(extra_headers)
|
|
1888
|
+
if headers:
|
|
1889
|
+
extra_headers.update(headers)
|
|
1890
|
+
|
|
1891
|
+
encoded_inputs, all_warnings = recursive_encode_inputs(input)
|
|
1892
|
+
encoded_outputs = encode_outputs(output)
|
|
1893
|
+
outputs = encoded_outputs.string_outputs
|
|
1894
|
+
encoded_value_metrics_tag_by_features = encode_outputs(value_metrics_tag_by_features).string_outputs
|
|
1895
|
+
|
|
1896
|
+
now_str = None
|
|
1897
|
+
if now is not None:
|
|
1898
|
+
if now.tzinfo is None:
|
|
1899
|
+
now = now.astimezone(tz=timezone.utc)
|
|
1900
|
+
now_str = now.isoformat()
|
|
1901
|
+
|
|
1902
|
+
staleness_encoded = {}
|
|
1903
|
+
if staleness is not None:
|
|
1904
|
+
for k, v in staleness.items():
|
|
1905
|
+
if isinstance(k, str):
|
|
1906
|
+
# It's a feature set
|
|
1907
|
+
staleness_encoded[k] = v
|
|
1908
|
+
elif is_feature_set_class(k):
|
|
1909
|
+
staleness_encoded[k.namespace] = v
|
|
1910
|
+
else:
|
|
1911
|
+
staleness_encoded[ensure_feature(k).root_fqn] = v
|
|
1921
1912
|
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1913
|
+
request = OnlineQueryRequest(
|
|
1914
|
+
inputs=encoded_inputs,
|
|
1915
|
+
outputs=outputs,
|
|
1916
|
+
expression_outputs=encoded_outputs.feature_expressions_base64,
|
|
1917
|
+
now=now_str,
|
|
1918
|
+
staleness=staleness_encoded,
|
|
1919
|
+
context=OnlineQueryContext(
|
|
1920
|
+
environment=environment,
|
|
1921
|
+
tags=tags,
|
|
1922
|
+
required_resolver_tags=required_resolver_tags,
|
|
1923
|
+
),
|
|
1924
|
+
deployment_id=preview_deployment_id,
|
|
1925
|
+
branch_id=branch,
|
|
1926
|
+
correlation_id=correlation_id,
|
|
1927
|
+
query_name=query_name,
|
|
1928
|
+
query_name_version=query_name_version,
|
|
1929
|
+
meta=meta,
|
|
1930
|
+
explain=explain,
|
|
1931
|
+
include_meta=bool(include_meta or explain),
|
|
1932
|
+
store_plan_stages=store_plan_stages,
|
|
1933
|
+
encoding_options=encoding_options or FeatureEncodingOptions(),
|
|
1934
|
+
planner_options=planner_options,
|
|
1935
|
+
value_metrics_tag_by_features=tuple(encoded_value_metrics_tag_by_features),
|
|
1936
|
+
query_context=_validate_context_dict(query_context),
|
|
1937
|
+
overlay_graph=_get_overlay_graph_b64(),
|
|
1938
|
+
)
|
|
1927
1939
|
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1940
|
+
resp = self._request(
|
|
1941
|
+
method="POST",
|
|
1942
|
+
uri="/v1/query/online",
|
|
1943
|
+
json=request,
|
|
1944
|
+
response=OnlineQueryResponse,
|
|
1945
|
+
environment_override=environment,
|
|
1946
|
+
preview_deployment_id=preview_deployment_id,
|
|
1947
|
+
branch=branch,
|
|
1948
|
+
metadata_request=False,
|
|
1949
|
+
extra_headers=extra_headers,
|
|
1950
|
+
timeout=request_timeout,
|
|
1951
|
+
connect_timeout=connect_timeout,
|
|
1952
|
+
)
|
|
1953
|
+
return OnlineQueryResponseImpl(
|
|
1954
|
+
data=resp.data, errors=resp.errors or [], warnings=all_warnings, meta=resp.meta
|
|
1955
|
+
)
|
|
1942
1956
|
|
|
1943
1957
|
def multi_query(
|
|
1944
1958
|
self,
|
|
@@ -1954,13 +1968,15 @@ https://docs.chalk.ai/cli/apply
|
|
|
1954
1968
|
use_feather: Optional[bool] = True, # deprecated
|
|
1955
1969
|
compression: Optional[str] = "uncompressed",
|
|
1956
1970
|
) -> BulkOnlineQueryResponse:
|
|
1957
|
-
|
|
1971
|
+
if branch is ...:
|
|
1972
|
+
branch = self._branch
|
|
1973
|
+
extra_headers = {"X-Chalk-Deployment-Type": "branch" if branch else "engine"}
|
|
1958
1974
|
if query_name is not None:
|
|
1959
1975
|
extra_headers["X-Chalk-Query-Name"] = query_name
|
|
1976
|
+
|
|
1960
1977
|
buffer = BytesIO()
|
|
1961
1978
|
buffer.write(MULTI_QUERY_MAGIC_STR)
|
|
1962
|
-
|
|
1963
|
-
branch = self._branch
|
|
1979
|
+
|
|
1964
1980
|
for query in queries:
|
|
1965
1981
|
tags = query.tags
|
|
1966
1982
|
encoded_inputs = {str(k): v for k, v in query.input.items()}
|
|
@@ -2063,13 +2079,13 @@ https://docs.chalk.ai/cli/apply
|
|
|
2063
2079
|
headers: Mapping[str, str] | None = None,
|
|
2064
2080
|
value_metrics_tag_by_features: Sequence[FeatureReference] = (),
|
|
2065
2081
|
) -> BulkOnlineQueryResponse:
|
|
2066
|
-
|
|
2082
|
+
if branch is ...:
|
|
2083
|
+
branch = self._branch
|
|
2084
|
+
extra_headers = {"X-Chalk-Deployment-Type": "branch" if branch else "engine"}
|
|
2067
2085
|
if query_name is not None:
|
|
2068
2086
|
extra_headers["X-Chalk-Query-Name"] = query_name
|
|
2069
2087
|
if headers:
|
|
2070
2088
|
extra_headers.update(headers)
|
|
2071
|
-
if branch is ...:
|
|
2072
|
-
branch = self._branch
|
|
2073
2089
|
|
|
2074
2090
|
now_str = None
|
|
2075
2091
|
if now is not None:
|
|
@@ -2225,6 +2241,8 @@ https://docs.chalk.ai/cli/apply
|
|
|
2225
2241
|
override_target_image_tag: Optional[str] = None,
|
|
2226
2242
|
feature_for_lower_upper_bound: Optional[FeatureReference] = None,
|
|
2227
2243
|
use_job_queue: bool = False,
|
|
2244
|
+
*,
|
|
2245
|
+
input_sql: str | None = None,
|
|
2228
2246
|
) -> DatasetImpl:
|
|
2229
2247
|
run_asynchronously = (
|
|
2230
2248
|
use_multiple_computers
|
|
@@ -2267,48 +2285,70 @@ https://docs.chalk.ai/cli/apply
|
|
|
2267
2285
|
|
|
2268
2286
|
context = OfflineQueryContext(environment=environment)
|
|
2269
2287
|
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
# by this point, should be
|
|
2282
|
-
# Union[QueryInput, List[QueryInput], Tuple[QueryInput, ...]]
|
|
2283
|
-
if isinstance(input, (list, tuple)):
|
|
2284
|
-
input_times_tuple: Sequence[QueryInputTime] = (
|
|
2285
|
-
[None] * len(input)
|
|
2286
|
-
if input_times is None
|
|
2287
|
-
else [input_times for _ in input]
|
|
2288
|
-
if isinstance(input_times, datetime)
|
|
2289
|
-
else input_times
|
|
2288
|
+
_check_exclusive_options(
|
|
2289
|
+
{
|
|
2290
|
+
"input": input,
|
|
2291
|
+
"input_sql": input_sql,
|
|
2292
|
+
"max_samples": max_samples,
|
|
2293
|
+
}
|
|
2294
|
+
)
|
|
2295
|
+
if input_sql is not None:
|
|
2296
|
+
if input_times is not None:
|
|
2297
|
+
raise ValueError(
|
|
2298
|
+
f"Cannot specify `input_sql` and `input_times` together. Instead, the ChalkSQL query may output a `{TS_COL_NAME}` column"
|
|
2290
2299
|
)
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
multi_input = [(input, cast(None, input_times))]
|
|
2300
|
+
if num_shards is not None:
|
|
2301
|
+
raise ValueError("Cannot specify `input_sql` and `num_shards` together.")
|
|
2302
|
+
if num_workers is not None:
|
|
2303
|
+
raise ValueError("Cannot specify `input_sql` and `num_workers` together.")
|
|
2296
2304
|
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
query_input = tuple(_to_offline_query_input(x, t) for x, t in multi_input)
|
|
2305
|
+
# Set query_input
|
|
2306
|
+
if input is not None:
|
|
2307
|
+
# Set query_input from input
|
|
2308
|
+
if isinstance(input, OfflineQueryInputUri):
|
|
2309
|
+
query_input = input
|
|
2310
|
+
elif isinstance(input, str):
|
|
2311
|
+
query_input = OfflineQueryInputUri(
|
|
2312
|
+
parquet_uri=input,
|
|
2313
|
+
start_row=None,
|
|
2314
|
+
end_row=None,
|
|
2315
|
+
)
|
|
2309
2316
|
else:
|
|
2310
|
-
|
|
2311
|
-
|
|
2317
|
+
# by this point, should be
|
|
2318
|
+
# Union[QueryInput, List[QueryInput], Tuple[QueryInput, ...]]
|
|
2319
|
+
if isinstance(input, (list, tuple)):
|
|
2320
|
+
input_times_tuple: Sequence[QueryInputTime] = (
|
|
2321
|
+
[None] * len(input)
|
|
2322
|
+
if input_times is None
|
|
2323
|
+
else [input_times for _ in input]
|
|
2324
|
+
if isinstance(input_times, datetime)
|
|
2325
|
+
else input_times
|
|
2326
|
+
)
|
|
2327
|
+
run_asynchronously = True
|
|
2328
|
+
multi_input = list(zip(input, input_times_tuple))
|
|
2329
|
+
else:
|
|
2330
|
+
# Just a QueryInput
|
|
2331
|
+
multi_input = [(input, cast(None, input_times))]
|
|
2332
|
+
|
|
2333
|
+
# defaulting to uploading input as table if inputs are large
|
|
2334
|
+
if upload_input_as_table or _offline_query_inputs_should_be_uploaded(input) or num_shards:
|
|
2335
|
+
with ThreadPoolExecutor(thread_name_prefix="offline_query_upload_input") as upload_input_executor:
|
|
2336
|
+
query_input = self._upload_offline_query_input(
|
|
2337
|
+
multi_input,
|
|
2338
|
+
context=context,
|
|
2339
|
+
branch=branch,
|
|
2340
|
+
executor=upload_input_executor,
|
|
2341
|
+
num_shards=num_shards,
|
|
2342
|
+
)
|
|
2343
|
+
elif run_asynchronously:
|
|
2344
|
+
query_input = tuple(_to_offline_query_input(x, t) for x, t in multi_input)
|
|
2345
|
+
else:
|
|
2346
|
+
assert len(multi_input) == 1, "We should default to running asynchronously if inputs is partitioned"
|
|
2347
|
+
query_input = _to_offline_query_input(*multi_input[0])
|
|
2348
|
+
elif input_sql is not None:
|
|
2349
|
+
query_input = OfflineQueryInputSql(input_sql=input_sql)
|
|
2350
|
+
else:
|
|
2351
|
+
query_input = None
|
|
2312
2352
|
|
|
2313
2353
|
response = self._create_dataset_job(
|
|
2314
2354
|
optional_output=optional_output_root_fqns,
|
|
@@ -2339,9 +2379,9 @@ https://docs.chalk.ai/cli/apply
|
|
|
2339
2379
|
override_target_image_tag=override_target_image_tag,
|
|
2340
2380
|
num_shards=num_shards,
|
|
2341
2381
|
num_workers=num_workers,
|
|
2342
|
-
feature_for_lower_upper_bound=
|
|
2343
|
-
|
|
2344
|
-
|
|
2382
|
+
feature_for_lower_upper_bound=(
|
|
2383
|
+
str(feature_for_lower_upper_bound) if feature_for_lower_upper_bound is not None else None
|
|
2384
|
+
),
|
|
2345
2385
|
completion_deadline=completion_deadline,
|
|
2346
2386
|
max_retries=max_retries,
|
|
2347
2387
|
optional_output_expressions=optional_output_expressions,
|
|
@@ -2371,6 +2411,111 @@ https://docs.chalk.ai/cli/apply
|
|
|
2371
2411
|
initialized_dataset.is_finished = True
|
|
2372
2412
|
return initialized_dataset
|
|
2373
2413
|
|
|
2414
|
+
def run_scheduled_query(
|
|
2415
|
+
self,
|
|
2416
|
+
name: str,
|
|
2417
|
+
planner_options: Optional[Mapping[str, Any]] = None,
|
|
2418
|
+
incremental_resolvers: Optional[Sequence[str]] = None,
|
|
2419
|
+
max_samples: Optional[int] = None,
|
|
2420
|
+
env_overrides: Optional[Mapping[str, str]] = None,
|
|
2421
|
+
) -> ManualTriggerScheduledQueryResponse:
|
|
2422
|
+
"""
|
|
2423
|
+
Manually trigger a scheduled query request.
|
|
2424
|
+
|
|
2425
|
+
Parameters
|
|
2426
|
+
----------
|
|
2427
|
+
name
|
|
2428
|
+
The name of the scheduled query to be triggered.
|
|
2429
|
+
incremental_resolvers
|
|
2430
|
+
If set to None, Chalk will incrementalize resolvers in the query's root namespaces.
|
|
2431
|
+
If set to a list of resolvers, this set will be used for incrementalization.
|
|
2432
|
+
Incremental resolvers must return a feature time in its output, and must return a `DataFrame`.
|
|
2433
|
+
Most commonly, this will be the name of a SQL file resolver. Chalk will ingest all new data
|
|
2434
|
+
from these resolvers and propagate changes to values in the root namespace.
|
|
2435
|
+
max_samples
|
|
2436
|
+
The maximum number of samples to compute.
|
|
2437
|
+
env_overrides:
|
|
2438
|
+
A dictionary of environment values to override during this specific triggered query.
|
|
2439
|
+
|
|
2440
|
+
Other Parameters
|
|
2441
|
+
----------------
|
|
2442
|
+
planner_options
|
|
2443
|
+
A dictionary of options to pass to the planner.
|
|
2444
|
+
These are typically provided by Chalk Support for specific use cases.
|
|
2445
|
+
|
|
2446
|
+
Returns
|
|
2447
|
+
-------
|
|
2448
|
+
ManualTriggerScheduledQueryResponse
|
|
2449
|
+
A response message containing metadata around the triggered run.
|
|
2450
|
+
|
|
2451
|
+
Examples
|
|
2452
|
+
--------
|
|
2453
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
2454
|
+
>>> ChalkGRPCClient().run_scheduled_query(
|
|
2455
|
+
... name="my_scheduled_query",
|
|
2456
|
+
... )
|
|
2457
|
+
"""
|
|
2458
|
+
from chalk.client.client_grpc import ChalkGRPCClient
|
|
2459
|
+
|
|
2460
|
+
client_grpc = ChalkGRPCClient(
|
|
2461
|
+
client_id=self._client_id,
|
|
2462
|
+
client_secret=self._client_secret,
|
|
2463
|
+
environment=self._primary_environment,
|
|
2464
|
+
api_server=self._api_server,
|
|
2465
|
+
)
|
|
2466
|
+
|
|
2467
|
+
resp = client_grpc.run_scheduled_query(
|
|
2468
|
+
name=name,
|
|
2469
|
+
planner_options=planner_options,
|
|
2470
|
+
incremental_resolvers=incremental_resolvers,
|
|
2471
|
+
max_samples=max_samples,
|
|
2472
|
+
env_overrides=env_overrides,
|
|
2473
|
+
)
|
|
2474
|
+
|
|
2475
|
+
return resp
|
|
2476
|
+
|
|
2477
|
+
def get_scheduled_query_run_history(
|
|
2478
|
+
self,
|
|
2479
|
+
name: str,
|
|
2480
|
+
limit: int = 10,
|
|
2481
|
+
) -> List[ScheduledQueryRun]:
|
|
2482
|
+
"""
|
|
2483
|
+
Get the run history for a scheduled query.
|
|
2484
|
+
|
|
2485
|
+
Parameters
|
|
2486
|
+
----------
|
|
2487
|
+
name
|
|
2488
|
+
The name of the scheduled query.
|
|
2489
|
+
limit
|
|
2490
|
+
The maximum number of runs to return. Defaults to 10.
|
|
2491
|
+
|
|
2492
|
+
Returns
|
|
2493
|
+
-------
|
|
2494
|
+
list[ScheduledQueryRun]
|
|
2495
|
+
A response message containing the list of scheduled query runs.
|
|
2496
|
+
|
|
2497
|
+
Examples
|
|
2498
|
+
--------
|
|
2499
|
+
>>> from chalk.client import ChalkClient
|
|
2500
|
+
>>> ChalkClient().get_scheduled_query_run_history(
|
|
2501
|
+
... name="my_scheduled_query",
|
|
2502
|
+
... limit=20,
|
|
2503
|
+
... )
|
|
2504
|
+
"""
|
|
2505
|
+
from chalk.client.client_grpc import ChalkGRPCClient
|
|
2506
|
+
|
|
2507
|
+
client_grpc = ChalkGRPCClient(
|
|
2508
|
+
client_id=self._client_id,
|
|
2509
|
+
client_secret=self._client_secret,
|
|
2510
|
+
environment=self._primary_environment,
|
|
2511
|
+
api_server=self._api_server,
|
|
2512
|
+
)
|
|
2513
|
+
|
|
2514
|
+
return client_grpc.get_scheduled_query_run_history(
|
|
2515
|
+
name=name,
|
|
2516
|
+
limit=limit,
|
|
2517
|
+
)
|
|
2518
|
+
|
|
2374
2519
|
def prompt_evaluation(
|
|
2375
2520
|
self,
|
|
2376
2521
|
prompts: list[Prompt | str],
|
|
@@ -3432,6 +3577,7 @@ https://docs.chalk.ai/cli/apply
|
|
|
3432
3577
|
Optional[OfflineQueryInput],
|
|
3433
3578
|
UploadedParquetShardedOfflineQueryInput,
|
|
3434
3579
|
OfflineQueryInputUri,
|
|
3580
|
+
OfflineQueryInputSql,
|
|
3435
3581
|
],
|
|
3436
3582
|
max_samples: Optional[int],
|
|
3437
3583
|
dataset_name: Optional[str],
|
|
@@ -4345,6 +4491,187 @@ https://docs.chalk.ai/cli/apply
|
|
|
4345
4491
|
)
|
|
4346
4492
|
return resp
|
|
4347
4493
|
|
|
4494
|
+
def _run_serialized_query(
|
|
4495
|
+
self,
|
|
4496
|
+
serialized_plan_bytes: bytes,
|
|
4497
|
+
input: Union[Mapping[FeatureReference, Sequence[Any]], pa.Table],
|
|
4498
|
+
output: Sequence[FeatureReference] = (),
|
|
4499
|
+
staleness: Optional[Mapping[FeatureReference, str]] = None,
|
|
4500
|
+
context: Optional[OnlineQueryContext] = None,
|
|
4501
|
+
query_name: Optional[str] = None,
|
|
4502
|
+
query_name_version: Optional[str] = None,
|
|
4503
|
+
correlation_id: Optional[str] = None,
|
|
4504
|
+
include_meta: bool = False,
|
|
4505
|
+
explain: bool = False,
|
|
4506
|
+
store_plan_stages: bool = False,
|
|
4507
|
+
meta: Optional[Mapping[str, str]] = None,
|
|
4508
|
+
headers: Mapping[str, str] | None = None,
|
|
4509
|
+
) -> BulkOnlineQueryResult:
|
|
4510
|
+
"""Run a query using a pre-serialized plan.
|
|
4511
|
+
|
|
4512
|
+
This is a protected method for internal use and testing.
|
|
4513
|
+
|
|
4514
|
+
Parameters
|
|
4515
|
+
----------
|
|
4516
|
+
serialized_plan_bytes
|
|
4517
|
+
The serialized BatchPlan protobuf bytes
|
|
4518
|
+
input
|
|
4519
|
+
The input data, either as a mapping of features to values or as a PyArrow table
|
|
4520
|
+
output
|
|
4521
|
+
The output features to compute
|
|
4522
|
+
staleness
|
|
4523
|
+
Maximum staleness overrides for features
|
|
4524
|
+
context
|
|
4525
|
+
Query context including environment and tags
|
|
4526
|
+
query_name
|
|
4527
|
+
The name of the query
|
|
4528
|
+
query_name_version
|
|
4529
|
+
The version of the query
|
|
4530
|
+
correlation_id
|
|
4531
|
+
Correlation ID for logging
|
|
4532
|
+
include_meta
|
|
4533
|
+
Whether to include metadata in the response
|
|
4534
|
+
explain
|
|
4535
|
+
Whether to include explain output
|
|
4536
|
+
store_plan_stages
|
|
4537
|
+
Whether to store plan stages
|
|
4538
|
+
meta
|
|
4539
|
+
Customer metadata tags
|
|
4540
|
+
headers
|
|
4541
|
+
Additional headers to provide with the request
|
|
4542
|
+
|
|
4543
|
+
Returns
|
|
4544
|
+
-------
|
|
4545
|
+
OnlineQueryResult
|
|
4546
|
+
The query result
|
|
4547
|
+
"""
|
|
4548
|
+
try:
|
|
4549
|
+
import pyarrow as pa
|
|
4550
|
+
import pyarrow.feather as feather
|
|
4551
|
+
except ImportError:
|
|
4552
|
+
raise missing_dependency_exception("chalkpy[runtime]")
|
|
4553
|
+
|
|
4554
|
+
# Convert input to PyArrow table if needed
|
|
4555
|
+
if isinstance(input, Mapping):
|
|
4556
|
+
# Convert mapping to PyArrow table
|
|
4557
|
+
table_dict = {}
|
|
4558
|
+
for feat_ref, values in input.items():
|
|
4559
|
+
feat_name = str(feat_ref)
|
|
4560
|
+
# Ensure values is a list
|
|
4561
|
+
if not isinstance(values, list):
|
|
4562
|
+
values = [values]
|
|
4563
|
+
table_dict[feat_name] = values
|
|
4564
|
+
input_table = pa.Table.from_pydict(table_dict)
|
|
4565
|
+
else:
|
|
4566
|
+
input_table = input
|
|
4567
|
+
|
|
4568
|
+
# Encode outputs
|
|
4569
|
+
outputs_encoded = encode_outputs(output).string_outputs if output else []
|
|
4570
|
+
|
|
4571
|
+
# Encode staleness
|
|
4572
|
+
staleness_encoded = {}
|
|
4573
|
+
if staleness is not None:
|
|
4574
|
+
for k, v in staleness.items():
|
|
4575
|
+
if is_feature_set_class(k):
|
|
4576
|
+
for f in k.features:
|
|
4577
|
+
staleness_encoded[f.root_fqn] = v
|
|
4578
|
+
else:
|
|
4579
|
+
staleness_encoded[ensure_feature(k).root_fqn] = v
|
|
4580
|
+
|
|
4581
|
+
# Create FeatherRequestHeader
|
|
4582
|
+
from chalk.client.models import OnlineQueryContext as OQC
|
|
4583
|
+
|
|
4584
|
+
header_dict = {
|
|
4585
|
+
"outputs": outputs_encoded,
|
|
4586
|
+
"expression_outputs": [],
|
|
4587
|
+
"staleness": staleness_encoded if staleness_encoded else None,
|
|
4588
|
+
"context": (context or OQC()).dict(),
|
|
4589
|
+
"include_meta": include_meta,
|
|
4590
|
+
"explain": explain,
|
|
4591
|
+
"correlation_id": correlation_id,
|
|
4592
|
+
"query_name": query_name,
|
|
4593
|
+
"query_name_version": query_name_version,
|
|
4594
|
+
"meta": meta,
|
|
4595
|
+
"store_plan_stages": store_plan_stages,
|
|
4596
|
+
}
|
|
4597
|
+
header_json = json.dumps(header_dict).encode("utf-8")
|
|
4598
|
+
|
|
4599
|
+
# Serialize the input table to feather format
|
|
4600
|
+
feather_buffer = BytesIO()
|
|
4601
|
+
feather.write_feather(input_table, feather_buffer)
|
|
4602
|
+
feather_bytes = feather_buffer.getvalue()
|
|
4603
|
+
|
|
4604
|
+
# Build the request body:
|
|
4605
|
+
# 1. First 8 bytes: int64 (big-endian) - length of serialized plan
|
|
4606
|
+
# 2. Next N bytes: serialized BatchPlan protobuf
|
|
4607
|
+
# 3. Next 8 bytes: int64 (big-endian) - length of header JSON
|
|
4608
|
+
# 4. Next M bytes: UTF-8 encoded JSON header (FeatherRequestHeader)
|
|
4609
|
+
# 5. Next 8 bytes: int64 (big-endian) - length of feather data
|
|
4610
|
+
# 6. Remaining bytes: feather-encoded input data
|
|
4611
|
+
request_body = BytesIO()
|
|
4612
|
+
request_body.write(len(serialized_plan_bytes).to_bytes(8, byteorder="big"))
|
|
4613
|
+
request_body.write(serialized_plan_bytes)
|
|
4614
|
+
request_body.write(len(header_json).to_bytes(8, byteorder="big"))
|
|
4615
|
+
request_body.write(header_json)
|
|
4616
|
+
request_body.write(len(feather_bytes).to_bytes(8, byteorder="big"))
|
|
4617
|
+
request_body.write(feather_bytes)
|
|
4618
|
+
|
|
4619
|
+
# Make the HTTP request
|
|
4620
|
+
response = self._request(
|
|
4621
|
+
method="POST",
|
|
4622
|
+
uri="/v1/query/run",
|
|
4623
|
+
response=None, # We'll handle the response manually
|
|
4624
|
+
json=None,
|
|
4625
|
+
data=request_body.getvalue(),
|
|
4626
|
+
environment_override=None,
|
|
4627
|
+
preview_deployment_id=None,
|
|
4628
|
+
branch=None,
|
|
4629
|
+
metadata_request=False,
|
|
4630
|
+
extra_headers=headers,
|
|
4631
|
+
)
|
|
4632
|
+
|
|
4633
|
+
if not isinstance(response, requests.Response): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
4634
|
+
raise TypeError("Expected requests.Response")
|
|
4635
|
+
|
|
4636
|
+
if response.status_code != 200:
|
|
4637
|
+
raise RuntimeError(f"Request failed with status {response.status_code}: {response.text}")
|
|
4638
|
+
|
|
4639
|
+
# Deserialize the response
|
|
4640
|
+
result = OnlineQueryResultFeather.deserialize(response.content)
|
|
4641
|
+
|
|
4642
|
+
# Convert feather bytes back to a dataframe
|
|
4643
|
+
scalars_df = None
|
|
4644
|
+
if result.scalar_data:
|
|
4645
|
+
scalars_table = feather.read_table(BytesIO(result.scalar_data))
|
|
4646
|
+
scalars_df = pa_table_to_pl_df(scalars_table)
|
|
4647
|
+
|
|
4648
|
+
# Parse errors from JSON strings back to ChalkError objects
|
|
4649
|
+
errors = []
|
|
4650
|
+
if result.errors:
|
|
4651
|
+
for error_json in result.errors:
|
|
4652
|
+
try:
|
|
4653
|
+
error_dict = json.loads(error_json)
|
|
4654
|
+
errors.append(ChalkError(**error_dict))
|
|
4655
|
+
except Exception:
|
|
4656
|
+
# If parsing fails, create a generic error
|
|
4657
|
+
errors.append(ChalkError.create(code=ErrorCode.PARSE_FAILED, message=str(error_json)))
|
|
4658
|
+
|
|
4659
|
+
# Parse meta if present
|
|
4660
|
+
query_meta = None
|
|
4661
|
+
if result.meta:
|
|
4662
|
+
try:
|
|
4663
|
+
query_meta = QueryMeta(**json.loads(result.meta))
|
|
4664
|
+
except Exception:
|
|
4665
|
+
pass
|
|
4666
|
+
|
|
4667
|
+
# Return as BulkOnlineQueryResult
|
|
4668
|
+
return BulkOnlineQueryResult(
|
|
4669
|
+
scalars_df=scalars_df,
|
|
4670
|
+
groups_dfs=None,
|
|
4671
|
+
errors=errors if errors else None,
|
|
4672
|
+
meta=query_meta,
|
|
4673
|
+
)
|
|
4674
|
+
|
|
4348
4675
|
def _to_value(self, x: FeatureResult):
|
|
4349
4676
|
f: Feature = Feature.from_root_fqn(x.field)
|
|
4350
4677
|
|
|
@@ -4551,20 +4878,49 @@ https://docs.chalk.ai/cli/apply
|
|
|
4551
4878
|
# This shouldn't happen, but satisfies type checker
|
|
4552
4879
|
raise RuntimeError("All retries exhausted but no exception recorded")
|
|
4553
4880
|
|
|
4554
|
-
def _canonicalize_error(x: ChalkError):
|
|
4555
|
-
|
|
4556
|
-
|
|
4557
|
-
|
|
4558
|
-
|
|
4559
|
-
|
|
4560
|
-
|
|
4561
|
-
|
|
4562
|
-
|
|
4563
|
-
}
|
|
4564
|
-
|
|
4565
|
-
|
|
4566
|
-
|
|
4567
|
-
|
|
4881
|
+
def _canonicalize_error(x: ChalkError, expected: Optional[ChalkError] = None):
|
|
4882
|
+
"""
|
|
4883
|
+
Canonicalize error for comparison. If expected is provided, only compare
|
|
4884
|
+
fields that are non-None in the expected error.
|
|
4885
|
+
"""
|
|
4886
|
+
update = {}
|
|
4887
|
+
|
|
4888
|
+
# Always normalize exception stacktraces if exception exists
|
|
4889
|
+
if x.exception is not None:
|
|
4890
|
+
update["exception"] = x.exception.copy(update={"stacktrace": "", "internal_stacktrace": None})
|
|
4891
|
+
|
|
4892
|
+
# If expected is provided, clear fields that are None in expected (meaning we don't care about them)
|
|
4893
|
+
if expected is not None:
|
|
4894
|
+
if expected.feature is None:
|
|
4895
|
+
update["feature"] = None
|
|
4896
|
+
if expected.resolver is None:
|
|
4897
|
+
update["resolver"] = None
|
|
4898
|
+
if expected.display_primary_key is None:
|
|
4899
|
+
update["display_primary_key"] = None
|
|
4900
|
+
if expected.display_primary_key_fqn is None:
|
|
4901
|
+
update["display_primary_key_fqn"] = None
|
|
4902
|
+
if expected.exception is None:
|
|
4903
|
+
update["exception"] = None
|
|
4904
|
+
|
|
4905
|
+
return x.copy(update=update) if update else x
|
|
4906
|
+
|
|
4907
|
+
# Canonicalize expected errors first (without reference)
|
|
4908
|
+
query_errors_list = [_canonicalize_error(x) for x in (query_errors or [])]
|
|
4909
|
+
|
|
4910
|
+
# Canonicalize actual errors with reference to expected ones
|
|
4911
|
+
# For each actual error, find matching expected error and canonicalize accordingly
|
|
4912
|
+
actual_errors_list = []
|
|
4913
|
+
for actual in resp_errors or []:
|
|
4914
|
+
# Find the best matching expected error (by code and message)
|
|
4915
|
+
matching_expected = None
|
|
4916
|
+
for query_error in query_errors_list:
|
|
4917
|
+
if actual.code == query_error.code and actual.message == query_error.message:
|
|
4918
|
+
matching_expected = query_error
|
|
4919
|
+
break
|
|
4920
|
+
actual_errors_list.append(_canonicalize_error(actual, matching_expected))
|
|
4921
|
+
|
|
4922
|
+
query_errors = FrozenOrderedSet(query_errors_list)
|
|
4923
|
+
actual_errors = FrozenOrderedSet(actual_errors_list)
|
|
4568
4924
|
|
|
4569
4925
|
if not _do_query_errors_match(actual_errors, query_errors):
|
|
4570
4926
|
errors_expected = len(query_errors) > 0
|
|
@@ -4641,6 +4997,9 @@ https://docs.chalk.ai/cli/apply
|
|
|
4641
4997
|
_fail_test("errors differed -- see output table above")
|
|
4642
4998
|
|
|
4643
4999
|
if resp_data is not None:
|
|
5000
|
+
# set of features that were asserted on
|
|
5001
|
+
expected_features = {e.fqn for e in expected}
|
|
5002
|
+
|
|
4644
5003
|
actuals = [
|
|
4645
5004
|
Result(
|
|
4646
5005
|
x.field,
|
|
@@ -4649,6 +5008,7 @@ https://docs.chalk.ai/cli/apply
|
|
|
4649
5008
|
x.error,
|
|
4650
5009
|
)
|
|
4651
5010
|
for x in resp_data
|
|
5011
|
+
if x.field in expected_features # Filter to only asserted features
|
|
4652
5012
|
]
|
|
4653
5013
|
|
|
4654
5014
|
feature_mismatch = not _do_resultsets_match(actuals, expected)
|
|
@@ -4899,6 +5259,7 @@ https://docs.chalk.ai/cli/apply
|
|
|
4899
5259
|
name: str,
|
|
4900
5260
|
model_artifact_id: Optional[str] = None,
|
|
4901
5261
|
run_id: Optional[str] = None,
|
|
5262
|
+
run_name: Optional[str] = None,
|
|
4902
5263
|
criterion: Optional[ModelRunCriterion] = None,
|
|
4903
5264
|
aliases: Optional[List[str]] = None,
|
|
4904
5265
|
) -> RegisterModelVersionResponse:
|
|
@@ -4915,6 +5276,7 @@ https://docs.chalk.ai/cli/apply
|
|
|
4915
5276
|
name=name,
|
|
4916
5277
|
model_artifact_id=model_artifact_id,
|
|
4917
5278
|
run_id=run_id,
|
|
5279
|
+
run_name=run_name,
|
|
4918
5280
|
criterion=criterion,
|
|
4919
5281
|
aliases=aliases,
|
|
4920
5282
|
)
|
|
@@ -4923,14 +5285,40 @@ https://docs.chalk.ai/cli/apply
|
|
|
4923
5285
|
|
|
4924
5286
|
def train_model(
|
|
4925
5287
|
self,
|
|
4926
|
-
|
|
4927
|
-
|
|
4928
|
-
dataset_name: str,
|
|
5288
|
+
experiment_name: str,
|
|
5289
|
+
train_fn: Callable[[], None],
|
|
4929
5290
|
config: Optional[Mapping[str, Any]] = None,
|
|
5291
|
+
branch: Optional[Union[BranchId, ellipsis]] = ...,
|
|
4930
5292
|
resources: Optional[ResourceRequests] = None,
|
|
5293
|
+
env_overrides: Optional[Mapping[str, str]] = None,
|
|
5294
|
+
enable_profiling: bool = False,
|
|
5295
|
+
max_retries: int = 0,
|
|
4931
5296
|
) -> CreateModelTrainingJobResponse:
|
|
4932
5297
|
from chalk.client.client_grpc import ChalkGRPCClient
|
|
4933
5298
|
|
|
5299
|
+
if branch is ...:
|
|
5300
|
+
branch = self._branch
|
|
5301
|
+
|
|
5302
|
+
if not callable(train_fn):
|
|
5303
|
+
raise ValueError("train_fn must be a callable function.")
|
|
5304
|
+
|
|
5305
|
+
nargs = len(inspect.signature(train_fn).parameters)
|
|
5306
|
+
|
|
5307
|
+
if nargs == 0:
|
|
5308
|
+
if config is not None:
|
|
5309
|
+
raise ValueError("train_fn must accept a 'config' parameter to use the provided config.")
|
|
5310
|
+
config_str = None
|
|
5311
|
+
|
|
5312
|
+
if nargs == 1:
|
|
5313
|
+
if config is None:
|
|
5314
|
+
raise ValueError("train_fn must not accept a 'config' parameter when no config is provided.")
|
|
5315
|
+
try:
|
|
5316
|
+
config_str = json.dumps({"kwargs": {"config": config}})
|
|
5317
|
+
except TypeError as e:
|
|
5318
|
+
raise ValueError("config must be JSON serializable.") from e
|
|
5319
|
+
|
|
5320
|
+
script = parse_notebook_into_script(train_fn, config is not None)
|
|
5321
|
+
|
|
4934
5322
|
client_grpc = ChalkGRPCClient(
|
|
4935
5323
|
client_id=self._client_id,
|
|
4936
5324
|
client_secret=self._client_secret,
|
|
@@ -4938,8 +5326,25 @@ https://docs.chalk.ai/cli/apply
|
|
|
4938
5326
|
api_server=self._api_server,
|
|
4939
5327
|
)
|
|
4940
5328
|
|
|
4941
|
-
client_grpc.create_model_training_job(
|
|
4942
|
-
|
|
5329
|
+
task_response = client_grpc.create_model_training_job(
|
|
5330
|
+
script=script,
|
|
5331
|
+
function_name=train_fn.__name__,
|
|
5332
|
+
experiment_name=experiment_name,
|
|
5333
|
+
config=config_str,
|
|
5334
|
+
branch=branch,
|
|
5335
|
+
resources=resources,
|
|
5336
|
+
env_overrides=env_overrides,
|
|
5337
|
+
enable_profiling=enable_profiling,
|
|
4943
5338
|
)
|
|
4944
5339
|
|
|
5340
|
+
client_grpc.follow_model_training_job(operation_id=task_response.task_id)
|
|
5341
|
+
|
|
4945
5342
|
return CreateModelTrainingJobResponse(success=True)
|
|
5343
|
+
|
|
5344
|
+
|
|
5345
|
+
def _check_exclusive_options(options: dict[str, Any | None]):
|
|
5346
|
+
filled_options = {k: v for k, v in options.items() if v is not None}
|
|
5347
|
+
if len(filled_options) > 1:
|
|
5348
|
+
raise ValueError(
|
|
5349
|
+
f"Only one of the options: {', '.join(filled_options.keys())} can be specified (they are mutually exclusive options)."
|
|
5350
|
+
)
|