chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
- chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
- chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
- chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
- chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
- chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
- chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
- chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
- chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_monitoring/Chart.py +1 -3
- chalk/_version.py +1 -1
- chalk/cli.py +5 -10
- chalk/client/client.py +178 -64
- chalk/client/client_async.py +154 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +738 -112
- chalk/client/client_impl.py +541 -136
- chalk/client/dataset.py +27 -6
- chalk/client/models.py +99 -2
- chalk/client/serialization/model_serialization.py +126 -10
- chalk/config/project_config.py +1 -1
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/df/ast_parser.py +2 -10
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_embedding/sentence_transformer.py +1 -1
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/_encoding/pyarrow.py +20 -4
- chalk/features/_encoding/rich.py +1 -3
- chalk/features/_tensor.py +1 -2
- chalk/features/dataframe/_filters.py +14 -5
- chalk/features/dataframe/_impl.py +91 -36
- chalk/features/dataframe/_validation.py +11 -7
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set.py +1 -2
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -12
- chalk/features/inference.py +65 -10
- chalk/features/resolver.py +338 -56
- chalk/features/tag.py +1 -3
- chalk/features/underscore_features.py +2 -1
- chalk/functions/__init__.py +456 -21
- chalk/functions/holidays.py +1 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +186 -74
- chalk/ml/__init__.py +6 -2
- chalk/ml/model_hooks.py +368 -51
- chalk/ml/model_reference.py +68 -10
- chalk/ml/model_version.py +34 -21
- chalk/ml/utils.py +143 -40
- chalk/operators/_utils.py +14 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +4 -0
- chalk/parsed/expressions.py +1 -3
- chalk/parsed/json_conversions.py +21 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +13 -1
- chalk/serialization/parsed_annotation.py +25 -12
- chalk/sql/__init__.py +221 -0
- chalk/sql/_internal/integrations/athena.py +6 -1
- chalk/sql/_internal/integrations/bigquery.py +22 -2
- chalk/sql/_internal/integrations/databricks.py +61 -18
- chalk/sql/_internal/integrations/mssql.py +281 -0
- chalk/sql/_internal/integrations/postgres.py +11 -3
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/integrations/snowflake.py +11 -2
- chalk/sql/_internal/integrations/util.py +2 -1
- chalk/sql/_internal/sql_file_resolver.py +55 -10
- chalk/sql/_internal/sql_source.py +36 -2
- chalk/streams/__init__.py +1 -3
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +16 -4
- chalk/streams/types.py +1 -2
- chalk/utils/__init__.py +1 -3
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +14 -5
- chalk/utils/df_utils.py +2 -2
- chalk/utils/duration.py +1 -3
- chalk/utils/job_log_display.py +538 -0
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/notebook.py +255 -2
- chalk/utils/pl_helpers.py +190 -37
- chalk/utils/pydanticutil/pydantic_compat.py +1 -2
- chalk/utils/storage_client.py +246 -0
- chalk/utils/threading.py +1 -3
- chalk/utils/tracing.py +194 -86
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/client/client_grpc.py
CHANGED
|
@@ -18,8 +18,9 @@ from google.protobuf import empty_pb2, timestamp_pb2
|
|
|
18
18
|
from chalk import DataFrame, EnvironmentId, chalk_logger
|
|
19
19
|
from chalk._gen.chalk.auth.v1.agent_pb2 import CustomClaim
|
|
20
20
|
from chalk._gen.chalk.auth.v1.permissions_pb2 import Permission
|
|
21
|
-
from chalk._gen.chalk.common.v1 import
|
|
21
|
+
from chalk._gen.chalk.common.v1 import online_query_pb2, resources_pb2, upload_features_pb2
|
|
22
22
|
from chalk._gen.chalk.common.v1.online_query_pb2 import GenericSingleQuery, UploadFeaturesBulkRequest
|
|
23
|
+
from chalk._gen.chalk.common.v1.script_task_pb2 import ScriptTaskKind, ScriptTaskRequest, TrainingRunArgs
|
|
23
24
|
from chalk._gen.chalk.common.v2.execute_plan_pb2 import ExecutePlanRequest, ExecutePlanResponse
|
|
24
25
|
from chalk._gen.chalk.engine.v1 import query_server_pb2
|
|
25
26
|
from chalk._gen.chalk.engine.v1.query_server_pb2_grpc import QueryServiceStub
|
|
@@ -36,6 +37,11 @@ from chalk._gen.chalk.protosql.v1.sql_service_pb2 import (
|
|
|
36
37
|
)
|
|
37
38
|
from chalk._gen.chalk.protosql.v1.sql_service_pb2_grpc import SqlServiceStub
|
|
38
39
|
from chalk._gen.chalk.server.v1.auth_pb2_grpc import AuthServiceStub
|
|
40
|
+
from chalk._gen.chalk.server.v1.dataplanejobqueue_pb2 import (
|
|
41
|
+
GetJobQueueOperationSummaryRequest,
|
|
42
|
+
GetJobQueueOperationSummaryResponse,
|
|
43
|
+
)
|
|
44
|
+
from chalk._gen.chalk.server.v1.dataplanejobqueue_pb2_grpc import DataPlaneJobQueueServiceStub
|
|
39
45
|
from chalk._gen.chalk.server.v1.deploy_pb2 import (
|
|
40
46
|
CreateBranchFromSourceDeploymentRequest,
|
|
41
47
|
CreateBranchFromSourceDeploymentResponse,
|
|
@@ -49,6 +55,7 @@ from chalk._gen.chalk.server.v1.graph_pb2 import (
|
|
|
49
55
|
PythonVersion,
|
|
50
56
|
)
|
|
51
57
|
from chalk._gen.chalk.server.v1.graph_pb2_grpc import GraphServiceStub
|
|
58
|
+
from chalk._gen.chalk.server.v1.log_pb2_grpc import LogSearchServiceStub
|
|
52
59
|
from chalk._gen.chalk.server.v1.model_registry_pb2 import (
|
|
53
60
|
CreateModelArtifactRequest,
|
|
54
61
|
CreateModelArtifactResponse,
|
|
@@ -66,8 +73,13 @@ from chalk._gen.chalk.server.v1.model_registry_pb2 import (
|
|
|
66
73
|
GetModelVersionResponse,
|
|
67
74
|
)
|
|
68
75
|
from chalk._gen.chalk.server.v1.model_registry_pb2_grpc import ModelRegistryServiceStub
|
|
69
|
-
from chalk._gen.chalk.server.v1.offline_queries_pb2 import CreateModelTrainingJobRequest, CreateModelTrainingJobResponse
|
|
70
76
|
from chalk._gen.chalk.server.v1.offline_queries_pb2_grpc import OfflineQueryMetadataServiceStub
|
|
77
|
+
from chalk._gen.chalk.server.v1.scheduled_query_pb2_grpc import ScheduledQueryServiceStub
|
|
78
|
+
from chalk._gen.chalk.server.v1.scheduled_query_run_pb2 import GetScheduledQueryRunsRequest
|
|
79
|
+
from chalk._gen.chalk.server.v1.scheduler_pb2 import ManualTriggerScheduledQueryRequest
|
|
80
|
+
from chalk._gen.chalk.server.v1.scheduler_pb2_grpc import SchedulerServiceStub
|
|
81
|
+
from chalk._gen.chalk.server.v1.script_tasks_pb2 import CreateScriptTaskRequest, CreateScriptTaskResponse
|
|
82
|
+
from chalk._gen.chalk.server.v1.script_tasks_pb2_grpc import ScriptTaskServiceStub
|
|
71
83
|
from chalk._gen.chalk.server.v1.team_pb2 import (
|
|
72
84
|
CreateServiceTokenRequest,
|
|
73
85
|
CreateServiceTokenResponse,
|
|
@@ -75,6 +87,7 @@ from chalk._gen.chalk.server.v1.team_pb2 import (
|
|
|
75
87
|
ListServiceTokensResponse,
|
|
76
88
|
)
|
|
77
89
|
from chalk._gen.chalk.server.v1.team_pb2_grpc import TeamServiceStub
|
|
90
|
+
from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2_grpc import SimpleStreamingServiceStub
|
|
78
91
|
from chalk.client import ChalkAuthException, FeatureReference
|
|
79
92
|
from chalk.client.client_impl import _validate_context_dict # pyright: ignore[reportPrivateUsage]
|
|
80
93
|
from chalk.client.models import (
|
|
@@ -84,6 +97,9 @@ from chalk.client.models import (
|
|
|
84
97
|
CreateBranchResponse,
|
|
85
98
|
GetRegisteredModelResponse,
|
|
86
99
|
GetRegisteredModelVersionResponse,
|
|
100
|
+
)
|
|
101
|
+
from chalk.client.models import ManualTriggerScheduledQueryResponse as ManualTriggerScheduledQueryResponseDataclass
|
|
102
|
+
from chalk.client.models import (
|
|
87
103
|
ModelUploadUrlResponse,
|
|
88
104
|
OnlineQuery,
|
|
89
105
|
OnlineQueryResponse,
|
|
@@ -91,6 +107,9 @@ from chalk.client.models import (
|
|
|
91
107
|
RegisterModelResponse,
|
|
92
108
|
RegisterModelVersionResponse,
|
|
93
109
|
ResourceRequests,
|
|
110
|
+
ScheduledQueryRun,
|
|
111
|
+
StreamResolverTestResponse,
|
|
112
|
+
StreamResolverTestStatus,
|
|
94
113
|
UploadFeaturesResponse,
|
|
95
114
|
)
|
|
96
115
|
from chalk.client.serialization.model_serialization import ModelSerializer
|
|
@@ -101,20 +120,25 @@ from chalk.features._encoding.inputs import GRPC_ENCODE_OPTIONS, InputEncodeOpti
|
|
|
101
120
|
from chalk.features._encoding.json import FeatureEncodingOptions
|
|
102
121
|
from chalk.features._encoding.outputs import encode_outputs
|
|
103
122
|
from chalk.features.feature_set import is_feature_set_class
|
|
123
|
+
from chalk.features.resolver import Resolver
|
|
104
124
|
from chalk.features.tag import DeploymentId
|
|
105
125
|
from chalk.importer import CHALK_IMPORT_FLAG
|
|
106
126
|
from chalk.ml import LocalSourceConfig, ModelEncoding, ModelRunCriterion, ModelType, SourceConfig
|
|
107
127
|
from chalk.ml.model_file_transfer import ModelFileUploader
|
|
128
|
+
from chalk.ml.utils import ModelClass
|
|
108
129
|
from chalk.parsed._proto.utils import datetime_to_proto_timestamp, value_to_proto
|
|
109
130
|
from chalk.utils import df_utils
|
|
110
131
|
from chalk.utils.df_utils import record_batch_to_arrow_ipc
|
|
111
132
|
from chalk.utils.grpc import AuthenticatedChalkClientInterceptor, TokenRefresher, UnauthenticatedChalkClientInterceptor
|
|
133
|
+
from chalk.utils.tracing import add_trace_headers, safe_trace
|
|
112
134
|
|
|
113
135
|
if TYPE_CHECKING:
|
|
114
136
|
from pyarrow import RecordBatch, Table
|
|
137
|
+
from pydantic import BaseModel
|
|
115
138
|
|
|
116
139
|
from chalk._gen.chalk.server.v1.builder_pb2 import StartBranchResponse
|
|
117
140
|
from chalk._gen.chalk.server.v1.builder_pb2_grpc import BuilderServiceStub
|
|
141
|
+
from chalk.client import ChalkError
|
|
118
142
|
|
|
119
143
|
CHALK_GRPC_TRACE_ID_HEADER: str = "x-chalk-trace-id"
|
|
120
144
|
|
|
@@ -135,6 +159,20 @@ def get_trace_id_from_response(call: grpc.Call) -> Optional[str]:
|
|
|
135
159
|
return None
|
|
136
160
|
|
|
137
161
|
|
|
162
|
+
def _merge_headers(
|
|
163
|
+
headers: None | Sequence[tuple[str, str | bytes]] | Mapping[str, str | bytes],
|
|
164
|
+
extra_headers: None | Sequence[tuple[str, str | bytes]] | Mapping[str, str | bytes],
|
|
165
|
+
) -> tuple[tuple[str, str | bytes], ...]:
|
|
166
|
+
headers = _canonicalize_headers(headers)
|
|
167
|
+
extra_headers = _canonicalize_headers(extra_headers)
|
|
168
|
+
all_headers: list[tuple[str, str | bytes]] = []
|
|
169
|
+
for h in headers:
|
|
170
|
+
all_headers.append(h)
|
|
171
|
+
for h in extra_headers:
|
|
172
|
+
all_headers.append(h)
|
|
173
|
+
return tuple(all_headers)
|
|
174
|
+
|
|
175
|
+
|
|
138
176
|
def _canonicalize_headers(
|
|
139
177
|
headers: None | Sequence[tuple[str, str | bytes]] | Mapping[str, str | bytes],
|
|
140
178
|
) -> tuple[tuple[str, str | bytes], ...]:
|
|
@@ -188,29 +226,26 @@ def _parse_uri_for_engine(query_server_uri: str) -> ParsedUri:
|
|
|
188
226
|
return ParsedUri(uri_without_scheme=uri_without_scheme, use_tls=use_tls)
|
|
189
227
|
|
|
190
228
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
229
|
+
default_channel_options: Dict[str, str | int] = {
|
|
230
|
+
"grpc.max_send_message_length": 1024 * 1024 * 100, # 100MB
|
|
231
|
+
"grpc.max_receive_message_length": 1024 * 1024 * 100, # 100MB
|
|
194
232
|
# https://grpc.io/docs/guides/performance/#python
|
|
195
|
-
|
|
196
|
-
(
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
]
|
|
210
|
-
}
|
|
211
|
-
),
|
|
233
|
+
grpc.experimental.ChannelOptions.SingleThreadedUnaryStream: 1,
|
|
234
|
+
"grpc.service_config": json.dumps(
|
|
235
|
+
{
|
|
236
|
+
"methodConfig": [
|
|
237
|
+
{
|
|
238
|
+
"name": [{}],
|
|
239
|
+
"maxAttempts": 5,
|
|
240
|
+
"initialBackoff": "0.1s",
|
|
241
|
+
"maxBackoff": "1s",
|
|
242
|
+
"backoffMultiplier": 2,
|
|
243
|
+
"retryableStatusCodes": ["UNAVAILABLE"],
|
|
244
|
+
}
|
|
245
|
+
]
|
|
246
|
+
}
|
|
212
247
|
),
|
|
213
|
-
|
|
248
|
+
}
|
|
214
249
|
|
|
215
250
|
|
|
216
251
|
T = TypeVar("T")
|
|
@@ -258,6 +293,22 @@ class StubProvider:
|
|
|
258
293
|
)
|
|
259
294
|
return OfflineQueryMetadataServiceStub(self._server_channel)
|
|
260
295
|
|
|
296
|
+
@cached_property
|
|
297
|
+
def scheduled_query_stub(self) -> SchedulerServiceStub:
|
|
298
|
+
if self._server_channel is None:
|
|
299
|
+
raise ValueError(
|
|
300
|
+
"The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
|
|
301
|
+
)
|
|
302
|
+
return SchedulerServiceStub(self._server_channel)
|
|
303
|
+
|
|
304
|
+
@cached_property
|
|
305
|
+
def scheduled_query_run_stub(self) -> ScheduledQueryServiceStub:
|
|
306
|
+
if self._server_channel is None:
|
|
307
|
+
raise ValueError(
|
|
308
|
+
"The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
|
|
309
|
+
)
|
|
310
|
+
return ScheduledQueryServiceStub(self._server_channel)
|
|
311
|
+
|
|
261
312
|
@cached_property
|
|
262
313
|
def sql_stub(self) -> SqlServiceStub:
|
|
263
314
|
if self._engine_channel is None:
|
|
@@ -274,12 +325,26 @@ class StubProvider:
|
|
|
274
325
|
)
|
|
275
326
|
return DataFrameServiceStub(self._engine_channel)
|
|
276
327
|
|
|
328
|
+
@cached_property
|
|
329
|
+
def streaming_stub(self) -> SimpleStreamingServiceStub:
|
|
330
|
+
if self._engine_channel is None:
|
|
331
|
+
raise ValueError(
|
|
332
|
+
"The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
|
|
333
|
+
)
|
|
334
|
+
return SimpleStreamingServiceStub(self._engine_channel)
|
|
335
|
+
|
|
277
336
|
@cached_property
|
|
278
337
|
def model_stub(self) -> ModelRegistryServiceStub:
|
|
279
338
|
if self._server_channel is None:
|
|
280
339
|
raise RuntimeError("Unable to connect to API server.")
|
|
281
340
|
return ModelRegistryServiceStub(self._server_channel)
|
|
282
341
|
|
|
342
|
+
@cached_property
|
|
343
|
+
def task_stub(self) -> ScriptTaskServiceStub:
|
|
344
|
+
if self._server_channel is None:
|
|
345
|
+
raise RuntimeError("Unable to connect to API server.")
|
|
346
|
+
return ScriptTaskServiceStub(self._server_channel)
|
|
347
|
+
|
|
283
348
|
@cached_property
|
|
284
349
|
def builder_stub(self) -> "BuilderServiceStub":
|
|
285
350
|
from chalk._gen.chalk.server.v1.builder_pb2_grpc import BuilderServiceStub
|
|
@@ -288,6 +353,18 @@ class StubProvider:
|
|
|
288
353
|
raise RuntimeError("Unable to connect to API server.")
|
|
289
354
|
return BuilderServiceStub(self._server_channel)
|
|
290
355
|
|
|
356
|
+
@cached_property
|
|
357
|
+
def log_stub(self) -> LogSearchServiceStub:
|
|
358
|
+
if self._server_channel is None:
|
|
359
|
+
raise RuntimeError("Unable to connect to API server.")
|
|
360
|
+
return LogSearchServiceStub(self._server_channel)
|
|
361
|
+
|
|
362
|
+
@cached_property
|
|
363
|
+
def job_queue_stub(self) -> DataPlaneJobQueueServiceStub:
|
|
364
|
+
if self._server_channel is None:
|
|
365
|
+
raise RuntimeError("Unable to connect to API server.")
|
|
366
|
+
return DataPlaneJobQueueServiceStub(self._server_channel)
|
|
367
|
+
|
|
291
368
|
def __init__(
|
|
292
369
|
self,
|
|
293
370
|
token_config: TokenConfig,
|
|
@@ -295,17 +372,25 @@ class StubProvider:
|
|
|
295
372
|
deployment_tag: str | None = None,
|
|
296
373
|
skip_api_server: bool = False,
|
|
297
374
|
additional_headers: List[tuple[str, str]] | None = None,
|
|
375
|
+
channel_options: List[tuple[str, str | int]] | None = None,
|
|
298
376
|
):
|
|
299
377
|
super().__init__()
|
|
300
378
|
additional_headers_nonempty: List[tuple[str, str]] = [] if additional_headers is None else additional_headers
|
|
301
379
|
token_refresher: TokenRefresher | None = None
|
|
380
|
+
channel_options_merged: Dict[str, str | int] = default_channel_options.copy()
|
|
381
|
+
if channel_options:
|
|
382
|
+
channel_options_merged.update(dict(channel_options))
|
|
302
383
|
if skip_api_server:
|
|
303
384
|
# Omits the auth handshake with the API server. Primarily for internal use/testing -- if used in production,
|
|
304
385
|
# this client will simply fail to connect. If True then query_server must be provided & point to
|
|
305
386
|
# `localhost/127.0.0.1`.
|
|
306
387
|
if query_server is None:
|
|
307
388
|
raise ValueError("If skipping API server auth, query_server URI must be provided.")
|
|
308
|
-
|
|
389
|
+
parsed_uri = _parse_uri_for_engine(query_server)
|
|
390
|
+
if not (
|
|
391
|
+
parsed_uri.uri_without_scheme.startswith("localhost")
|
|
392
|
+
or parsed_uri.uri_without_scheme.startswith("127.0.0.1")
|
|
393
|
+
):
|
|
309
394
|
warnings.warn(
|
|
310
395
|
"Skipping API server auth should only be enabled if query_server URI is localhost. It will fail to authenticate against a production engine."
|
|
311
396
|
)
|
|
@@ -321,13 +406,13 @@ class StubProvider:
|
|
|
321
406
|
_unauthenticated_server_channel: grpc.Channel = (
|
|
322
407
|
grpc.insecure_channel(
|
|
323
408
|
target=server_host,
|
|
324
|
-
options=
|
|
409
|
+
options=list(channel_options_merged.items()),
|
|
325
410
|
)
|
|
326
411
|
if server_host.startswith("localhost") or server_host.startswith("127.0.0.1")
|
|
327
412
|
else grpc.secure_channel(
|
|
328
413
|
target=server_host,
|
|
329
414
|
credentials=grpc.ssl_channel_credentials(),
|
|
330
|
-
options=
|
|
415
|
+
options=list(channel_options_merged.items()),
|
|
331
416
|
)
|
|
332
417
|
)
|
|
333
418
|
|
|
@@ -401,12 +486,12 @@ class StubProvider:
|
|
|
401
486
|
grpc.secure_channel(
|
|
402
487
|
target=parsed_uri.uri_without_scheme,
|
|
403
488
|
credentials=grpc.ssl_channel_credentials(),
|
|
404
|
-
options=
|
|
489
|
+
options=list(channel_options_merged.items()),
|
|
405
490
|
)
|
|
406
491
|
if parsed_uri.use_tls
|
|
407
492
|
else grpc.insecure_channel(
|
|
408
493
|
target=parsed_uri.uri_without_scheme,
|
|
409
|
-
options=
|
|
494
|
+
options=list(channel_options_merged.items()),
|
|
410
495
|
)
|
|
411
496
|
),
|
|
412
497
|
*interceptors,
|
|
@@ -421,6 +506,7 @@ class StubRefresher:
|
|
|
421
506
|
deployment_tag: str | None = None,
|
|
422
507
|
skip_api_server: bool = False,
|
|
423
508
|
additional_headers: List[tuple[str, str]] | None = None,
|
|
509
|
+
channel_options: List[tuple[str, str | int]] | None = None,
|
|
424
510
|
):
|
|
425
511
|
super().__init__()
|
|
426
512
|
self._token_config = token_config
|
|
@@ -428,6 +514,7 @@ class StubRefresher:
|
|
|
428
514
|
self._deployment_tag = deployment_tag
|
|
429
515
|
self._skip_api_server = skip_api_server
|
|
430
516
|
self._additional_headers = additional_headers
|
|
517
|
+
self._channel_options = channel_options
|
|
431
518
|
self._stub = self._refresh_stub()
|
|
432
519
|
|
|
433
520
|
def _refresh_stub(self) -> StubProvider:
|
|
@@ -437,6 +524,7 @@ class StubRefresher:
|
|
|
437
524
|
deployment_tag=self._deployment_tag,
|
|
438
525
|
skip_api_server=self._skip_api_server,
|
|
439
526
|
additional_headers=self._additional_headers,
|
|
527
|
+
channel_options=self._channel_options,
|
|
440
528
|
)
|
|
441
529
|
return self._stub
|
|
442
530
|
|
|
@@ -473,6 +561,12 @@ class StubRefresher:
|
|
|
473
561
|
def call_offline_query_stub(self, fn: Callable[[OfflineQueryMetadataServiceStub], T]) -> T:
|
|
474
562
|
return self._retry_callable(fn, lambda: self._stub.offline_query_stub)
|
|
475
563
|
|
|
564
|
+
def call_scheduled_query_stub(self, fn: Callable[[SchedulerServiceStub], T]) -> T:
|
|
565
|
+
return self._retry_callable(fn, lambda: self._stub.scheduled_query_stub)
|
|
566
|
+
|
|
567
|
+
def call_scheduled_query_run_stub(self, fn: Callable[[ScheduledQueryServiceStub], T]) -> T:
|
|
568
|
+
return self._retry_callable(fn, lambda: self._stub.scheduled_query_run_stub)
|
|
569
|
+
|
|
476
570
|
def call_sql_stub(self, fn: Callable[[SqlServiceStub], T]) -> T:
|
|
477
571
|
return self._retry_callable(fn, lambda: self._stub.sql_stub)
|
|
478
572
|
|
|
@@ -482,9 +576,25 @@ class StubRefresher:
|
|
|
482
576
|
def call_model_stub(self, fn: Callable[[ModelRegistryServiceStub], T]) -> T:
|
|
483
577
|
return self._retry_callable(fn, lambda: self._stub.model_stub)
|
|
484
578
|
|
|
579
|
+
def call_task_stub(self, fn: Callable[[ScriptTaskServiceStub], T]) -> T:
|
|
580
|
+
return self._retry_callable(fn, lambda: self._stub.task_stub)
|
|
581
|
+
|
|
485
582
|
def call_builder_stub(self, fn: Callable[["BuilderServiceStub"], T]) -> T:
|
|
486
583
|
return self._retry_callable(fn, lambda: self._stub.builder_stub)
|
|
487
584
|
|
|
585
|
+
def call_log_stub(self, fn: Callable[[LogSearchServiceStub], T]) -> T:
|
|
586
|
+
return self._retry_callable(fn, lambda: self._stub.log_stub)
|
|
587
|
+
|
|
588
|
+
def call_job_queue_stub(self, fn: Callable[[DataPlaneJobQueueServiceStub], T]) -> T:
|
|
589
|
+
return self._retry_callable(fn, lambda: self._stub.job_queue_stub)
|
|
590
|
+
|
|
591
|
+
def call_streaming_stub(self, fn: Callable[[SimpleStreamingServiceStub], T]) -> T:
|
|
592
|
+
return self._retry_callable(fn, lambda: self._stub.streaming_stub)
|
|
593
|
+
|
|
594
|
+
@property
|
|
595
|
+
def log_stub(self) -> LogSearchServiceStub:
|
|
596
|
+
return self._stub.log_stub
|
|
597
|
+
|
|
488
598
|
@property
|
|
489
599
|
def environment_id(self) -> str | None:
|
|
490
600
|
return self._stub.environment_id
|
|
@@ -506,6 +616,7 @@ class ChalkGRPCClient:
|
|
|
506
616
|
additional_headers: List[tuple[str, str]] | None = None,
|
|
507
617
|
query_server: str | None = None,
|
|
508
618
|
input_compression: typing.Literal["lz4", "zstd", "uncompressed"] = "lz4",
|
|
619
|
+
channel_options: List[Tuple[str, str | int]] | None = None,
|
|
509
620
|
**kwargs: Any,
|
|
510
621
|
):
|
|
511
622
|
"""Create a `ChalkGRPCClient` with the given credentials.
|
|
@@ -561,6 +672,7 @@ class ChalkGRPCClient:
|
|
|
561
672
|
deployment_tag=deployment_tag,
|
|
562
673
|
additional_headers=additional_headers,
|
|
563
674
|
skip_api_server=kwargs.get("_skip_api_server", False),
|
|
675
|
+
channel_options=channel_options,
|
|
564
676
|
)
|
|
565
677
|
|
|
566
678
|
_INPUT_ENCODE_OPTIONS = GRPC_ENCODE_OPTIONS
|
|
@@ -618,6 +730,7 @@ class ChalkGRPCClient:
|
|
|
618
730
|
request_timeout: Optional[float] = None,
|
|
619
731
|
headers: Mapping[str, str] | Sequence[tuple[str, str | bytes]] | None = None,
|
|
620
732
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None = None,
|
|
733
|
+
trace: bool = False,
|
|
621
734
|
) -> OnlineQueryResponse:
|
|
622
735
|
"""Compute features values using online resolvers.
|
|
623
736
|
|
|
@@ -740,6 +853,7 @@ class ChalkGRPCClient:
|
|
|
740
853
|
request_timeout=request_timeout,
|
|
741
854
|
headers=headers,
|
|
742
855
|
query_context=_validate_context_dict(query_context),
|
|
856
|
+
trace=trace,
|
|
743
857
|
)
|
|
744
858
|
return OnlineQueryConverter.online_query_bulk_response_decode_to_single(bulk_response)
|
|
745
859
|
|
|
@@ -765,37 +879,44 @@ class ChalkGRPCClient:
|
|
|
765
879
|
request_timeout: Optional[float] = None,
|
|
766
880
|
headers: Mapping[str, str] | Sequence[tuple[str, str | bytes]] | None = None,
|
|
767
881
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | None = None,
|
|
882
|
+
trace: bool = False,
|
|
768
883
|
) -> online_query_pb2.OnlineQueryBulkResponse:
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
884
|
+
with safe_trace("_online_query_grpc_request"):
|
|
885
|
+
request = self._make_query_bulk_request(
|
|
886
|
+
input={k: [v] for k, v in input.items()},
|
|
887
|
+
output=output,
|
|
888
|
+
now=[now] if now is not None else [],
|
|
889
|
+
staleness=staleness or {},
|
|
890
|
+
tags=tags or (),
|
|
891
|
+
correlation_id=correlation_id,
|
|
892
|
+
query_name=query_name,
|
|
893
|
+
query_name_version=query_name_version,
|
|
894
|
+
include_meta=include_meta,
|
|
895
|
+
meta=meta or {},
|
|
896
|
+
explain=explain,
|
|
897
|
+
store_plan_stages=store_plan_stages,
|
|
898
|
+
value_metrics_tag_by_features=value_metrics_tag_by_features,
|
|
899
|
+
encoding_options=encoding_options,
|
|
900
|
+
required_resolver_tags=required_resolver_tags or (),
|
|
901
|
+
planner_options=planner_options or {},
|
|
902
|
+
query_context=query_context,
|
|
903
|
+
)
|
|
904
|
+
if trace:
|
|
905
|
+
extra_headers: dict[str, str] = {}
|
|
906
|
+
extra_headers = add_trace_headers(extra_headers)
|
|
907
|
+
headers = _merge_headers(extra_headers, headers)
|
|
908
|
+
metadata = _canonicalize_headers(headers)
|
|
909
|
+
return self._stub_refresher.call_query_stub(
|
|
910
|
+
lambda x: x.OnlineQueryBulk(
|
|
911
|
+
request,
|
|
912
|
+
timeout=request_timeout,
|
|
913
|
+
metadata=metadata,
|
|
914
|
+
)
|
|
793
915
|
)
|
|
794
|
-
)
|
|
795
916
|
|
|
796
917
|
def online_query_bulk(
|
|
797
918
|
self,
|
|
798
|
-
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame],
|
|
919
|
+
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame, None] = None,
|
|
799
920
|
output: Sequence[FeatureReference] = (),
|
|
800
921
|
now: Optional[Sequence[dt.datetime]] = None,
|
|
801
922
|
staleness: Optional[Mapping[FeatureReference, str]] = None,
|
|
@@ -814,9 +935,17 @@ class ChalkGRPCClient:
|
|
|
814
935
|
request_timeout: Optional[float] = None,
|
|
815
936
|
headers: Mapping[str, str | bytes] | Sequence[tuple[str, str | bytes]] | None = None,
|
|
816
937
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None = None,
|
|
938
|
+
*,
|
|
939
|
+
input_sql: str | None = None,
|
|
817
940
|
) -> BulkOnlineQueryResult:
|
|
941
|
+
if input is None and input_sql is None:
|
|
942
|
+
raise TypeError("One of `input` or `input_sql` is required")
|
|
943
|
+
if input is not None and input_sql is not None:
|
|
944
|
+
raise TypeError("`input` and `input_sql` are mutually exclusive")
|
|
945
|
+
|
|
818
946
|
response, call = self._online_query_bulk_grpc_request(
|
|
819
947
|
input=input,
|
|
948
|
+
input_sql=input_sql,
|
|
820
949
|
output=output,
|
|
821
950
|
now=now,
|
|
822
951
|
staleness=staleness,
|
|
@@ -843,7 +972,8 @@ class ChalkGRPCClient:
|
|
|
843
972
|
def _online_query_bulk_grpc_request(
|
|
844
973
|
self,
|
|
845
974
|
*,
|
|
846
|
-
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame],
|
|
975
|
+
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame, None] = None,
|
|
976
|
+
input_sql: str | None = None,
|
|
847
977
|
output: Sequence[FeatureReference] = (),
|
|
848
978
|
now: Optional[Sequence[dt.datetime]] = None,
|
|
849
979
|
staleness: Optional[Mapping[FeatureReference, str]] = None,
|
|
@@ -864,8 +994,10 @@ class ChalkGRPCClient:
|
|
|
864
994
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | None = None,
|
|
865
995
|
) -> Tuple[online_query_pb2.OnlineQueryBulkResponse, grpc.Call]:
|
|
866
996
|
"""Returns the raw GRPC response and metadata"""
|
|
997
|
+
|
|
867
998
|
request = self._make_query_bulk_request(
|
|
868
999
|
input=input,
|
|
1000
|
+
input_sql=input_sql,
|
|
869
1001
|
output=output,
|
|
870
1002
|
now=now or (),
|
|
871
1003
|
staleness=staleness or {},
|
|
@@ -1054,7 +1186,9 @@ class ChalkGRPCClient:
|
|
|
1054
1186
|
|
|
1055
1187
|
def _make_query_bulk_request(
|
|
1056
1188
|
self,
|
|
1057
|
-
|
|
1189
|
+
*,
|
|
1190
|
+
input: Mapping[FeatureReference, Sequence[Any]] | DataFrame | None = None,
|
|
1191
|
+
input_sql: str | None = None,
|
|
1058
1192
|
output: Sequence[FeatureReference],
|
|
1059
1193
|
now: Sequence[dt.datetime],
|
|
1060
1194
|
staleness: Mapping[FeatureReference, str],
|
|
@@ -1072,9 +1206,19 @@ class ChalkGRPCClient:
|
|
|
1072
1206
|
planner_options: Mapping[str, str | int | bool],
|
|
1073
1207
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None,
|
|
1074
1208
|
) -> online_query_pb2.OnlineQueryBulkRequest:
|
|
1075
|
-
|
|
1076
|
-
input
|
|
1077
|
-
|
|
1209
|
+
if input is None and input_sql is None:
|
|
1210
|
+
raise TypeError("One of `input` or `input_sql` is required")
|
|
1211
|
+
if input is not None and input_sql is not None:
|
|
1212
|
+
raise TypeError("`input` and `input_sql` are mutually exclusive")
|
|
1213
|
+
|
|
1214
|
+
inputs_feather: bytes | None
|
|
1215
|
+
if input is None:
|
|
1216
|
+
inputs_feather = None
|
|
1217
|
+
else:
|
|
1218
|
+
inputs_feather = get_features_feather_bytes(
|
|
1219
|
+
input, self._INPUT_ENCODE_OPTIONS, compression=self._input_compression
|
|
1220
|
+
)
|
|
1221
|
+
|
|
1078
1222
|
encoded_outputs = encode_outputs(output)
|
|
1079
1223
|
outputs = encoded_outputs.string_outputs
|
|
1080
1224
|
# Currently assume every feature tag is just a fqn instead of a more complex expr.
|
|
@@ -1103,7 +1247,8 @@ class ChalkGRPCClient:
|
|
|
1103
1247
|
query_context = _validate_context_dict(query_context)
|
|
1104
1248
|
query_context_proto = {k: value_to_proto(v) for k, v in query_context.items()} if query_context else None
|
|
1105
1249
|
return online_query_pb2.OnlineQueryBulkRequest(
|
|
1106
|
-
inputs_feather=
|
|
1250
|
+
inputs_feather=inputs_feather,
|
|
1251
|
+
inputs_sql=input_sql,
|
|
1107
1252
|
outputs=[online_query_pb2.OutputExpr(feature_fqn=o) for o in outputs]
|
|
1108
1253
|
+ [online_query_pb2.OutputExpr(feature_expression=o) for o in encoded_outputs.feature_expressions_proto],
|
|
1109
1254
|
now=now_proto,
|
|
@@ -1131,6 +1276,101 @@ class ChalkGRPCClient:
|
|
|
1131
1276
|
body_type=online_query_pb2.FEATHER_BODY_TYPE_RECORD_BATCHES,
|
|
1132
1277
|
)
|
|
1133
1278
|
|
|
1279
|
+
def run_scheduled_query(
|
|
1280
|
+
self,
|
|
1281
|
+
name: str,
|
|
1282
|
+
planner_options: Optional[Mapping[str, Any]],
|
|
1283
|
+
incremental_resolvers: Optional[Sequence[str]],
|
|
1284
|
+
max_samples: Optional[int],
|
|
1285
|
+
env_overrides: Optional[Mapping[str, str]],
|
|
1286
|
+
) -> ManualTriggerScheduledQueryResponseDataclass:
|
|
1287
|
+
"""
|
|
1288
|
+
Manually trigger a scheduled query request.
|
|
1289
|
+
|
|
1290
|
+
Parameters
|
|
1291
|
+
----------
|
|
1292
|
+
name
|
|
1293
|
+
The name of the scheduled query to be triggered.
|
|
1294
|
+
incremental_resolvers
|
|
1295
|
+
If set to None, Chalk will incrementalize resolvers in the query's root namespaces.
|
|
1296
|
+
If set to a list of resolvers, this set will be used for incrementalization.
|
|
1297
|
+
Incremental resolvers must return a feature time in its output, and must return a `DataFrame`.
|
|
1298
|
+
Most commonly, this will be the name of a SQL file resolver. Chalk will ingest all new data
|
|
1299
|
+
from these resolvers and propagate changes to values in the root namespace.
|
|
1300
|
+
max_samples
|
|
1301
|
+
The maximum number of samples to compute.
|
|
1302
|
+
env_overrides:
|
|
1303
|
+
A dictionary of environment values to override during this specific triggered query.
|
|
1304
|
+
|
|
1305
|
+
Other Parameters
|
|
1306
|
+
----------------
|
|
1307
|
+
planner_options
|
|
1308
|
+
A dictionary of options to pass to the planner.
|
|
1309
|
+
These are typically provided by Chalk Support for specific use cases.
|
|
1310
|
+
|
|
1311
|
+
Returns
|
|
1312
|
+
-------
|
|
1313
|
+
ManualTriggerScheduledQueryResponse
|
|
1314
|
+
A response message containing metadata around the triggered run.
|
|
1315
|
+
|
|
1316
|
+
Examples
|
|
1317
|
+
--------
|
|
1318
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
1319
|
+
>>> ChalkGRPCClient().run_scheduled_query(
|
|
1320
|
+
... name="my_scheduled_query",
|
|
1321
|
+
... )
|
|
1322
|
+
"""
|
|
1323
|
+
proto_resp = self._stub_refresher.call_scheduled_query_stub(
|
|
1324
|
+
lambda x: x.ManualTriggerScheduledQuery(
|
|
1325
|
+
request=ManualTriggerScheduledQueryRequest(
|
|
1326
|
+
cron_query_name=name,
|
|
1327
|
+
planner_options=planner_options or {},
|
|
1328
|
+
incremental_resolvers=incremental_resolvers or (),
|
|
1329
|
+
max_samples=max_samples,
|
|
1330
|
+
env_overrides=env_overrides or {},
|
|
1331
|
+
),
|
|
1332
|
+
)
|
|
1333
|
+
)
|
|
1334
|
+
return ManualTriggerScheduledQueryResponseDataclass.from_proto(proto_resp)
|
|
1335
|
+
|
|
1336
|
+
def get_scheduled_query_run_history(
|
|
1337
|
+
self,
|
|
1338
|
+
name: str,
|
|
1339
|
+
limit: int = 10,
|
|
1340
|
+
) -> List[ScheduledQueryRun]:
|
|
1341
|
+
"""
|
|
1342
|
+
Get the run history for a scheduled query.
|
|
1343
|
+
|
|
1344
|
+
Parameters
|
|
1345
|
+
----------
|
|
1346
|
+
name
|
|
1347
|
+
The name of the scheduled query.
|
|
1348
|
+
limit
|
|
1349
|
+
The maximum number of runs to return. Defaults to 10.
|
|
1350
|
+
|
|
1351
|
+
Returns
|
|
1352
|
+
-------
|
|
1353
|
+
list[ScheduledQueryRun]
|
|
1354
|
+
A response message containing the list of scheduled query runs.
|
|
1355
|
+
|
|
1356
|
+
Examples
|
|
1357
|
+
--------
|
|
1358
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
1359
|
+
>>> ChalkGRPCClient().get_scheduled_query_run_history(
|
|
1360
|
+
... name="my_scheduled_query",
|
|
1361
|
+
... limit=20,
|
|
1362
|
+
... )
|
|
1363
|
+
"""
|
|
1364
|
+
proto_resp = self._stub_refresher.call_scheduled_query_run_stub(
|
|
1365
|
+
lambda x: x.GetScheduledQueryRuns(
|
|
1366
|
+
GetScheduledQueryRunsRequest(
|
|
1367
|
+
cron_name=name,
|
|
1368
|
+
limit=limit,
|
|
1369
|
+
)
|
|
1370
|
+
)
|
|
1371
|
+
)
|
|
1372
|
+
return [ScheduledQueryRun.from_proto(run) for run in proto_resp.runs]
|
|
1373
|
+
|
|
1134
1374
|
def get_graph(self, deployment: DeploymentId | None = None) -> Graph:
|
|
1135
1375
|
"""Get the graph for a given deployment.
|
|
1136
1376
|
|
|
@@ -1410,6 +1650,7 @@ class ChalkGRPCClient:
|
|
|
1410
1650
|
self,
|
|
1411
1651
|
name: str,
|
|
1412
1652
|
model_type: Optional[ModelType] = None,
|
|
1653
|
+
model_class: Optional[ModelClass] = None,
|
|
1413
1654
|
model_encoding: Optional[ModelEncoding] = None,
|
|
1414
1655
|
aliases: Optional[List[str]] = None,
|
|
1415
1656
|
model: Optional[Any] = None,
|
|
@@ -1423,53 +1664,53 @@ class ChalkGRPCClient:
|
|
|
1423
1664
|
source_config: Optional[SourceConfig] = None,
|
|
1424
1665
|
dependencies: Optional[List[str]] = None,
|
|
1425
1666
|
) -> RegisterModelVersionResponse:
|
|
1426
|
-
"""
|
|
1427
|
-
Register a model in the Chalk model registry.
|
|
1667
|
+
"""Register a model in the Chalk model registry.
|
|
1428
1668
|
|
|
1429
1669
|
Parameters
|
|
1430
1670
|
----------
|
|
1431
|
-
name
|
|
1432
|
-
|
|
1433
|
-
aliases
|
|
1434
|
-
|
|
1435
|
-
model
|
|
1436
|
-
|
|
1437
|
-
model_paths
|
|
1438
|
-
|
|
1439
|
-
additional_files
|
|
1440
|
-
|
|
1441
|
-
model_type
|
|
1442
|
-
|
|
1443
|
-
model_encoding
|
|
1444
|
-
|
|
1445
|
-
input_schema
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
output_schema
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
metadata
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
input_features
|
|
1671
|
+
name
|
|
1672
|
+
Unique name for the model.
|
|
1673
|
+
aliases
|
|
1674
|
+
List of version aliases (e.g., `["v1.0", "latest"]`).
|
|
1675
|
+
model
|
|
1676
|
+
Python model object (for object-based registration).
|
|
1677
|
+
model_paths
|
|
1678
|
+
Paths to model files (for file-based registration).
|
|
1679
|
+
additional_files
|
|
1680
|
+
Additional files needed for inference (tokenizers, configs, etc.)
|
|
1681
|
+
model_type
|
|
1682
|
+
Type of model framework.
|
|
1683
|
+
model_encoding
|
|
1684
|
+
Serialization format.
|
|
1685
|
+
input_schema
|
|
1686
|
+
Definition of the input schema. Can be:
|
|
1687
|
+
- `dict`: Dictionary mapping column names to dtypes for tabular data
|
|
1688
|
+
- `list`: List of `(shape, dtype)` tuples for tensor data
|
|
1689
|
+
output_schema
|
|
1690
|
+
Definition of the output schema. Can be:
|
|
1691
|
+
- `dict`: Dictionary mapping column names to dtypes for tabular data
|
|
1692
|
+
- `list`: List of `(shape, dtype)` tuples for tensor data
|
|
1693
|
+
metadata
|
|
1694
|
+
Additional metadata dictionary containing framework info,
|
|
1695
|
+
training details, performance metrics, etc.
|
|
1696
|
+
input_features
|
|
1457
1697
|
The features to be used as inputs to the model.
|
|
1458
1698
|
For example, `[User.message]`. Features can also be expressed as snakecased strings,
|
|
1459
|
-
e.g. `["user.message"]
|
|
1460
|
-
output_features
|
|
1699
|
+
e.g. `["user.message"]`.
|
|
1700
|
+
output_features
|
|
1461
1701
|
The features to be used as outputs to the model.
|
|
1462
1702
|
For example, `[User.is_spam]`. Features can also be expressed as snakecased strings,
|
|
1463
|
-
e.g. `["user.is_spam"]
|
|
1464
|
-
source_config
|
|
1703
|
+
e.g. `["user.is_spam"]`.
|
|
1704
|
+
source_config
|
|
1465
1705
|
Config to pass credentials to access files from a remote source.
|
|
1466
|
-
dependencies
|
|
1706
|
+
dependencies
|
|
1467
1707
|
List of package dependencies needed to run this model.
|
|
1468
|
-
e.g. ["torch==2.7.1", "numpy==1.26.4"]
|
|
1708
|
+
e.g. `["torch==2.7.1", "numpy==1.26.4"]`.
|
|
1709
|
+
|
|
1469
1710
|
Returns
|
|
1470
1711
|
-------
|
|
1471
1712
|
ModelVersion
|
|
1472
|
-
|
|
1713
|
+
The registered model version object.
|
|
1473
1714
|
|
|
1474
1715
|
Examples
|
|
1475
1716
|
--------
|
|
@@ -1507,6 +1748,9 @@ class ChalkGRPCClient:
|
|
|
1507
1748
|
if model_type is None:
|
|
1508
1749
|
model_type = model_serializer.model_type
|
|
1509
1750
|
|
|
1751
|
+
if model_class is None:
|
|
1752
|
+
model_class = model_serializer.model_class
|
|
1753
|
+
|
|
1510
1754
|
if model is not None:
|
|
1511
1755
|
inferred_input_schema, inferred_output_schema = model_serializer.infer_input_output_schemas(
|
|
1512
1756
|
model, model_type
|
|
@@ -1547,9 +1791,29 @@ class ChalkGRPCClient:
|
|
|
1547
1791
|
"Failed to register model. Please specify a model encoding if using model_paths."
|
|
1548
1792
|
)
|
|
1549
1793
|
|
|
1794
|
+
# Auto-convert ONNX list schemas to dict format if needed
|
|
1795
|
+
if model_type == ModelType.ONNX:
|
|
1796
|
+
input_schema = model_serializer.convert_onnx_list_schema_to_dict(input_schema, model, is_input=True)
|
|
1797
|
+
output_schema = model_serializer.convert_onnx_list_schema_to_dict(
|
|
1798
|
+
output_schema, model, is_input=False
|
|
1799
|
+
)
|
|
1800
|
+
|
|
1550
1801
|
input_model_schema = model_serializer.convert_schema(input_schema)
|
|
1551
1802
|
output_model_schema = model_serializer.convert_schema(output_schema)
|
|
1552
1803
|
|
|
1804
|
+
# Final validation: ONNX models must use tabular schemas
|
|
1805
|
+
if model_type == ModelType.ONNX:
|
|
1806
|
+
if input_model_schema is not None and not input_model_schema.HasField("tabular"):
|
|
1807
|
+
raise ValueError(
|
|
1808
|
+
"ONNX models must be registered with tabular input schema (dict format). "
|
|
1809
|
+
+ "Use dict format like {'input': Tensor[...]} instead of list format."
|
|
1810
|
+
)
|
|
1811
|
+
if output_model_schema is not None and not output_model_schema.HasField("tabular"):
|
|
1812
|
+
raise ValueError(
|
|
1813
|
+
"ONNX models must be registered with tabular output schema (dict format). "
|
|
1814
|
+
+ "Use dict format like {'output': Vector[...]} instead of list format."
|
|
1815
|
+
)
|
|
1816
|
+
|
|
1553
1817
|
all_files_to_process, model_file_names = model_file_uploader.prepare_file_mapping(
|
|
1554
1818
|
model_paths, additional_files
|
|
1555
1819
|
)
|
|
@@ -1579,6 +1843,7 @@ class ChalkGRPCClient:
|
|
|
1579
1843
|
for file in additional_files_upload_paths
|
|
1580
1844
|
],
|
|
1581
1845
|
model_type=model_type,
|
|
1846
|
+
model_class=model_class,
|
|
1582
1847
|
model_encoding=model_encoding,
|
|
1583
1848
|
model_signature=_model_artifact_pb2.ModelSignature(
|
|
1584
1849
|
inputs=input_model_schema,
|
|
@@ -1691,6 +1956,7 @@ class ChalkGRPCClient:
|
|
|
1691
1956
|
name: str,
|
|
1692
1957
|
model_artifact_id: Optional[str] = None,
|
|
1693
1958
|
run_id: Optional[str] = None,
|
|
1959
|
+
run_name: Optional[str] = None,
|
|
1694
1960
|
criterion: Optional[ModelRunCriterion] = None,
|
|
1695
1961
|
aliases: Optional[List[str]] = None,
|
|
1696
1962
|
) -> RegisterModelVersionResponse:
|
|
@@ -1705,6 +1971,8 @@ class ChalkGRPCClient:
|
|
|
1705
1971
|
Artifact UUID to promote to a model version.
|
|
1706
1972
|
run_id: str, optional
|
|
1707
1973
|
run id that produce the artifact to promote.
|
|
1974
|
+
run_name: str, optional
|
|
1975
|
+
run name used in the checkpointer for artifact to promote.
|
|
1708
1976
|
criterion: ModelRunCriterion, optional
|
|
1709
1977
|
criterion on which to select the artifact from the training run.
|
|
1710
1978
|
If none provided, the latest artifact in the run will be selected.
|
|
@@ -1722,11 +1990,15 @@ class ChalkGRPCClient:
|
|
|
1722
1990
|
... )
|
|
1723
1991
|
"""
|
|
1724
1992
|
if model_artifact_id is not None:
|
|
1725
|
-
if run_id is not None or criterion is not None:
|
|
1726
|
-
raise ValueError(
|
|
1993
|
+
if run_id is not None or criterion is not None or run_name is not None:
|
|
1994
|
+
raise ValueError(
|
|
1995
|
+
"Please specify only one of 'model_artifact_id', (run_id, run criterion), (run_name, run criterion)"
|
|
1996
|
+
)
|
|
1727
1997
|
else:
|
|
1728
|
-
if
|
|
1729
|
-
raise ValueError(
|
|
1998
|
+
if run_name is None and run_id is None:
|
|
1999
|
+
raise ValueError(
|
|
2000
|
+
"Please specify only one of 'model_artifact_id', (run_id, run criterion), (run_name, run criterion)"
|
|
2001
|
+
)
|
|
1730
2002
|
|
|
1731
2003
|
try:
|
|
1732
2004
|
resp: CreateModelVersionFromArtifactResponse = self._stub_refresher.call_model_stub(
|
|
@@ -1735,7 +2007,8 @@ class ChalkGRPCClient:
|
|
|
1735
2007
|
model_name=name,
|
|
1736
2008
|
model_artifact_id=model_artifact_id,
|
|
1737
2009
|
training_run=ModelSerializer.convert_run_criterion_to_proto(
|
|
1738
|
-
|
|
2010
|
+
run_id=run_id,
|
|
2011
|
+
run_name=run_name,
|
|
1739
2012
|
criterion=criterion,
|
|
1740
2013
|
),
|
|
1741
2014
|
aliases=aliases,
|
|
@@ -1756,19 +2029,46 @@ class ChalkGRPCClient:
|
|
|
1756
2029
|
|
|
1757
2030
|
def create_model_training_job(
|
|
1758
2031
|
self,
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
2032
|
+
script: str,
|
|
2033
|
+
function_name: str,
|
|
2034
|
+
experiment_name: str,
|
|
2035
|
+
branch: Optional[str] = None,
|
|
2036
|
+
config: str | None = None,
|
|
1763
2037
|
resources: Optional[ResourceRequests] = None,
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
2038
|
+
env_overrides: Optional[Mapping[str, str]] = None,
|
|
2039
|
+
enable_profiling: bool = False,
|
|
2040
|
+
max_retries: int = 0,
|
|
2041
|
+
) -> CreateScriptTaskResponse:
|
|
2042
|
+
resources_request = {}
|
|
2043
|
+
if resources is not None:
|
|
2044
|
+
if resources.cpu is not None:
|
|
2045
|
+
resources_request["cpu"] = resources.cpu
|
|
2046
|
+
if resources.memory is not None:
|
|
2047
|
+
resources_request["memory"] = resources.memory
|
|
2048
|
+
|
|
2049
|
+
return self._stub_refresher.call_task_stub(
|
|
2050
|
+
lambda x: x.CreateScriptTask(
|
|
2051
|
+
CreateScriptTaskRequest(
|
|
2052
|
+
request=ScriptTaskRequest(
|
|
2053
|
+
function_reference_type="file",
|
|
2054
|
+
# Hardcoded script name
|
|
2055
|
+
function_reference=f"train.py::{function_name}",
|
|
2056
|
+
kind=ScriptTaskKind.SCRIPT_TASK_KIND_TRAINING_RUN,
|
|
2057
|
+
training_run=TrainingRunArgs(
|
|
2058
|
+
experiment_name=experiment_name,
|
|
2059
|
+
),
|
|
2060
|
+
arguments_json=config,
|
|
2061
|
+
branch=branch,
|
|
2062
|
+
resource_requests=resources_pb2.ResourceRequirements(
|
|
2063
|
+
requests=resources_request,
|
|
2064
|
+
),
|
|
2065
|
+
resource_group=resources.resource_group if resources is not None else None,
|
|
2066
|
+
env_overrides=env_overrides,
|
|
2067
|
+
enable_profiling=enable_profiling,
|
|
2068
|
+
max_retries=max_retries,
|
|
2069
|
+
),
|
|
2070
|
+
source_file=script.encode("utf-8"),
|
|
2071
|
+
),
|
|
1772
2072
|
)
|
|
1773
2073
|
)
|
|
1774
2074
|
|
|
@@ -1820,3 +2120,329 @@ class ChalkGRPCClient:
|
|
|
1820
2120
|
f"Branch server did not start within {timeout_seconds} seconds. Last state: {BranchScalingState.Name(response.state)}"
|
|
1821
2121
|
)
|
|
1822
2122
|
time.sleep(poll_interval_seconds)
|
|
2123
|
+
|
|
2124
|
+
def get_job_queue_operation_summary(
|
|
2125
|
+
self,
|
|
2126
|
+
operation_id: str,
|
|
2127
|
+
environment_id: str | None = None,
|
|
2128
|
+
limit: int | None = None,
|
|
2129
|
+
offset: int | None = None,
|
|
2130
|
+
) -> GetJobQueueOperationSummaryResponse:
|
|
2131
|
+
"""Get summary information for a job queue operation.
|
|
2132
|
+
|
|
2133
|
+
Parameters
|
|
2134
|
+
----------
|
|
2135
|
+
operation_id
|
|
2136
|
+
The ID of the operation to get summary for
|
|
2137
|
+
environment_id
|
|
2138
|
+
The environment ID. If None, uses the client's environment.
|
|
2139
|
+
limit
|
|
2140
|
+
Maximum number of job rows to return. Defaults to 10000.
|
|
2141
|
+
offset
|
|
2142
|
+
Offset for pagination. Defaults to 0.
|
|
2143
|
+
|
|
2144
|
+
Returns
|
|
2145
|
+
-------
|
|
2146
|
+
GetJobQueueOperationSummaryResponse
|
|
2147
|
+
The operation summary response containing job queue information.
|
|
2148
|
+
|
|
2149
|
+
Examples
|
|
2150
|
+
--------
|
|
2151
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
2152
|
+
>>> client = ChalkGRPCClient()
|
|
2153
|
+
>>> response = client.get_job_queue_operation_summary(operation_id="op_123")
|
|
2154
|
+
"""
|
|
2155
|
+
env_id = environment_id or self._stub_refresher.environment_id
|
|
2156
|
+
if not env_id:
|
|
2157
|
+
raise ValueError("No environment specified")
|
|
2158
|
+
|
|
2159
|
+
request = GetJobQueueOperationSummaryRequest(
|
|
2160
|
+
operation_id=operation_id,
|
|
2161
|
+
environment_id=env_id,
|
|
2162
|
+
)
|
|
2163
|
+
|
|
2164
|
+
if limit is not None:
|
|
2165
|
+
request.limit = limit
|
|
2166
|
+
if offset is not None:
|
|
2167
|
+
request.offset = offset
|
|
2168
|
+
|
|
2169
|
+
return self._stub_refresher.call_job_queue_stub(lambda x: x.GetJobQueueOperationSummary(request))
|
|
2170
|
+
|
|
2171
|
+
def follow_model_training_job(
|
|
2172
|
+
self,
|
|
2173
|
+
operation_id: str,
|
|
2174
|
+
poll_interval: float = 2.0,
|
|
2175
|
+
output_callback: Optional[Callable[[str, str], None]] = None,
|
|
2176
|
+
) -> None:
|
|
2177
|
+
"""Follow a model training job, displaying both status and logs.
|
|
2178
|
+
|
|
2179
|
+
This method polls the job queue for status updates while also following logs
|
|
2180
|
+
in real-time. It continues until the job reaches a terminal state (completed,
|
|
2181
|
+
failed, or canceled).
|
|
2182
|
+
|
|
2183
|
+
Parameters
|
|
2184
|
+
----------
|
|
2185
|
+
operation_id
|
|
2186
|
+
The operation ID of the model training job
|
|
2187
|
+
poll_interval
|
|
2188
|
+
Time in seconds between polling for status and logs. Defaults to 2.0 seconds.
|
|
2189
|
+
output_callback
|
|
2190
|
+
Optional callback function that receives (timestamp, message) for each log entry.
|
|
2191
|
+
If None, logs are displayed using Rich live display.
|
|
2192
|
+
|
|
2193
|
+
Examples
|
|
2194
|
+
--------
|
|
2195
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
2196
|
+
>>> client = ChalkGRPCClient()
|
|
2197
|
+
>>> client.follow_model_training_job(operation_id="op_123")
|
|
2198
|
+
"""
|
|
2199
|
+
from chalk.utils.job_log_display import JobLogDisplay
|
|
2200
|
+
|
|
2201
|
+
# Create display manager
|
|
2202
|
+
display = JobLogDisplay(title="Model Training Jobs")
|
|
2203
|
+
|
|
2204
|
+
# Define callback for status polling
|
|
2205
|
+
def get_status_callback():
|
|
2206
|
+
return self.get_job_queue_operation_summary(operation_id=operation_id)
|
|
2207
|
+
|
|
2208
|
+
# Get log stub and construct log query
|
|
2209
|
+
log_query = f'operation_id:"{operation_id}"'
|
|
2210
|
+
log_stub = self._stub_refresher.log_stub
|
|
2211
|
+
|
|
2212
|
+
# Delegate to the display manager to handle all threading and coordination
|
|
2213
|
+
display.follow_job(
|
|
2214
|
+
get_status_callback=get_status_callback,
|
|
2215
|
+
log_stub=log_stub,
|
|
2216
|
+
log_query=log_query,
|
|
2217
|
+
poll_interval=poll_interval,
|
|
2218
|
+
output_callback=output_callback,
|
|
2219
|
+
)
|
|
2220
|
+
|
|
2221
|
+
def test_streaming_resolver(
|
|
2222
|
+
self,
|
|
2223
|
+
resolver: str | Resolver,
|
|
2224
|
+
message_bodies: "list[str | bytes | BaseModel] | None" = None,
|
|
2225
|
+
message_keys: list[str | None] | None = None,
|
|
2226
|
+
message_timestamps: list[str | dt.datetime] | None = None,
|
|
2227
|
+
message_filepath: str | None = None,
|
|
2228
|
+
request_timeout: Optional[float] = None,
|
|
2229
|
+
) -> "StreamResolverTestResponse":
|
|
2230
|
+
"""Test a streaming resolver with supplied messages.
|
|
2231
|
+
|
|
2232
|
+
This method tests streaming resolvers using the gRPC TestStreamingResolver endpoint.
|
|
2233
|
+
It supports both deployed resolvers (by FQN) and static/undeployed resolvers
|
|
2234
|
+
(automatically serialized from Resolver objects).
|
|
2235
|
+
|
|
2236
|
+
Parameters
|
|
2237
|
+
----------
|
|
2238
|
+
resolver : str | Resolver
|
|
2239
|
+
The streaming resolver or its string name. If a StreamResolver object with
|
|
2240
|
+
feature_expressions is provided, it will be automatically serialized for testing.
|
|
2241
|
+
message_bodies : list[str | bytes | BaseModel], optional
|
|
2242
|
+
The message bodies to process. Can be JSON strings, raw bytes,
|
|
2243
|
+
or Pydantic models (will be serialized to JSON).
|
|
2244
|
+
Either message_bodies or message_filepath must be provided.
|
|
2245
|
+
message_keys : list[str | None], optional
|
|
2246
|
+
Optional keys for each message. If not provided, all keys will be None.
|
|
2247
|
+
Must match length of message_bodies if provided.
|
|
2248
|
+
message_timestamps : list[str | datetime], optional
|
|
2249
|
+
Optional timestamps for each message. If not provided, current time
|
|
2250
|
+
will be used. Must match length of message_bodies if provided.
|
|
2251
|
+
message_filepath : str, optional
|
|
2252
|
+
A filepath from which test messages will be ingested.
|
|
2253
|
+
This file should be newline delimited JSON with format:
|
|
2254
|
+
{"message_key": "my-key", "message_body": {"field1": "value1"}}
|
|
2255
|
+
Each line may optionally contain a "message_timestamp" field.
|
|
2256
|
+
Either message_bodies or message_filepath must be provided.
|
|
2257
|
+
request_timeout : float, optional
|
|
2258
|
+
Request timeout in seconds.
|
|
2259
|
+
|
|
2260
|
+
Returns
|
|
2261
|
+
-------
|
|
2262
|
+
StreamResolverTestResponse
|
|
2263
|
+
Response containing:
|
|
2264
|
+
- status: SUCCESS or FAILURE
|
|
2265
|
+
- data_uri: Optional signed URL to parquet file with results
|
|
2266
|
+
- errors: List of ChalkError objects
|
|
2267
|
+
- message: Human-readable message
|
|
2268
|
+
|
|
2269
|
+
Examples
|
|
2270
|
+
--------
|
|
2271
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
2272
|
+
>>> client = ChalkGRPCClient()
|
|
2273
|
+
>>> response = client.test_streaming_resolver(
|
|
2274
|
+
... resolver="my_module.my_stream_resolver",
|
|
2275
|
+
... message_bodies=[
|
|
2276
|
+
... '{"user_id": 1, "event": "login"}',
|
|
2277
|
+
... '{"user_id": 2, "event": "logout"}',
|
|
2278
|
+
... ],
|
|
2279
|
+
... message_keys=["user_1", "user_2"],
|
|
2280
|
+
... )
|
|
2281
|
+
>>> print(f"Status: {response.status}")
|
|
2282
|
+
>>> if response.data_uri:
|
|
2283
|
+
... print(f"Results at: {response.data_uri}")
|
|
2284
|
+
"""
|
|
2285
|
+
import base64
|
|
2286
|
+
import json
|
|
2287
|
+
from uuid import uuid4
|
|
2288
|
+
|
|
2289
|
+
import pyarrow as pa
|
|
2290
|
+
|
|
2291
|
+
from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2 import TestStreamingResolverRequest
|
|
2292
|
+
from chalk.utils.pydanticutil.pydantic_compat import get_pydantic_model_json, is_pydantic_basemodel_instance
|
|
2293
|
+
|
|
2294
|
+
# Determine if resolver is static and needs serialization
|
|
2295
|
+
resolver_fqn: str | None = None
|
|
2296
|
+
static_stream_resolver_b64: str | None = None
|
|
2297
|
+
|
|
2298
|
+
if isinstance(resolver, str):
|
|
2299
|
+
resolver_fqn = resolver
|
|
2300
|
+
else:
|
|
2301
|
+
from chalk.features.resolver import StreamResolver
|
|
2302
|
+
|
|
2303
|
+
resolver_fqn = resolver.fqn
|
|
2304
|
+
|
|
2305
|
+
if isinstance(resolver, StreamResolver) and resolver.feature_expressions:
|
|
2306
|
+
from chalk.parsed.to_proto import ToProtoConverter
|
|
2307
|
+
|
|
2308
|
+
proto_resolver = ToProtoConverter.convert_stream_resolver(resolver)
|
|
2309
|
+
static_stream_resolver_b64 = base64.b64encode(
|
|
2310
|
+
proto_resolver.SerializeToString(deterministic=True)
|
|
2311
|
+
).decode("utf-8")
|
|
2312
|
+
|
|
2313
|
+
# Load from file if provided
|
|
2314
|
+
if message_filepath is not None:
|
|
2315
|
+
if message_bodies is not None:
|
|
2316
|
+
raise ValueError("Cannot provide both message_filepath and message_bodies")
|
|
2317
|
+
|
|
2318
|
+
loaded_bodies: list[Any] = []
|
|
2319
|
+
loaded_keys: list[str | None] = []
|
|
2320
|
+
loaded_timestamps: list[str | None] = []
|
|
2321
|
+
|
|
2322
|
+
with open(message_filepath, "r") as f:
|
|
2323
|
+
for line in f:
|
|
2324
|
+
line = line.strip()
|
|
2325
|
+
if not line:
|
|
2326
|
+
continue
|
|
2327
|
+
msg = json.loads(line)
|
|
2328
|
+
loaded_bodies.append(msg.get("message_body", msg))
|
|
2329
|
+
loaded_keys.append(msg.get("message_key"))
|
|
2330
|
+
loaded_timestamps.append(msg.get("message_timestamp"))
|
|
2331
|
+
|
|
2332
|
+
message_bodies = loaded_bodies
|
|
2333
|
+
if message_keys is None and any(k is not None for k in loaded_keys):
|
|
2334
|
+
message_keys = loaded_keys
|
|
2335
|
+
if message_timestamps is None and any(t is not None for t in loaded_timestamps):
|
|
2336
|
+
# Cast needed: loaded_timestamps is list[str | None] from JSON,
|
|
2337
|
+
# but message_timestamps is list[str | datetime] - strings will be parsed later
|
|
2338
|
+
message_timestamps = typing.cast(list[str | dt.datetime], loaded_timestamps)
|
|
2339
|
+
|
|
2340
|
+
# Validate inputs
|
|
2341
|
+
if message_bodies is None:
|
|
2342
|
+
raise ValueError("Either message_bodies or message_filepath must be provided")
|
|
2343
|
+
|
|
2344
|
+
num_messages = len(message_bodies)
|
|
2345
|
+
if num_messages == 0:
|
|
2346
|
+
raise ValueError("message_bodies cannot be empty")
|
|
2347
|
+
|
|
2348
|
+
if message_keys is not None and len(message_keys) != num_messages:
|
|
2349
|
+
raise ValueError(
|
|
2350
|
+
f"message_keys length ({len(message_keys)}) must match message_bodies length ({num_messages})"
|
|
2351
|
+
)
|
|
2352
|
+
|
|
2353
|
+
if message_timestamps is not None and len(message_timestamps) != num_messages:
|
|
2354
|
+
raise ValueError(
|
|
2355
|
+
f"message_timestamps length ({len(message_timestamps)}) must match message_bodies length ({num_messages})"
|
|
2356
|
+
)
|
|
2357
|
+
|
|
2358
|
+
# Generate defaults
|
|
2359
|
+
message_ids = [str(uuid4()) for _ in range(num_messages)]
|
|
2360
|
+
|
|
2361
|
+
if message_keys is None:
|
|
2362
|
+
message_keys = typing.cast(list[str | None], [None] * num_messages)
|
|
2363
|
+
|
|
2364
|
+
if message_timestamps is None:
|
|
2365
|
+
message_timestamps = typing.cast(list[str | dt.datetime], [dt.datetime.now()] * num_messages)
|
|
2366
|
+
|
|
2367
|
+
# Convert message bodies to bytes
|
|
2368
|
+
processed_bodies: list[bytes] = []
|
|
2369
|
+
for body in message_bodies:
|
|
2370
|
+
if isinstance(body, bytes):
|
|
2371
|
+
processed_bodies.append(body)
|
|
2372
|
+
elif isinstance(body, str):
|
|
2373
|
+
processed_bodies.append(body.encode("utf-8"))
|
|
2374
|
+
elif is_pydantic_basemodel_instance(body):
|
|
2375
|
+
# Use utility function that handles both Pydantic v1 and v2
|
|
2376
|
+
processed_bodies.append(get_pydantic_model_json(body).encode("utf-8"))
|
|
2377
|
+
else:
|
|
2378
|
+
# Try JSON serialization for dict-like objects
|
|
2379
|
+
processed_bodies.append(json.dumps(body).encode("utf-8"))
|
|
2380
|
+
|
|
2381
|
+
# Convert timestamps to unix timestamps in milliseconds (int64)
|
|
2382
|
+
# At this point message_timestamps is guaranteed to be non-None due to the default assignment above
|
|
2383
|
+
assert message_timestamps is not None
|
|
2384
|
+
processed_timestamps: list[int] = []
|
|
2385
|
+
for ts in message_timestamps:
|
|
2386
|
+
if isinstance(ts, str):
|
|
2387
|
+
# Parse ISO format string
|
|
2388
|
+
parsed = dt.datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
2389
|
+
processed_timestamps.append(int(parsed.timestamp() * 1000)) # milliseconds
|
|
2390
|
+
else:
|
|
2391
|
+
# Type narrowing: ts must be dt.datetime here
|
|
2392
|
+
processed_timestamps.append(int(ts.timestamp() * 1000)) # milliseconds
|
|
2393
|
+
|
|
2394
|
+
# Create Arrow table
|
|
2395
|
+
table = pa.table(
|
|
2396
|
+
{
|
|
2397
|
+
"message_id": message_ids,
|
|
2398
|
+
"message_key": message_keys,
|
|
2399
|
+
"message_data": processed_bodies,
|
|
2400
|
+
"publish_timestamp": processed_timestamps,
|
|
2401
|
+
}
|
|
2402
|
+
)
|
|
2403
|
+
|
|
2404
|
+
# Serialize to Arrow IPC format
|
|
2405
|
+
sink = pa.BufferOutputStream()
|
|
2406
|
+
with pa.ipc.new_stream(sink, table.schema) as writer:
|
|
2407
|
+
writer.write_table(table)
|
|
2408
|
+
input_data = sink.getvalue().to_pybytes()
|
|
2409
|
+
|
|
2410
|
+
# Create gRPC request
|
|
2411
|
+
request = TestStreamingResolverRequest(
|
|
2412
|
+
resolver_fqn=resolver_fqn or "",
|
|
2413
|
+
input_data=input_data,
|
|
2414
|
+
operation_id=None,
|
|
2415
|
+
debug=True,
|
|
2416
|
+
)
|
|
2417
|
+
|
|
2418
|
+
if static_stream_resolver_b64:
|
|
2419
|
+
request.static_stream_resolver_b64 = static_stream_resolver_b64
|
|
2420
|
+
|
|
2421
|
+
# Call new TestStreamingResolver endpoint
|
|
2422
|
+
proto_response = self._stub_refresher.call_streaming_stub(
|
|
2423
|
+
lambda x: x.TestStreamingResolver(
|
|
2424
|
+
request,
|
|
2425
|
+
timeout=request_timeout,
|
|
2426
|
+
)
|
|
2427
|
+
)
|
|
2428
|
+
|
|
2429
|
+
# Convert proto response to StreamResolverTestResponse
|
|
2430
|
+
from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2 import TEST_STREAM_RESOLVER_STATUS_SUCCESS
|
|
2431
|
+
|
|
2432
|
+
status = (
|
|
2433
|
+
StreamResolverTestStatus.SUCCESS
|
|
2434
|
+
if proto_response.status == TEST_STREAM_RESOLVER_STATUS_SUCCESS
|
|
2435
|
+
else StreamResolverTestStatus.FAILURE
|
|
2436
|
+
)
|
|
2437
|
+
|
|
2438
|
+
# Convert proto errors to ChalkError objects
|
|
2439
|
+
errors_list: list[ChalkError] = []
|
|
2440
|
+
if proto_response.errors:
|
|
2441
|
+
errors_list = [ChalkErrorConverter.chalk_error_decode(err) for err in proto_response.errors]
|
|
2442
|
+
|
|
2443
|
+
return StreamResolverTestResponse(
|
|
2444
|
+
status=status,
|
|
2445
|
+
data_uri=proto_response.data_uri if proto_response.HasField("data_uri") else None,
|
|
2446
|
+
errors=errors_list if errors_list else None,
|
|
2447
|
+
message=proto_response.message if proto_response.message else None,
|
|
2448
|
+
)
|