chalkpy 2.90.1__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +16 -16
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +4 -0
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +17 -15
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +25 -0
- chalk/_gen/chalk/common/v1/script_task_pb2.py +3 -3
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +2 -0
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +358 -288
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +360 -10
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +225 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +141 -119
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +106 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +52 -38
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +62 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +6 -6
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +14 -12
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +19 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +38 -26
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +58 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +47 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +18 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +23 -21
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +15 -1
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +66 -66
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +32 -2
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -12
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +16 -3
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +15 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +22 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +154 -141
- chalk/_gen/chalk/server/v1/team_pb2.pyi +30 -2
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +44 -40
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +20 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +16 -10
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +52 -1
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_version.py +1 -1
- chalk/client/client.py +128 -43
- chalk/client/client_async.py +149 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +539 -104
- chalk/client/client_impl.py +449 -122
- chalk/client/dataset.py +7 -1
- chalk/client/models.py +98 -0
- chalk/client/serialization/model_serialization.py +92 -9
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -10
- chalk/features/inference.py +33 -31
- chalk/features/resolver.py +224 -24
- chalk/functions/__init__.py +65 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +142 -68
- chalk/ml/__init__.py +2 -0
- chalk/ml/model_hooks.py +194 -26
- chalk/ml/model_reference.py +56 -8
- chalk/ml/model_version.py +24 -15
- chalk/ml/utils.py +20 -17
- chalk/operators/_utils.py +10 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +3 -0
- chalk/parsed/json_conversions.py +20 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +9 -1
- chalk/serialization/parsed_annotation.py +24 -11
- chalk/sql/__init__.py +18 -0
- chalk/sql/_internal/integrations/databricks.py +55 -17
- chalk/sql/_internal/integrations/mssql.py +127 -62
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/sql_file_resolver.py +53 -9
- chalk/sql/_internal/sql_source.py +35 -2
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +15 -2
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +2 -2
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/tracing.py +185 -95
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/METADATA +4 -6
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/RECORD +202 -146
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.90.1.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/client/client_grpc.py
CHANGED
|
@@ -74,6 +74,10 @@ from chalk._gen.chalk.server.v1.model_registry_pb2 import (
|
|
|
74
74
|
)
|
|
75
75
|
from chalk._gen.chalk.server.v1.model_registry_pb2_grpc import ModelRegistryServiceStub
|
|
76
76
|
from chalk._gen.chalk.server.v1.offline_queries_pb2_grpc import OfflineQueryMetadataServiceStub
|
|
77
|
+
from chalk._gen.chalk.server.v1.scheduled_query_pb2_grpc import ScheduledQueryServiceStub
|
|
78
|
+
from chalk._gen.chalk.server.v1.scheduled_query_run_pb2 import GetScheduledQueryRunsRequest
|
|
79
|
+
from chalk._gen.chalk.server.v1.scheduler_pb2 import ManualTriggerScheduledQueryRequest
|
|
80
|
+
from chalk._gen.chalk.server.v1.scheduler_pb2_grpc import SchedulerServiceStub
|
|
77
81
|
from chalk._gen.chalk.server.v1.script_tasks_pb2 import CreateScriptTaskRequest, CreateScriptTaskResponse
|
|
78
82
|
from chalk._gen.chalk.server.v1.script_tasks_pb2_grpc import ScriptTaskServiceStub
|
|
79
83
|
from chalk._gen.chalk.server.v1.team_pb2 import (
|
|
@@ -83,6 +87,7 @@ from chalk._gen.chalk.server.v1.team_pb2 import (
|
|
|
83
87
|
ListServiceTokensResponse,
|
|
84
88
|
)
|
|
85
89
|
from chalk._gen.chalk.server.v1.team_pb2_grpc import TeamServiceStub
|
|
90
|
+
from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2_grpc import SimpleStreamingServiceStub
|
|
86
91
|
from chalk.client import ChalkAuthException, FeatureReference
|
|
87
92
|
from chalk.client.client_impl import _validate_context_dict # pyright: ignore[reportPrivateUsage]
|
|
88
93
|
from chalk.client.models import (
|
|
@@ -92,6 +97,9 @@ from chalk.client.models import (
|
|
|
92
97
|
CreateBranchResponse,
|
|
93
98
|
GetRegisteredModelResponse,
|
|
94
99
|
GetRegisteredModelVersionResponse,
|
|
100
|
+
)
|
|
101
|
+
from chalk.client.models import ManualTriggerScheduledQueryResponse as ManualTriggerScheduledQueryResponseDataclass
|
|
102
|
+
from chalk.client.models import (
|
|
95
103
|
ModelUploadUrlResponse,
|
|
96
104
|
OnlineQuery,
|
|
97
105
|
OnlineQueryResponse,
|
|
@@ -99,6 +107,9 @@ from chalk.client.models import (
|
|
|
99
107
|
RegisterModelResponse,
|
|
100
108
|
RegisterModelVersionResponse,
|
|
101
109
|
ResourceRequests,
|
|
110
|
+
ScheduledQueryRun,
|
|
111
|
+
StreamResolverTestResponse,
|
|
112
|
+
StreamResolverTestStatus,
|
|
102
113
|
UploadFeaturesResponse,
|
|
103
114
|
)
|
|
104
115
|
from chalk.client.serialization.model_serialization import ModelSerializer
|
|
@@ -109,6 +120,7 @@ from chalk.features._encoding.inputs import GRPC_ENCODE_OPTIONS, InputEncodeOpti
|
|
|
109
120
|
from chalk.features._encoding.json import FeatureEncodingOptions
|
|
110
121
|
from chalk.features._encoding.outputs import encode_outputs
|
|
111
122
|
from chalk.features.feature_set import is_feature_set_class
|
|
123
|
+
from chalk.features.resolver import Resolver
|
|
112
124
|
from chalk.features.tag import DeploymentId
|
|
113
125
|
from chalk.importer import CHALK_IMPORT_FLAG
|
|
114
126
|
from chalk.ml import LocalSourceConfig, ModelEncoding, ModelRunCriterion, ModelType, SourceConfig
|
|
@@ -118,13 +130,15 @@ from chalk.parsed._proto.utils import datetime_to_proto_timestamp, value_to_prot
|
|
|
118
130
|
from chalk.utils import df_utils
|
|
119
131
|
from chalk.utils.df_utils import record_batch_to_arrow_ipc
|
|
120
132
|
from chalk.utils.grpc import AuthenticatedChalkClientInterceptor, TokenRefresher, UnauthenticatedChalkClientInterceptor
|
|
121
|
-
from chalk.utils.tracing import add_trace_headers
|
|
133
|
+
from chalk.utils.tracing import add_trace_headers, safe_trace
|
|
122
134
|
|
|
123
135
|
if TYPE_CHECKING:
|
|
124
136
|
from pyarrow import RecordBatch, Table
|
|
137
|
+
from pydantic import BaseModel
|
|
125
138
|
|
|
126
139
|
from chalk._gen.chalk.server.v1.builder_pb2 import StartBranchResponse
|
|
127
140
|
from chalk._gen.chalk.server.v1.builder_pb2_grpc import BuilderServiceStub
|
|
141
|
+
from chalk.client import ChalkError
|
|
128
142
|
|
|
129
143
|
CHALK_GRPC_TRACE_ID_HEADER: str = "x-chalk-trace-id"
|
|
130
144
|
|
|
@@ -212,29 +226,26 @@ def _parse_uri_for_engine(query_server_uri: str) -> ParsedUri:
|
|
|
212
226
|
return ParsedUri(uri_without_scheme=uri_without_scheme, use_tls=use_tls)
|
|
213
227
|
|
|
214
228
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
229
|
+
default_channel_options: Dict[str, str | int] = {
|
|
230
|
+
"grpc.max_send_message_length": 1024 * 1024 * 100, # 100MB
|
|
231
|
+
"grpc.max_receive_message_length": 1024 * 1024 * 100, # 100MB
|
|
218
232
|
# https://grpc.io/docs/guides/performance/#python
|
|
219
|
-
|
|
220
|
-
(
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
]
|
|
234
|
-
}
|
|
235
|
-
),
|
|
233
|
+
grpc.experimental.ChannelOptions.SingleThreadedUnaryStream: 1,
|
|
234
|
+
"grpc.service_config": json.dumps(
|
|
235
|
+
{
|
|
236
|
+
"methodConfig": [
|
|
237
|
+
{
|
|
238
|
+
"name": [{}],
|
|
239
|
+
"maxAttempts": 5,
|
|
240
|
+
"initialBackoff": "0.1s",
|
|
241
|
+
"maxBackoff": "1s",
|
|
242
|
+
"backoffMultiplier": 2,
|
|
243
|
+
"retryableStatusCodes": ["UNAVAILABLE"],
|
|
244
|
+
}
|
|
245
|
+
]
|
|
246
|
+
}
|
|
236
247
|
),
|
|
237
|
-
|
|
248
|
+
}
|
|
238
249
|
|
|
239
250
|
|
|
240
251
|
T = TypeVar("T")
|
|
@@ -282,6 +293,22 @@ class StubProvider:
|
|
|
282
293
|
)
|
|
283
294
|
return OfflineQueryMetadataServiceStub(self._server_channel)
|
|
284
295
|
|
|
296
|
+
@cached_property
|
|
297
|
+
def scheduled_query_stub(self) -> SchedulerServiceStub:
|
|
298
|
+
if self._server_channel is None:
|
|
299
|
+
raise ValueError(
|
|
300
|
+
"The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
|
|
301
|
+
)
|
|
302
|
+
return SchedulerServiceStub(self._server_channel)
|
|
303
|
+
|
|
304
|
+
@cached_property
|
|
305
|
+
def scheduled_query_run_stub(self) -> ScheduledQueryServiceStub:
|
|
306
|
+
if self._server_channel is None:
|
|
307
|
+
raise ValueError(
|
|
308
|
+
"The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
|
|
309
|
+
)
|
|
310
|
+
return ScheduledQueryServiceStub(self._server_channel)
|
|
311
|
+
|
|
285
312
|
@cached_property
|
|
286
313
|
def sql_stub(self) -> SqlServiceStub:
|
|
287
314
|
if self._engine_channel is None:
|
|
@@ -298,6 +325,14 @@ class StubProvider:
|
|
|
298
325
|
)
|
|
299
326
|
return DataFrameServiceStub(self._engine_channel)
|
|
300
327
|
|
|
328
|
+
@cached_property
|
|
329
|
+
def streaming_stub(self) -> SimpleStreamingServiceStub:
|
|
330
|
+
if self._engine_channel is None:
|
|
331
|
+
raise ValueError(
|
|
332
|
+
"The GRPC engine service is not available. If you would like to set up a GRPC service, please contact Chalk."
|
|
333
|
+
)
|
|
334
|
+
return SimpleStreamingServiceStub(self._engine_channel)
|
|
335
|
+
|
|
301
336
|
@cached_property
|
|
302
337
|
def model_stub(self) -> ModelRegistryServiceStub:
|
|
303
338
|
if self._server_channel is None:
|
|
@@ -337,17 +372,25 @@ class StubProvider:
|
|
|
337
372
|
deployment_tag: str | None = None,
|
|
338
373
|
skip_api_server: bool = False,
|
|
339
374
|
additional_headers: List[tuple[str, str]] | None = None,
|
|
375
|
+
channel_options: List[tuple[str, str | int]] | None = None,
|
|
340
376
|
):
|
|
341
377
|
super().__init__()
|
|
342
378
|
additional_headers_nonempty: List[tuple[str, str]] = [] if additional_headers is None else additional_headers
|
|
343
379
|
token_refresher: TokenRefresher | None = None
|
|
380
|
+
channel_options_merged: Dict[str, str | int] = default_channel_options.copy()
|
|
381
|
+
if channel_options:
|
|
382
|
+
channel_options_merged.update(dict(channel_options))
|
|
344
383
|
if skip_api_server:
|
|
345
384
|
# Omits the auth handshake with the API server. Primarily for internal use/testing -- if used in production,
|
|
346
385
|
# this client will simply fail to connect. If True then query_server must be provided & point to
|
|
347
386
|
# `localhost/127.0.0.1`.
|
|
348
387
|
if query_server is None:
|
|
349
388
|
raise ValueError("If skipping API server auth, query_server URI must be provided.")
|
|
350
|
-
|
|
389
|
+
parsed_uri = _parse_uri_for_engine(query_server)
|
|
390
|
+
if not (
|
|
391
|
+
parsed_uri.uri_without_scheme.startswith("localhost")
|
|
392
|
+
or parsed_uri.uri_without_scheme.startswith("127.0.0.1")
|
|
393
|
+
):
|
|
351
394
|
warnings.warn(
|
|
352
395
|
"Skipping API server auth should only be enabled if query_server URI is localhost. It will fail to authenticate against a production engine."
|
|
353
396
|
)
|
|
@@ -363,13 +406,13 @@ class StubProvider:
|
|
|
363
406
|
_unauthenticated_server_channel: grpc.Channel = (
|
|
364
407
|
grpc.insecure_channel(
|
|
365
408
|
target=server_host,
|
|
366
|
-
options=
|
|
409
|
+
options=list(channel_options_merged.items()),
|
|
367
410
|
)
|
|
368
411
|
if server_host.startswith("localhost") or server_host.startswith("127.0.0.1")
|
|
369
412
|
else grpc.secure_channel(
|
|
370
413
|
target=server_host,
|
|
371
414
|
credentials=grpc.ssl_channel_credentials(),
|
|
372
|
-
options=
|
|
415
|
+
options=list(channel_options_merged.items()),
|
|
373
416
|
)
|
|
374
417
|
)
|
|
375
418
|
|
|
@@ -443,12 +486,12 @@ class StubProvider:
|
|
|
443
486
|
grpc.secure_channel(
|
|
444
487
|
target=parsed_uri.uri_without_scheme,
|
|
445
488
|
credentials=grpc.ssl_channel_credentials(),
|
|
446
|
-
options=
|
|
489
|
+
options=list(channel_options_merged.items()),
|
|
447
490
|
)
|
|
448
491
|
if parsed_uri.use_tls
|
|
449
492
|
else grpc.insecure_channel(
|
|
450
493
|
target=parsed_uri.uri_without_scheme,
|
|
451
|
-
options=
|
|
494
|
+
options=list(channel_options_merged.items()),
|
|
452
495
|
)
|
|
453
496
|
),
|
|
454
497
|
*interceptors,
|
|
@@ -463,6 +506,7 @@ class StubRefresher:
|
|
|
463
506
|
deployment_tag: str | None = None,
|
|
464
507
|
skip_api_server: bool = False,
|
|
465
508
|
additional_headers: List[tuple[str, str]] | None = None,
|
|
509
|
+
channel_options: List[tuple[str, str | int]] | None = None,
|
|
466
510
|
):
|
|
467
511
|
super().__init__()
|
|
468
512
|
self._token_config = token_config
|
|
@@ -470,6 +514,7 @@ class StubRefresher:
|
|
|
470
514
|
self._deployment_tag = deployment_tag
|
|
471
515
|
self._skip_api_server = skip_api_server
|
|
472
516
|
self._additional_headers = additional_headers
|
|
517
|
+
self._channel_options = channel_options
|
|
473
518
|
self._stub = self._refresh_stub()
|
|
474
519
|
|
|
475
520
|
def _refresh_stub(self) -> StubProvider:
|
|
@@ -479,6 +524,7 @@ class StubRefresher:
|
|
|
479
524
|
deployment_tag=self._deployment_tag,
|
|
480
525
|
skip_api_server=self._skip_api_server,
|
|
481
526
|
additional_headers=self._additional_headers,
|
|
527
|
+
channel_options=self._channel_options,
|
|
482
528
|
)
|
|
483
529
|
return self._stub
|
|
484
530
|
|
|
@@ -515,6 +561,12 @@ class StubRefresher:
|
|
|
515
561
|
def call_offline_query_stub(self, fn: Callable[[OfflineQueryMetadataServiceStub], T]) -> T:
|
|
516
562
|
return self._retry_callable(fn, lambda: self._stub.offline_query_stub)
|
|
517
563
|
|
|
564
|
+
def call_scheduled_query_stub(self, fn: Callable[[SchedulerServiceStub], T]) -> T:
|
|
565
|
+
return self._retry_callable(fn, lambda: self._stub.scheduled_query_stub)
|
|
566
|
+
|
|
567
|
+
def call_scheduled_query_run_stub(self, fn: Callable[[ScheduledQueryServiceStub], T]) -> T:
|
|
568
|
+
return self._retry_callable(fn, lambda: self._stub.scheduled_query_run_stub)
|
|
569
|
+
|
|
518
570
|
def call_sql_stub(self, fn: Callable[[SqlServiceStub], T]) -> T:
|
|
519
571
|
return self._retry_callable(fn, lambda: self._stub.sql_stub)
|
|
520
572
|
|
|
@@ -536,6 +588,9 @@ class StubRefresher:
|
|
|
536
588
|
def call_job_queue_stub(self, fn: Callable[[DataPlaneJobQueueServiceStub], T]) -> T:
|
|
537
589
|
return self._retry_callable(fn, lambda: self._stub.job_queue_stub)
|
|
538
590
|
|
|
591
|
+
def call_streaming_stub(self, fn: Callable[[SimpleStreamingServiceStub], T]) -> T:
|
|
592
|
+
return self._retry_callable(fn, lambda: self._stub.streaming_stub)
|
|
593
|
+
|
|
539
594
|
@property
|
|
540
595
|
def log_stub(self) -> LogSearchServiceStub:
|
|
541
596
|
return self._stub.log_stub
|
|
@@ -561,6 +616,7 @@ class ChalkGRPCClient:
|
|
|
561
616
|
additional_headers: List[tuple[str, str]] | None = None,
|
|
562
617
|
query_server: str | None = None,
|
|
563
618
|
input_compression: typing.Literal["lz4", "zstd", "uncompressed"] = "lz4",
|
|
619
|
+
channel_options: List[Tuple[str, str | int]] | None = None,
|
|
564
620
|
**kwargs: Any,
|
|
565
621
|
):
|
|
566
622
|
"""Create a `ChalkGRPCClient` with the given credentials.
|
|
@@ -616,6 +672,7 @@ class ChalkGRPCClient:
|
|
|
616
672
|
deployment_tag=deployment_tag,
|
|
617
673
|
additional_headers=additional_headers,
|
|
618
674
|
skip_api_server=kwargs.get("_skip_api_server", False),
|
|
675
|
+
channel_options=channel_options,
|
|
619
676
|
)
|
|
620
677
|
|
|
621
678
|
_INPUT_ENCODE_OPTIONS = GRPC_ENCODE_OPTIONS
|
|
@@ -796,6 +853,7 @@ class ChalkGRPCClient:
|
|
|
796
853
|
request_timeout=request_timeout,
|
|
797
854
|
headers=headers,
|
|
798
855
|
query_context=_validate_context_dict(query_context),
|
|
856
|
+
trace=trace,
|
|
799
857
|
)
|
|
800
858
|
return OnlineQueryConverter.online_query_bulk_response_decode_to_single(bulk_response)
|
|
801
859
|
|
|
@@ -823,41 +881,42 @@ class ChalkGRPCClient:
|
|
|
823
881
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | None = None,
|
|
824
882
|
trace: bool = False,
|
|
825
883
|
) -> online_query_pb2.OnlineQueryBulkResponse:
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
884
|
+
with safe_trace("_online_query_grpc_request"):
|
|
885
|
+
request = self._make_query_bulk_request(
|
|
886
|
+
input={k: [v] for k, v in input.items()},
|
|
887
|
+
output=output,
|
|
888
|
+
now=[now] if now is not None else [],
|
|
889
|
+
staleness=staleness or {},
|
|
890
|
+
tags=tags or (),
|
|
891
|
+
correlation_id=correlation_id,
|
|
892
|
+
query_name=query_name,
|
|
893
|
+
query_name_version=query_name_version,
|
|
894
|
+
include_meta=include_meta,
|
|
895
|
+
meta=meta or {},
|
|
896
|
+
explain=explain,
|
|
897
|
+
store_plan_stages=store_plan_stages,
|
|
898
|
+
value_metrics_tag_by_features=value_metrics_tag_by_features,
|
|
899
|
+
encoding_options=encoding_options,
|
|
900
|
+
required_resolver_tags=required_resolver_tags or (),
|
|
901
|
+
planner_options=planner_options or {},
|
|
902
|
+
query_context=query_context,
|
|
903
|
+
)
|
|
904
|
+
if trace:
|
|
905
|
+
extra_headers: dict[str, str] = {}
|
|
906
|
+
extra_headers = add_trace_headers(extra_headers)
|
|
907
|
+
headers = _merge_headers(extra_headers, headers)
|
|
908
|
+
metadata = _canonicalize_headers(headers)
|
|
909
|
+
return self._stub_refresher.call_query_stub(
|
|
910
|
+
lambda x: x.OnlineQueryBulk(
|
|
911
|
+
request,
|
|
912
|
+
timeout=request_timeout,
|
|
913
|
+
metadata=metadata,
|
|
914
|
+
)
|
|
855
915
|
)
|
|
856
|
-
)
|
|
857
916
|
|
|
858
917
|
def online_query_bulk(
|
|
859
918
|
self,
|
|
860
|
-
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame],
|
|
919
|
+
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame, None] = None,
|
|
861
920
|
output: Sequence[FeatureReference] = (),
|
|
862
921
|
now: Optional[Sequence[dt.datetime]] = None,
|
|
863
922
|
staleness: Optional[Mapping[FeatureReference, str]] = None,
|
|
@@ -876,9 +935,17 @@ class ChalkGRPCClient:
|
|
|
876
935
|
request_timeout: Optional[float] = None,
|
|
877
936
|
headers: Mapping[str, str | bytes] | Sequence[tuple[str, str | bytes]] | None = None,
|
|
878
937
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None = None,
|
|
938
|
+
*,
|
|
939
|
+
input_sql: str | None = None,
|
|
879
940
|
) -> BulkOnlineQueryResult:
|
|
941
|
+
if input is None and input_sql is None:
|
|
942
|
+
raise TypeError("One of `input` or `input_sql` is required")
|
|
943
|
+
if input is not None and input_sql is not None:
|
|
944
|
+
raise TypeError("`input` and `input_sql` are mutually exclusive")
|
|
945
|
+
|
|
880
946
|
response, call = self._online_query_bulk_grpc_request(
|
|
881
947
|
input=input,
|
|
948
|
+
input_sql=input_sql,
|
|
882
949
|
output=output,
|
|
883
950
|
now=now,
|
|
884
951
|
staleness=staleness,
|
|
@@ -905,7 +972,8 @@ class ChalkGRPCClient:
|
|
|
905
972
|
def _online_query_bulk_grpc_request(
|
|
906
973
|
self,
|
|
907
974
|
*,
|
|
908
|
-
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame],
|
|
975
|
+
input: Union[Mapping[FeatureReference, Sequence[Any]], DataFrame, None] = None,
|
|
976
|
+
input_sql: str | None = None,
|
|
909
977
|
output: Sequence[FeatureReference] = (),
|
|
910
978
|
now: Optional[Sequence[dt.datetime]] = None,
|
|
911
979
|
staleness: Optional[Mapping[FeatureReference, str]] = None,
|
|
@@ -926,8 +994,10 @@ class ChalkGRPCClient:
|
|
|
926
994
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | None = None,
|
|
927
995
|
) -> Tuple[online_query_pb2.OnlineQueryBulkResponse, grpc.Call]:
|
|
928
996
|
"""Returns the raw GRPC response and metadata"""
|
|
997
|
+
|
|
929
998
|
request = self._make_query_bulk_request(
|
|
930
999
|
input=input,
|
|
1000
|
+
input_sql=input_sql,
|
|
931
1001
|
output=output,
|
|
932
1002
|
now=now or (),
|
|
933
1003
|
staleness=staleness or {},
|
|
@@ -1116,7 +1186,9 @@ class ChalkGRPCClient:
|
|
|
1116
1186
|
|
|
1117
1187
|
def _make_query_bulk_request(
|
|
1118
1188
|
self,
|
|
1119
|
-
|
|
1189
|
+
*,
|
|
1190
|
+
input: Mapping[FeatureReference, Sequence[Any]] | DataFrame | None = None,
|
|
1191
|
+
input_sql: str | None = None,
|
|
1120
1192
|
output: Sequence[FeatureReference],
|
|
1121
1193
|
now: Sequence[dt.datetime],
|
|
1122
1194
|
staleness: Mapping[FeatureReference, str],
|
|
@@ -1134,9 +1206,19 @@ class ChalkGRPCClient:
|
|
|
1134
1206
|
planner_options: Mapping[str, str | int | bool],
|
|
1135
1207
|
query_context: Mapping[str, Union[str, int, float, bool, None]] | str | None,
|
|
1136
1208
|
) -> online_query_pb2.OnlineQueryBulkRequest:
|
|
1137
|
-
|
|
1138
|
-
input
|
|
1139
|
-
|
|
1209
|
+
if input is None and input_sql is None:
|
|
1210
|
+
raise TypeError("One of `input` or `input_sql` is required")
|
|
1211
|
+
if input is not None and input_sql is not None:
|
|
1212
|
+
raise TypeError("`input` and `input_sql` are mutually exclusive")
|
|
1213
|
+
|
|
1214
|
+
inputs_feather: bytes | None
|
|
1215
|
+
if input is None:
|
|
1216
|
+
inputs_feather = None
|
|
1217
|
+
else:
|
|
1218
|
+
inputs_feather = get_features_feather_bytes(
|
|
1219
|
+
input, self._INPUT_ENCODE_OPTIONS, compression=self._input_compression
|
|
1220
|
+
)
|
|
1221
|
+
|
|
1140
1222
|
encoded_outputs = encode_outputs(output)
|
|
1141
1223
|
outputs = encoded_outputs.string_outputs
|
|
1142
1224
|
# Currently assume every feature tag is just a fqn instead of a more complex expr.
|
|
@@ -1165,7 +1247,8 @@ class ChalkGRPCClient:
|
|
|
1165
1247
|
query_context = _validate_context_dict(query_context)
|
|
1166
1248
|
query_context_proto = {k: value_to_proto(v) for k, v in query_context.items()} if query_context else None
|
|
1167
1249
|
return online_query_pb2.OnlineQueryBulkRequest(
|
|
1168
|
-
inputs_feather=
|
|
1250
|
+
inputs_feather=inputs_feather,
|
|
1251
|
+
inputs_sql=input_sql,
|
|
1169
1252
|
outputs=[online_query_pb2.OutputExpr(feature_fqn=o) for o in outputs]
|
|
1170
1253
|
+ [online_query_pb2.OutputExpr(feature_expression=o) for o in encoded_outputs.feature_expressions_proto],
|
|
1171
1254
|
now=now_proto,
|
|
@@ -1193,6 +1276,101 @@ class ChalkGRPCClient:
|
|
|
1193
1276
|
body_type=online_query_pb2.FEATHER_BODY_TYPE_RECORD_BATCHES,
|
|
1194
1277
|
)
|
|
1195
1278
|
|
|
1279
|
+
def run_scheduled_query(
|
|
1280
|
+
self,
|
|
1281
|
+
name: str,
|
|
1282
|
+
planner_options: Optional[Mapping[str, Any]],
|
|
1283
|
+
incremental_resolvers: Optional[Sequence[str]],
|
|
1284
|
+
max_samples: Optional[int],
|
|
1285
|
+
env_overrides: Optional[Mapping[str, str]],
|
|
1286
|
+
) -> ManualTriggerScheduledQueryResponseDataclass:
|
|
1287
|
+
"""
|
|
1288
|
+
Manually trigger a scheduled query request.
|
|
1289
|
+
|
|
1290
|
+
Parameters
|
|
1291
|
+
----------
|
|
1292
|
+
name
|
|
1293
|
+
The name of the scheduled query to be triggered.
|
|
1294
|
+
incremental_resolvers
|
|
1295
|
+
If set to None, Chalk will incrementalize resolvers in the query's root namespaces.
|
|
1296
|
+
If set to a list of resolvers, this set will be used for incrementalization.
|
|
1297
|
+
Incremental resolvers must return a feature time in its output, and must return a `DataFrame`.
|
|
1298
|
+
Most commonly, this will be the name of a SQL file resolver. Chalk will ingest all new data
|
|
1299
|
+
from these resolvers and propagate changes to values in the root namespace.
|
|
1300
|
+
max_samples
|
|
1301
|
+
The maximum number of samples to compute.
|
|
1302
|
+
env_overrides:
|
|
1303
|
+
A dictionary of environment values to override during this specific triggered query.
|
|
1304
|
+
|
|
1305
|
+
Other Parameters
|
|
1306
|
+
----------------
|
|
1307
|
+
planner_options
|
|
1308
|
+
A dictionary of options to pass to the planner.
|
|
1309
|
+
These are typically provided by Chalk Support for specific use cases.
|
|
1310
|
+
|
|
1311
|
+
Returns
|
|
1312
|
+
-------
|
|
1313
|
+
ManualTriggerScheduledQueryResponse
|
|
1314
|
+
A response message containing metadata around the triggered run.
|
|
1315
|
+
|
|
1316
|
+
Examples
|
|
1317
|
+
--------
|
|
1318
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
1319
|
+
>>> ChalkGRPCClient().run_scheduled_query(
|
|
1320
|
+
... name="my_scheduled_query",
|
|
1321
|
+
... )
|
|
1322
|
+
"""
|
|
1323
|
+
proto_resp = self._stub_refresher.call_scheduled_query_stub(
|
|
1324
|
+
lambda x: x.ManualTriggerScheduledQuery(
|
|
1325
|
+
request=ManualTriggerScheduledQueryRequest(
|
|
1326
|
+
cron_query_name=name,
|
|
1327
|
+
planner_options=planner_options or {},
|
|
1328
|
+
incremental_resolvers=incremental_resolvers or (),
|
|
1329
|
+
max_samples=max_samples,
|
|
1330
|
+
env_overrides=env_overrides or {},
|
|
1331
|
+
),
|
|
1332
|
+
)
|
|
1333
|
+
)
|
|
1334
|
+
return ManualTriggerScheduledQueryResponseDataclass.from_proto(proto_resp)
|
|
1335
|
+
|
|
1336
|
+
def get_scheduled_query_run_history(
|
|
1337
|
+
self,
|
|
1338
|
+
name: str,
|
|
1339
|
+
limit: int = 10,
|
|
1340
|
+
) -> List[ScheduledQueryRun]:
|
|
1341
|
+
"""
|
|
1342
|
+
Get the run history for a scheduled query.
|
|
1343
|
+
|
|
1344
|
+
Parameters
|
|
1345
|
+
----------
|
|
1346
|
+
name
|
|
1347
|
+
The name of the scheduled query.
|
|
1348
|
+
limit
|
|
1349
|
+
The maximum number of runs to return. Defaults to 10.
|
|
1350
|
+
|
|
1351
|
+
Returns
|
|
1352
|
+
-------
|
|
1353
|
+
list[ScheduledQueryRun]
|
|
1354
|
+
A response message containing the list of scheduled query runs.
|
|
1355
|
+
|
|
1356
|
+
Examples
|
|
1357
|
+
--------
|
|
1358
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
1359
|
+
>>> ChalkGRPCClient().get_scheduled_query_run_history(
|
|
1360
|
+
... name="my_scheduled_query",
|
|
1361
|
+
... limit=20,
|
|
1362
|
+
... )
|
|
1363
|
+
"""
|
|
1364
|
+
proto_resp = self._stub_refresher.call_scheduled_query_run_stub(
|
|
1365
|
+
lambda x: x.GetScheduledQueryRuns(
|
|
1366
|
+
GetScheduledQueryRunsRequest(
|
|
1367
|
+
cron_name=name,
|
|
1368
|
+
limit=limit,
|
|
1369
|
+
)
|
|
1370
|
+
)
|
|
1371
|
+
)
|
|
1372
|
+
return [ScheduledQueryRun.from_proto(run) for run in proto_resp.runs]
|
|
1373
|
+
|
|
1196
1374
|
def get_graph(self, deployment: DeploymentId | None = None) -> Graph:
|
|
1197
1375
|
"""Get the graph for a given deployment.
|
|
1198
1376
|
|
|
@@ -1486,53 +1664,53 @@ class ChalkGRPCClient:
|
|
|
1486
1664
|
source_config: Optional[SourceConfig] = None,
|
|
1487
1665
|
dependencies: Optional[List[str]] = None,
|
|
1488
1666
|
) -> RegisterModelVersionResponse:
|
|
1489
|
-
"""
|
|
1490
|
-
Register a model in the Chalk model registry.
|
|
1667
|
+
"""Register a model in the Chalk model registry.
|
|
1491
1668
|
|
|
1492
1669
|
Parameters
|
|
1493
1670
|
----------
|
|
1494
|
-
name
|
|
1495
|
-
|
|
1496
|
-
aliases
|
|
1497
|
-
|
|
1498
|
-
model
|
|
1499
|
-
|
|
1500
|
-
model_paths
|
|
1501
|
-
|
|
1502
|
-
additional_files
|
|
1503
|
-
|
|
1504
|
-
model_type
|
|
1505
|
-
|
|
1506
|
-
model_encoding
|
|
1507
|
-
|
|
1508
|
-
input_schema
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
output_schema
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
metadata
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
input_features
|
|
1671
|
+
name
|
|
1672
|
+
Unique name for the model.
|
|
1673
|
+
aliases
|
|
1674
|
+
List of version aliases (e.g., `["v1.0", "latest"]`).
|
|
1675
|
+
model
|
|
1676
|
+
Python model object (for object-based registration).
|
|
1677
|
+
model_paths
|
|
1678
|
+
Paths to model files (for file-based registration).
|
|
1679
|
+
additional_files
|
|
1680
|
+
Additional files needed for inference (tokenizers, configs, etc.)
|
|
1681
|
+
model_type
|
|
1682
|
+
Type of model framework.
|
|
1683
|
+
model_encoding
|
|
1684
|
+
Serialization format.
|
|
1685
|
+
input_schema
|
|
1686
|
+
Definition of the input schema. Can be:
|
|
1687
|
+
- `dict`: Dictionary mapping column names to dtypes for tabular data
|
|
1688
|
+
- `list`: List of `(shape, dtype)` tuples for tensor data
|
|
1689
|
+
output_schema
|
|
1690
|
+
Definition of the output schema. Can be:
|
|
1691
|
+
- `dict`: Dictionary mapping column names to dtypes for tabular data
|
|
1692
|
+
- `list`: List of `(shape, dtype)` tuples for tensor data
|
|
1693
|
+
metadata
|
|
1694
|
+
Additional metadata dictionary containing framework info,
|
|
1695
|
+
training details, performance metrics, etc.
|
|
1696
|
+
input_features
|
|
1520
1697
|
The features to be used as inputs to the model.
|
|
1521
1698
|
For example, `[User.message]`. Features can also be expressed as snakecased strings,
|
|
1522
|
-
e.g. `["user.message"]
|
|
1523
|
-
output_features
|
|
1699
|
+
e.g. `["user.message"]`.
|
|
1700
|
+
output_features
|
|
1524
1701
|
The features to be used as outputs to the model.
|
|
1525
1702
|
For example, `[User.is_spam]`. Features can also be expressed as snakecased strings,
|
|
1526
|
-
e.g. `["user.is_spam"]
|
|
1527
|
-
source_config
|
|
1703
|
+
e.g. `["user.is_spam"]`.
|
|
1704
|
+
source_config
|
|
1528
1705
|
Config to pass credentials to access files from a remote source.
|
|
1529
|
-
dependencies
|
|
1706
|
+
dependencies
|
|
1530
1707
|
List of package dependencies needed to run this model.
|
|
1531
|
-
e.g. ["torch==2.7.1", "numpy==1.26.4"]
|
|
1708
|
+
e.g. `["torch==2.7.1", "numpy==1.26.4"]`.
|
|
1709
|
+
|
|
1532
1710
|
Returns
|
|
1533
1711
|
-------
|
|
1534
1712
|
ModelVersion
|
|
1535
|
-
|
|
1713
|
+
The registered model version object.
|
|
1536
1714
|
|
|
1537
1715
|
Examples
|
|
1538
1716
|
--------
|
|
@@ -1613,9 +1791,29 @@ class ChalkGRPCClient:
|
|
|
1613
1791
|
"Failed to register model. Please specify a model encoding if using model_paths."
|
|
1614
1792
|
)
|
|
1615
1793
|
|
|
1794
|
+
# Auto-convert ONNX list schemas to dict format if needed
|
|
1795
|
+
if model_type == ModelType.ONNX:
|
|
1796
|
+
input_schema = model_serializer.convert_onnx_list_schema_to_dict(input_schema, model, is_input=True)
|
|
1797
|
+
output_schema = model_serializer.convert_onnx_list_schema_to_dict(
|
|
1798
|
+
output_schema, model, is_input=False
|
|
1799
|
+
)
|
|
1800
|
+
|
|
1616
1801
|
input_model_schema = model_serializer.convert_schema(input_schema)
|
|
1617
1802
|
output_model_schema = model_serializer.convert_schema(output_schema)
|
|
1618
1803
|
|
|
1804
|
+
# Final validation: ONNX models must use tabular schemas
|
|
1805
|
+
if model_type == ModelType.ONNX:
|
|
1806
|
+
if input_model_schema is not None and not input_model_schema.HasField("tabular"):
|
|
1807
|
+
raise ValueError(
|
|
1808
|
+
"ONNX models must be registered with tabular input schema (dict format). "
|
|
1809
|
+
+ "Use dict format like {'input': Tensor[...]} instead of list format."
|
|
1810
|
+
)
|
|
1811
|
+
if output_model_schema is not None and not output_model_schema.HasField("tabular"):
|
|
1812
|
+
raise ValueError(
|
|
1813
|
+
"ONNX models must be registered with tabular output schema (dict format). "
|
|
1814
|
+
+ "Use dict format like {'output': Vector[...]} instead of list format."
|
|
1815
|
+
)
|
|
1816
|
+
|
|
1619
1817
|
all_files_to_process, model_file_names = model_file_uploader.prepare_file_mapping(
|
|
1620
1818
|
model_paths, additional_files
|
|
1621
1819
|
)
|
|
@@ -1758,6 +1956,7 @@ class ChalkGRPCClient:
|
|
|
1758
1956
|
name: str,
|
|
1759
1957
|
model_artifact_id: Optional[str] = None,
|
|
1760
1958
|
run_id: Optional[str] = None,
|
|
1959
|
+
run_name: Optional[str] = None,
|
|
1761
1960
|
criterion: Optional[ModelRunCriterion] = None,
|
|
1762
1961
|
aliases: Optional[List[str]] = None,
|
|
1763
1962
|
) -> RegisterModelVersionResponse:
|
|
@@ -1772,6 +1971,8 @@ class ChalkGRPCClient:
|
|
|
1772
1971
|
Artifact UUID to promote to a model version.
|
|
1773
1972
|
run_id: str, optional
|
|
1774
1973
|
run id that produce the artifact to promote.
|
|
1974
|
+
run_name: str, optional
|
|
1975
|
+
run name used in the checkpointer for artifact to promote.
|
|
1775
1976
|
criterion: ModelRunCriterion, optional
|
|
1776
1977
|
criterion on which to select the artifact from the training run.
|
|
1777
1978
|
If none provided, the latest artifact in the run will be selected.
|
|
@@ -1789,11 +1990,15 @@ class ChalkGRPCClient:
|
|
|
1789
1990
|
... )
|
|
1790
1991
|
"""
|
|
1791
1992
|
if model_artifact_id is not None:
|
|
1792
|
-
if run_id is not None or criterion is not None:
|
|
1793
|
-
raise ValueError(
|
|
1993
|
+
if run_id is not None or criterion is not None or run_name is not None:
|
|
1994
|
+
raise ValueError(
|
|
1995
|
+
"Please specify only one of 'model_artifact_id', (run_id, run criterion), (run_name, run criterion)"
|
|
1996
|
+
)
|
|
1794
1997
|
else:
|
|
1795
|
-
if
|
|
1796
|
-
raise ValueError(
|
|
1998
|
+
if run_name is None and run_id is None:
|
|
1999
|
+
raise ValueError(
|
|
2000
|
+
"Please specify only one of 'model_artifact_id', (run_id, run criterion), (run_name, run criterion)"
|
|
2001
|
+
)
|
|
1797
2002
|
|
|
1798
2003
|
try:
|
|
1799
2004
|
resp: CreateModelVersionFromArtifactResponse = self._stub_refresher.call_model_stub(
|
|
@@ -1802,7 +2007,8 @@ class ChalkGRPCClient:
|
|
|
1802
2007
|
model_name=name,
|
|
1803
2008
|
model_artifact_id=model_artifact_id,
|
|
1804
2009
|
training_run=ModelSerializer.convert_run_criterion_to_proto(
|
|
1805
|
-
|
|
2010
|
+
run_id=run_id,
|
|
2011
|
+
run_name=run_name,
|
|
1806
2012
|
criterion=criterion,
|
|
1807
2013
|
),
|
|
1808
2014
|
aliases=aliases,
|
|
@@ -2011,3 +2217,232 @@ class ChalkGRPCClient:
|
|
|
2011
2217
|
poll_interval=poll_interval,
|
|
2012
2218
|
output_callback=output_callback,
|
|
2013
2219
|
)
|
|
2220
|
+
|
|
2221
|
+
def test_streaming_resolver(
|
|
2222
|
+
self,
|
|
2223
|
+
resolver: str | Resolver,
|
|
2224
|
+
message_bodies: "list[str | bytes | BaseModel] | None" = None,
|
|
2225
|
+
message_keys: list[str | None] | None = None,
|
|
2226
|
+
message_timestamps: list[str | dt.datetime] | None = None,
|
|
2227
|
+
message_filepath: str | None = None,
|
|
2228
|
+
request_timeout: Optional[float] = None,
|
|
2229
|
+
) -> "StreamResolverTestResponse":
|
|
2230
|
+
"""Test a streaming resolver with supplied messages.
|
|
2231
|
+
|
|
2232
|
+
This method tests streaming resolvers using the gRPC TestStreamingResolver endpoint.
|
|
2233
|
+
It supports both deployed resolvers (by FQN) and static/undeployed resolvers
|
|
2234
|
+
(automatically serialized from Resolver objects).
|
|
2235
|
+
|
|
2236
|
+
Parameters
|
|
2237
|
+
----------
|
|
2238
|
+
resolver : str | Resolver
|
|
2239
|
+
The streaming resolver or its string name. If a StreamResolver object with
|
|
2240
|
+
feature_expressions is provided, it will be automatically serialized for testing.
|
|
2241
|
+
message_bodies : list[str | bytes | BaseModel], optional
|
|
2242
|
+
The message bodies to process. Can be JSON strings, raw bytes,
|
|
2243
|
+
or Pydantic models (will be serialized to JSON).
|
|
2244
|
+
Either message_bodies or message_filepath must be provided.
|
|
2245
|
+
message_keys : list[str | None], optional
|
|
2246
|
+
Optional keys for each message. If not provided, all keys will be None.
|
|
2247
|
+
Must match length of message_bodies if provided.
|
|
2248
|
+
message_timestamps : list[str | datetime], optional
|
|
2249
|
+
Optional timestamps for each message. If not provided, current time
|
|
2250
|
+
will be used. Must match length of message_bodies if provided.
|
|
2251
|
+
message_filepath : str, optional
|
|
2252
|
+
A filepath from which test messages will be ingested.
|
|
2253
|
+
This file should be newline delimited JSON with format:
|
|
2254
|
+
{"message_key": "my-key", "message_body": {"field1": "value1"}}
|
|
2255
|
+
Each line may optionally contain a "message_timestamp" field.
|
|
2256
|
+
Either message_bodies or message_filepath must be provided.
|
|
2257
|
+
request_timeout : float, optional
|
|
2258
|
+
Request timeout in seconds.
|
|
2259
|
+
|
|
2260
|
+
Returns
|
|
2261
|
+
-------
|
|
2262
|
+
StreamResolverTestResponse
|
|
2263
|
+
Response containing:
|
|
2264
|
+
- status: SUCCESS or FAILURE
|
|
2265
|
+
- data_uri: Optional signed URL to parquet file with results
|
|
2266
|
+
- errors: List of ChalkError objects
|
|
2267
|
+
- message: Human-readable message
|
|
2268
|
+
|
|
2269
|
+
Examples
|
|
2270
|
+
--------
|
|
2271
|
+
>>> from chalk.client.client_grpc import ChalkGRPCClient
|
|
2272
|
+
>>> client = ChalkGRPCClient()
|
|
2273
|
+
>>> response = client.test_streaming_resolver(
|
|
2274
|
+
... resolver="my_module.my_stream_resolver",
|
|
2275
|
+
... message_bodies=[
|
|
2276
|
+
... '{"user_id": 1, "event": "login"}',
|
|
2277
|
+
... '{"user_id": 2, "event": "logout"}',
|
|
2278
|
+
... ],
|
|
2279
|
+
... message_keys=["user_1", "user_2"],
|
|
2280
|
+
... )
|
|
2281
|
+
>>> print(f"Status: {response.status}")
|
|
2282
|
+
>>> if response.data_uri:
|
|
2283
|
+
... print(f"Results at: {response.data_uri}")
|
|
2284
|
+
"""
|
|
2285
|
+
import base64
|
|
2286
|
+
import json
|
|
2287
|
+
from uuid import uuid4
|
|
2288
|
+
|
|
2289
|
+
import pyarrow as pa
|
|
2290
|
+
|
|
2291
|
+
from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2 import TestStreamingResolverRequest
|
|
2292
|
+
from chalk.utils.pydanticutil.pydantic_compat import get_pydantic_model_json, is_pydantic_basemodel_instance
|
|
2293
|
+
|
|
2294
|
+
# Determine if resolver is static and needs serialization
|
|
2295
|
+
resolver_fqn: str | None = None
|
|
2296
|
+
static_stream_resolver_b64: str | None = None
|
|
2297
|
+
|
|
2298
|
+
if isinstance(resolver, str):
|
|
2299
|
+
resolver_fqn = resolver
|
|
2300
|
+
else:
|
|
2301
|
+
from chalk.features.resolver import StreamResolver
|
|
2302
|
+
|
|
2303
|
+
resolver_fqn = resolver.fqn
|
|
2304
|
+
|
|
2305
|
+
if isinstance(resolver, StreamResolver) and resolver.feature_expressions:
|
|
2306
|
+
from chalk.parsed.to_proto import ToProtoConverter
|
|
2307
|
+
|
|
2308
|
+
proto_resolver = ToProtoConverter.convert_stream_resolver(resolver)
|
|
2309
|
+
static_stream_resolver_b64 = base64.b64encode(
|
|
2310
|
+
proto_resolver.SerializeToString(deterministic=True)
|
|
2311
|
+
).decode("utf-8")
|
|
2312
|
+
|
|
2313
|
+
# Load from file if provided
|
|
2314
|
+
if message_filepath is not None:
|
|
2315
|
+
if message_bodies is not None:
|
|
2316
|
+
raise ValueError("Cannot provide both message_filepath and message_bodies")
|
|
2317
|
+
|
|
2318
|
+
loaded_bodies: list[Any] = []
|
|
2319
|
+
loaded_keys: list[str | None] = []
|
|
2320
|
+
loaded_timestamps: list[str | None] = []
|
|
2321
|
+
|
|
2322
|
+
with open(message_filepath, "r") as f:
|
|
2323
|
+
for line in f:
|
|
2324
|
+
line = line.strip()
|
|
2325
|
+
if not line:
|
|
2326
|
+
continue
|
|
2327
|
+
msg = json.loads(line)
|
|
2328
|
+
loaded_bodies.append(msg.get("message_body", msg))
|
|
2329
|
+
loaded_keys.append(msg.get("message_key"))
|
|
2330
|
+
loaded_timestamps.append(msg.get("message_timestamp"))
|
|
2331
|
+
|
|
2332
|
+
message_bodies = loaded_bodies
|
|
2333
|
+
if message_keys is None and any(k is not None for k in loaded_keys):
|
|
2334
|
+
message_keys = loaded_keys
|
|
2335
|
+
if message_timestamps is None and any(t is not None for t in loaded_timestamps):
|
|
2336
|
+
# Cast needed: loaded_timestamps is list[str | None] from JSON,
|
|
2337
|
+
# but message_timestamps is list[str | datetime] - strings will be parsed later
|
|
2338
|
+
message_timestamps = typing.cast(list[str | dt.datetime], loaded_timestamps)
|
|
2339
|
+
|
|
2340
|
+
# Validate inputs
|
|
2341
|
+
if message_bodies is None:
|
|
2342
|
+
raise ValueError("Either message_bodies or message_filepath must be provided")
|
|
2343
|
+
|
|
2344
|
+
num_messages = len(message_bodies)
|
|
2345
|
+
if num_messages == 0:
|
|
2346
|
+
raise ValueError("message_bodies cannot be empty")
|
|
2347
|
+
|
|
2348
|
+
if message_keys is not None and len(message_keys) != num_messages:
|
|
2349
|
+
raise ValueError(
|
|
2350
|
+
f"message_keys length ({len(message_keys)}) must match message_bodies length ({num_messages})"
|
|
2351
|
+
)
|
|
2352
|
+
|
|
2353
|
+
if message_timestamps is not None and len(message_timestamps) != num_messages:
|
|
2354
|
+
raise ValueError(
|
|
2355
|
+
f"message_timestamps length ({len(message_timestamps)}) must match message_bodies length ({num_messages})"
|
|
2356
|
+
)
|
|
2357
|
+
|
|
2358
|
+
# Generate defaults
|
|
2359
|
+
message_ids = [str(uuid4()) for _ in range(num_messages)]
|
|
2360
|
+
|
|
2361
|
+
if message_keys is None:
|
|
2362
|
+
message_keys = typing.cast(list[str | None], [None] * num_messages)
|
|
2363
|
+
|
|
2364
|
+
if message_timestamps is None:
|
|
2365
|
+
message_timestamps = typing.cast(list[str | dt.datetime], [dt.datetime.now()] * num_messages)
|
|
2366
|
+
|
|
2367
|
+
# Convert message bodies to bytes
|
|
2368
|
+
processed_bodies: list[bytes] = []
|
|
2369
|
+
for body in message_bodies:
|
|
2370
|
+
if isinstance(body, bytes):
|
|
2371
|
+
processed_bodies.append(body)
|
|
2372
|
+
elif isinstance(body, str):
|
|
2373
|
+
processed_bodies.append(body.encode("utf-8"))
|
|
2374
|
+
elif is_pydantic_basemodel_instance(body):
|
|
2375
|
+
# Use utility function that handles both Pydantic v1 and v2
|
|
2376
|
+
processed_bodies.append(get_pydantic_model_json(body).encode("utf-8"))
|
|
2377
|
+
else:
|
|
2378
|
+
# Try JSON serialization for dict-like objects
|
|
2379
|
+
processed_bodies.append(json.dumps(body).encode("utf-8"))
|
|
2380
|
+
|
|
2381
|
+
# Convert timestamps to unix timestamps in milliseconds (int64)
|
|
2382
|
+
# At this point message_timestamps is guaranteed to be non-None due to the default assignment above
|
|
2383
|
+
assert message_timestamps is not None
|
|
2384
|
+
processed_timestamps: list[int] = []
|
|
2385
|
+
for ts in message_timestamps:
|
|
2386
|
+
if isinstance(ts, str):
|
|
2387
|
+
# Parse ISO format string
|
|
2388
|
+
parsed = dt.datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
2389
|
+
processed_timestamps.append(int(parsed.timestamp() * 1000)) # milliseconds
|
|
2390
|
+
else:
|
|
2391
|
+
# Type narrowing: ts must be dt.datetime here
|
|
2392
|
+
processed_timestamps.append(int(ts.timestamp() * 1000)) # milliseconds
|
|
2393
|
+
|
|
2394
|
+
# Create Arrow table
|
|
2395
|
+
table = pa.table(
|
|
2396
|
+
{
|
|
2397
|
+
"message_id": message_ids,
|
|
2398
|
+
"message_key": message_keys,
|
|
2399
|
+
"message_data": processed_bodies,
|
|
2400
|
+
"publish_timestamp": processed_timestamps,
|
|
2401
|
+
}
|
|
2402
|
+
)
|
|
2403
|
+
|
|
2404
|
+
# Serialize to Arrow IPC format
|
|
2405
|
+
sink = pa.BufferOutputStream()
|
|
2406
|
+
with pa.ipc.new_stream(sink, table.schema) as writer:
|
|
2407
|
+
writer.write_table(table)
|
|
2408
|
+
input_data = sink.getvalue().to_pybytes()
|
|
2409
|
+
|
|
2410
|
+
# Create gRPC request
|
|
2411
|
+
request = TestStreamingResolverRequest(
|
|
2412
|
+
resolver_fqn=resolver_fqn or "",
|
|
2413
|
+
input_data=input_data,
|
|
2414
|
+
operation_id=None,
|
|
2415
|
+
debug=True,
|
|
2416
|
+
)
|
|
2417
|
+
|
|
2418
|
+
if static_stream_resolver_b64:
|
|
2419
|
+
request.static_stream_resolver_b64 = static_stream_resolver_b64
|
|
2420
|
+
|
|
2421
|
+
# Call new TestStreamingResolver endpoint
|
|
2422
|
+
proto_response = self._stub_refresher.call_streaming_stub(
|
|
2423
|
+
lambda x: x.TestStreamingResolver(
|
|
2424
|
+
request,
|
|
2425
|
+
timeout=request_timeout,
|
|
2426
|
+
)
|
|
2427
|
+
)
|
|
2428
|
+
|
|
2429
|
+
# Convert proto response to StreamResolverTestResponse
|
|
2430
|
+
from chalk._gen.chalk.streaming.v1.simple_streaming_service_pb2 import TEST_STREAM_RESOLVER_STATUS_SUCCESS
|
|
2431
|
+
|
|
2432
|
+
status = (
|
|
2433
|
+
StreamResolverTestStatus.SUCCESS
|
|
2434
|
+
if proto_response.status == TEST_STREAM_RESOLVER_STATUS_SUCCESS
|
|
2435
|
+
else StreamResolverTestStatus.FAILURE
|
|
2436
|
+
)
|
|
2437
|
+
|
|
2438
|
+
# Convert proto errors to ChalkError objects
|
|
2439
|
+
errors_list: list[ChalkError] = []
|
|
2440
|
+
if proto_response.errors:
|
|
2441
|
+
errors_list = [ChalkErrorConverter.chalk_error_decode(err) for err in proto_response.errors]
|
|
2442
|
+
|
|
2443
|
+
return StreamResolverTestResponse(
|
|
2444
|
+
status=status,
|
|
2445
|
+
data_uri=proto_response.data_uri if proto_response.HasField("data_uri") else None,
|
|
2446
|
+
errors=errors_list if errors_list else None,
|
|
2447
|
+
message=proto_response.message if proto_response.message else None,
|
|
2448
|
+
)
|