chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chalk/__init__.py +2 -1
- chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
- chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
- chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
- chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
- chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
- chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
- chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
- chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
- chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
- chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
- chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
- chalk/_gen/chalk/dataframe/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
- chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
- chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
- chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
- chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
- chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
- chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
- chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
- chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
- chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
- chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
- chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
- chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
- chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
- chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
- chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
- chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
- chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
- chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
- chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
- chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
- chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
- chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
- chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
- chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
- chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
- chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
- chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
- chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
- chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
- chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
- chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
- chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
- chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
- chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
- chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
- chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
- chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
- chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
- chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
- chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
- chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
- chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
- chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
- chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
- chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
- chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
- chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
- chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
- chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
- chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
- chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
- chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
- chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
- chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
- chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
- chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
- chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
- chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
- chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
- chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
- chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
- chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
- chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
- chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
- chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
- chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
- chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
- chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
- chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
- chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
- chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
- chalk/_lsp/error_builder.py +11 -0
- chalk/_monitoring/Chart.py +1 -3
- chalk/_version.py +1 -1
- chalk/cli.py +5 -10
- chalk/client/client.py +178 -64
- chalk/client/client_async.py +154 -0
- chalk/client/client_async_impl.py +22 -0
- chalk/client/client_grpc.py +738 -112
- chalk/client/client_impl.py +541 -136
- chalk/client/dataset.py +27 -6
- chalk/client/models.py +99 -2
- chalk/client/serialization/model_serialization.py +126 -10
- chalk/config/project_config.py +1 -1
- chalk/df/LazyFramePlaceholder.py +1154 -0
- chalk/df/ast_parser.py +2 -10
- chalk/features/_class_property.py +7 -0
- chalk/features/_embedding/embedding.py +1 -0
- chalk/features/_embedding/sentence_transformer.py +1 -1
- chalk/features/_encoding/converter.py +83 -2
- chalk/features/_encoding/pyarrow.py +20 -4
- chalk/features/_encoding/rich.py +1 -3
- chalk/features/_tensor.py +1 -2
- chalk/features/dataframe/_filters.py +14 -5
- chalk/features/dataframe/_impl.py +91 -36
- chalk/features/dataframe/_validation.py +11 -7
- chalk/features/feature_field.py +40 -30
- chalk/features/feature_set.py +1 -2
- chalk/features/feature_set_decorator.py +1 -0
- chalk/features/feature_wrapper.py +42 -3
- chalk/features/hooks.py +81 -12
- chalk/features/inference.py +65 -10
- chalk/features/resolver.py +338 -56
- chalk/features/tag.py +1 -3
- chalk/features/underscore_features.py +2 -1
- chalk/functions/__init__.py +456 -21
- chalk/functions/holidays.py +1 -3
- chalk/gitignore/gitignore_parser.py +5 -1
- chalk/importer.py +186 -74
- chalk/ml/__init__.py +6 -2
- chalk/ml/model_hooks.py +368 -51
- chalk/ml/model_reference.py +68 -10
- chalk/ml/model_version.py +34 -21
- chalk/ml/utils.py +143 -40
- chalk/operators/_utils.py +14 -3
- chalk/parsed/_proto/export.py +22 -0
- chalk/parsed/duplicate_input_gql.py +4 -0
- chalk/parsed/expressions.py +1 -3
- chalk/parsed/json_conversions.py +21 -14
- chalk/parsed/to_proto.py +16 -4
- chalk/parsed/user_types_to_json.py +31 -10
- chalk/parsed/validation_from_registries.py +182 -0
- chalk/queries/named_query.py +16 -6
- chalk/queries/scheduled_query.py +13 -1
- chalk/serialization/parsed_annotation.py +25 -12
- chalk/sql/__init__.py +221 -0
- chalk/sql/_internal/integrations/athena.py +6 -1
- chalk/sql/_internal/integrations/bigquery.py +22 -2
- chalk/sql/_internal/integrations/databricks.py +61 -18
- chalk/sql/_internal/integrations/mssql.py +281 -0
- chalk/sql/_internal/integrations/postgres.py +11 -3
- chalk/sql/_internal/integrations/redshift.py +4 -0
- chalk/sql/_internal/integrations/snowflake.py +11 -2
- chalk/sql/_internal/integrations/util.py +2 -1
- chalk/sql/_internal/sql_file_resolver.py +55 -10
- chalk/sql/_internal/sql_source.py +36 -2
- chalk/streams/__init__.py +1 -3
- chalk/streams/_kafka_source.py +5 -1
- chalk/streams/_windows.py +16 -4
- chalk/streams/types.py +1 -2
- chalk/utils/__init__.py +1 -3
- chalk/utils/_otel_version.py +13 -0
- chalk/utils/async_helpers.py +14 -5
- chalk/utils/df_utils.py +2 -2
- chalk/utils/duration.py +1 -3
- chalk/utils/job_log_display.py +538 -0
- chalk/utils/missing_dependency.py +5 -4
- chalk/utils/notebook.py +255 -2
- chalk/utils/pl_helpers.py +190 -37
- chalk/utils/pydanticutil/pydantic_compat.py +1 -2
- chalk/utils/storage_client.py +246 -0
- chalk/utils/threading.py +1 -3
- chalk/utils/tracing.py +194 -86
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
- {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
chalk/importer.py
CHANGED
|
@@ -17,8 +17,9 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Ty
|
|
|
17
17
|
|
|
18
18
|
import pyarrow as pa
|
|
19
19
|
|
|
20
|
+
import chalk.functions as F
|
|
20
21
|
from chalk._lsp.error_builder import DiagnosticBuilder, LSPErrorBuilder
|
|
21
|
-
from chalk.features import Feature, Features, FeatureSetBase, Filter, unwrap_feature
|
|
22
|
+
from chalk.features import Feature, Features, FeatureSetBase, Filter, Vector, unwrap_feature
|
|
22
23
|
from chalk.features.feature_field import WindowConfigResolved
|
|
23
24
|
from chalk.features.pseudofeatures import Now
|
|
24
25
|
|
|
@@ -101,6 +102,8 @@ supported_aggs = (
|
|
|
101
102
|
"sum",
|
|
102
103
|
"var",
|
|
103
104
|
"var_sample",
|
|
105
|
+
"vector_sum",
|
|
106
|
+
"vector_mean",
|
|
104
107
|
)
|
|
105
108
|
|
|
106
109
|
|
|
@@ -169,7 +172,16 @@ def _check_types(
|
|
|
169
172
|
return
|
|
170
173
|
|
|
171
174
|
joined_annotation = joined_feature.typ.parsed_annotation
|
|
172
|
-
if aggregation not in {
|
|
175
|
+
if aggregation not in {
|
|
176
|
+
"count",
|
|
177
|
+
"approx_count_distinct",
|
|
178
|
+
"approx_top_k",
|
|
179
|
+
"min_by_n",
|
|
180
|
+
"max_by_n",
|
|
181
|
+
"array_agg",
|
|
182
|
+
"vector_sum",
|
|
183
|
+
"vector_mean",
|
|
184
|
+
}:
|
|
173
185
|
_validate_types(
|
|
174
186
|
annotation=joined_annotation,
|
|
175
187
|
permitted_types=(int, float),
|
|
@@ -195,7 +207,6 @@ def _check_types(
|
|
|
195
207
|
joined=False,
|
|
196
208
|
feature_name=feature_name,
|
|
197
209
|
)
|
|
198
|
-
|
|
199
210
|
elif aggregation == "min" or aggregation == "max":
|
|
200
211
|
if _get_underlying_type(this_annotation, feature_name) != _get_underlying_type(
|
|
201
212
|
joined_annotation, joined_feature.name
|
|
@@ -293,6 +304,19 @@ def _parse_agg_function_call(expr: Underscore | None) -> Tuple[str, Underscore,
|
|
|
293
304
|
f"expecting 'int' type argument for 'k', but received arg of type '{type(call_expr._chalk__kwargs.get('k'))}'"
|
|
294
305
|
)
|
|
295
306
|
opts = FrozenOrderedSet(call_expr._chalk__kwargs.items())
|
|
307
|
+
elif aggregation == "approx_percentile":
|
|
308
|
+
if len(call_expr._chalk__args) > 0:
|
|
309
|
+
raise ChalkParseError("should not have any positional arguments")
|
|
310
|
+
elif {"quantile"} != call_expr._chalk__kwargs.keys():
|
|
311
|
+
raise ChalkParseError("expecting exactly one required keyword argument 'quantile'")
|
|
312
|
+
elif not isinstance(call_expr._chalk__kwargs.get("quantile"), float):
|
|
313
|
+
raise ChalkParseError(
|
|
314
|
+
f"expecting 'float' type argument for 'quantile', but received arg of type '{type(call_expr._chalk__kwargs.get('quantile'))}'"
|
|
315
|
+
)
|
|
316
|
+
# TODO: expand proto definition to accept kwargs that are not necessarily `k`
|
|
317
|
+
quantile = call_expr._chalk__kwargs["quantile"]
|
|
318
|
+
nano_quantile = int(round(quantile * 1_000_000_000))
|
|
319
|
+
opts = FrozenOrderedSet([("k", nano_quantile)])
|
|
296
320
|
elif aggregation in ("min_by_n", "max_by_n"):
|
|
297
321
|
if len(call_expr._chalk__kwargs) > 0:
|
|
298
322
|
raise ChalkParseError("should not have any keyword arguments")
|
|
@@ -422,8 +446,6 @@ def run_post_import_fixups():
|
|
|
422
446
|
# "1m", "2m", materialization={...},
|
|
423
447
|
# expression=_.transactions[_.amount].sum(),
|
|
424
448
|
# )
|
|
425
|
-
assert f.underscore_expression is not None
|
|
426
|
-
assert f.window_materialization is not None
|
|
427
449
|
|
|
428
450
|
try:
|
|
429
451
|
f.window_materialization_parsed = parse_windowed_materialization(f=f)
|
|
@@ -561,39 +583,51 @@ def parse_grouped_window(f: Feature) -> WindowConfigResolved:
|
|
|
561
583
|
aggregation_kwargs=aggregation_kwargs,
|
|
562
584
|
pyarrow_dtype=pyarrow_dtype,
|
|
563
585
|
filters=parsed_filters,
|
|
564
|
-
backfill_resolver=
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
)
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
586
|
+
backfill_resolver=(
|
|
587
|
+
_try_parse_resolver_fqn(
|
|
588
|
+
"backfill_resolver",
|
|
589
|
+
f.window_materialization.get("backfill_resolver", None),
|
|
590
|
+
)
|
|
591
|
+
if isinstance(f.window_materialization, dict)
|
|
592
|
+
else None
|
|
593
|
+
),
|
|
594
|
+
backfill_schedule=(
|
|
595
|
+
f.window_materialization.get("backfill_schedule", None)
|
|
596
|
+
if isinstance(f.window_materialization, dict)
|
|
597
|
+
else None
|
|
598
|
+
),
|
|
599
|
+
backfill_lookback_duration_seconds=(
|
|
600
|
+
_try_parse_duration(
|
|
601
|
+
"backfill_lookback_duration",
|
|
602
|
+
f.window_materialization.get("backfill_lookback_duration", None),
|
|
603
|
+
)
|
|
604
|
+
if isinstance(f.window_materialization, dict)
|
|
605
|
+
else None
|
|
606
|
+
),
|
|
607
|
+
backfill_start_time=(
|
|
608
|
+
_try_parse_datetime(
|
|
609
|
+
"backfill_start_time",
|
|
610
|
+
f.window_materialization.get("backfill_start_time", None),
|
|
611
|
+
)
|
|
612
|
+
if isinstance(f.window_materialization, dict)
|
|
613
|
+
else None
|
|
614
|
+
),
|
|
615
|
+
continuous_resolver=(
|
|
616
|
+
_try_parse_resolver_fqn(
|
|
617
|
+
"continuous_resolver",
|
|
618
|
+
f.window_materialization.get("continuous_resolver", None),
|
|
619
|
+
)
|
|
620
|
+
if isinstance(f.window_materialization, dict)
|
|
621
|
+
else None
|
|
622
|
+
),
|
|
623
|
+
continuous_buffer_duration_seconds=(
|
|
624
|
+
_try_parse_duration(
|
|
625
|
+
"continuous_buffer_duration",
|
|
626
|
+
f.window_materialization.get("continuous_buffer_duration", None),
|
|
627
|
+
)
|
|
628
|
+
if isinstance(f.window_materialization, dict)
|
|
629
|
+
else None
|
|
630
|
+
),
|
|
597
631
|
)
|
|
598
632
|
|
|
599
633
|
return cfg
|
|
@@ -710,6 +744,14 @@ def parse_windowed_materialization(f: Feature) -> WindowConfigResolved | None:
|
|
|
710
744
|
aggregated_feature_name=aggregated_value,
|
|
711
745
|
)
|
|
712
746
|
|
|
747
|
+
if aggregation == "sum" or aggregation == "mean":
|
|
748
|
+
try:
|
|
749
|
+
if issubclass(f.typ.parsed_annotation, Vector):
|
|
750
|
+
aggregation = f"vector_{aggregation}"
|
|
751
|
+
except TypeError:
|
|
752
|
+
# Not a class so not a Vector, skip
|
|
753
|
+
pass
|
|
754
|
+
|
|
713
755
|
_check_types(
|
|
714
756
|
feature_name=f.window_stem,
|
|
715
757
|
aggregation=aggregation,
|
|
@@ -781,39 +823,51 @@ def parse_windowed_materialization(f: Feature) -> WindowConfigResolved | None:
|
|
|
781
823
|
aggregation_kwargs=aggregation_kwargs,
|
|
782
824
|
pyarrow_dtype=f.converter.pyarrow_dtype,
|
|
783
825
|
filters=parsed_filters,
|
|
784
|
-
backfill_resolver=
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
)
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
826
|
+
backfill_resolver=(
|
|
827
|
+
_try_parse_resolver_fqn(
|
|
828
|
+
"backfill_resolver",
|
|
829
|
+
f.window_materialization.get("backfill_resolver", None),
|
|
830
|
+
)
|
|
831
|
+
if isinstance(f.window_materialization, dict)
|
|
832
|
+
else None
|
|
833
|
+
),
|
|
834
|
+
backfill_schedule=(
|
|
835
|
+
f.window_materialization.get("backfill_schedule", None)
|
|
836
|
+
if isinstance(f.window_materialization, dict)
|
|
837
|
+
else None
|
|
838
|
+
),
|
|
839
|
+
backfill_lookback_duration_seconds=(
|
|
840
|
+
_try_parse_duration(
|
|
841
|
+
"backfill_lookback_duration",
|
|
842
|
+
f.window_materialization.get("backfill_lookback_duration", None),
|
|
843
|
+
)
|
|
844
|
+
if isinstance(f.window_materialization, dict)
|
|
845
|
+
else None
|
|
846
|
+
),
|
|
847
|
+
backfill_start_time=(
|
|
848
|
+
_try_parse_datetime(
|
|
849
|
+
"backfill_start_time",
|
|
850
|
+
f.window_materialization.get("backfill_start_time", None),
|
|
851
|
+
)
|
|
852
|
+
if isinstance(f.window_materialization, dict)
|
|
853
|
+
else None
|
|
854
|
+
),
|
|
855
|
+
continuous_resolver=(
|
|
856
|
+
_try_parse_resolver_fqn(
|
|
857
|
+
"continuous_resolver",
|
|
858
|
+
f.window_materialization.get("continuous_resolver", None),
|
|
859
|
+
)
|
|
860
|
+
if isinstance(f.window_materialization, dict)
|
|
861
|
+
else None
|
|
862
|
+
),
|
|
863
|
+
continuous_buffer_duration_seconds=(
|
|
864
|
+
_try_parse_duration(
|
|
865
|
+
"continuous_buffer_duration",
|
|
866
|
+
f.window_materialization.get("continuous_buffer_duration", None),
|
|
867
|
+
)
|
|
868
|
+
if isinstance(f.window_materialization, dict)
|
|
869
|
+
else None
|
|
870
|
+
),
|
|
817
871
|
)
|
|
818
872
|
|
|
819
873
|
|
|
@@ -991,6 +1045,33 @@ class _UnderscoreValidationError(ValueError):
|
|
|
991
1045
|
...
|
|
992
1046
|
|
|
993
1047
|
|
|
1048
|
+
def _has_group_by_in_parent_chain(underscore: Underscore) -> bool:
|
|
1049
|
+
"""
|
|
1050
|
+
Traverse parent chain to check if .group_by() exists before .agg().
|
|
1051
|
+
|
|
1052
|
+
For valid group_by_windowed: _.x.group_by(_.y).agg(_.z.sum())
|
|
1053
|
+
- Looks for: UnderscoreCall -> UnderscoreAttr("group_by")
|
|
1054
|
+
|
|
1055
|
+
Returns True if .group_by() found, False otherwise.
|
|
1056
|
+
"""
|
|
1057
|
+
current: Optional[Any] = underscore
|
|
1058
|
+
|
|
1059
|
+
while current is not None:
|
|
1060
|
+
# Check if current is a .group_by() call
|
|
1061
|
+
if isinstance(current, UnderscoreCall):
|
|
1062
|
+
parent = current._chalk__parent
|
|
1063
|
+
if isinstance(parent, UnderscoreAttr) and parent._chalk__attr == "group_by":
|
|
1064
|
+
return True
|
|
1065
|
+
|
|
1066
|
+
# Move to parent
|
|
1067
|
+
if hasattr(current, "_chalk__parent"):
|
|
1068
|
+
current = current._chalk__parent
|
|
1069
|
+
else:
|
|
1070
|
+
break
|
|
1071
|
+
|
|
1072
|
+
return False
|
|
1073
|
+
|
|
1074
|
+
|
|
994
1075
|
class ChalkImporter:
|
|
995
1076
|
def __init__(self):
|
|
996
1077
|
super().__init__()
|
|
@@ -1092,6 +1173,9 @@ class ChalkImporter:
|
|
|
1092
1173
|
for feature_class in FeatureSetBase.registry.values():
|
|
1093
1174
|
# Iterate through every class, to find every underscore definition.
|
|
1094
1175
|
for f in feature_class.features:
|
|
1176
|
+
if f.is_windowed_pseudofeature is True:
|
|
1177
|
+
# need one LSP just for the base
|
|
1178
|
+
continue
|
|
1095
1179
|
if f.underscore_expression is not None:
|
|
1096
1180
|
# Validate that the underscore expression is well-formed.
|
|
1097
1181
|
# If it is not well-formed, then an `_UnderscoreValidationError` will
|
|
@@ -1424,15 +1508,26 @@ def _supplemental_validate_underscore_expression(
|
|
|
1424
1508
|
|
|
1425
1509
|
# TODO: Dominic - impl for UnderscoreCall args (we need some special casing for aggregate functions that take in UnderscoreItems)
|
|
1426
1510
|
if isinstance(underscore, UnderscoreCall):
|
|
1511
|
+
if not isinstance(underscore._chalk__parent, UnderscoreAttr):
|
|
1512
|
+
# we only support calls on attrs, ie _.a.some_attr(*args, **kwargs)
|
|
1513
|
+
raise _UnderscoreValidationError(f"Cannot call non-attribute {underscore._chalk__parent}.")
|
|
1427
1514
|
caller = underscore._chalk__parent._chalk__parent
|
|
1515
|
+
op_name = underscore._chalk__parent._chalk__attr
|
|
1516
|
+
|
|
1517
|
+
if (op := getattr(F, op_name, None)) is not None:
|
|
1518
|
+
if getattr(op, "_chalk__method_chaining_predicate", lambda _: True)(underscore):
|
|
1519
|
+
return _supplemental_validate_underscore_expression(
|
|
1520
|
+
state,
|
|
1521
|
+
class_namespace=class_namespace,
|
|
1522
|
+
underscore=op(caller, *underscore._chalk__args, **underscore._chalk__kwargs),
|
|
1523
|
+
)
|
|
1524
|
+
|
|
1428
1525
|
maybe_parent_result = _supplemental_validate_underscore_expression(
|
|
1429
1526
|
state=state,
|
|
1430
1527
|
class_namespace=class_namespace,
|
|
1431
1528
|
underscore=caller,
|
|
1432
1529
|
)
|
|
1433
|
-
if
|
|
1434
|
-
return None # TODO: Dominic - is this ever valid?
|
|
1435
|
-
if underscore._chalk__parent._chalk__attr == "where":
|
|
1530
|
+
if op_name == "where":
|
|
1436
1531
|
if maybe_parent_result is None:
|
|
1437
1532
|
return None
|
|
1438
1533
|
if not isinstance(maybe_parent_result, _HasManyNamespaceExpr) or not isinstance(caller, UnderscoreItem):
|
|
@@ -1457,9 +1552,26 @@ def _supplemental_validate_underscore_expression(
|
|
|
1457
1552
|
raise _UnderscoreValidationError(
|
|
1458
1553
|
f"the input '{arg!r}' is a feature namespace '{expr.namespace}' which cannot be used as a scalar value"
|
|
1459
1554
|
)
|
|
1555
|
+
return None
|
|
1556
|
+
|
|
1557
|
+
# Validate .agg() usage (addressing TODO at line 1522)
|
|
1558
|
+
if op_name == "agg":
|
|
1559
|
+
if not _has_group_by_in_parent_chain(caller):
|
|
1560
|
+
raise _UnderscoreValidationError(
|
|
1561
|
+
"'.agg()' can only be used with '.group_by()' for group_by_windowed features. "
|
|
1562
|
+
+ "For windowed features, use direct aggregation methods instead. "
|
|
1563
|
+
+ "For example, instead of using '.agg(_.field.method())', use '.field.method()' directly on the filtered DataFrame"
|
|
1564
|
+
)
|
|
1460
1565
|
|
|
1461
1566
|
return None
|
|
1462
1567
|
|
|
1568
|
+
# TODO: check that op_name is a supported agg or .agg/.group_by/etc
|
|
1569
|
+
# if op_name in supported_aggs:
|
|
1570
|
+
# # TODO: typechecking for agg fns
|
|
1571
|
+
# return None
|
|
1572
|
+
#
|
|
1573
|
+
# raise _UnderscoreValidationError(f"unrecognized function '{op_name}' in expression '{underscore}'")
|
|
1574
|
+
|
|
1463
1575
|
if isinstance(underscore, UnderscoreItem):
|
|
1464
1576
|
parent_result = _supplemental_validate_underscore_expression(
|
|
1465
1577
|
state=state,
|
chalk/ml/__init__.py
CHANGED
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from chalk.ml.model_file_transfer import HFSourceConfig, LocalSourceConfig, S3SourceConfig, SourceConfig
|
|
3
|
+
from chalk.ml.model_file_transfer import FileInfo, HFSourceConfig, LocalSourceConfig, S3SourceConfig, SourceConfig
|
|
4
4
|
from chalk.ml.model_reference import ModelReference
|
|
5
|
-
from chalk.ml.
|
|
5
|
+
from chalk.ml.model_version import ModelVersion
|
|
6
|
+
from chalk.ml.utils import ModelClass, ModelEncoding, ModelRunCriterion, ModelType
|
|
6
7
|
|
|
7
8
|
__all__ = (
|
|
8
9
|
"ModelType",
|
|
10
|
+
"ModelClass",
|
|
9
11
|
"ModelEncoding",
|
|
10
12
|
"ModelReference",
|
|
13
|
+
"ModelVersion",
|
|
11
14
|
"SourceConfig",
|
|
12
15
|
"LocalSourceConfig",
|
|
13
16
|
"S3SourceConfig",
|
|
14
17
|
"HFSourceConfig",
|
|
15
18
|
"ModelRunCriterion",
|
|
19
|
+
"FileInfo",
|
|
16
20
|
)
|