chalkpy 2.96.6__py3-none-any.whl → 2.96.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chalk/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.96.6"
1
+ __version__ = "2.96.8"
chalk/client/client.py CHANGED
@@ -33,6 +33,7 @@ from chalk.client.models import (
33
33
  GetRegisteredModelResponse,
34
34
  GetRegisteredModelVersionResponse,
35
35
  ManualTriggerScheduledQueryResponse,
36
+ OfflineQueryDeadlineOptions,
36
37
  OfflineQueryInputUri,
37
38
  OnlineQuery,
38
39
  OnlineQueryContext,
@@ -526,6 +527,7 @@ class ChalkClient:
526
527
  explain: bool = False,
527
528
  num_input_rows: Optional[int] = None,
528
529
  headers: Mapping[str, str] | None = None,
530
+ planner_options: Mapping[str, str | int | bool] | None = None,
529
531
  ) -> PlanQueryResponse:
530
532
  """Plan a query without executing it.
531
533
 
@@ -575,6 +577,10 @@ class ChalkClient:
575
577
  The number of input rows that this plan will be run with. If unknown, specify `None`.
576
578
  headers
577
579
  Additional headers to provide with the request
580
+ planner_options
581
+ Dictionary of additional options to pass to the Chalk query engine.
582
+ Values may be provided as part of conversations with Chalk support
583
+ to enable or disable specific functionality.
578
584
 
579
585
  Returns
580
586
  -------
@@ -922,7 +928,7 @@ class ChalkClient:
922
928
  store_offline: bool = False,
923
929
  num_shards: int | None = None,
924
930
  num_workers: int | None = None,
925
- completion_deadline: timedelta | None = None,
931
+ completion_deadline: Union[timedelta, OfflineQueryDeadlineOptions, None] = None,
926
932
  max_retries: int | None = None,
927
933
  query_name: str | None = None,
928
934
  query_name_version: str | None = None,
@@ -1038,8 +1044,8 @@ class ChalkClient:
1038
1044
  If specified, the query will be run asynchronously across a maximum `num_workers` pod workers at any time.
1039
1045
  This parameter is useful if you have a large number of shards and would like to limit the number of pods running at once.
1040
1046
  completion_deadline
1041
- If specified, shards must complete within 'completion_deadline' duration, or they will be terminated.
1042
- Terminated shards can be tried.
1047
+ If specified as a timedelta, applies a completion deadline to each shard; each shard's query will fail (allowing retries) if it does not complete within the duration.
1048
+ If specified as an OfflineQueryDeadlineOptions, allows more fine-grained control of shard- or query-level deadlines, with options to retry on failure or not.
1043
1049
  max_retries
1044
1050
  If specified, failed offline query shards will be retried. The retry budget is shared across all shards.
1045
1051
  By default, max_retries=num_shards/
@@ -12,6 +12,7 @@ from chalk.client.models import (
12
12
  BulkOnlineQueryResult,
13
13
  FeatureReference,
14
14
  FeatureStatisticsResponse,
15
+ OfflineQueryDeadlineOptions,
15
16
  OfflineQueryInputUri,
16
17
  OnlineQuery,
17
18
  OnlineQueryContext,
@@ -486,6 +487,7 @@ class AsyncChalkClient:
486
487
  explain: bool = False,
487
488
  num_input_rows: Optional[int] = None,
488
489
  headers: Mapping[str, str] | None = None,
490
+ planner_options: Mapping[str, str | int | bool] | None = None,
489
491
  ) -> PlanQueryResponse:
490
492
  """Plan a query without executing it.
491
493
 
@@ -535,6 +537,10 @@ class AsyncChalkClient:
535
537
  The number of input rows that this plan will be run with. If unknown, specify `None`.
536
538
  headers
537
539
  Additional headers to provide with the request
540
+ planner_options
541
+ Dictionary of additional options to pass to the Chalk query engine.
542
+ Values may be provided as part of conversations with Chalk support
543
+ to enable or disable specific functionality.
538
544
 
539
545
  Returns
540
546
  -------
@@ -642,7 +648,7 @@ class AsyncChalkClient:
642
648
  store_offline: bool = False,
643
649
  num_shards: int | None = None,
644
650
  num_workers: int | None = None,
645
- completion_deadline: timedelta | None = None,
651
+ completion_deadline: Union[timedelta, OfflineQueryDeadlineOptions, None] = None,
646
652
  max_retries: int | None = None,
647
653
  query_name: str | None = None,
648
654
  query_name_version: str | None = None,
@@ -745,6 +751,9 @@ class AsyncChalkClient:
745
751
  num_workers
746
752
  If specified, the query will be run asynchronously across a maximum `num_workers` pod workers at any time.
747
753
  This parameter is useful if you have a large number of shards and would like to limit the number of pods running at once.
754
+ completion_deadline
755
+ If specified as a timedelta, applies a completion deadline to each shard; each shard's query will fail (allowing retries) if it does not complete within the duration.
756
+ If specified as an OfflineQueryDeadlineOptions, allows more fine-grained control of shard- or query-level deadlines, with options to retry on failure or not.
748
757
  query_name
749
758
  The name of the query to execute. If provided, will create a new named query or fill in missing parameters from a preexisting execution.
750
759
  query_name_version
@@ -103,6 +103,7 @@ from chalk.client.models import (
103
103
  MultiUploadFeaturesRequest,
104
104
  MultiUploadFeaturesResponse,
105
105
  OfflineQueryContext,
106
+ OfflineQueryDeadlineOptions,
106
107
  OfflineQueryInput,
107
108
  OfflineQueryInputSql,
108
109
  OfflineQueryInputUri,
@@ -2229,7 +2230,7 @@ https://docs.chalk.ai/cli/apply
2229
2230
  store_offline: bool = False,
2230
2231
  num_shards: int | None = None,
2231
2232
  num_workers: int | None = None,
2232
- completion_deadline: timedelta | None = None,
2233
+ completion_deadline: Union[timedelta, OfflineQueryDeadlineOptions, None] = None,
2233
2234
  max_retries: int | None = None,
2234
2235
  query_name: str | None = None,
2235
2236
  query_name_version: str | None = None,
@@ -3607,7 +3608,7 @@ https://docs.chalk.ai/cli/apply
3607
3608
  num_shards: int | None = None,
3608
3609
  num_workers: int | None = None,
3609
3610
  feature_for_lower_upper_bound: Optional[str] = None,
3610
- completion_deadline: timedelta | None = None,
3611
+ completion_deadline: Union[timedelta, OfflineQueryDeadlineOptions, None] = None,
3611
3612
  max_retries: int | None = None,
3612
3613
  optional_output_expressions: Optional[List[str]] = None,
3613
3614
  required_output_expressions: Optional[List[str]] = None,
@@ -3651,6 +3652,13 @@ https://docs.chalk.ai/cli/apply
3651
3652
  upper_bound_str = process_bound(upper_bound)
3652
3653
  if branch is ...:
3653
3654
  branch = self._branch
3655
+
3656
+ retyped_completion_deadline: Union[None, str, OfflineQueryDeadlineOptions] = None
3657
+ if isinstance(completion_deadline, OfflineQueryDeadlineOptions):
3658
+ retyped_completion_deadline = completion_deadline.with_chalk_durations()
3659
+ elif isinstance(completion_deadline, timedelta):
3660
+ retyped_completion_deadline = timedelta_to_duration(completion_deadline)
3661
+
3654
3662
  req = CreateOfflineQueryJobRequest(
3655
3663
  output=optional_output,
3656
3664
  output_expressions=optional_output_expressions or [],
@@ -3683,7 +3691,7 @@ https://docs.chalk.ai/cli/apply
3683
3691
  num_shards=num_shards,
3684
3692
  num_workers=num_workers,
3685
3693
  feature_for_lower_upper_bound=feature_for_lower_upper_bound,
3686
- completion_deadline=timedelta_to_duration(completion_deadline) if completion_deadline is not None else None,
3694
+ completion_deadline=retyped_completion_deadline,
3687
3695
  max_retries=max_retries,
3688
3696
  use_job_queue=use_job_queue,
3689
3697
  overlay_graph=_get_overlay_graph_b64(),
@@ -4438,6 +4446,7 @@ https://docs.chalk.ai/cli/apply
4438
4446
  explain: bool = False,
4439
4447
  num_input_rows: Optional[int] = None,
4440
4448
  headers: Mapping[str, str] | None = None,
4449
+ planner_options: Mapping[str, str | int | bool] | None = None,
4441
4450
  ) -> PlanQueryResponse:
4442
4451
  encoded_inputs = encode_outputs(input).string_outputs
4443
4452
  outputs = encode_outputs(output).string_outputs
@@ -4472,6 +4481,7 @@ https://docs.chalk.ai/cli/apply
4472
4481
  store_plan_stages=store_plan_stages,
4473
4482
  explain=explain,
4474
4483
  num_input_rows=num_input_rows,
4484
+ planner_options=planner_options,
4475
4485
  )
4476
4486
 
4477
4487
  extra_headers: dict[str, str] = {}
chalk/client/models.py CHANGED
@@ -20,6 +20,7 @@ from chalk.features.tag import EnvironmentId
20
20
  from chalk.prompts import Prompt
21
21
  from chalk.queries.query_context import ContextJsonDict
22
22
  from chalk.utils.df_utils import read_parquet
23
+ from chalk.utils.duration import timedelta_to_duration
23
24
  from chalk.utils.missing_dependency import missing_dependency_exception
24
25
 
25
26
  if TYPE_CHECKING:
@@ -820,6 +821,48 @@ class ResourceRequests(BaseModel):
820
821
  """Resource group to use for this job. If not specified, the default resource group will be used."""
821
822
 
822
823
 
824
+ class OfflineQueryDeadlineOptions(BaseModel):
825
+ """
826
+ Specification for setting deadlines for shards of the query or the entire query itself.
827
+ """
828
+
829
+ shard_deadline: Union[timedelta, str, None] = None
830
+ """
831
+ Maximum amount of time a query shard can work before being failed.
832
+ """
833
+
834
+ retry_on_shard_deadline: Optional[bool] = None
835
+ """
836
+ Whether to retry when the per-shard deadline is triggered. Will default to true.
837
+ """
838
+
839
+ query_deadline: Union[timedelta, str, None] = None
840
+ """
841
+ Maximum amount of time that the entire query can work before being failed.
842
+ """
843
+
844
+ retry_on_query_deadline: Optional[bool] = None
845
+ """
846
+ Whether to retry when the entire query's deadline is triggered. Will default to false.
847
+ """
848
+
849
+ def with_chalk_durations(self) -> OfflineQueryDeadlineOptions:
850
+ return OfflineQueryDeadlineOptions(
851
+ shard_deadline=(
852
+ timedelta_to_duration(self.shard_deadline)
853
+ if isinstance(self.shard_deadline, timedelta)
854
+ else self.shard_deadline
855
+ ),
856
+ retry_on_shard_deadline=self.retry_on_shard_deadline,
857
+ query_deadline=(
858
+ timedelta_to_duration(self.query_deadline)
859
+ if isinstance(self.query_deadline, timedelta)
860
+ else self.query_deadline
861
+ ),
862
+ retry_on_query_deadline=self.retry_on_query_deadline,
863
+ )
864
+
865
+
823
866
  class CreateOfflineQueryJobRequest(BaseModel):
824
867
  output: List[str]
825
868
  """A list of output feature root fqns to query"""
@@ -902,7 +945,7 @@ class CreateOfflineQueryJobRequest(BaseModel):
902
945
  num_workers: Optional[int] = None
903
946
  feature_for_lower_upper_bound: Optional[str] = None
904
947
 
905
- completion_deadline: Optional[str] = None
948
+ completion_deadline: Union[None, str, OfflineQueryDeadlineOptions] = None
906
949
  max_retries: Optional[int] = None
907
950
 
908
951
  use_job_queue: bool = False
@@ -915,7 +958,7 @@ class CreateOfflineQueryJobRequest(BaseModel):
915
958
  @root_validator
916
959
  def _validate_multiple_computers(cls, values: Dict[str, Any]):
917
960
  if values["input"] is None or isinstance(
918
- values["input"], (UploadedParquetShardedOfflineQueryInput, OfflineQueryInputUri)
961
+ values["input"], (UploadedParquetShardedOfflineQueryInput, OfflineQueryInputUri, OfflineQueryInputSql)
919
962
  ):
920
963
  return values
921
964
  expected_use_multiple_computers = isinstance(values["input"], tuple)
@@ -1653,6 +1696,7 @@ class PlanQueryRequest(BaseModel):
1653
1696
  explain: bool = False
1654
1697
  store_plan_stages: bool = False
1655
1698
  encoding_options: FeatureEncodingOptions = FeatureEncodingOptions()
1699
+ planner_options: Mapping[str, str | int | bool | float] | None = None
1656
1700
 
1657
1701
 
1658
1702
  class FeatureSchema(BaseModel):
@@ -101,6 +101,11 @@ class LazyFramePlaceholder:
101
101
 
102
102
  __str__ = __repr__
103
103
 
104
+ def _is_equal(self, other: LazyFramePlaceholder) -> bool:
105
+ # proto equality is janky but it's hard to write a good eq method here given
106
+ # we have dicts and the proto round trip is slightly lossy on tuples vs lists
107
+ return self._to_proto() == other._to_proto()
108
+
104
109
  def _to_proto(self) -> dataframe_pb2.DataFramePlan:
105
110
  """
106
111
  Convert this proto plan to a dataframe.
@@ -940,10 +940,9 @@ class PrimitiveFeatureConverter(Generic[_TPrim]):
940
940
 
941
941
  @staticmethod
942
942
  def convert_arrow_table_from_proto(proto: pb.TableParquetBytes) -> pa.Table:
943
- import pyarrow.parquet
943
+ import pyarrow.parquet as pq
944
944
 
945
- pf = pyarrow.parquet.ParquetFile(io.BytesIO(proto.encoded_parquet_bytes))
946
- return pyarrow.parquet.read_table(pf)
945
+ return pq.read_table(io.BytesIO(proto.encoded_parquet_bytes))
947
946
 
948
947
  @staticmethod
949
948
  def _serialize_pa_decimal_to_pb(value: Union[pa.Decimal128Scalar, pa.Decimal256Scalar]) -> pb.ScalarValue:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chalkpy
3
- Version: 2.96.6
3
+ Version: 2.96.8
4
4
  Summary: Python SDK for Chalk
5
5
  Author: Chalk AI, Inc.
6
6
  Project-URL: Homepage, https://chalk.ai
@@ -1,5 +1,5 @@
1
1
  chalk/__init__.py,sha256=vKsx9-cl5kImlVWGHVRYO6bweBm79NAzGs3l36u71wM,2657
2
- chalk/_version.py,sha256=Y-Kl04ucuWNOt_WOW6dvhcamZanMwDrSw7NrZ8zKeSs,23
2
+ chalk/_version.py,sha256=XQBn2m_TF6iSSjpgoPtUzJ93WpjTyMNEkfPlAWxDliQ,23
3
3
  chalk/cli.py,sha256=ckqqfOI-A2mT23-rnZzDMmblYj-2x1VBX8ebHlIEn9A,5873
4
4
  chalk/importer.py,sha256=m4lMn1lSYj_euDq8CS7LYTBnek9JOcjGJf9-82dJHbA,64441
5
5
  chalk/prompts.py,sha256=2H9UomLAamdfRTNUdKs9i3VTpiossuyRhntqsAXUhhg,16117
@@ -612,14 +612,14 @@ chalk/_validation/validation.py,sha256=9cCMfZa9-1wxkXLme_ylmD5vIA1qExJD6aqbYvbmK
612
612
  chalk/byte_transmit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
613
613
  chalk/byte_transmit/model.py,sha256=LFX8pj9X_CWXeap7fDnMl9YmXsYTgq7jBAbEWkxoYoE,13048
614
614
  chalk/client/__init__.py,sha256=wu3WQVzE5gRj6noQQDOdYJUgDaz_9QtbjXH4KuoIlXQ,1782
615
- chalk/client/client.py,sha256=fqw75x4yiAk3CXryGbuF_qg09WDIlluhYeFdQ-BfTG8,103337
616
- chalk/client/client_async.py,sha256=nFFTWJbdBlb7zksyjOMBY566tZTAyNXQhCnq06LHWl0,50803
615
+ chalk/client/client.py,sha256=59UYbIq7KHwNm1ZFw5TmCV2JvbJxn45Ar_AjfSkuzQ8,103907
616
+ chalk/client/client_async.py,sha256=wD38RIkwVLaKYKj7K1bWDXnVZkLFYHsbY_5VhtflMvo,51559
617
617
  chalk/client/client_async_impl.py,sha256=ZphhgTB49JBWHCGXe-dI0wWWKc9zPcOczy02q_gFy50,6925
618
618
  chalk/client/client_grpc.py,sha256=skpSHYCyE8hg0T3Bcl5R8MMi92QjrFtxQRu_NxCSVlw,106952
619
- chalk/client/client_impl.py,sha256=rvT02yriRo7_WFqbUsHXRjm6DbRyXzw2QjcLwKaW10E,211844
619
+ chalk/client/client_impl.py,sha256=3NDl1Vo7YJ0grraxJeN6vjFj1hBpU7YT_ikolK31D6k,212395
620
620
  chalk/client/dataset.py,sha256=LneWwaAOHCjtj7gaJjsSeVNruj-QJ51hjRi62zrFNVE,77561
621
621
  chalk/client/exc.py,sha256=kZJ80YbSeSRDmTLTh240j_eRdJFZBa7IaDsNSRoDroU,4145
622
- chalk/client/models.py,sha256=FDgEqIeBJq25T9AH38jdWkeyRLiAXfLXfXHpomFJM08,63923
622
+ chalk/client/models.py,sha256=YwXywN6OSPOda1DKJuPVe6hgsHzf8ewvX2dYOBOZk7I,65534
623
623
  chalk/client/response.py,sha256=m8sQCOj7YVv3mZSZMIC1rIMzFMQ9rfMdBRLg5NRmOOE,53257
624
624
  chalk/client/_internal_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
625
625
  chalk/client/_internal_models/check.py,sha256=3Xfo4Ws4rvwjeVg0-5-kejfRfRBJeqHmnRhW-WEz784,917
@@ -636,7 +636,7 @@ chalk/config/_validator.py,sha256=QC1y52m704_bV_TYjq0sdZJ-km8iSkDX1V4sHgw4RJk,13
636
636
  chalk/config/auth_config.py,sha256=HAALkQrvDD0i7gaZK5iufS8vDDVbzLIpHLOpcJO1kmw,4498
637
637
  chalk/config/project_config.py,sha256=YHB3upvtBJu-tWfNOchBRSc9xGebDbrIpCVmKbBzJ8Q,7217
638
638
  chalk/df/ChalkDataFrameImpl.py,sha256=BRwnjQcie3gxaKhKau8HG17NWzS1zdr8SnNVurxF8QY,133
639
- chalk/df/LazyFramePlaceholder.py,sha256=uTinB2buEFBT8djBCqiPN2hKmr_nU7n4i_DcCLNLuWs,39111
639
+ chalk/df/LazyFramePlaceholder.py,sha256=D4oYEGPfoxpJMmyiDJq3G9Wfl1jF3XSXD71q-GxOrmQ,39398
640
640
  chalk/df/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
641
641
  chalk/df/ast_parser.py,sha256=t-DwUxd2hN8LECRSBx85DIv9FtILgMiHyGyCTINfgQw,11199
642
642
  chalk/features/__init__.py,sha256=5doD7bFwRthzwdmizbRaPVHiCABiNpiOiAJVFlwqNnA,6943
@@ -673,7 +673,7 @@ chalk/features/_embedding/sentence_transformer.py,sha256=hNYuT9D-16C49lkhIVK_KXZ
673
673
  chalk/features/_embedding/utils.py,sha256=gUYUJCt-9XLeioC3QYg42yc5f69ywAr_w_Q-xoF3o54,877
674
674
  chalk/features/_embedding/vertexai.py,sha256=PUaMmd7qbK9vXrlce_8BQHg_OM9L982k6LuyM_PU-5w,5362
675
675
  chalk/features/_encoding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
676
- chalk/features/_encoding/converter.py,sha256=YMIFZLAiN7z8JhDj4hsrH4qQTZ0pFBIwaJN6ULVsjd8,66221
676
+ chalk/features/_encoding/converter.py,sha256=YLNX4hxoT5GlP_n_cL5anQlIzfMW0TfBuuK1TBccaiI,66169
677
677
  chalk/features/_encoding/http.py,sha256=MVmkgv90O4sOnImc-FrRPVwYR2U7zOGduNkHO4umoe4,2227
678
678
  chalk/features/_encoding/inputs.py,sha256=4K4kOCAl0MuobAC_lVJE70toQVrPt2ZlzrlwYALuLHE,10151
679
679
  chalk/features/_encoding/json.py,sha256=lHSBWIjNKMDmt_AmiFtf4ZsW5QPIEC6r7NvDW0IdqPI,13804
@@ -827,8 +827,8 @@ chalk/utils/tracing.py,sha256=NiiM-9dbuJhSCv6R1npR1uYNKWlkqTR6Ygm0Voi2NrY,13078
827
827
  chalk/utils/weak_set_by_identity.py,sha256=VmikA_laYwFeOphCwXJIuyOIkrdlQe0bSzaXq7onoQw,953
828
828
  chalk/utils/pydanticutil/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
829
829
  chalk/utils/pydanticutil/pydantic_compat.py,sha256=O575lLYJ5GvZC4HMzR9yATxf9XwjC6NrDUXbNwZidlE,3031
830
- chalkpy-2.96.6.dist-info/METADATA,sha256=5dqjMynKBZ6H6Y6tbYNazvRRwyCLZblyJFsi6E_PA1E,27754
831
- chalkpy-2.96.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
832
- chalkpy-2.96.6.dist-info/entry_points.txt,sha256=Vg23sd8icwq-morJrljVFr-kQnMbm95rZfZj5wsZGis,42
833
- chalkpy-2.96.6.dist-info/top_level.txt,sha256=1Q6_19IGYfNxSw50W8tYKEJ2t5HKQ3W9Wiw4ia5yg2c,6
834
- chalkpy-2.96.6.dist-info/RECORD,,
830
+ chalkpy-2.96.8.dist-info/METADATA,sha256=wLMi2G7i8tczLoHoAAYiMWG9g2oujf4DoUmq3hqkP98,27754
831
+ chalkpy-2.96.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
832
+ chalkpy-2.96.8.dist-info/entry_points.txt,sha256=Vg23sd8icwq-morJrljVFr-kQnMbm95rZfZj5wsZGis,42
833
+ chalkpy-2.96.8.dist-info/top_level.txt,sha256=1Q6_19IGYfNxSw50W8tYKEJ2t5HKQ3W9Wiw4ia5yg2c,6
834
+ chalkpy-2.96.8.dist-info/RECORD,,