chalkpy 2.89.22__py3-none-any.whl → 2.95.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. chalk/__init__.py +2 -1
  2. chalk/_gen/chalk/arrow/v1/arrow_pb2.py +7 -5
  3. chalk/_gen/chalk/arrow/v1/arrow_pb2.pyi +6 -0
  4. chalk/_gen/chalk/artifacts/v1/chart_pb2.py +36 -33
  5. chalk/_gen/chalk/artifacts/v1/chart_pb2.pyi +41 -1
  6. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.py +8 -7
  7. chalk/_gen/chalk/artifacts/v1/cron_query_pb2.pyi +5 -0
  8. chalk/_gen/chalk/common/v1/offline_query_pb2.py +19 -13
  9. chalk/_gen/chalk/common/v1/offline_query_pb2.pyi +37 -0
  10. chalk/_gen/chalk/common/v1/online_query_pb2.py +54 -54
  11. chalk/_gen/chalk/common/v1/online_query_pb2.pyi +13 -1
  12. chalk/_gen/chalk/common/v1/script_task_pb2.py +13 -11
  13. chalk/_gen/chalk/common/v1/script_task_pb2.pyi +19 -1
  14. chalk/_gen/chalk/dataframe/__init__.py +0 -0
  15. chalk/_gen/chalk/dataframe/v1/__init__.py +0 -0
  16. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.py +48 -0
  17. chalk/_gen/chalk/dataframe/v1/dataframe_pb2.pyi +123 -0
  18. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.py +4 -0
  19. chalk/_gen/chalk/dataframe/v1/dataframe_pb2_grpc.pyi +4 -0
  20. chalk/_gen/chalk/graph/v1/graph_pb2.py +150 -149
  21. chalk/_gen/chalk/graph/v1/graph_pb2.pyi +25 -0
  22. chalk/_gen/chalk/graph/v1/sources_pb2.py +94 -84
  23. chalk/_gen/chalk/graph/v1/sources_pb2.pyi +56 -0
  24. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.py +79 -0
  25. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2.pyi +377 -0
  26. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.py +4 -0
  27. chalk/_gen/chalk/kubernetes/v1/horizontalpodautoscaler_pb2_grpc.pyi +4 -0
  28. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.py +43 -7
  29. chalk/_gen/chalk/kubernetes/v1/scaledobject_pb2.pyi +252 -2
  30. chalk/_gen/chalk/protosql/v1/sql_service_pb2.py +54 -27
  31. chalk/_gen/chalk/protosql/v1/sql_service_pb2.pyi +131 -3
  32. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.py +45 -0
  33. chalk/_gen/chalk/protosql/v1/sql_service_pb2_grpc.pyi +14 -0
  34. chalk/_gen/chalk/python/v1/types_pb2.py +14 -14
  35. chalk/_gen/chalk/python/v1/types_pb2.pyi +8 -0
  36. chalk/_gen/chalk/server/v1/benchmark_pb2.py +76 -0
  37. chalk/_gen/chalk/server/v1/benchmark_pb2.pyi +156 -0
  38. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.py +258 -0
  39. chalk/_gen/chalk/server/v1/benchmark_pb2_grpc.pyi +84 -0
  40. chalk/_gen/chalk/server/v1/billing_pb2.py +40 -38
  41. chalk/_gen/chalk/server/v1/billing_pb2.pyi +17 -1
  42. chalk/_gen/chalk/server/v1/branches_pb2.py +45 -0
  43. chalk/_gen/chalk/server/v1/branches_pb2.pyi +80 -0
  44. chalk/_gen/chalk/server/v1/branches_pb2_grpc.pyi +36 -0
  45. chalk/_gen/chalk/server/v1/builder_pb2.py +372 -272
  46. chalk/_gen/chalk/server/v1/builder_pb2.pyi +479 -12
  47. chalk/_gen/chalk/server/v1/builder_pb2_grpc.py +360 -0
  48. chalk/_gen/chalk/server/v1/builder_pb2_grpc.pyi +96 -0
  49. chalk/_gen/chalk/server/v1/chart_pb2.py +10 -10
  50. chalk/_gen/chalk/server/v1/chart_pb2.pyi +18 -2
  51. chalk/_gen/chalk/server/v1/clickhouse_pb2.py +42 -0
  52. chalk/_gen/chalk/server/v1/clickhouse_pb2.pyi +17 -0
  53. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.py +78 -0
  54. chalk/_gen/chalk/server/v1/clickhouse_pb2_grpc.pyi +38 -0
  55. chalk/_gen/chalk/server/v1/cloud_components_pb2.py +153 -107
  56. chalk/_gen/chalk/server/v1/cloud_components_pb2.pyi +146 -4
  57. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.py +180 -0
  58. chalk/_gen/chalk/server/v1/cloud_components_pb2_grpc.pyi +48 -0
  59. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.py +11 -3
  60. chalk/_gen/chalk/server/v1/cloud_credentials_pb2.pyi +20 -0
  61. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.py +45 -0
  62. chalk/_gen/chalk/server/v1/cloud_credentials_pb2_grpc.pyi +12 -0
  63. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.py +59 -35
  64. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2.pyi +127 -1
  65. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.py +135 -0
  66. chalk/_gen/chalk/server/v1/dataplanejobqueue_pb2_grpc.pyi +36 -0
  67. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.py +90 -0
  68. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2.pyi +264 -0
  69. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.py +170 -0
  70. chalk/_gen/chalk/server/v1/dataplaneworkflows_pb2_grpc.pyi +62 -0
  71. chalk/_gen/chalk/server/v1/datasets_pb2.py +36 -24
  72. chalk/_gen/chalk/server/v1/datasets_pb2.pyi +71 -2
  73. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.py +45 -0
  74. chalk/_gen/chalk/server/v1/datasets_pb2_grpc.pyi +12 -0
  75. chalk/_gen/chalk/server/v1/deploy_pb2.py +9 -3
  76. chalk/_gen/chalk/server/v1/deploy_pb2.pyi +12 -0
  77. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.py +45 -0
  78. chalk/_gen/chalk/server/v1/deploy_pb2_grpc.pyi +12 -0
  79. chalk/_gen/chalk/server/v1/deployment_pb2.py +20 -15
  80. chalk/_gen/chalk/server/v1/deployment_pb2.pyi +25 -0
  81. chalk/_gen/chalk/server/v1/environment_pb2.py +25 -15
  82. chalk/_gen/chalk/server/v1/environment_pb2.pyi +93 -1
  83. chalk/_gen/chalk/server/v1/eventbus_pb2.py +44 -0
  84. chalk/_gen/chalk/server/v1/eventbus_pb2.pyi +64 -0
  85. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.py +4 -0
  86. chalk/_gen/chalk/server/v1/eventbus_pb2_grpc.pyi +4 -0
  87. chalk/_gen/chalk/server/v1/files_pb2.py +65 -0
  88. chalk/_gen/chalk/server/v1/files_pb2.pyi +167 -0
  89. chalk/_gen/chalk/server/v1/files_pb2_grpc.py +4 -0
  90. chalk/_gen/chalk/server/v1/files_pb2_grpc.pyi +4 -0
  91. chalk/_gen/chalk/server/v1/graph_pb2.py +41 -3
  92. chalk/_gen/chalk/server/v1/graph_pb2.pyi +191 -0
  93. chalk/_gen/chalk/server/v1/graph_pb2_grpc.py +92 -0
  94. chalk/_gen/chalk/server/v1/graph_pb2_grpc.pyi +32 -0
  95. chalk/_gen/chalk/server/v1/incident_pb2.py +57 -0
  96. chalk/_gen/chalk/server/v1/incident_pb2.pyi +165 -0
  97. chalk/_gen/chalk/server/v1/incident_pb2_grpc.py +4 -0
  98. chalk/_gen/chalk/server/v1/incident_pb2_grpc.pyi +4 -0
  99. chalk/_gen/chalk/server/v1/indexing_job_pb2.py +44 -0
  100. chalk/_gen/chalk/server/v1/indexing_job_pb2.pyi +38 -0
  101. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.py +78 -0
  102. chalk/_gen/chalk/server/v1/indexing_job_pb2_grpc.pyi +38 -0
  103. chalk/_gen/chalk/server/v1/integrations_pb2.py +11 -9
  104. chalk/_gen/chalk/server/v1/integrations_pb2.pyi +34 -2
  105. chalk/_gen/chalk/server/v1/kube_pb2.py +29 -19
  106. chalk/_gen/chalk/server/v1/kube_pb2.pyi +28 -0
  107. chalk/_gen/chalk/server/v1/kube_pb2_grpc.py +45 -0
  108. chalk/_gen/chalk/server/v1/kube_pb2_grpc.pyi +12 -0
  109. chalk/_gen/chalk/server/v1/log_pb2.py +21 -3
  110. chalk/_gen/chalk/server/v1/log_pb2.pyi +68 -0
  111. chalk/_gen/chalk/server/v1/log_pb2_grpc.py +90 -0
  112. chalk/_gen/chalk/server/v1/log_pb2_grpc.pyi +24 -0
  113. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.py +73 -0
  114. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2.pyi +212 -0
  115. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.py +217 -0
  116. chalk/_gen/chalk/server/v1/metadataplanejobqueue_pb2_grpc.pyi +74 -0
  117. chalk/_gen/chalk/server/v1/model_registry_pb2.py +10 -10
  118. chalk/_gen/chalk/server/v1/model_registry_pb2.pyi +4 -1
  119. chalk/_gen/chalk/server/v1/monitoring_pb2.py +84 -75
  120. chalk/_gen/chalk/server/v1/monitoring_pb2.pyi +1 -0
  121. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.py +136 -0
  122. chalk/_gen/chalk/server/v1/monitoring_pb2_grpc.pyi +38 -0
  123. chalk/_gen/chalk/server/v1/offline_queries_pb2.py +32 -10
  124. chalk/_gen/chalk/server/v1/offline_queries_pb2.pyi +73 -0
  125. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.py +90 -0
  126. chalk/_gen/chalk/server/v1/offline_queries_pb2_grpc.pyi +24 -0
  127. chalk/_gen/chalk/server/v1/plandebug_pb2.py +53 -0
  128. chalk/_gen/chalk/server/v1/plandebug_pb2.pyi +86 -0
  129. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.py +168 -0
  130. chalk/_gen/chalk/server/v1/plandebug_pb2_grpc.pyi +60 -0
  131. chalk/_gen/chalk/server/v1/queries_pb2.py +76 -48
  132. chalk/_gen/chalk/server/v1/queries_pb2.pyi +155 -2
  133. chalk/_gen/chalk/server/v1/queries_pb2_grpc.py +180 -0
  134. chalk/_gen/chalk/server/v1/queries_pb2_grpc.pyi +48 -0
  135. chalk/_gen/chalk/server/v1/scheduled_query_pb2.py +4 -2
  136. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.py +45 -0
  137. chalk/_gen/chalk/server/v1/scheduled_query_pb2_grpc.pyi +12 -0
  138. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.py +12 -6
  139. chalk/_gen/chalk/server/v1/scheduled_query_run_pb2.pyi +75 -2
  140. chalk/_gen/chalk/server/v1/scheduler_pb2.py +24 -12
  141. chalk/_gen/chalk/server/v1/scheduler_pb2.pyi +61 -1
  142. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.py +90 -0
  143. chalk/_gen/chalk/server/v1/scheduler_pb2_grpc.pyi +24 -0
  144. chalk/_gen/chalk/server/v1/script_tasks_pb2.py +26 -14
  145. chalk/_gen/chalk/server/v1/script_tasks_pb2.pyi +33 -3
  146. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.py +90 -0
  147. chalk/_gen/chalk/server/v1/script_tasks_pb2_grpc.pyi +24 -0
  148. chalk/_gen/chalk/server/v1/sql_interface_pb2.py +75 -0
  149. chalk/_gen/chalk/server/v1/sql_interface_pb2.pyi +142 -0
  150. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.py +349 -0
  151. chalk/_gen/chalk/server/v1/sql_interface_pb2_grpc.pyi +114 -0
  152. chalk/_gen/chalk/server/v1/sql_queries_pb2.py +48 -0
  153. chalk/_gen/chalk/server/v1/sql_queries_pb2.pyi +150 -0
  154. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.py +123 -0
  155. chalk/_gen/chalk/server/v1/sql_queries_pb2_grpc.pyi +52 -0
  156. chalk/_gen/chalk/server/v1/team_pb2.py +156 -137
  157. chalk/_gen/chalk/server/v1/team_pb2.pyi +56 -10
  158. chalk/_gen/chalk/server/v1/team_pb2_grpc.py +90 -0
  159. chalk/_gen/chalk/server/v1/team_pb2_grpc.pyi +24 -0
  160. chalk/_gen/chalk/server/v1/topic_pb2.py +5 -3
  161. chalk/_gen/chalk/server/v1/topic_pb2.pyi +10 -1
  162. chalk/_gen/chalk/server/v1/trace_pb2.py +50 -28
  163. chalk/_gen/chalk/server/v1/trace_pb2.pyi +121 -0
  164. chalk/_gen/chalk/server/v1/trace_pb2_grpc.py +135 -0
  165. chalk/_gen/chalk/server/v1/trace_pb2_grpc.pyi +42 -0
  166. chalk/_gen/chalk/server/v1/webhook_pb2.py +9 -3
  167. chalk/_gen/chalk/server/v1/webhook_pb2.pyi +18 -0
  168. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.py +45 -0
  169. chalk/_gen/chalk/server/v1/webhook_pb2_grpc.pyi +12 -0
  170. chalk/_gen/chalk/streaming/v1/debug_service_pb2.py +62 -0
  171. chalk/_gen/chalk/streaming/v1/debug_service_pb2.pyi +75 -0
  172. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.py +221 -0
  173. chalk/_gen/chalk/streaming/v1/debug_service_pb2_grpc.pyi +88 -0
  174. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.py +19 -7
  175. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2.pyi +96 -3
  176. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.py +48 -0
  177. chalk/_gen/chalk/streaming/v1/simple_streaming_service_pb2_grpc.pyi +20 -0
  178. chalk/_gen/chalk/utils/v1/field_change_pb2.py +32 -0
  179. chalk/_gen/chalk/utils/v1/field_change_pb2.pyi +42 -0
  180. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.py +4 -0
  181. chalk/_gen/chalk/utils/v1/field_change_pb2_grpc.pyi +4 -0
  182. chalk/_lsp/error_builder.py +11 -0
  183. chalk/_monitoring/Chart.py +1 -3
  184. chalk/_version.py +1 -1
  185. chalk/cli.py +5 -10
  186. chalk/client/client.py +178 -64
  187. chalk/client/client_async.py +154 -0
  188. chalk/client/client_async_impl.py +22 -0
  189. chalk/client/client_grpc.py +738 -112
  190. chalk/client/client_impl.py +541 -136
  191. chalk/client/dataset.py +27 -6
  192. chalk/client/models.py +99 -2
  193. chalk/client/serialization/model_serialization.py +126 -10
  194. chalk/config/project_config.py +1 -1
  195. chalk/df/LazyFramePlaceholder.py +1154 -0
  196. chalk/df/ast_parser.py +2 -10
  197. chalk/features/_class_property.py +7 -0
  198. chalk/features/_embedding/embedding.py +1 -0
  199. chalk/features/_embedding/sentence_transformer.py +1 -1
  200. chalk/features/_encoding/converter.py +83 -2
  201. chalk/features/_encoding/pyarrow.py +20 -4
  202. chalk/features/_encoding/rich.py +1 -3
  203. chalk/features/_tensor.py +1 -2
  204. chalk/features/dataframe/_filters.py +14 -5
  205. chalk/features/dataframe/_impl.py +91 -36
  206. chalk/features/dataframe/_validation.py +11 -7
  207. chalk/features/feature_field.py +40 -30
  208. chalk/features/feature_set.py +1 -2
  209. chalk/features/feature_set_decorator.py +1 -0
  210. chalk/features/feature_wrapper.py +42 -3
  211. chalk/features/hooks.py +81 -12
  212. chalk/features/inference.py +65 -10
  213. chalk/features/resolver.py +338 -56
  214. chalk/features/tag.py +1 -3
  215. chalk/features/underscore_features.py +2 -1
  216. chalk/functions/__init__.py +456 -21
  217. chalk/functions/holidays.py +1 -3
  218. chalk/gitignore/gitignore_parser.py +5 -1
  219. chalk/importer.py +186 -74
  220. chalk/ml/__init__.py +6 -2
  221. chalk/ml/model_hooks.py +368 -51
  222. chalk/ml/model_reference.py +68 -10
  223. chalk/ml/model_version.py +34 -21
  224. chalk/ml/utils.py +143 -40
  225. chalk/operators/_utils.py +14 -3
  226. chalk/parsed/_proto/export.py +22 -0
  227. chalk/parsed/duplicate_input_gql.py +4 -0
  228. chalk/parsed/expressions.py +1 -3
  229. chalk/parsed/json_conversions.py +21 -14
  230. chalk/parsed/to_proto.py +16 -4
  231. chalk/parsed/user_types_to_json.py +31 -10
  232. chalk/parsed/validation_from_registries.py +182 -0
  233. chalk/queries/named_query.py +16 -6
  234. chalk/queries/scheduled_query.py +13 -1
  235. chalk/serialization/parsed_annotation.py +25 -12
  236. chalk/sql/__init__.py +221 -0
  237. chalk/sql/_internal/integrations/athena.py +6 -1
  238. chalk/sql/_internal/integrations/bigquery.py +22 -2
  239. chalk/sql/_internal/integrations/databricks.py +61 -18
  240. chalk/sql/_internal/integrations/mssql.py +281 -0
  241. chalk/sql/_internal/integrations/postgres.py +11 -3
  242. chalk/sql/_internal/integrations/redshift.py +4 -0
  243. chalk/sql/_internal/integrations/snowflake.py +11 -2
  244. chalk/sql/_internal/integrations/util.py +2 -1
  245. chalk/sql/_internal/sql_file_resolver.py +55 -10
  246. chalk/sql/_internal/sql_source.py +36 -2
  247. chalk/streams/__init__.py +1 -3
  248. chalk/streams/_kafka_source.py +5 -1
  249. chalk/streams/_windows.py +16 -4
  250. chalk/streams/types.py +1 -2
  251. chalk/utils/__init__.py +1 -3
  252. chalk/utils/_otel_version.py +13 -0
  253. chalk/utils/async_helpers.py +14 -5
  254. chalk/utils/df_utils.py +2 -2
  255. chalk/utils/duration.py +1 -3
  256. chalk/utils/job_log_display.py +538 -0
  257. chalk/utils/missing_dependency.py +5 -4
  258. chalk/utils/notebook.py +255 -2
  259. chalk/utils/pl_helpers.py +190 -37
  260. chalk/utils/pydanticutil/pydantic_compat.py +1 -2
  261. chalk/utils/storage_client.py +246 -0
  262. chalk/utils/threading.py +1 -3
  263. chalk/utils/tracing.py +194 -86
  264. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/METADATA +53 -21
  265. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/RECORD +268 -198
  266. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/WHEEL +0 -0
  267. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/entry_points.txt +0 -0
  268. {chalkpy-2.89.22.dist-info → chalkpy-2.95.3.dist-info}/top_level.txt +0 -0
@@ -33,8 +33,11 @@ from chalk.utils.missing_dependency import missing_dependency_exception
33
33
  from chalk.utils.tracing import safe_distribution, safe_incr, safe_trace
34
34
 
35
35
  if TYPE_CHECKING:
36
+ import azure.storage.blob
36
37
  import google.cloud.storage
37
38
  import pyarrow as pa
39
+ from azure.core.credentials import TokenCredential
40
+ from azure.core.credentials_async import AsyncTokenCredential
38
41
  from fsspec import AbstractFileSystem
39
42
  from mypy_boto3_s3.client import S3Client
40
43
 
@@ -622,6 +625,249 @@ class S3StorageClient(StorageClient):
622
625
  return obj["ContentLength"]
623
626
 
624
627
 
628
+ class AzureBlobStorageClient(StorageClient):
629
+ protocol = "abfs"
630
+
631
+ def __init__(
632
+ self,
633
+ blob_service_client: azure.storage.blob.BlobServiceClient,
634
+ account_name: str,
635
+ container_name: str,
636
+ executor: ThreadPoolExecutor,
637
+ credential: TokenCredential | AsyncTokenCredential | None = None,
638
+ ):
639
+ super().__init__()
640
+ try:
641
+ import adlfs
642
+ except ImportError:
643
+ raise missing_dependency_exception("chalkpy[runtime]")
644
+ self.bucket = container_name # interface requirement
645
+ self._blob_service_client = blob_service_client
646
+ self._container_name = container_name
647
+ self._container_client = blob_service_client.get_container_client(container_name)
648
+ self._account_name = account_name
649
+ self._executor = executor
650
+ if credential is None:
651
+ credential = blob_service_client.credential
652
+ self.fs = adlfs.AzureBlobFileSystem(
653
+ account_name=account_name,
654
+ credential=credential, # pyright: ignore[reportArgumentType]
655
+ )
656
+
657
+ @override
658
+ def get_uri(self, filename: str) -> str:
659
+ """Return a URI for a filename"""
660
+ return f"{self.protocol}://{self._container_name}@{self._account_name}.dfs.core.windows.net/{filename}"
661
+
662
+ def get_https_uri(self, filename: str) -> str:
663
+ """Return an HTTPS URI for signed URLs and direct access"""
664
+ return f"https://{self._account_name}.blob.core.windows.net/{self._container_name}/{filename}"
665
+
666
+ def _normalize_path(self, path: str) -> str:
667
+ if path.startswith(self._container_name):
668
+ return path[len(self._container_name) + 1 :]
669
+ return path
670
+
671
+ @override
672
+ def upload_object(
673
+ self,
674
+ filename: str,
675
+ content_type: str,
676
+ data: bytes | BinaryIO,
677
+ metadata: Mapping[str, str] | None = None,
678
+ ):
679
+ from azure.storage.blob import ContentSettings
680
+
681
+ if not isinstance(data, bytes):
682
+ data.seek(0)
683
+ data = data.read()
684
+ assert isinstance(data, bytes)
685
+
686
+ if metadata is None:
687
+ metadata = {}
688
+ else:
689
+ metadata = dict(metadata)
690
+
691
+ # Not using the actual filename in the tag to avoid tag blowup
692
+ safe_distribution(
693
+ "chalk.engine.storage_client.files_uploaded",
694
+ 1,
695
+ tags=[f"bucket:{self.bucket}", f"protocol:{self.protocol}"],
696
+ )
697
+ safe_distribution(
698
+ "chalk.engine.storage_client.bytes_uploaded",
699
+ len(data),
700
+ tags=[f"bucket:{self.bucket}", f"protocol:{self.protocol}"],
701
+ )
702
+
703
+ blob_client = self._container_client.get_blob_client(self._normalize_path(filename))
704
+
705
+ # Optimize based on size
706
+ if len(data) < 256 * 1024 * 1024: # < 256 MB
707
+ blob_client.upload_blob(
708
+ data,
709
+ overwrite=True,
710
+ content_settings=ContentSettings(content_type=content_type),
711
+ metadata=metadata,
712
+ )
713
+ else:
714
+ blob_client.upload_blob(
715
+ data,
716
+ overwrite=True,
717
+ max_block_size=100 * 1024 * 1024,
718
+ max_concurrency=8,
719
+ content_settings=ContentSettings(content_type=content_type),
720
+ metadata=metadata,
721
+ )
722
+
723
+ async def async_upload_object(
724
+ self,
725
+ filename: str,
726
+ content_type: str,
727
+ data: bytes | BinaryIO,
728
+ metadata: Mapping[str, str] | None = None,
729
+ ):
730
+ return await asyncio.get_running_loop().run_in_executor(
731
+ self._executor,
732
+ self.upload_object,
733
+ filename,
734
+ content_type,
735
+ data,
736
+ metadata,
737
+ )
738
+
739
+ @overload
740
+ @override
741
+ def download_object(self, filename: str) -> bytes:
742
+ ...
743
+
744
+ @overload
745
+ @override
746
+ def download_object(self, filename: str, destination: BinaryIO | str) -> None:
747
+ ...
748
+
749
+ @override
750
+ def download_object(self, filename: str, destination: str | BinaryIO | None = None) -> bytes | None:
751
+ blob_client = self._container_client.get_blob_client(self._normalize_path(filename))
752
+ if destination is None:
753
+ downloader = blob_client.download_blob()
754
+ return downloader.readall()
755
+ elif isinstance(destination, str):
756
+ with open(destination, "wb") as f:
757
+ downloader = blob_client.download_blob()
758
+ downloader.readinto(f)
759
+ else:
760
+ downloader = blob_client.download_blob()
761
+ downloader.readinto(destination)
762
+
763
+ @overload
764
+ @override
765
+ async def async_download_object(self, filename: str) -> bytes:
766
+ ...
767
+
768
+ @overload
769
+ @override
770
+ async def async_download_object(self, filename: str, destination: BinaryIO | str) -> None:
771
+ ...
772
+
773
+ @override
774
+ async def async_download_object(self, filename: str, destination: str | BinaryIO | None = None) -> bytes | None:
775
+ if destination is None:
776
+ return cast(
777
+ None,
778
+ await asyncio.get_running_loop().run_in_executor(self._executor, self.download_object, filename),
779
+ )
780
+ else:
781
+ return await asyncio.get_running_loop().run_in_executor(
782
+ self._executor, self.download_object, filename, destination
783
+ )
784
+
785
+ def sign_url(
786
+ self,
787
+ filename: str,
788
+ expiration: datetime,
789
+ mode: SignedUrlMode,
790
+ response_disposition: str | None = None,
791
+ ) -> str:
792
+ from datetime import timedelta
793
+
794
+ from azure.storage.blob import BlobSasPermissions, UserDelegationKey, generate_blob_sas
795
+
796
+ if expiration <= datetime.now(timezone.utc):
797
+ raise ValueError("Expiration time is in the past")
798
+
799
+ if mode == SignedUrlMode.DOWNLOAD:
800
+ permissions = BlobSasPermissions(read=True)
801
+ elif mode == SignedUrlMode.UPLOAD:
802
+ permissions = BlobSasPermissions(read=True, write=True, create=True, add=True)
803
+ else:
804
+ assert_never(mode)
805
+
806
+ filename = self._normalize_path(filename)
807
+
808
+ start_time = datetime.now(timezone.utc) - timedelta(minutes=10)
809
+ try:
810
+ user_delegation_key: UserDelegationKey = self._blob_service_client.get_user_delegation_key(
811
+ key_start_time=start_time,
812
+ key_expiry_time=expiration,
813
+ )
814
+ except Exception as e:
815
+ _logger.error(
816
+ f"Failed to get user delegation key for '{filename}'; falling back to storage client URL",
817
+ exc_info=e,
818
+ )
819
+ return self.get_https_uri(filename)
820
+
821
+ sas_token = generate_blob_sas(
822
+ account_name=self._account_name,
823
+ container_name=self._container_name,
824
+ blob_name=filename,
825
+ user_delegation_key=user_delegation_key,
826
+ permission=permissions,
827
+ expiry=expiration,
828
+ start=start_time,
829
+ protocol="https",
830
+ content_disposition=response_disposition,
831
+ )
832
+ return f"{self.get_https_uri(filename)}?{sas_token}"
833
+
834
+ def list_files(self, prefix: str, delimiter: Optional[str] = None) -> Iterable[str]:
835
+ try:
836
+ prefix = self._normalize_path(prefix)
837
+ if delimiter is None:
838
+ # Flat listing
839
+ blob_list = self._container_client.list_blobs(name_starts_with=prefix)
840
+ for blob in blob_list:
841
+ yield blob.name
842
+ else:
843
+ # Hierarchical listing
844
+ blob_list = self._container_client.walk_blobs(name_starts_with=prefix, delimiter=delimiter)
845
+ for item in blob_list:
846
+ # walk_blobs returns both BlobProperties and BlobPrefix
847
+ if hasattr(item, "name"):
848
+ yield item.name
849
+ except Exception:
850
+ _logger.error(f"Got exception while listing files for {prefix=}", exc_info=True)
851
+ raise
852
+
853
+ async def async_list_files(self, prefix: str, delimiter: Optional[str] = None) -> AsyncIterator[str]:
854
+ iterable = await asyncio.get_running_loop().run_in_executor(self._executor, self.list_files, prefix, delimiter)
855
+ async for filename in to_async_iterable(iterable, self._executor):
856
+ yield filename
857
+
858
+ def copy(self, source_filename: str, dest_filename: str) -> None:
859
+ source_blob_url = self.get_https_uri(self._normalize_path(source_filename))
860
+ dest_blob_client = self._container_client.get_blob_client(self._normalize_path(dest_filename))
861
+ dest_blob_client.start_copy_from_url(source_blob_url)
862
+
863
+ def get_file_size(self, filename: str) -> int:
864
+ blob_client = self._container_client.get_blob_client(self._normalize_path(filename))
865
+ properties = blob_client.get_blob_properties()
866
+ size = properties.size
867
+ assert size is not None
868
+ return size
869
+
870
+
625
871
  class LocalStorageClient(StorageClient):
626
872
  protocol = "file"
627
873
 
chalk/utils/threading.py CHANGED
@@ -7,9 +7,7 @@ import os
7
7
  import threading
8
8
  import weakref
9
9
  from concurrent.futures import Future, ThreadPoolExecutor
10
- from typing import Any, Callable, TypeVar
11
-
12
- from typing_extensions import Final, ParamSpec
10
+ from typing import Any, Callable, Final, ParamSpec, TypeVar
13
11
 
14
12
  DESIRED_CPU_PARALLELISM: int = int(os.getenv("OMP_NUM_THREADS", max(8, os.cpu_count() or 8)))
15
13
  """
chalk/utils/tracing.py CHANGED
@@ -8,36 +8,85 @@ import types
8
8
  from typing import TYPE_CHECKING, Any, Mapping, Union, cast
9
9
 
10
10
  from chalk.utils._ddtrace_version import can_use_datadog_statsd, can_use_ddtrace
11
+ from chalk.utils._otel_version import can_use_otel_trace
11
12
  from chalk.utils.environment_parsing import env_var_bool
13
+ from chalk.utils.log_with_context import get_logger
12
14
 
13
15
  if TYPE_CHECKING:
14
16
  import ddtrace.context
17
+ from opentelemetry import trace as otel_trace
15
18
 
16
- if can_use_ddtrace and can_use_datadog_statsd:
17
- import ddtrace
18
- from datadog.dogstatsd.base import statsd
19
+ _logger = get_logger(__name__)
19
20
 
20
- def safe_set_gauge(gauge: str, value: int | float):
21
- statsd.gauge(gauge, value)
21
+ if can_use_otel_trace:
22
+ from opentelemetry import context as otel_context
23
+ from opentelemetry import trace as otel_trace
24
+ from opentelemetry.propagate import inject as otel_inject
22
25
 
23
- def safe_incr(counter: str, value: int | float, tags: list[str] | None = None):
24
- statsd.increment(counter, value, tags)
26
+ _logger.debug("OTEL trace packages installed, otel tracing is available")
25
27
 
26
- def safe_distribution(counter: str, value: int | float, tags: list[str] | None = None):
27
- statsd.distribution(counter, value, tags)
28
+ @contextlib.contextmanager
29
+ def safe_trace(span_id: str, attributes: Mapping[str, str] | None = None): # pyright: ignore[reportRedeclaration]
30
+ if attributes is None:
31
+ attributes = {}
32
+ attributes = dict(attributes)
33
+ attributes["thread_id"] = str(threading.get_native_id())
34
+ with otel_trace.get_tracer("chalk").start_as_current_span(span_id) as span:
35
+ span.set_attributes(attributes)
36
+ yield span
37
+
38
+ def safe_add_metrics(metrics: Mapping[str, Union[int, float]]): # pyright: ignore[reportRedeclaration]
39
+ current_span = otel_trace.get_current_span()
40
+ current_span.set_attributes(dict(metrics))
41
+
42
+ def safe_add_tags(tags: Mapping[str, str]):
43
+ current_span = otel_trace.get_current_span()
44
+ current_span.set_attributes(dict(tags))
45
+
46
+ def safe_current_trace_context() -> ddtrace.context.Context | otel_trace.SpanContext | None: # pyright: ignore[reportRedeclaration]
47
+ return otel_trace.get_current_span().get_span_context()
28
48
 
29
49
  @contextlib.contextmanager
30
- def safe_trace(span_id: str, attributes: Mapping[str, str] | None = None):
31
- if not ddtrace.tracer.enabled:
50
+ def safe_activate_trace_context( # pyright: ignore[reportRedeclaration]
51
+ ctx: ddtrace.context.Context
52
+ | ddtrace.Span
53
+ | otel_trace.SpanContext
54
+ | None, # pyright: ignore[reportPrivateImportUsage]
55
+ ):
56
+ if isinstance(ctx, otel_trace.SpanContext):
57
+ new_span = otel_trace.NonRecordingSpan(ctx)
58
+ new_context = otel_trace.set_span_in_context(new_span)
59
+ token = otel_context.attach(new_context)
32
60
  yield
33
- return
34
- if (current_ctx := ddtrace.tracer.current_trace_context()) is None:
61
+ otel_context.detach(token)
62
+ else:
35
63
  yield
36
- return
37
- if (priority := current_ctx.sampling_priority) is not None and priority <= 0:
38
- # If a priority is negative, then it won't be sampled
39
- # See https://github.com/DataDog/dd-trace-py/blob/09edef713bf9f0ab30f554bf7765d7a7c2ed6f30/ddtrace/constants.py#L74
40
- # Not sure what a priority=None means
64
+
65
+ def add_trace_headers( # pyright: ignore[reportRedeclaration]
66
+ input_headers: None | dict[str, str]
67
+ ) -> dict[str, str]:
68
+ current_span_ctx = otel_trace.get_current_span().get_span_context()
69
+ new_span_ctx = otel_trace.SpanContext(
70
+ trace_id=current_span_ctx.trace_id,
71
+ span_id=current_span_ctx.span_id,
72
+ is_remote=current_span_ctx.is_remote,
73
+ trace_flags=otel_trace.TraceFlags(otel_trace.TraceFlags.SAMPLED),
74
+ trace_state=current_span_ctx.trace_state,
75
+ )
76
+ ctx = otel_trace.set_span_in_context(otel_trace.NonRecordingSpan(new_span_ctx))
77
+ headers: dict[str, str] = dict(input_headers if input_headers is not None else {})
78
+ otel_inject(headers, context=ctx)
79
+ return headers
80
+
81
+ elif can_use_ddtrace:
82
+ import ddtrace
83
+ from ddtrace.propagation.http import HTTPPropagator
84
+
85
+ _logger.debug("ddtrace installed and available, using it to trace")
86
+
87
+ @contextlib.contextmanager
88
+ def safe_trace(span_id: str, attributes: Mapping[str, str] | None = None): # pyright: ignore[reportRedeclaration]
89
+ if not ddtrace.tracer.enabled:
41
90
  yield
42
91
  return
43
92
  if attributes is None:
@@ -56,48 +105,91 @@ if can_use_ddtrace and can_use_datadog_statsd:
56
105
  span.set_tags(cast(Any, attributes))
57
106
  yield
58
107
 
59
- def safe_add_metrics(metrics: Mapping[str, Union[int, float]]):
108
+ def safe_add_metrics(metrics: Mapping[str, Union[int, float]]): # pyright: ignore[reportRedeclaration]
60
109
  span = ddtrace.tracer.current_span()
61
110
  if span:
62
111
  span.set_metrics(cast(Any, metrics))
63
112
 
64
- def safe_add_tags(tags: Mapping[str, str]):
113
+ def safe_add_tags(tags: Mapping[str, str]): # pyright: ignore[reportRedeclaration]
65
114
  span = ddtrace.tracer.current_span()
66
115
  if span:
67
116
  span.set_tags(cast(Any, tags))
68
117
 
69
- def safe_current_trace_context(): # pyright: ignore[reportRedeclaration]
118
+ def safe_current_trace_context() -> ddtrace.context.Context | otel_trace.SpanContext | None: # pyright: ignore[reportRedeclaration]
70
119
  return ddtrace.tracer.current_trace_context()
71
120
 
72
- def safe_activate_trace_context(
73
- ctx: ddtrace.context.Context | ddtrace.Span | None, # pyright: ignore[reportPrivateImportUsage]
74
- ) -> None:
75
- ddtrace.tracer.context_provider.activate(ctx)
121
+ @contextlib.contextmanager
122
+ def safe_activate_trace_context( # pyright: ignore[reportRedeclaration]
123
+ ctx: ddtrace.context.Context
124
+ | ddtrace.Span
125
+ | otel_trace.SpanContext
126
+ | None, # pyright: ignore[reportPrivateImportUsage]
127
+ ):
128
+ if isinstance(ctx, ddtrace.context.Context) or isinstance(ctx, ddtrace.Span):
129
+ ddtrace.tracer.context_provider.activate(ctx)
130
+ yield
131
+
132
+ def add_trace_headers( # pyright: ignore[reportRedeclaration]
133
+ input_headers: None | dict[str, str]
134
+ ) -> dict[str, str]:
135
+ headers: dict[str, str] = dict(input_headers if input_headers is not None else {})
136
+ span = ddtrace.tracer.current_span()
137
+ if span:
138
+ span.context.sampling_priority = 2
139
+ span.set_tags({ddtrace.constants.SAMPLING_PRIORITY_KEY: 2}) # Ensure that sampling is enabled
140
+ HTTPPropagator.inject(span.context, headers)
141
+ return headers
76
142
 
77
143
  else:
144
+ _logger.debug("no trace packages found, tracing will not work")
78
145
 
79
- def safe_set_gauge(gauge: str, value: int | float):
146
+ @contextlib.contextmanager
147
+ def safe_trace(span_id: str, attributes: Mapping[str, str] | None = None): # pyright: ignore[reportRedeclaration]
148
+ yield
149
+
150
+ def safe_add_metrics(metrics: Mapping[str, Union[int, float]]): # pyright: ignore[reportRedeclaration]
80
151
  pass
81
152
 
82
- def safe_incr(counter: str, value: int | float, tags: list[str] | None = None):
153
+ def safe_add_tags(tags: Mapping[str, str]): # pyright: ignore[reportRedeclaration]
83
154
  pass
84
155
 
156
+ def safe_current_trace_context() -> ddtrace.context.Context | otel_trace.SpanContext | None: # pyright: ignore[reportRedeclaration]
157
+ return
158
+
85
159
  @contextlib.contextmanager
86
- def safe_trace(span_id: str, attributes: Mapping[str, str] | None = None):
160
+ def safe_activate_trace_context( # pyright: ignore[reportRedeclaration]
161
+ ctx: ddtrace.context.Context
162
+ | ddtrace.Span
163
+ | otel_trace.Context
164
+ | otel_trace.SpanContext
165
+ | None, # pyright: ignore[reportPrivateImportUsage]
166
+ ):
87
167
  yield
88
168
 
89
- def safe_add_metrics(metrics: Mapping[str, Union[int, float]]):
90
- pass
169
+ def add_trace_headers(headers: None | dict[str, str]) -> dict[str, str]: # pyright: ignore[reportRedeclaration]
170
+ if headers is None:
171
+ return {}
172
+ return headers
91
173
 
92
- def safe_add_tags(tags: Mapping[str, str]):
93
- pass
94
174
 
95
- def safe_current_trace_context():
96
- return
175
+ if can_use_datadog_statsd:
176
+ from datadog.dogstatsd.base import statsd
177
+
178
+ def safe_set_gauge(gauge: str, value: int | float):
179
+ statsd.gauge(gauge, value)
180
+
181
+ def safe_incr(counter: str, value: int | float, tags: list[str] | None = None):
182
+ statsd.increment(counter, value, tags)
97
183
 
98
- def safe_activate_trace_context(
99
- ctx: ddtrace.context.Context | ddtrace.Span | None, # pyright: ignore[reportPrivateImportUsage]
100
- ) -> None:
184
+ def safe_distribution(counter: str, value: int | float, tags: list[str] | None = None):
185
+ statsd.distribution(counter, value, tags)
186
+
187
+ else:
188
+
189
+ def safe_set_gauge(gauge: str, value: int | float):
190
+ pass
191
+
192
+ def safe_incr(counter: str, value: int | float, tags: list[str] | None = None):
101
193
  pass
102
194
 
103
195
  def safe_distribution(counter: str, value: int | float, tags: list[str] | None = None):
@@ -135,54 +227,70 @@ def configure_tracing(default_service_name: str):
135
227
 
136
228
  _logger = get_logger(__name__)
137
229
 
138
- if not can_use_ddtrace:
139
- _logger.warning("ddtrace is not installed")
140
- return
141
-
142
- import ddtrace
143
- from ddtrace.filters import FilterRequestsOnUrl
144
-
145
- if ddtrace.config.service is None:
146
- ddtrace.config.service = default_service_name
147
- # Re-configuring the global tracer to capture any setting changes from environs from a .dotenv file
148
- # which might be loaded after the first ddtrace import
149
-
150
- ddtrace.tracer.configure(
151
- enabled=None if "DD_TRACE_ENABLED" not in os.environ else env_var_bool("DD_TRACE_ENABLED"),
152
- hostname=os.getenv("DD_AGENT_HOST") or os.getenv("DD_TRACE_AGENT_URL"),
153
- uds_path=os.getenv("DD_TRACE_AGENT_URL"),
154
- dogstatsd_url=os.getenv("DD_DOGSTATSD_URL"),
155
- api_version=os.getenv("DD_TRACE_API_VERSION"),
156
- compute_stats_enabled=env_var_bool("DD_TRACE_COMPUTE_STATS"),
157
- iast_enabled=None if "DD_IAST_ENABLED" not in os.environ else env_var_bool("DD_IAST_ENABLED"),
158
- # exclude healthcheck url from apm trace collection
159
- settings={
160
- "FILTERS": [
161
- FilterRequestsOnUrl(
162
- [
163
- r"^http://.*/healthcheck$",
164
- r"^http://.*/ready$",
165
- r"^http://[^/]*/$", # exclude "/"
166
- ]
167
- )
168
- ]
169
- },
170
- )
171
- if ddtrace.tracer.enabled:
172
- ddtrace.patch(
173
- asyncio=True,
174
- databricks=False,
175
- fastapi=True,
176
- futures=True,
177
- httplib=True,
178
- httpx=True,
179
- psycopg=True,
180
- redis=True,
181
- requests=True,
182
- sqlalchemy=False,
183
- urllib3=True,
230
+ if can_use_otel_trace:
231
+ from opentelemetry import trace as otel_trace
232
+ from opentelemetry.sdk.resources import Resource
233
+ from opentelemetry.sdk.trace import TracerProvider
234
+
235
+ provider = TracerProvider(
236
+ resource=Resource.create(
237
+ {
238
+ "service.name": default_service_name,
239
+ }
240
+ ),
241
+ )
242
+ otel_trace.set_tracer_provider(provider)
243
+
244
+ elif can_use_ddtrace:
245
+ import ddtrace
246
+ from ddtrace.filters import FilterRequestsOnUrl
247
+
248
+ if ddtrace.config.service is None:
249
+ ddtrace.config.service = default_service_name
250
+ # Re-configuring the global tracer to capture any setting changes from environs from a .dotenv file
251
+ # which might be loaded after the first ddtrace import
252
+
253
+ ddtrace.tracer.configure(
254
+ enabled=None if "DD_TRACE_ENABLED" not in os.environ else env_var_bool("DD_TRACE_ENABLED"),
255
+ hostname=os.getenv("DD_AGENT_HOST") or os.getenv("DD_TRACE_AGENT_URL"),
256
+ uds_path=os.getenv("DD_TRACE_AGENT_URL"),
257
+ dogstatsd_url=os.getenv("DD_DOGSTATSD_URL"),
258
+ api_version=os.getenv("DD_TRACE_API_VERSION"),
259
+ compute_stats_enabled=env_var_bool("DD_TRACE_COMPUTE_STATS"),
260
+ iast_enabled=None if "DD_IAST_ENABLED" not in os.environ else env_var_bool("DD_IAST_ENABLED"),
261
+ # exclude healthcheck url from apm trace collection
262
+ settings={
263
+ "FILTERS": [
264
+ FilterRequestsOnUrl(
265
+ [
266
+ r"^http://.*/healthcheck$",
267
+ r"^http://.*/ready$",
268
+ r"^http://[^/]*/$", # exclude "/"
269
+ ]
270
+ )
271
+ ]
272
+ },
184
273
  )
274
+ if ddtrace.tracer.enabled:
275
+ ddtrace.patch(
276
+ asyncio=True,
277
+ databricks=False,
278
+ fastapi=True,
279
+ futures=True,
280
+ httplib=True,
281
+ httpx=True,
282
+ psycopg=True,
283
+ redis=True,
284
+ requests=True,
285
+ sqlalchemy=False,
286
+ urllib3=True,
287
+ )
288
+
289
+ _logger.info(
290
+ f"Configuring DDtrace tracing: enabled={ddtrace.tracer.enabled}, service={ddtrace.config.service}, env={ddtrace.config.env}, trace_agent_url: {ddtrace.config._trace_agent_url}, effective trace agent: {ddtrace.tracer._agent_url}" # pyright: ignore [reportAttributeAccessIssue, reportPrivateUsage]
291
+ )
292
+ else:
293
+ _logger.warning("neither opentelemetry nor ddtrace are installed")
294
+
185
295
 
186
- _logger.info(
187
- f"Configuring DDtrace tracing: enabled={ddtrace.tracer.enabled}, service={ddtrace.config.service}, env={ddtrace.config.env}, trace_agent_url: {ddtrace.config._trace_agent_url}, effective trace agent: {ddtrace.tracer._agent_url}" # pyright: ignore [reportAttributeAccessIssue, reportPrivateUsage]
188
- )
296
+ configure_tracing("chalkpy")