cosmotech-acceleration-library 1.1.0__py3-none-any.whl → 2.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. cosmotech/coal/__init__.py +1 -1
  2. cosmotech/coal/aws/__init__.py +1 -9
  3. cosmotech/coal/aws/s3.py +181 -214
  4. cosmotech/coal/azure/__init__.py +5 -5
  5. cosmotech/coal/azure/adx/__init__.py +24 -10
  6. cosmotech/coal/azure/adx/auth.py +2 -2
  7. cosmotech/coal/azure/adx/ingestion.py +10 -14
  8. cosmotech/coal/azure/adx/query.py +1 -1
  9. cosmotech/coal/azure/adx/runner.py +13 -14
  10. cosmotech/coal/azure/adx/store.py +5 -86
  11. cosmotech/coal/azure/adx/tables.py +2 -2
  12. cosmotech/coal/azure/adx/utils.py +2 -2
  13. cosmotech/coal/azure/blob.py +20 -26
  14. cosmotech/coal/azure/storage.py +3 -3
  15. cosmotech/coal/cosmotech_api/__init__.py +0 -28
  16. cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
  17. cosmotech/coal/cosmotech_api/apis/dataset.py +222 -0
  18. cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
  19. cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
  20. cosmotech/coal/cosmotech_api/apis/run.py +38 -0
  21. cosmotech/coal/cosmotech_api/apis/runner.py +75 -0
  22. cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
  23. cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
  24. cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
  25. cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
  26. cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
  27. cosmotech/coal/postgresql/runner.py +58 -41
  28. cosmotech/coal/postgresql/store.py +56 -15
  29. cosmotech/coal/postgresql/utils.py +255 -0
  30. cosmotech/coal/singlestore/store.py +3 -2
  31. cosmotech/coal/store/__init__.py +16 -13
  32. cosmotech/coal/store/output/__init__.py +0 -0
  33. cosmotech/coal/store/output/aws_channel.py +74 -0
  34. cosmotech/coal/store/output/az_storage_channel.py +33 -0
  35. cosmotech/coal/store/output/channel_interface.py +38 -0
  36. cosmotech/coal/store/output/channel_spliter.py +61 -0
  37. cosmotech/coal/store/output/postgres_channel.py +37 -0
  38. cosmotech/coal/store/pandas.py +1 -1
  39. cosmotech/coal/store/pyarrow.py +2 -2
  40. cosmotech/coal/store/store.py +4 -7
  41. cosmotech/coal/utils/configuration.py +197 -0
  42. cosmotech/coal/utils/decorator.py +4 -7
  43. cosmotech/csm_data/commands/adx_send_data.py +1 -1
  44. cosmotech/csm_data/commands/adx_send_runnerdata.py +3 -2
  45. cosmotech/csm_data/commands/api/api.py +6 -19
  46. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
  47. cosmotech/csm_data/commands/api/run_load_data.py +15 -52
  48. cosmotech/csm_data/commands/api/wsf_load_file.py +13 -16
  49. cosmotech/csm_data/commands/api/wsf_send_file.py +11 -14
  50. cosmotech/csm_data/commands/az_storage_upload.py +3 -2
  51. cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
  52. cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
  53. cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
  54. cosmotech/csm_data/commands/store/dump_to_azure.py +3 -2
  55. cosmotech/csm_data/commands/store/dump_to_postgresql.py +3 -2
  56. cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
  57. cosmotech/csm_data/commands/store/list_tables.py +3 -2
  58. cosmotech/csm_data/commands/store/load_csv_folder.py +10 -4
  59. cosmotech/csm_data/commands/store/load_from_singlestore.py +3 -2
  60. cosmotech/csm_data/commands/store/output.py +35 -0
  61. cosmotech/csm_data/commands/store/reset.py +8 -3
  62. cosmotech/csm_data/commands/store/store.py +3 -3
  63. cosmotech/csm_data/main.py +4 -4
  64. cosmotech/csm_data/utils/decorators.py +4 -3
  65. cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
  66. cosmotech/translation/coal/en-US/coal/services/dataset.yml +10 -14
  67. cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
  68. cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
  69. cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
  70. cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
  71. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/METADATA +29 -33
  72. cosmotech_acceleration_library-2.1.0rc1.dist-info/RECORD +153 -0
  73. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/WHEEL +1 -1
  74. cosmotech/coal/azure/functions.py +0 -72
  75. cosmotech/coal/cosmotech_api/connection.py +0 -96
  76. cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
  77. cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
  78. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
  79. cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
  80. cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
  81. cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -229
  82. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -185
  83. cosmotech/coal/cosmotech_api/dataset/upload.py +0 -41
  84. cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
  85. cosmotech/coal/cosmotech_api/parameters.py +0 -48
  86. cosmotech/coal/cosmotech_api/run.py +0 -25
  87. cosmotech/coal/cosmotech_api/run_data.py +0 -173
  88. cosmotech/coal/cosmotech_api/run_template.py +0 -108
  89. cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
  90. cosmotech/coal/cosmotech_api/runner/data.py +0 -38
  91. cosmotech/coal/cosmotech_api/runner/datasets.py +0 -416
  92. cosmotech/coal/cosmotech_api/runner/download.py +0 -135
  93. cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
  94. cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
  95. cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
  96. cosmotech/coal/cosmotech_api/workspace.py +0 -127
  97. cosmotech/coal/utils/postgresql.py +0 -236
  98. cosmotech/coal/utils/semver.py +0 -6
  99. cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
  100. cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
  101. cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
  102. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
  103. cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
  104. cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
  105. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
  106. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
  107. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
  108. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
  109. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
  110. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
  111. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
  112. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
  113. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
  114. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
  115. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
  116. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
  117. cosmotech_acceleration_library-1.1.0.dist-info/RECORD +0 -171
  118. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/entry_points.txt +0 -0
  119. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/licenses/LICENSE +0 -0
  120. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.1.0rc1.dist-info}/top_level.txt +0 -0
@@ -5,4 +5,4 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
- __version__ = "1.1.0"
8
+ __version__ = "2.1.0-rc1"
@@ -12,12 +12,4 @@ This module provides functions for interacting with AWS services like S3.
12
12
  """
13
13
 
14
14
  # Re-export S3 functions for easier importing
15
- from cosmotech.coal.aws.s3 import (
16
- create_s3_client,
17
- create_s3_resource,
18
- upload_file,
19
- upload_folder,
20
- download_files,
21
- upload_data_stream,
22
- delete_objects,
23
- )
15
+ from cosmotech.coal.aws.s3 import S3
cosmotech/coal/aws/s3.py CHANGED
@@ -6,230 +6,197 @@
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
8
  """
9
- S3 bucket operations module.
9
+ s3 bucket operations module.
10
10
 
11
- This module provides functions for interacting with S3 buckets, including
11
+ this module provides functions for interacting with S3 buckets, including
12
12
  uploading, downloading, and deleting files.
13
13
  """
14
14
 
15
15
  import pathlib
16
16
  from io import BytesIO
17
- from typing import Optional, Dict, Any, List, Iterator
18
17
 
19
18
  import boto3
19
+ from cosmotech.orchestrator.utils.translate import T
20
20
 
21
+ from cosmotech.coal.utils.configuration import Configuration
21
22
  from cosmotech.coal.utils.logger import LOGGER
22
- from cosmotech.orchestrator.utils.translate import T
23
23
 
24
24
 
25
- def create_s3_client(
26
- endpoint_url: str,
27
- access_id: str,
28
- secret_key: str,
29
- use_ssl: bool = True,
30
- ssl_cert_bundle: Optional[str] = None,
31
- ) -> boto3.client:
32
- """
33
- Create an S3 client with the given credentials and configuration.
34
-
35
- Args:
36
- endpoint_url: The S3 endpoint URL
37
- access_id: The AWS access key ID
38
- secret_key: The AWS secret access key
39
- use_ssl: Whether to use SSL for the connection
40
- ssl_cert_bundle: Path to the SSL certificate bundle
41
-
42
- Returns:
43
- An S3 client object
44
- """
45
- boto3_parameters = {
46
- "use_ssl": use_ssl,
47
- "endpoint_url": endpoint_url,
48
- "aws_access_key_id": access_id,
49
- "aws_secret_access_key": secret_key,
50
- }
51
- if ssl_cert_bundle:
52
- boto3_parameters["verify"] = ssl_cert_bundle
53
-
54
- return boto3.client("s3", **boto3_parameters)
55
-
56
-
57
- def create_s3_resource(
58
- endpoint_url: str,
59
- access_id: str,
60
- secret_key: str,
61
- use_ssl: bool = True,
62
- ssl_cert_bundle: Optional[str] = None,
63
- ) -> boto3.resource:
64
- """
65
- Create an S3 resource with the given credentials and configuration.
66
-
67
- Args:
68
- endpoint_url: The S3 endpoint URL
69
- access_id: The AWS access key ID
70
- secret_key: The AWS secret access key
71
- use_ssl: Whether to use SSL for the connection
72
- ssl_cert_bundle: Path to the SSL certificate bundle
73
-
74
- Returns:
75
- An S3 resource object
76
- """
77
- boto3_parameters = {
78
- "use_ssl": use_ssl,
79
- "endpoint_url": endpoint_url,
80
- "aws_access_key_id": access_id,
81
- "aws_secret_access_key": secret_key,
82
- }
83
- if ssl_cert_bundle:
84
- boto3_parameters["verify"] = ssl_cert_bundle
85
-
86
- return boto3.resource("s3", **boto3_parameters)
87
-
88
-
89
- def upload_file(
90
- file_path: pathlib.Path,
91
- bucket_name: str,
92
- s3_resource: boto3.resource,
93
- file_prefix: str = "",
94
- ) -> None:
95
- """
96
- Upload a single file to an S3 bucket.
97
-
98
- Args:
99
- file_path: Path to the file to upload
100
- bucket_name: Name of the S3 bucket
101
- s3_resource: S3 resource object
102
- file_prefix: Prefix to add to the file name in the bucket
103
- """
104
- uploaded_file_name = file_prefix + file_path.name
105
- LOGGER.info(T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name))
106
- s3_resource.Bucket(bucket_name).upload_file(str(file_path), uploaded_file_name)
107
-
108
-
109
- def upload_folder(
110
- source_folder: str,
111
- bucket_name: str,
112
- s3_resource: boto3.resource,
113
- file_prefix: str = "",
114
- recursive: bool = False,
115
- ) -> None:
116
- """
117
- Upload files from a folder to an S3 bucket.
118
-
119
- Args:
120
- source_folder: Path to the folder containing files to upload
121
- bucket_name: Name of the S3 bucket
122
- s3_resource: S3 resource object
123
- file_prefix: Prefix to add to the file names in the bucket
124
- recursive: Whether to recursively upload files from subdirectories
125
- """
126
- source_path = pathlib.Path(source_folder)
127
- if not source_path.exists():
128
- LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
129
- raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
130
-
131
- if source_path.is_dir():
132
- _source_name = str(source_path)
133
- for _file_path in source_path.glob("**/*" if recursive else "*"):
134
- if _file_path.is_file():
135
- _file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
136
- uploaded_file_name = file_prefix + _file_name
137
- LOGGER.info(
138
- T("coal.common.data_transfer.file_sent").format(
139
- file_path=_file_path, uploaded_name=uploaded_file_name
25
+ class S3:
26
+
27
+ def __init__(self, configuration: Configuration):
28
+ self._configuration = configuration.s3
29
+
30
+ @property
31
+ def file_prefix(self):
32
+ if "bucket_prefix" in self._configuration:
33
+ return self._configuration.bucket_prefix
34
+ return ""
35
+
36
+ @property
37
+ def use_ssl(self):
38
+ if "use_ssl" in self._configuration:
39
+ return self._configuration.use_ssl
40
+ return True
41
+
42
+ @property
43
+ def ssl_cert_bundle(self):
44
+ if "ssl_cert_bundle" in self._configuration:
45
+ return self._configuration.ssl_cert_bundle
46
+ return None
47
+
48
+ @property
49
+ def access_key_id(self):
50
+ return self._configuration.access_key_id
51
+
52
+ @property
53
+ def endpoint_url(self):
54
+ return self._configuration.endpoint_url
55
+
56
+ @property
57
+ def bucket_name(self):
58
+ return self._configuration.bucket_name
59
+
60
+ @property
61
+ def secret_access_key(self):
62
+ return self._configuration.secret_access_key
63
+
64
+ @property
65
+ def output_type(self):
66
+ if "output_type" in self._configuration:
67
+ return self._configuration.output_type
68
+ return "csv"
69
+
70
+ @property
71
+ def client(self) -> boto3.client:
72
+ boto3_parameters = {
73
+ "use_ssl": self.use_ssl,
74
+ "endpoint_url": self.endpoint_url,
75
+ "aws_access_key_id": self.access_key_id,
76
+ "aws_secret_access_key": self.secret_access_key,
77
+ }
78
+ if self.ssl_cert_bundle:
79
+ boto3_parameters["verify"] = self.ssl_cert_bundle
80
+
81
+ return boto3.client("s3", **boto3_parameters)
82
+
83
+ @property
84
+ def resource(self) -> boto3.resource:
85
+ boto3_parameters = {
86
+ "use_ssl": self.use_ssl,
87
+ "endpoint_url": self.endpoint_url,
88
+ "aws_access_key_id": self.access_key_id,
89
+ "aws_secret_access_key": self.secret_access_key,
90
+ }
91
+ if self.ssl_cert_bundle:
92
+ boto3_parameters["verify"] = self.ssl_cert_bundle
93
+
94
+ return boto3.resource("s3", **boto3_parameters)
95
+
96
+ def upload_file(self, file_path: pathlib.Path) -> None:
97
+ """
98
+ Upload a single file to an S3 bucket.
99
+
100
+ Args:
101
+ file_path: Path to the file to upload
102
+ """
103
+ uploaded_file_name = self.file_prefix + file_path.name
104
+ LOGGER.info(
105
+ T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name)
106
+ )
107
+ self.resource.Bucket(self.bucket_name).upload_file(str(file_path), uploaded_file_name)
108
+
109
+ def upload_folder(self, source_folder: str, recursive: bool = False) -> None:
110
+ """
111
+ Upload files from a folder to an S3 bucket.
112
+
113
+ Args:
114
+ source_folder: Path to the folder containing files to upload
115
+ recursive: Whether to recursively upload files from subdirectories
116
+ """
117
+ source_path = pathlib.Path(source_folder)
118
+ if not source_path.exists():
119
+ LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
120
+ raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
121
+
122
+ if source_path.is_dir():
123
+ _source_name = str(source_path)
124
+ for _file_path in source_path.glob("**/*" if recursive else "*"):
125
+ if _file_path.is_file():
126
+ _file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
127
+ uploaded_file_name = self.file_prefix + _file_name
128
+ LOGGER.info(
129
+ T("coal.common.data_transfer.file_sent").format(
130
+ file_path=_file_path, uploaded_name=uploaded_file_name
131
+ )
140
132
  )
141
- )
142
- s3_resource.Bucket(bucket_name).upload_file(str(_file_path), uploaded_file_name)
143
- else:
144
- upload_file(source_path, bucket_name, s3_resource, file_prefix)
145
-
146
-
147
- def download_files(
148
- target_folder: str,
149
- bucket_name: str,
150
- s3_resource: boto3.resource,
151
- file_prefix: Optional[str] = None,
152
- ) -> None:
153
- """
154
- Download files from an S3 bucket to a local folder.
155
-
156
- Args:
157
- target_folder: Local folder to download files to
158
- bucket_name: Name of the S3 bucket
159
- s3_resource: S3 resource object
160
- file_prefix: Optional prefix to filter objects to download
161
- """
162
- bucket = s3_resource.Bucket(bucket_name)
163
-
164
- pathlib.Path(target_folder).mkdir(parents=True, exist_ok=True)
165
- remove_prefix = False
166
- if file_prefix:
167
- bucket_files = bucket.objects.filter(Prefix=file_prefix)
168
- if file_prefix.endswith("/"):
169
- remove_prefix = True
170
- else:
171
- bucket_files = bucket.objects.all()
172
- for _file in bucket_files:
173
- if not (path_name := str(_file.key)).endswith("/"):
174
- target_file = path_name
175
- if remove_prefix:
176
- target_file = target_file.removeprefix(file_prefix)
177
- output_file = f"{target_folder}/{target_file}"
178
- pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
179
- LOGGER.info(T("coal.services.azure_storage.downloading").format(path=path_name, output=output_file))
180
- bucket.download_file(_file.key, output_file)
181
-
182
-
183
- def upload_data_stream(
184
- data_stream: BytesIO,
185
- bucket_name: str,
186
- s3_client: boto3.client,
187
- file_name: str,
188
- file_prefix: str = "",
189
- ) -> None:
190
- """
191
- Upload a data stream to an S3 bucket.
192
-
193
- Args:
194
- data_stream: BytesIO stream containing the data to upload
195
- bucket_name: Name of the S3 bucket
196
- s3_client: S3 client object
197
- file_name: Name of the file to create in the bucket
198
- file_prefix: Prefix to add to the file name in the bucket
199
- """
200
- uploaded_file_name = file_prefix + file_name
201
- data_stream.seek(0)
202
- size = len(data_stream.read())
203
- data_stream.seek(0)
204
-
205
- LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
206
- s3_client.upload_fileobj(data_stream, bucket_name, uploaded_file_name)
207
-
208
-
209
- def delete_objects(
210
- bucket_name: str,
211
- s3_resource: boto3.resource,
212
- file_prefix: Optional[str] = None,
213
- ) -> None:
214
- """
215
- Delete objects from an S3 bucket, optionally filtered by prefix.
216
-
217
- Args:
218
- bucket_name: Name of the S3 bucket
219
- s3_resource: S3 resource object
220
- file_prefix: Optional prefix to filter objects to delete
221
- """
222
- bucket = s3_resource.Bucket(bucket_name)
223
-
224
- if file_prefix:
225
- bucket_files = bucket.objects.filter(Prefix=file_prefix)
226
- else:
227
- bucket_files = bucket.objects.all()
228
-
229
- boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != file_prefix]
230
- if boto_objects:
231
- LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
232
- boto_delete_request = {"Objects": boto_objects}
233
- bucket.delete_objects(Delete=boto_delete_request)
234
- else:
235
- LOGGER.info(T("coal.services.azure_storage.no_objects"))
133
+ self.resource.Bucket(self.bucket_name).upload_file(str(_file_path), uploaded_file_name)
134
+ else:
135
+ self.upload_file(source_path)
136
+
137
+ def download_files(self, destination_folder: str) -> None:
138
+ """
139
+ Download files from an S3 bucket to a local folder.
140
+
141
+ Args:
142
+ destination_folder: Local folder to download files to
143
+ """
144
+ bucket = self.resource.Bucket(self.bucket_name)
145
+
146
+ pathlib.Path(destination_folder).mkdir(parents=True, exist_ok=True)
147
+ remove_prefix = False
148
+ if self.file_prefix:
149
+ bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
150
+ if self.file_prefix.endswith("/"):
151
+ remove_prefix = True
152
+ else:
153
+ bucket_files = bucket.objects.all()
154
+ for _file in bucket_files:
155
+ if not (path_name := str(_file.key)).endswith("/"):
156
+ target_file = path_name
157
+ if remove_prefix:
158
+ target_file = target_file.removeprefix(self.file_prefix)
159
+ output_file = f"{destination_folder}/{target_file}"
160
+ pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
161
+ LOGGER.info(T("coal.services.azure_storage.downloading").format(path=path_name, output=output_file))
162
+ bucket.download_file(_file.key, output_file)
163
+
164
+ def upload_data_stream(self, data_stream: BytesIO, file_name: str) -> None:
165
+ """
166
+ Upload a data stream to an S3 bucket.
167
+
168
+ Args:
169
+ data_stream: BytesIO stream containing the data to upload
170
+ file_name: Name of the file to create in the bucket
171
+ """
172
+ uploaded_file_name = self.file_prefix + file_name
173
+ data_stream.seek(0)
174
+ size = len(data_stream.read())
175
+ data_stream.seek(0)
176
+
177
+ LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
178
+ self.client.upload_fileobj(data_stream, self.bucket_name, uploaded_file_name)
179
+
180
+ def delete_objects(self) -> None:
181
+ """
182
+ Delete objects from an S3 bucket, optionally filtered by prefix.
183
+
184
+ Args:
185
+ bucket_name: Name of the S3 bucket
186
+ s3_resource: S3 resource object
187
+ file_prefix: Optional prefix to filter objects to delete
188
+ """
189
+ bucket = self.resource.Bucket(self.bucket_name)
190
+
191
+ if self.file_prefix:
192
+ bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
193
+ else:
194
+ bucket_files = bucket.objects.all()
195
+
196
+ boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != self.file_prefix]
197
+ if boto_objects:
198
+ LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
199
+ boto_delete_request = {"Objects": boto_objects}
200
+ bucket.delete_objects(Delete=boto_delete_request)
201
+ else:
202
+ LOGGER.info(T("coal.services.azure_storage.no_objects"))
@@ -11,13 +11,13 @@ Azure services integration module.
11
11
  This module provides functions for interacting with Azure services like Storage and ADX.
12
12
  """
13
13
 
14
+ # Re-export blob functions for easier importing
15
+ from cosmotech.coal.azure.blob import (
16
+ dump_store_to_azure,
17
+ )
18
+
14
19
  # Re-export storage functions for easier importing
15
20
  from cosmotech.coal.azure.storage import (
16
21
  upload_file,
17
22
  upload_folder,
18
23
  )
19
-
20
- # Re-export blob functions for easier importing
21
- from cosmotech.coal.azure.blob import (
22
- dump_store_to_azure,
23
- )
@@ -5,22 +5,36 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
- from cosmotech.coal.azure.adx.auth import create_kusto_client, create_ingest_client, initialize_clients
9
- from cosmotech.coal.azure.adx.query import run_query, run_command_query
8
+ from cosmotech.coal.azure.adx.auth import (
9
+ create_ingest_client,
10
+ create_kusto_client,
11
+ initialize_clients,
12
+ )
10
13
  from cosmotech.coal.azure.adx.ingestion import (
11
- ingest_dataframe,
12
- send_to_adx,
14
+ IngestionStatus,
13
15
  check_ingestion_status,
14
- monitor_ingestion,
15
16
  handle_failures,
16
- IngestionStatus,
17
+ ingest_dataframe,
18
+ monitor_ingestion,
19
+ send_to_adx,
17
20
  )
18
- from cosmotech.coal.azure.adx.tables import table_exists, create_table, check_and_create_table, _drop_by_tag
19
- from cosmotech.coal.azure.adx.utils import type_mapping, create_column_mapping
20
- from cosmotech.coal.azure.adx.store import send_pyarrow_table_to_adx, send_table_data, process_tables, send_store_to_adx
21
+ from cosmotech.coal.azure.adx.query import run_command_query, run_query
21
22
  from cosmotech.coal.azure.adx.runner import (
22
- prepare_csv_content,
23
23
  construct_create_query,
24
24
  insert_csv_files,
25
+ prepare_csv_content,
25
26
  send_runner_data,
26
27
  )
28
+ from cosmotech.coal.azure.adx.store import (
29
+ process_tables,
30
+ send_pyarrow_table_to_adx,
31
+ send_store_to_adx,
32
+ send_table_data,
33
+ )
34
+ from cosmotech.coal.azure.adx.tables import (
35
+ _drop_by_tag,
36
+ check_and_create_table,
37
+ create_table,
38
+ table_exists,
39
+ )
40
+ from cosmotech.coal.azure.adx.utils import create_column_mapping, type_mapping
@@ -6,13 +6,13 @@
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
8
  import os
9
- from typing import Union, Optional, Tuple
9
+ from typing import Optional, Tuple
10
10
 
11
11
  from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
12
12
  from azure.kusto.ingest import QueuedIngestClient
13
+ from cosmotech.orchestrator.utils.translate import T
13
14
 
14
15
  from cosmotech.coal.utils.logger import LOGGER
15
- from cosmotech.orchestrator.utils.translate import T
16
16
 
17
17
 
18
18
  def create_kusto_client(
@@ -5,28 +5,24 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
+ import os
9
+ import time
8
10
  from enum import Enum
9
- from typing import Dict
10
- from typing import Iterator
11
- from typing import List
12
- from typing import Optional
13
- from typing import Tuple
11
+ from typing import Dict, Iterator, List, Optional, Tuple
14
12
 
15
- import os
16
13
  import pandas as pd
17
- import time
18
14
  import tqdm
19
15
  from azure.kusto.data import KustoClient
20
16
  from azure.kusto.data.data_format import DataFormat
21
- from azure.kusto.ingest import IngestionProperties
22
- from azure.kusto.ingest import QueuedIngestClient
23
- from azure.kusto.ingest import ReportLevel
24
- from azure.kusto.ingest.status import FailureMessage
25
- from azure.kusto.ingest.status import KustoIngestStatusQueues
26
- from azure.kusto.ingest.status import SuccessMessage
17
+ from azure.kusto.ingest import IngestionProperties, QueuedIngestClient, ReportLevel
18
+ from azure.kusto.ingest.status import (
19
+ FailureMessage,
20
+ KustoIngestStatusQueues,
21
+ SuccessMessage,
22
+ )
27
23
  from cosmotech.orchestrator.utils.translate import T
28
24
 
29
- from cosmotech.coal.azure.adx.tables import create_table, _drop_by_tag
25
+ from cosmotech.coal.azure.adx.tables import _drop_by_tag, create_table
30
26
  from cosmotech.coal.azure.adx.utils import type_mapping
31
27
  from cosmotech.coal.utils.logger import LOGGER
32
28
 
@@ -7,9 +7,9 @@
7
7
 
8
8
  from azure.kusto.data import KustoClient
9
9
  from azure.kusto.data.response import KustoResponseDataSet
10
+ from cosmotech.orchestrator.utils.translate import T
10
11
 
11
12
  from cosmotech.coal.utils.logger import LOGGER
12
- from cosmotech.orchestrator.utils.translate import T
13
13
 
14
14
 
15
15
  def run_query(client: KustoClient, database: str, query: str) -> KustoResponseDataSet:
@@ -13,25 +13,24 @@ This module provides functions for ingesting runner data into Azure Data Explore
13
13
 
14
14
  import pathlib
15
15
  import time
16
- from collections import defaultdict
17
- from typing import Dict, Any, List, Tuple, Optional
16
+ from typing import Any, Dict
18
17
 
19
18
  from azure.kusto.data.response import KustoResponseDataSet
20
- from azure.kusto.ingest import ColumnMapping
21
- from azure.kusto.ingest import FileDescriptor
22
- from azure.kusto.ingest import IngestionMappingKind
23
- from azure.kusto.ingest import IngestionProperties
24
- from azure.kusto.ingest import IngestionResult
25
- from azure.kusto.ingest import ReportLevel
26
-
27
- from azure.kusto.data import KustoClient
28
- from azure.kusto.ingest import QueuedIngestClient
19
+ from azure.kusto.ingest import (
20
+ ColumnMapping,
21
+ FileDescriptor,
22
+ IngestionMappingKind,
23
+ IngestionProperties,
24
+ IngestionResult,
25
+ QueuedIngestClient,
26
+ ReportLevel,
27
+ )
28
+ from cosmotech.orchestrator.utils.translate import T
29
29
 
30
30
  from cosmotech.coal.azure.adx.auth import initialize_clients
31
- from cosmotech.coal.azure.adx.query import run_query, run_command_query
32
- from cosmotech.coal.azure.adx.ingestion import check_ingestion_status, IngestionStatus
31
+ from cosmotech.coal.azure.adx.ingestion import IngestionStatus, check_ingestion_status
32
+ from cosmotech.coal.azure.adx.query import run_query
33
33
  from cosmotech.coal.utils.logger import LOGGER
34
- from cosmotech.orchestrator.utils.translate import T
35
34
 
36
35
 
37
36
  def prepare_csv_content(folder_path: str) -> Dict[str, Dict[str, Any]]: