cosmotech-acceleration-library 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. cosmotech/coal/__init__.py +1 -1
  2. cosmotech/coal/aws/__init__.py +1 -9
  3. cosmotech/coal/aws/s3.py +181 -214
  4. cosmotech/coal/azure/adx/auth.py +2 -2
  5. cosmotech/coal/azure/adx/runner.py +13 -14
  6. cosmotech/coal/azure/adx/store.py +5 -86
  7. cosmotech/coal/azure/adx/tables.py +2 -2
  8. cosmotech/coal/azure/blob.py +6 -6
  9. cosmotech/coal/azure/storage.py +3 -3
  10. cosmotech/coal/cosmotech_api/__init__.py +0 -28
  11. cosmotech/coal/cosmotech_api/apis/__init__.py +14 -0
  12. cosmotech/coal/cosmotech_api/apis/dataset.py +103 -0
  13. cosmotech/coal/cosmotech_api/apis/meta.py +25 -0
  14. cosmotech/coal/cosmotech_api/apis/organization.py +24 -0
  15. cosmotech/coal/cosmotech_api/apis/run.py +38 -0
  16. cosmotech/coal/cosmotech_api/apis/runner.py +71 -0
  17. cosmotech/coal/cosmotech_api/apis/solution.py +23 -0
  18. cosmotech/coal/cosmotech_api/apis/workspace.py +108 -0
  19. cosmotech/coal/cosmotech_api/objects/__init__.py +9 -0
  20. cosmotech/coal/cosmotech_api/objects/connection.py +125 -0
  21. cosmotech/coal/cosmotech_api/objects/parameters.py +127 -0
  22. cosmotech/coal/postgresql/runner.py +56 -36
  23. cosmotech/coal/postgresql/store.py +60 -14
  24. cosmotech/coal/postgresql/utils.py +254 -0
  25. cosmotech/coal/store/output/__init__.py +0 -0
  26. cosmotech/coal/store/output/aws_channel.py +73 -0
  27. cosmotech/coal/store/output/az_storage_channel.py +42 -0
  28. cosmotech/coal/store/output/channel_interface.py +23 -0
  29. cosmotech/coal/store/output/channel_spliter.py +55 -0
  30. cosmotech/coal/store/output/postgres_channel.py +40 -0
  31. cosmotech/coal/utils/configuration.py +169 -0
  32. cosmotech/coal/utils/decorator.py +4 -7
  33. cosmotech/csm_data/commands/api/api.py +6 -19
  34. cosmotech/csm_data/commands/api/postgres_send_runner_metadata.py +20 -16
  35. cosmotech/csm_data/commands/api/run_load_data.py +7 -46
  36. cosmotech/csm_data/commands/api/wsf_load_file.py +13 -16
  37. cosmotech/csm_data/commands/api/wsf_send_file.py +11 -14
  38. cosmotech/csm_data/commands/s3_bucket_delete.py +16 -15
  39. cosmotech/csm_data/commands/s3_bucket_download.py +16 -16
  40. cosmotech/csm_data/commands/s3_bucket_upload.py +16 -14
  41. cosmotech/csm_data/commands/store/dump_to_s3.py +18 -16
  42. cosmotech/csm_data/commands/store/output.py +35 -0
  43. cosmotech/csm_data/commands/store/store.py +3 -3
  44. cosmotech/translation/coal/en-US/coal/cosmotech_api/initialization.yml +8 -0
  45. cosmotech/translation/coal/en-US/coal/services/dataset.yml +4 -14
  46. cosmotech/translation/coal/en-US/coal/store/output/data_interface.yml +1 -0
  47. cosmotech/translation/coal/en-US/coal/store/output/split.yml +6 -0
  48. cosmotech/translation/coal/en-US/coal/utils/configuration.yml +2 -0
  49. cosmotech/translation/csm_data/en-US/csm_data/commands/store/output.yml +7 -0
  50. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/METADATA +5 -8
  51. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/RECORD +55 -73
  52. cosmotech/coal/azure/functions.py +0 -72
  53. cosmotech/coal/cosmotech_api/connection.py +0 -96
  54. cosmotech/coal/cosmotech_api/dataset/__init__.py +0 -26
  55. cosmotech/coal/cosmotech_api/dataset/converters.py +0 -164
  56. cosmotech/coal/cosmotech_api/dataset/download/__init__.py +0 -19
  57. cosmotech/coal/cosmotech_api/dataset/download/adt.py +0 -119
  58. cosmotech/coal/cosmotech_api/dataset/download/common.py +0 -140
  59. cosmotech/coal/cosmotech_api/dataset/download/file.py +0 -229
  60. cosmotech/coal/cosmotech_api/dataset/download/twingraph.py +0 -185
  61. cosmotech/coal/cosmotech_api/dataset/upload.py +0 -41
  62. cosmotech/coal/cosmotech_api/dataset/utils.py +0 -132
  63. cosmotech/coal/cosmotech_api/parameters.py +0 -48
  64. cosmotech/coal/cosmotech_api/run.py +0 -25
  65. cosmotech/coal/cosmotech_api/run_data.py +0 -173
  66. cosmotech/coal/cosmotech_api/run_template.py +0 -108
  67. cosmotech/coal/cosmotech_api/runner/__init__.py +0 -28
  68. cosmotech/coal/cosmotech_api/runner/data.py +0 -38
  69. cosmotech/coal/cosmotech_api/runner/datasets.py +0 -416
  70. cosmotech/coal/cosmotech_api/runner/download.py +0 -135
  71. cosmotech/coal/cosmotech_api/runner/metadata.py +0 -42
  72. cosmotech/coal/cosmotech_api/runner/parameters.py +0 -157
  73. cosmotech/coal/cosmotech_api/twin_data_layer.py +0 -512
  74. cosmotech/coal/cosmotech_api/workspace.py +0 -127
  75. cosmotech/coal/utils/postgresql.py +0 -236
  76. cosmotech/coal/utils/semver.py +0 -6
  77. cosmotech/csm_data/commands/api/rds_load_csv.py +0 -90
  78. cosmotech/csm_data/commands/api/rds_send_csv.py +0 -74
  79. cosmotech/csm_data/commands/api/rds_send_store.py +0 -74
  80. cosmotech/csm_data/commands/api/runtemplate_load_handler.py +0 -66
  81. cosmotech/csm_data/commands/api/tdl_load_files.py +0 -76
  82. cosmotech/csm_data/commands/api/tdl_send_files.py +0 -82
  83. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_load_csv.json +0 -27
  84. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_csv.json +0 -27
  85. cosmotech/orchestrator_plugins/csm-data/templates/api/rds_send_store.json +0 -27
  86. cosmotech/orchestrator_plugins/csm-data/templates/api/runtemplate_load_handler.json +0 -27
  87. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_load_files.json +0 -32
  88. cosmotech/orchestrator_plugins/csm-data/templates/api/tdl_send_files.json +0 -27
  89. cosmotech/translation/coal/en-US/coal/cosmotech_api/run_data.yml +0 -2
  90. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_load_csv.yml +0 -13
  91. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_csv.yml +0 -12
  92. cosmotech/translation/csm_data/en-US/csm_data/commands/api/rds_send_store.yml +0 -12
  93. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_load_files.yml +0 -14
  94. cosmotech/translation/csm_data/en-US/csm_data/commands/api/tdl_send_files.yml +0 -18
  95. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/WHEEL +0 -0
  96. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/entry_points.txt +0 -0
  97. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/licenses/LICENSE +0 -0
  98. {cosmotech_acceleration_library-1.1.0.dist-info → cosmotech_acceleration_library-2.0.0.dist-info}/top_level.txt +0 -0
@@ -5,4 +5,4 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
- __version__ = "1.1.0"
8
+ __version__ = "2.0.0"
@@ -12,12 +12,4 @@ This module provides functions for interacting with AWS services like S3.
12
12
  """
13
13
 
14
14
  # Re-export S3 functions for easier importing
15
- from cosmotech.coal.aws.s3 import (
16
- create_s3_client,
17
- create_s3_resource,
18
- upload_file,
19
- upload_folder,
20
- download_files,
21
- upload_data_stream,
22
- delete_objects,
23
- )
15
+ from cosmotech.coal.aws.s3 import S3
cosmotech/coal/aws/s3.py CHANGED
@@ -6,230 +6,197 @@
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
8
  """
9
- S3 bucket operations module.
9
+ s3 bucket operations module.
10
10
 
11
- This module provides functions for interacting with S3 buckets, including
11
+ this module provides functions for interacting with S3 buckets, including
12
12
  uploading, downloading, and deleting files.
13
13
  """
14
14
 
15
15
  import pathlib
16
16
  from io import BytesIO
17
- from typing import Optional, Dict, Any, List, Iterator
18
17
 
19
18
  import boto3
19
+ from cosmotech.orchestrator.utils.translate import T
20
20
 
21
+ from cosmotech.coal.utils.configuration import Configuration
21
22
  from cosmotech.coal.utils.logger import LOGGER
22
- from cosmotech.orchestrator.utils.translate import T
23
23
 
24
24
 
25
- def create_s3_client(
26
- endpoint_url: str,
27
- access_id: str,
28
- secret_key: str,
29
- use_ssl: bool = True,
30
- ssl_cert_bundle: Optional[str] = None,
31
- ) -> boto3.client:
32
- """
33
- Create an S3 client with the given credentials and configuration.
34
-
35
- Args:
36
- endpoint_url: The S3 endpoint URL
37
- access_id: The AWS access key ID
38
- secret_key: The AWS secret access key
39
- use_ssl: Whether to use SSL for the connection
40
- ssl_cert_bundle: Path to the SSL certificate bundle
41
-
42
- Returns:
43
- An S3 client object
44
- """
45
- boto3_parameters = {
46
- "use_ssl": use_ssl,
47
- "endpoint_url": endpoint_url,
48
- "aws_access_key_id": access_id,
49
- "aws_secret_access_key": secret_key,
50
- }
51
- if ssl_cert_bundle:
52
- boto3_parameters["verify"] = ssl_cert_bundle
53
-
54
- return boto3.client("s3", **boto3_parameters)
55
-
56
-
57
- def create_s3_resource(
58
- endpoint_url: str,
59
- access_id: str,
60
- secret_key: str,
61
- use_ssl: bool = True,
62
- ssl_cert_bundle: Optional[str] = None,
63
- ) -> boto3.resource:
64
- """
65
- Create an S3 resource with the given credentials and configuration.
66
-
67
- Args:
68
- endpoint_url: The S3 endpoint URL
69
- access_id: The AWS access key ID
70
- secret_key: The AWS secret access key
71
- use_ssl: Whether to use SSL for the connection
72
- ssl_cert_bundle: Path to the SSL certificate bundle
73
-
74
- Returns:
75
- An S3 resource object
76
- """
77
- boto3_parameters = {
78
- "use_ssl": use_ssl,
79
- "endpoint_url": endpoint_url,
80
- "aws_access_key_id": access_id,
81
- "aws_secret_access_key": secret_key,
82
- }
83
- if ssl_cert_bundle:
84
- boto3_parameters["verify"] = ssl_cert_bundle
85
-
86
- return boto3.resource("s3", **boto3_parameters)
87
-
88
-
89
- def upload_file(
90
- file_path: pathlib.Path,
91
- bucket_name: str,
92
- s3_resource: boto3.resource,
93
- file_prefix: str = "",
94
- ) -> None:
95
- """
96
- Upload a single file to an S3 bucket.
97
-
98
- Args:
99
- file_path: Path to the file to upload
100
- bucket_name: Name of the S3 bucket
101
- s3_resource: S3 resource object
102
- file_prefix: Prefix to add to the file name in the bucket
103
- """
104
- uploaded_file_name = file_prefix + file_path.name
105
- LOGGER.info(T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name))
106
- s3_resource.Bucket(bucket_name).upload_file(str(file_path), uploaded_file_name)
107
-
108
-
109
- def upload_folder(
110
- source_folder: str,
111
- bucket_name: str,
112
- s3_resource: boto3.resource,
113
- file_prefix: str = "",
114
- recursive: bool = False,
115
- ) -> None:
116
- """
117
- Upload files from a folder to an S3 bucket.
118
-
119
- Args:
120
- source_folder: Path to the folder containing files to upload
121
- bucket_name: Name of the S3 bucket
122
- s3_resource: S3 resource object
123
- file_prefix: Prefix to add to the file names in the bucket
124
- recursive: Whether to recursively upload files from subdirectories
125
- """
126
- source_path = pathlib.Path(source_folder)
127
- if not source_path.exists():
128
- LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
129
- raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
130
-
131
- if source_path.is_dir():
132
- _source_name = str(source_path)
133
- for _file_path in source_path.glob("**/*" if recursive else "*"):
134
- if _file_path.is_file():
135
- _file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
136
- uploaded_file_name = file_prefix + _file_name
137
- LOGGER.info(
138
- T("coal.common.data_transfer.file_sent").format(
139
- file_path=_file_path, uploaded_name=uploaded_file_name
25
+ class S3:
26
+
27
+ def __init__(self, configuration: Configuration):
28
+ self._configuration = configuration.s3
29
+
30
+ @property
31
+ def file_prefix(self):
32
+ if "bucket_prefix" in self._configuration:
33
+ return self._configuration.bucket_prefix
34
+ return ""
35
+
36
+ @property
37
+ def use_ssl(self):
38
+ if "use_ssl" in self._configuration:
39
+ return self._configuration.use_ssl
40
+ return True
41
+
42
+ @property
43
+ def ssl_cert_bundle(self):
44
+ if "ssl_cert_bundle" in self._configuration:
45
+ return self._configuration.ssl_cert_bundle
46
+ return None
47
+
48
+ @property
49
+ def access_key_id(self):
50
+ return self._configuration.access_key_id
51
+
52
+ @property
53
+ def endpoint_url(self):
54
+ return self._configuration.endpoint_url
55
+
56
+ @property
57
+ def bucket_name(self):
58
+ return self._configuration.bucket_name
59
+
60
+ @property
61
+ def secret_access_key(self):
62
+ return self._configuration.secret_access_key
63
+
64
+ @property
65
+ def output_type(self):
66
+ if "output_type" in self._configuration:
67
+ return self._configuration.output_type
68
+ return "csv"
69
+
70
+ @property
71
+ def client(self) -> boto3.client:
72
+ boto3_parameters = {
73
+ "use_ssl": self.use_ssl,
74
+ "endpoint_url": self.endpoint_url,
75
+ "aws_access_key_id": self.access_key_id,
76
+ "aws_secret_access_key": self.secret_access_key,
77
+ }
78
+ if self.ssl_cert_bundle:
79
+ boto3_parameters["verify"] = self.ssl_cert_bundle
80
+
81
+ return boto3.client("s3", **boto3_parameters)
82
+
83
+ @property
84
+ def resource(self) -> boto3.resource:
85
+ boto3_parameters = {
86
+ "use_ssl": self.use_ssl,
87
+ "endpoint_url": self.endpoint_url,
88
+ "aws_access_key_id": self.access_key_id,
89
+ "aws_secret_access_key": self.secret_access_key,
90
+ }
91
+ if self.ssl_cert_bundle:
92
+ boto3_parameters["verify"] = self.ssl_cert_bundle
93
+
94
+ return boto3.resource("s3", **boto3_parameters)
95
+
96
+ def upload_file(self, file_path: pathlib.Path) -> None:
97
+ """
98
+ Upload a single file to an S3 bucket.
99
+
100
+ Args:
101
+ file_path: Path to the file to upload
102
+ """
103
+ uploaded_file_name = self.file_prefix + file_path.name
104
+ LOGGER.info(
105
+ T("coal.common.data_transfer.file_sent").format(file_path=file_path, uploaded_name=uploaded_file_name)
106
+ )
107
+ self.resource.Bucket(self.bucket_name).upload_file(str(file_path), uploaded_file_name)
108
+
109
+ def upload_folder(self, source_folder: str, recursive: bool = False) -> None:
110
+ """
111
+ Upload files from a folder to an S3 bucket.
112
+
113
+ Args:
114
+ source_folder: Path to the folder containing files to upload
115
+ recursive: Whether to recursively upload files from subdirectories
116
+ """
117
+ source_path = pathlib.Path(source_folder)
118
+ if not source_path.exists():
119
+ LOGGER.error(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
120
+ raise FileNotFoundError(T("coal.common.file_operations.not_found").format(source_folder=source_folder))
121
+
122
+ if source_path.is_dir():
123
+ _source_name = str(source_path)
124
+ for _file_path in source_path.glob("**/*" if recursive else "*"):
125
+ if _file_path.is_file():
126
+ _file_name = str(_file_path).removeprefix(_source_name).removeprefix("/")
127
+ uploaded_file_name = self.file_prefix + _file_name
128
+ LOGGER.info(
129
+ T("coal.common.data_transfer.file_sent").format(
130
+ file_path=_file_path, uploaded_name=uploaded_file_name
131
+ )
140
132
  )
141
- )
142
- s3_resource.Bucket(bucket_name).upload_file(str(_file_path), uploaded_file_name)
143
- else:
144
- upload_file(source_path, bucket_name, s3_resource, file_prefix)
145
-
146
-
147
- def download_files(
148
- target_folder: str,
149
- bucket_name: str,
150
- s3_resource: boto3.resource,
151
- file_prefix: Optional[str] = None,
152
- ) -> None:
153
- """
154
- Download files from an S3 bucket to a local folder.
155
-
156
- Args:
157
- target_folder: Local folder to download files to
158
- bucket_name: Name of the S3 bucket
159
- s3_resource: S3 resource object
160
- file_prefix: Optional prefix to filter objects to download
161
- """
162
- bucket = s3_resource.Bucket(bucket_name)
163
-
164
- pathlib.Path(target_folder).mkdir(parents=True, exist_ok=True)
165
- remove_prefix = False
166
- if file_prefix:
167
- bucket_files = bucket.objects.filter(Prefix=file_prefix)
168
- if file_prefix.endswith("/"):
169
- remove_prefix = True
170
- else:
171
- bucket_files = bucket.objects.all()
172
- for _file in bucket_files:
173
- if not (path_name := str(_file.key)).endswith("/"):
174
- target_file = path_name
175
- if remove_prefix:
176
- target_file = target_file.removeprefix(file_prefix)
177
- output_file = f"{target_folder}/{target_file}"
178
- pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
179
- LOGGER.info(T("coal.services.azure_storage.downloading").format(path=path_name, output=output_file))
180
- bucket.download_file(_file.key, output_file)
181
-
182
-
183
- def upload_data_stream(
184
- data_stream: BytesIO,
185
- bucket_name: str,
186
- s3_client: boto3.client,
187
- file_name: str,
188
- file_prefix: str = "",
189
- ) -> None:
190
- """
191
- Upload a data stream to an S3 bucket.
192
-
193
- Args:
194
- data_stream: BytesIO stream containing the data to upload
195
- bucket_name: Name of the S3 bucket
196
- s3_client: S3 client object
197
- file_name: Name of the file to create in the bucket
198
- file_prefix: Prefix to add to the file name in the bucket
199
- """
200
- uploaded_file_name = file_prefix + file_name
201
- data_stream.seek(0)
202
- size = len(data_stream.read())
203
- data_stream.seek(0)
204
-
205
- LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
206
- s3_client.upload_fileobj(data_stream, bucket_name, uploaded_file_name)
207
-
208
-
209
- def delete_objects(
210
- bucket_name: str,
211
- s3_resource: boto3.resource,
212
- file_prefix: Optional[str] = None,
213
- ) -> None:
214
- """
215
- Delete objects from an S3 bucket, optionally filtered by prefix.
216
-
217
- Args:
218
- bucket_name: Name of the S3 bucket
219
- s3_resource: S3 resource object
220
- file_prefix: Optional prefix to filter objects to delete
221
- """
222
- bucket = s3_resource.Bucket(bucket_name)
223
-
224
- if file_prefix:
225
- bucket_files = bucket.objects.filter(Prefix=file_prefix)
226
- else:
227
- bucket_files = bucket.objects.all()
228
-
229
- boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != file_prefix]
230
- if boto_objects:
231
- LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
232
- boto_delete_request = {"Objects": boto_objects}
233
- bucket.delete_objects(Delete=boto_delete_request)
234
- else:
235
- LOGGER.info(T("coal.services.azure_storage.no_objects"))
133
+ self.resource.Bucket(self.bucket_name).upload_file(str(_file_path), uploaded_file_name)
134
+ else:
135
+ self.upload_file(source_path)
136
+
137
+ def download_files(self, destination_folder: str) -> None:
138
+ """
139
+ Download files from an S3 bucket to a local folder.
140
+
141
+ Args:
142
+ destination_folder: Local folder to download files to
143
+ """
144
+ bucket = self.resource.Bucket(self.bucket_name)
145
+
146
+ pathlib.Path(destination_folder).mkdir(parents=True, exist_ok=True)
147
+ remove_prefix = False
148
+ if self.file_prefix:
149
+ bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
150
+ if self.file_prefix.endswith("/"):
151
+ remove_prefix = True
152
+ else:
153
+ bucket_files = bucket.objects.all()
154
+ for _file in bucket_files:
155
+ if not (path_name := str(_file.key)).endswith("/"):
156
+ target_file = path_name
157
+ if remove_prefix:
158
+ target_file = target_file.removeprefix(self.file_prefix)
159
+ output_file = f"{destination_folder}/{target_file}"
160
+ pathlib.Path(output_file).parent.mkdir(parents=True, exist_ok=True)
161
+ LOGGER.info(T("coal.services.azure_storage.downloading").format(path=path_name, output=output_file))
162
+ bucket.download_file(_file.key, output_file)
163
+
164
+ def upload_data_stream(self, data_stream: BytesIO, file_name: str) -> None:
165
+ """
166
+ Upload a data stream to an S3 bucket.
167
+
168
+ Args:
169
+ data_stream: BytesIO stream containing the data to upload
170
+ file_name: Name of the file to create in the bucket
171
+ """
172
+ uploaded_file_name = self.file_prefix + file_name
173
+ data_stream.seek(0)
174
+ size = len(data_stream.read())
175
+ data_stream.seek(0)
176
+
177
+ LOGGER.info(T("coal.common.data_transfer.sending_data").format(size=size))
178
+ self.client.upload_fileobj(data_stream, self.bucket_name, uploaded_file_name)
179
+
180
+ def delete_objects(self) -> None:
181
+ """
182
+ Delete objects from an S3 bucket, optionally filtered by prefix.
183
+
184
+ Args:
185
+ bucket_name: Name of the S3 bucket
186
+ s3_resource: S3 resource object
187
+ file_prefix: Optional prefix to filter objects to delete
188
+ """
189
+ bucket = self.resource.Bucket(self.bucket_name)
190
+
191
+ if self.file_prefix:
192
+ bucket_files = bucket.objects.filter(Prefix=self.file_prefix)
193
+ else:
194
+ bucket_files = bucket.objects.all()
195
+
196
+ boto_objects = [{"Key": _file.key} for _file in bucket_files if _file.key != self.file_prefix]
197
+ if boto_objects:
198
+ LOGGER.info(T("coal.services.azure_storage.deleting_objects").format(objects=boto_objects))
199
+ boto_delete_request = {"Objects": boto_objects}
200
+ bucket.delete_objects(Delete=boto_delete_request)
201
+ else:
202
+ LOGGER.info(T("coal.services.azure_storage.no_objects"))
@@ -6,13 +6,13 @@
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
8
  import os
9
- from typing import Union, Optional, Tuple
9
+ from typing import Optional, Tuple
10
10
 
11
11
  from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
12
12
  from azure.kusto.ingest import QueuedIngestClient
13
+ from cosmotech.orchestrator.utils.translate import T
13
14
 
14
15
  from cosmotech.coal.utils.logger import LOGGER
15
- from cosmotech.orchestrator.utils.translate import T
16
16
 
17
17
 
18
18
  def create_kusto_client(
@@ -13,25 +13,24 @@ This module provides functions for ingesting runner data into Azure Data Explore
13
13
 
14
14
  import pathlib
15
15
  import time
16
- from collections import defaultdict
17
- from typing import Dict, Any, List, Tuple, Optional
16
+ from typing import Any, Dict
18
17
 
19
18
  from azure.kusto.data.response import KustoResponseDataSet
20
- from azure.kusto.ingest import ColumnMapping
21
- from azure.kusto.ingest import FileDescriptor
22
- from azure.kusto.ingest import IngestionMappingKind
23
- from azure.kusto.ingest import IngestionProperties
24
- from azure.kusto.ingest import IngestionResult
25
- from azure.kusto.ingest import ReportLevel
26
-
27
- from azure.kusto.data import KustoClient
28
- from azure.kusto.ingest import QueuedIngestClient
19
+ from azure.kusto.ingest import (
20
+ ColumnMapping,
21
+ FileDescriptor,
22
+ IngestionMappingKind,
23
+ IngestionProperties,
24
+ IngestionResult,
25
+ QueuedIngestClient,
26
+ ReportLevel,
27
+ )
28
+ from cosmotech.orchestrator.utils.translate import T
29
29
 
30
30
  from cosmotech.coal.azure.adx.auth import initialize_clients
31
- from cosmotech.coal.azure.adx.query import run_query, run_command_query
32
- from cosmotech.coal.azure.adx.ingestion import check_ingestion_status, IngestionStatus
31
+ from cosmotech.coal.azure.adx.ingestion import IngestionStatus, check_ingestion_status
32
+ from cosmotech.coal.azure.adx.query import run_query
33
33
  from cosmotech.coal.utils.logger import LOGGER
34
- from cosmotech.orchestrator.utils.translate import T
35
34
 
36
35
 
37
36
  def prepare_csv_content(folder_path: str) -> Dict[str, Dict[str, Any]]:
@@ -7,26 +7,22 @@
7
7
 
8
8
  import os
9
9
  import tempfile
10
+ import time
10
11
  import uuid
11
- from typing import Optional, List, Dict, Tuple, Union, Any
12
+ from typing import Any, Dict, List, Optional, Tuple, Union
12
13
 
13
14
  import pyarrow
14
15
  import pyarrow.csv as pc
15
- import time
16
16
  from azure.kusto.data import KustoClient
17
17
  from azure.kusto.data.data_format import DataFormat
18
- from azure.kusto.ingest import IngestionProperties
19
- from azure.kusto.ingest import QueuedIngestClient
20
- from azure.kusto.ingest import ReportLevel
18
+ from azure.kusto.ingest import IngestionProperties, QueuedIngestClient, ReportLevel
21
19
  from cosmotech.orchestrator.utils.translate import T
22
- from time import perf_counter
23
20
 
24
- from cosmotech.coal.azure.adx.tables import check_and_create_table, _drop_by_tag
25
21
  from cosmotech.coal.azure.adx.auth import initialize_clients
26
- from cosmotech.coal.azure.adx.ingestion import monitor_ingestion, handle_failures
22
+ from cosmotech.coal.azure.adx.ingestion import handle_failures, monitor_ingestion
23
+ from cosmotech.coal.azure.adx.tables import _drop_by_tag, check_and_create_table
27
24
  from cosmotech.coal.store.store import Store
28
25
  from cosmotech.coal.utils.logger import LOGGER
29
- from cosmotech.coal.utils.postgresql import send_pyarrow_table_to_postgresql
30
26
 
31
27
 
32
28
  def send_table_data(
@@ -176,80 +172,3 @@ def send_store_to_adx(
176
172
  LOGGER.warning(T("coal.services.adx.dropping_data").format(operation_tag=operation_tag))
177
173
  _drop_by_tag(kusto_client, database, operation_tag)
178
174
  raise e
179
-
180
-
181
- def dump_store_to_adx(
182
- store_folder: str,
183
- postgres_host: str,
184
- postgres_port: int,
185
- postgres_db: str,
186
- postgres_schema: str,
187
- postgres_user: str,
188
- postgres_password: str,
189
- table_prefix: str = "Cosmotech_",
190
- replace: bool = True,
191
- ) -> None:
192
- """
193
- Dump Store data to an Azure Data Explorer database.
194
-
195
- Args:
196
- store_folder: Folder containing the Store
197
- postgres_host: PostgreSQL host
198
- postgres_port: PostgreSQL port
199
- postgres_db: PostgreSQL database name
200
- postgres_schema: PostgreSQL schema
201
- postgres_user: PostgreSQL username
202
- postgres_password: PostgreSQL password
203
- table_prefix: Table prefix
204
- replace: Whether to replace existing tables
205
- """
206
- _s = Store(store_location=store_folder)
207
-
208
- tables = list(_s.list_tables())
209
- if len(tables):
210
- LOGGER.info(T("coal.services.database.sending_data").format(table=f"{postgres_db}.{postgres_schema}"))
211
- total_rows = 0
212
- _process_start = perf_counter()
213
- for table_name in tables:
214
- _s_time = perf_counter()
215
- target_table_name = f"{table_prefix}{table_name}"
216
- LOGGER.info(T("coal.services.database.table_entry").format(table=target_table_name))
217
- data = _s.get_table(table_name)
218
- if not len(data):
219
- LOGGER.info(T("coal.services.database.no_rows"))
220
- continue
221
- _dl_time = perf_counter()
222
- rows = send_pyarrow_table_to_postgresql(
223
- data,
224
- target_table_name,
225
- postgres_host,
226
- postgres_port,
227
- postgres_db,
228
- postgres_schema,
229
- postgres_user,
230
- postgres_password,
231
- replace,
232
- )
233
- total_rows += rows
234
- _up_time = perf_counter()
235
- LOGGER.info(T("coal.services.database.row_count").format(count=rows))
236
- LOGGER.debug(
237
- T("coal.common.timing.operation_completed").format(
238
- operation="Load from datastore", time=f"{_dl_time - _s_time:0.3}"
239
- )
240
- )
241
- LOGGER.debug(
242
- T("coal.common.timing.operation_completed").format(
243
- operation="Send to postgresql", time=f"{_up_time - _dl_time:0.3}"
244
- )
245
- )
246
- _process_end = perf_counter()
247
- LOGGER.info(
248
- T("coal.services.database.rows_fetched").format(
249
- table="all tables",
250
- count=total_rows,
251
- time=f"{_process_end - _process_start:0.3}",
252
- )
253
- )
254
- else:
255
- LOGGER.info(T("coal.services.database.store_empty"))
@@ -5,13 +5,13 @@
5
5
  # etc., to any person is prohibited unless it has been previously and
6
6
  # specifically authorized by written means by Cosmo Tech.
7
7
 
8
- from typing import Dict, Any
8
+ from typing import Dict
9
9
 
10
10
  import pyarrow
11
11
  from azure.kusto.data import KustoClient
12
+ from cosmotech.orchestrator.utils.translate import T
12
13
 
13
14
  from cosmotech.coal.utils.logger import LOGGER
14
- from cosmotech.orchestrator.utils.translate import T
15
15
 
16
16
 
17
17
  def table_exists(client: KustoClient, database: str, table_name: str) -> bool:
@@ -12,19 +12,16 @@ This module provides functions for interacting with Azure Blob Storage,
12
12
  including uploading data from the Store.
13
13
  """
14
14
 
15
- import pathlib
16
15
  from io import BytesIO
17
- from typing import List, Optional
18
-
19
- from azure.identity import ClientSecretCredential
20
- from azure.storage.blob import BlobServiceClient
21
16
 
22
17
  import pyarrow.csv as pc
23
18
  import pyarrow.parquet as pq
19
+ from azure.identity import ClientSecretCredential
20
+ from azure.storage.blob import BlobServiceClient
21
+ from cosmotech.orchestrator.utils.translate import T
24
22
 
25
23
  from cosmotech.coal.store.store import Store
26
24
  from cosmotech.coal.utils.logger import LOGGER
27
- from cosmotech.orchestrator.utils.translate import T
28
25
 
29
26
  VALID_TYPES = (
30
27
  "sqlite",
@@ -42,6 +39,7 @@ def dump_store_to_azure(
42
39
  client_secret: str,
43
40
  output_type: str = "sqlite",
44
41
  file_prefix: str = "",
42
+ selected_tables: list[str] = [],
45
43
  ) -> None:
46
44
  """
47
45
  Dump Store data to Azure Blob Storage.
@@ -90,6 +88,8 @@ def dump_store_to_azure(
90
88
  container_client.upload_blob(name=_uploaded_file_name, data=data, overwrite=True)
91
89
  else:
92
90
  tables = list(_s.list_tables())
91
+ if selected_tables:
92
+ tables = [t for t in tables if t in selected_tables]
93
93
  for table_name in tables:
94
94
  _data_stream = BytesIO()
95
95
  _file_name = None