apache-airflow-providers-microsoft-azure 10.1.2rc1__tar.gz → 10.2.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/PKG-INFO +10 -7
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/README.rst +5 -4
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/__init__.py +1 -1
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/get_provider_info.py +8 -1
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/asb.py +10 -5
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/data_factory.py +2 -1
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/data_lake.py +36 -18
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/asb.py +20 -10
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/cosmos.py +2 -1
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/sensors/data_factory.py +2 -1
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/sensors/wasb.py +4 -2
- apache_airflow_providers_microsoft_azure-10.2.0rc1/airflow/providers/microsoft/azure/transfers/s3_to_wasb.py +266 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/triggers/data_factory.py +4 -2
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/utils.py +8 -4
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/pyproject.toml +6 -3
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/LICENSE +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/fs/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/fs/adls.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/adx.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/base_azure.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/batch.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/container_instance.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/container_registry.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/container_volume.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/cosmos.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/fileshare.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/msgraph.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/synapse.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/hooks/wasb.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/log/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/log/wasb_task_handler.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/adls.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/adx.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/batch.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/container_instances.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/data_factory.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/msgraph.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/synapse.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/operators/wasb_delete_blob.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/secrets/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/secrets/key_vault.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/sensors/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/sensors/cosmos.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/sensors/msgraph.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/transfers/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/transfers/azure_blob_to_gcs.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/transfers/local_to_adls.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/transfers/local_to_wasb.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/transfers/oracle_to_azure_data_lake.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/transfers/sftp_to_wasb.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/triggers/__init__.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/triggers/msgraph.py +0 -0
- {apache_airflow_providers_microsoft_azure-10.1.2rc1 → apache_airflow_providers_microsoft_azure-10.2.0rc1}/airflow/providers/microsoft/azure/triggers/wasb.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: apache-airflow-providers-microsoft-azure
|
3
|
-
Version: 10.
|
3
|
+
Version: 10.2.0rc1
|
4
4
|
Summary: Provider package apache-airflow-providers-microsoft-azure for Apache Airflow
|
5
5
|
Keywords: airflow-provider,microsoft.azure,airflow,integration
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
@@ -44,16 +44,18 @@ Requires-Dist: azure-storage-file-share>=12.7.0
|
|
44
44
|
Requires-Dist: azure-synapse-artifacts>=0.17.0
|
45
45
|
Requires-Dist: azure-synapse-spark>=0.2.0
|
46
46
|
Requires-Dist: msgraph-core>=1.0.0
|
47
|
+
Requires-Dist: apache-airflow-providers-amazon ; extra == "amazon"
|
47
48
|
Requires-Dist: apache-airflow-providers-google ; extra == "google"
|
48
49
|
Requires-Dist: apache-airflow-providers-oracle ; extra == "oracle"
|
49
50
|
Requires-Dist: apache-airflow-providers-sftp ; extra == "sftp"
|
50
51
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
51
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
52
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
52
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0/changelog.html
|
53
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0
|
53
54
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
54
55
|
Project-URL: Source Code, https://github.com/apache/airflow
|
55
56
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
56
57
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
58
|
+
Provides-Extra: amazon
|
57
59
|
Provides-Extra: google
|
58
60
|
Provides-Extra: oracle
|
59
61
|
Provides-Extra: sftp
|
@@ -102,7 +104,7 @@ Provides-Extra: sftp
|
|
102
104
|
|
103
105
|
Package ``apache-airflow-providers-microsoft-azure``
|
104
106
|
|
105
|
-
Release: ``10.
|
107
|
+
Release: ``10.2.0.rc1``
|
106
108
|
|
107
109
|
|
108
110
|
`Microsoft Azure <https://azure.microsoft.com/>`__
|
@@ -115,7 +117,7 @@ This is a provider package for ``microsoft.azure`` provider. All classes for thi
|
|
115
117
|
are in ``airflow.providers.microsoft.azure`` python package.
|
116
118
|
|
117
119
|
You can find package information and changelog for the provider
|
118
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
120
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0/>`_.
|
119
121
|
|
120
122
|
Installation
|
121
123
|
------------
|
@@ -167,16 +169,17 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
167
169
|
|
168
170
|
.. code-block:: bash
|
169
171
|
|
170
|
-
pip install apache-airflow-providers-microsoft-azure[
|
172
|
+
pip install apache-airflow-providers-microsoft-azure[amazon]
|
171
173
|
|
172
174
|
|
173
175
|
==================================================================================================== ==========
|
174
176
|
Dependent package Extra
|
175
177
|
==================================================================================================== ==========
|
178
|
+
`apache-airflow-providers-amazon <https://airflow.apache.org/docs/apache-airflow-providers-amazon>`_ ``amazon``
|
176
179
|
`apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
|
177
180
|
`apache-airflow-providers-oracle <https://airflow.apache.org/docs/apache-airflow-providers-oracle>`_ ``oracle``
|
178
181
|
`apache-airflow-providers-sftp <https://airflow.apache.org/docs/apache-airflow-providers-sftp>`_ ``sftp``
|
179
182
|
==================================================================================================== ==========
|
180
183
|
|
181
184
|
The changelog for the provider package can be found in the
|
182
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
185
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0/changelog.html>`_.
|
@@ -42,7 +42,7 @@
|
|
42
42
|
|
43
43
|
Package ``apache-airflow-providers-microsoft-azure``
|
44
44
|
|
45
|
-
Release: ``10.
|
45
|
+
Release: ``10.2.0.rc1``
|
46
46
|
|
47
47
|
|
48
48
|
`Microsoft Azure <https://azure.microsoft.com/>`__
|
@@ -55,7 +55,7 @@ This is a provider package for ``microsoft.azure`` provider. All classes for thi
|
|
55
55
|
are in ``airflow.providers.microsoft.azure`` python package.
|
56
56
|
|
57
57
|
You can find package information and changelog for the provider
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0/>`_.
|
59
59
|
|
60
60
|
Installation
|
61
61
|
------------
|
@@ -107,16 +107,17 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
107
107
|
|
108
108
|
.. code-block:: bash
|
109
109
|
|
110
|
-
pip install apache-airflow-providers-microsoft-azure[
|
110
|
+
pip install apache-airflow-providers-microsoft-azure[amazon]
|
111
111
|
|
112
112
|
|
113
113
|
==================================================================================================== ==========
|
114
114
|
Dependent package Extra
|
115
115
|
==================================================================================================== ==========
|
116
|
+
`apache-airflow-providers-amazon <https://airflow.apache.org/docs/apache-airflow-providers-amazon>`_ ``amazon``
|
116
117
|
`apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
|
117
118
|
`apache-airflow-providers-oracle <https://airflow.apache.org/docs/apache-airflow-providers-oracle>`_ ``oracle``
|
118
119
|
`apache-airflow-providers-sftp <https://airflow.apache.org/docs/apache-airflow-providers-sftp>`_ ``sftp``
|
119
120
|
==================================================================================================== ==========
|
120
121
|
|
121
122
|
The changelog for the provider package can be found in the
|
122
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
123
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0/changelog.html>`_.
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
29
29
|
|
30
30
|
__all__ = ["__version__"]
|
31
31
|
|
32
|
-
__version__ = "10.
|
32
|
+
__version__ = "10.2.0"
|
33
33
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
35
35
|
"2.7.0"
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
28
28
|
"name": "Microsoft Azure",
|
29
29
|
"description": "`Microsoft Azure <https://azure.microsoft.com/>`__\n",
|
30
30
|
"state": "ready",
|
31
|
-
"source-date-epoch":
|
31
|
+
"source-date-epoch": 1720503933,
|
32
32
|
"versions": [
|
33
|
+
"10.2.0",
|
33
34
|
"10.1.2",
|
34
35
|
"10.1.1",
|
35
36
|
"10.1.0",
|
@@ -363,6 +364,12 @@ def get_provider_info():
|
|
363
364
|
"how-to-guide": "/docs/apache-airflow-providers-microsoft-azure/transfer/sftp_to_wasb.rst",
|
364
365
|
"python-module": "airflow.providers.microsoft.azure.transfers.sftp_to_wasb",
|
365
366
|
},
|
367
|
+
{
|
368
|
+
"source-integration-name": "Amazon Simple Storage Service (S3)",
|
369
|
+
"target-integration-name": "Microsoft Azure Blob Storage",
|
370
|
+
"how-to-guide": "/docs/apache-airflow-providers-microsoft-azure/transfer/s3_to_wasb.rst",
|
371
|
+
"python-module": "airflow.providers.microsoft.azure.transfers.s3_to_wasb",
|
372
|
+
},
|
366
373
|
{
|
367
374
|
"source-integration-name": "Microsoft Azure Blob Storage",
|
368
375
|
"target-integration-name": "Google Cloud Storage (GCS)",
|
@@ -92,7 +92,8 @@ class BaseAzureServiceBusHook(BaseHook):
|
|
92
92
|
|
93
93
|
|
94
94
|
class AdminClientHook(BaseAzureServiceBusHook):
|
95
|
-
"""
|
95
|
+
"""
|
96
|
+
Interact with the ServiceBusAdministrationClient.
|
96
97
|
|
97
98
|
This can create, update, list, and delete resources of a Service Bus
|
98
99
|
namespace. This hook uses the same Azure Service Bus client connection
|
@@ -100,7 +101,8 @@ class AdminClientHook(BaseAzureServiceBusHook):
|
|
100
101
|
"""
|
101
102
|
|
102
103
|
def get_conn(self) -> ServiceBusAdministrationClient:
|
103
|
-
"""
|
104
|
+
"""
|
105
|
+
Create a ServiceBusAdministrationClient instance.
|
104
106
|
|
105
107
|
This uses the connection string in connection details.
|
106
108
|
"""
|
@@ -190,7 +192,8 @@ class AdminClientHook(BaseAzureServiceBusHook):
|
|
190
192
|
|
191
193
|
|
192
194
|
class MessageHook(BaseAzureServiceBusHook):
|
193
|
-
"""
|
195
|
+
"""
|
196
|
+
Interact with ServiceBusClient.
|
194
197
|
|
195
198
|
This acts as a high level interface for getting ServiceBusSender and ServiceBusReceiver.
|
196
199
|
"""
|
@@ -225,7 +228,8 @@ class MessageHook(BaseAzureServiceBusHook):
|
|
225
228
|
return client
|
226
229
|
|
227
230
|
def send_message(self, queue_name: str, messages: str | list[str], batch_message_flag: bool = False):
|
228
|
-
"""
|
231
|
+
"""
|
232
|
+
Use ServiceBusClient Send to send message(s) to a Service Bus Queue.
|
229
233
|
|
230
234
|
By using ``batch_message_flag``, it enables and send message as batch message.
|
231
235
|
|
@@ -295,7 +299,8 @@ class MessageHook(BaseAzureServiceBusHook):
|
|
295
299
|
max_message_count: int | None,
|
296
300
|
max_wait_time: float | None,
|
297
301
|
):
|
298
|
-
"""
|
302
|
+
"""
|
303
|
+
Receive a batch of subscription message at once.
|
299
304
|
|
300
305
|
This approach is optimal if you wish to process multiple messages
|
301
306
|
simultaneously, or perform an ad-hoc receive as a single call.
|
@@ -45,7 +45,8 @@ Credentials = Union[ClientSecretCredential, AzureIdentityCredentialAdapter, Defa
|
|
45
45
|
|
46
46
|
|
47
47
|
class AzureDataLakeHook(BaseHook):
|
48
|
-
"""
|
48
|
+
"""
|
49
|
+
Integration with Azure Data Lake.
|
49
50
|
|
50
51
|
AzureDataLakeHook communicates via a REST API compatible with WebHDFS. Make
|
51
52
|
sure that a Airflow connection of type ``azure_data_lake`` exists.
|
@@ -135,7 +136,8 @@ class AzureDataLakeHook(BaseHook):
|
|
135
136
|
return self._conn
|
136
137
|
|
137
138
|
def check_for_file(self, file_path: str) -> bool:
|
138
|
-
"""
|
139
|
+
"""
|
140
|
+
Check if a file exists on Azure Data Lake.
|
139
141
|
|
140
142
|
:param file_path: Path and name of the file.
|
141
143
|
:return: True if the file exists, False otherwise.
|
@@ -156,7 +158,8 @@ class AzureDataLakeHook(BaseHook):
|
|
156
158
|
blocksize: int = 4194304,
|
157
159
|
**kwargs,
|
158
160
|
) -> None:
|
159
|
-
"""
|
161
|
+
"""
|
162
|
+
Upload a file to Azure Data Lake.
|
160
163
|
|
161
164
|
:param local_path: local path. Can be single file, directory (in which case,
|
162
165
|
upload recursively) or glob pattern. Recursive glob patterns using `**`
|
@@ -196,7 +199,8 @@ class AzureDataLakeHook(BaseHook):
|
|
196
199
|
blocksize: int = 4194304,
|
197
200
|
**kwargs,
|
198
201
|
) -> None:
|
199
|
-
"""
|
202
|
+
"""
|
203
|
+
Download a file from Azure Blob Storage.
|
200
204
|
|
201
205
|
:param local_path: local path. If downloading a single file, will write to this
|
202
206
|
specific file, unless it is an existing directory, in which case a file is
|
@@ -228,7 +232,8 @@ class AzureDataLakeHook(BaseHook):
|
|
228
232
|
)
|
229
233
|
|
230
234
|
def list(self, path: str) -> list:
|
231
|
-
"""
|
235
|
+
"""
|
236
|
+
List files in Azure Data Lake Storage.
|
232
237
|
|
233
238
|
:param path: full path/globstring to use to list files in ADLS
|
234
239
|
"""
|
@@ -238,7 +243,8 @@ class AzureDataLakeHook(BaseHook):
|
|
238
243
|
return self.get_conn().walk(path)
|
239
244
|
|
240
245
|
def remove(self, path: str, recursive: bool = False, ignore_not_found: bool = True) -> None:
|
241
|
-
"""
|
246
|
+
"""
|
247
|
+
Remove files in Azure Data Lake Storage.
|
242
248
|
|
243
249
|
:param path: A directory or file to remove in ADLS
|
244
250
|
:param recursive: Whether to loop into directories in the location and remove the files
|
@@ -254,7 +260,8 @@ class AzureDataLakeHook(BaseHook):
|
|
254
260
|
|
255
261
|
|
256
262
|
class AzureDataLakeStorageV2Hook(BaseHook):
|
257
|
-
"""
|
263
|
+
"""
|
264
|
+
Interact with a ADLS gen2 storage account.
|
258
265
|
|
259
266
|
It mainly helps to create and manage directories and files in storage
|
260
267
|
accounts that have a hierarchical namespace. Using Adls_v2 connection
|
@@ -375,7 +382,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
375
382
|
)
|
376
383
|
|
377
384
|
def create_file_system(self, file_system_name: str) -> None:
|
378
|
-
"""
|
385
|
+
"""
|
386
|
+
Create a new file system under the specified account.
|
379
387
|
|
380
388
|
A container acts as a file system for your files.
|
381
389
|
|
@@ -393,7 +401,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
393
401
|
raise
|
394
402
|
|
395
403
|
def get_file_system(self, file_system: FileSystemProperties | str) -> FileSystemClient:
|
396
|
-
"""
|
404
|
+
"""
|
405
|
+
Get a client to interact with the specified file system.
|
397
406
|
|
398
407
|
:param file_system: This can either be the name of the file system
|
399
408
|
or an instance of FileSystemProperties.
|
@@ -411,7 +420,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
411
420
|
def create_directory(
|
412
421
|
self, file_system_name: FileSystemProperties | str, directory_name: str, **kwargs
|
413
422
|
) -> DataLakeDirectoryClient:
|
414
|
-
"""
|
423
|
+
"""
|
424
|
+
Create a directory under the specified file system.
|
415
425
|
|
416
426
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
417
427
|
:param directory_name: Name of the directory which needs to be created in the file system.
|
@@ -424,7 +434,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
424
434
|
file_system_name: FileSystemProperties | str,
|
425
435
|
directory_name: DirectoryProperties | str,
|
426
436
|
) -> DataLakeDirectoryClient:
|
427
|
-
"""
|
437
|
+
"""
|
438
|
+
Get the specific directory under the specified file system.
|
428
439
|
|
429
440
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
430
441
|
:param directory_name: Name of the directory or instance of DirectoryProperties which needs to be
|
@@ -443,7 +454,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
443
454
|
raise
|
444
455
|
|
445
456
|
def create_file(self, file_system_name: FileSystemProperties | str, file_name: str) -> DataLakeFileClient:
|
446
|
-
"""
|
457
|
+
"""
|
458
|
+
Create a file under the file system.
|
447
459
|
|
448
460
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
449
461
|
:param file_name: Name of the file which needs to be created in the file system.
|
@@ -459,7 +471,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
459
471
|
overwrite: bool = False,
|
460
472
|
**kwargs: Any,
|
461
473
|
) -> None:
|
462
|
-
"""
|
474
|
+
"""
|
475
|
+
Create a file with data in the file system.
|
463
476
|
|
464
477
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
465
478
|
:param file_name: Name of the file to be created with name.
|
@@ -479,7 +492,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
479
492
|
overwrite: bool = False,
|
480
493
|
**kwargs: Any,
|
481
494
|
) -> None:
|
482
|
-
"""
|
495
|
+
"""
|
496
|
+
Upload data to a file.
|
483
497
|
|
484
498
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
485
499
|
:param directory_name: Name of the directory.
|
@@ -495,7 +509,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
495
509
|
def list_files_directory(
|
496
510
|
self, file_system_name: FileSystemProperties | str, directory_name: str
|
497
511
|
) -> list[str]:
|
498
|
-
"""
|
512
|
+
"""
|
513
|
+
List files or directories under the specified file system.
|
499
514
|
|
500
515
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
501
516
|
:param directory_name: Name of the directory.
|
@@ -509,7 +524,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
509
524
|
def list_file_system(
|
510
525
|
self, prefix: str | None = None, include_metadata: bool = False, **kwargs: Any
|
511
526
|
) -> list[str]:
|
512
|
-
"""
|
527
|
+
"""
|
528
|
+
List file systems under the specified account.
|
513
529
|
|
514
530
|
:param prefix:
|
515
531
|
Filters the results to return only file systems whose names
|
@@ -526,7 +542,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
526
542
|
return file_system_list
|
527
543
|
|
528
544
|
def delete_file_system(self, file_system_name: FileSystemProperties | str) -> None:
|
529
|
-
"""
|
545
|
+
"""
|
546
|
+
Delete the file system.
|
530
547
|
|
531
548
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
532
549
|
"""
|
@@ -540,7 +557,8 @@ class AzureDataLakeStorageV2Hook(BaseHook):
|
|
540
557
|
raise
|
541
558
|
|
542
559
|
def delete_directory(self, file_system_name: FileSystemProperties | str, directory_name: str) -> None:
|
543
|
-
"""
|
560
|
+
"""
|
561
|
+
Delete the specified directory in a file system.
|
544
562
|
|
545
563
|
:param file_system_name: Name of the file system or instance of FileSystemProperties.
|
546
564
|
:param directory_name: Name of the directory.
|
@@ -32,7 +32,8 @@ if TYPE_CHECKING:
|
|
32
32
|
|
33
33
|
|
34
34
|
class AzureServiceBusCreateQueueOperator(BaseOperator):
|
35
|
-
"""
|
35
|
+
"""
|
36
|
+
Create a Azure Service Bus queue under a Service Bus Namespace.
|
36
37
|
|
37
38
|
.. seealso::
|
38
39
|
For more information on how to use this operator, take a look at the guide:
|
@@ -84,7 +85,8 @@ class AzureServiceBusCreateQueueOperator(BaseOperator):
|
|
84
85
|
|
85
86
|
|
86
87
|
class AzureServiceBusSendMessageOperator(BaseOperator):
|
87
|
-
"""
|
88
|
+
"""
|
89
|
+
Send Message or batch message to the Service Bus queue.
|
88
90
|
|
89
91
|
.. seealso::
|
90
92
|
For more information on how to use this operator, take a look at the guide:
|
@@ -126,7 +128,8 @@ class AzureServiceBusSendMessageOperator(BaseOperator):
|
|
126
128
|
|
127
129
|
|
128
130
|
class AzureServiceBusReceiveMessageOperator(BaseOperator):
|
129
|
-
"""
|
131
|
+
"""
|
132
|
+
Receive a batch of messages at once in a specified Queue name.
|
130
133
|
|
131
134
|
.. seealso::
|
132
135
|
For more information on how to use this operator, take a look at the guide:
|
@@ -169,7 +172,8 @@ class AzureServiceBusReceiveMessageOperator(BaseOperator):
|
|
169
172
|
|
170
173
|
|
171
174
|
class AzureServiceBusDeleteQueueOperator(BaseOperator):
|
172
|
-
"""
|
175
|
+
"""
|
176
|
+
Delete the Queue in the Azure Service Bus namespace.
|
173
177
|
|
174
178
|
.. seealso::
|
175
179
|
For more information on how to use this operator, take a look at the guide:
|
@@ -204,7 +208,8 @@ class AzureServiceBusDeleteQueueOperator(BaseOperator):
|
|
204
208
|
|
205
209
|
|
206
210
|
class AzureServiceBusTopicCreateOperator(BaseOperator):
|
207
|
-
"""
|
211
|
+
"""
|
212
|
+
Create an Azure Service Bus Topic under a Service Bus Namespace.
|
208
213
|
|
209
214
|
.. seealso::
|
210
215
|
For more information on how to use this operator, take a look at the guide:
|
@@ -325,7 +330,8 @@ class AzureServiceBusTopicCreateOperator(BaseOperator):
|
|
325
330
|
|
326
331
|
|
327
332
|
class AzureServiceBusSubscriptionCreateOperator(BaseOperator):
|
328
|
-
"""
|
333
|
+
"""
|
334
|
+
Create an Azure Service Bus Topic Subscription under a Service Bus Namespace.
|
329
335
|
|
330
336
|
.. seealso::
|
331
337
|
For more information on how to use this operator, take a look at the guide:
|
@@ -431,7 +437,8 @@ class AzureServiceBusSubscriptionCreateOperator(BaseOperator):
|
|
431
437
|
|
432
438
|
|
433
439
|
class AzureServiceBusUpdateSubscriptionOperator(BaseOperator):
|
434
|
-
"""
|
440
|
+
"""
|
441
|
+
Update an Azure ServiceBus Topic Subscription under a ServiceBus Namespace.
|
435
442
|
|
436
443
|
.. seealso::
|
437
444
|
For more information on how to use this operator, take a look at the guide:
|
@@ -490,7 +497,8 @@ class AzureServiceBusUpdateSubscriptionOperator(BaseOperator):
|
|
490
497
|
|
491
498
|
|
492
499
|
class ASBReceiveSubscriptionMessageOperator(BaseOperator):
|
493
|
-
"""
|
500
|
+
"""
|
501
|
+
Receive a Batch messages from a Service Bus Subscription under specific Topic.
|
494
502
|
|
495
503
|
.. seealso::
|
496
504
|
For more information on how to use this operator, take a look at the guide:
|
@@ -541,7 +549,8 @@ class ASBReceiveSubscriptionMessageOperator(BaseOperator):
|
|
541
549
|
|
542
550
|
|
543
551
|
class AzureServiceBusSubscriptionDeleteOperator(BaseOperator):
|
544
|
-
"""
|
552
|
+
"""
|
553
|
+
Delete the topic subscription in the Azure ServiceBus namespace.
|
545
554
|
|
546
555
|
.. seealso::
|
547
556
|
For more information on how to use this operator, take a look at the guide:
|
@@ -579,7 +588,8 @@ class AzureServiceBusSubscriptionDeleteOperator(BaseOperator):
|
|
579
588
|
|
580
589
|
|
581
590
|
class AzureServiceBusTopicDeleteOperator(BaseOperator):
|
582
|
-
"""
|
591
|
+
"""
|
592
|
+
Delete the topic in the Azure Service Bus namespace.
|
583
593
|
|
584
594
|
.. seealso::
|
585
595
|
For more information on how to use this operator, take a look at the guide:
|
@@ -27,7 +27,8 @@ if TYPE_CHECKING:
|
|
27
27
|
|
28
28
|
|
29
29
|
class AzureCosmosInsertDocumentOperator(BaseOperator):
|
30
|
-
"""
|
30
|
+
"""
|
31
|
+
Insert a new document into the specified Cosmos database and collection.
|
31
32
|
|
32
33
|
Both the database and collection will be created automatically if they do
|
33
34
|
not already exist.
|
@@ -101,7 +101,8 @@ class AzureDataFactoryPipelineRunStatusSensor(BaseSensorOperator):
|
|
101
101
|
return pipeline_run_status == AzureDataFactoryPipelineRunStatus.SUCCEEDED
|
102
102
|
|
103
103
|
def execute(self, context: Context) -> None:
|
104
|
-
"""
|
104
|
+
"""
|
105
|
+
Poll for state of the job run.
|
105
106
|
|
106
107
|
In deferrable mode, the polling is deferred to the triggerer. Otherwise
|
107
108
|
the sensor waits synchronously.
|
@@ -74,7 +74,8 @@ class WasbBlobSensor(BaseSensorOperator):
|
|
74
74
|
return hook.check_for_blob(self.container_name, self.blob_name, **self.check_options)
|
75
75
|
|
76
76
|
def execute(self, context: Context) -> None:
|
77
|
-
"""
|
77
|
+
"""
|
78
|
+
Poll for state of the job run.
|
78
79
|
|
79
80
|
In deferrable mode, the polling is deferred to the triggerer. Otherwise
|
80
81
|
the sensor waits synchronously.
|
@@ -184,7 +185,8 @@ class WasbPrefixSensor(BaseSensorOperator):
|
|
184
185
|
return hook.check_for_prefix(self.container_name, self.prefix, **self.check_options)
|
185
186
|
|
186
187
|
def execute(self, context: Context) -> None:
|
187
|
-
"""
|
188
|
+
"""
|
189
|
+
Poll for state of the job run.
|
188
190
|
|
189
191
|
In deferrable mode, the polling is deferred to the triggerer. Otherwise
|
190
192
|
the sensor waits synchronously.
|
@@ -0,0 +1,266 @@
|
|
1
|
+
#
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
13
|
+
# software distributed under the License is distributed on an
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
# KIND, either express or implied. See the License for the
|
16
|
+
# specific language governing permissions and limitations
|
17
|
+
# under the License.
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
import tempfile
|
21
|
+
from functools import cached_property
|
22
|
+
from typing import TYPE_CHECKING, Sequence
|
23
|
+
|
24
|
+
from airflow.models import BaseOperator
|
25
|
+
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
26
|
+
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
|
27
|
+
|
28
|
+
if TYPE_CHECKING:
|
29
|
+
from airflow.utils.context import Context
|
30
|
+
|
31
|
+
|
32
|
+
# Create three custom exception that are
|
33
|
+
class TooManyFilesToMoveException(Exception):
|
34
|
+
"""Custom exception thrown when attempting to move multiple files from S3 to a single Azure Blob."""
|
35
|
+
|
36
|
+
def __init__(self, number_of_files: int):
|
37
|
+
# Call the parent constructor with a simple message
|
38
|
+
message: str = f"{number_of_files} cannot be moved to a single Azure Blob."
|
39
|
+
super().__init__(message)
|
40
|
+
|
41
|
+
|
42
|
+
class InvalidAzureBlobParameters(Exception):
|
43
|
+
"""Custom exception raised when neither a blob_prefix or blob_name are passed to the operator."""
|
44
|
+
|
45
|
+
def __init__(self):
|
46
|
+
message: str = "One of blob_name or blob_prefix must be provided."
|
47
|
+
super().__init__(message)
|
48
|
+
|
49
|
+
|
50
|
+
class InvalidKeyComponents(Exception):
|
51
|
+
"""Custom exception raised when neither a full_path or file_name + prefix are provided to _create_key."""
|
52
|
+
|
53
|
+
def __init__(self):
|
54
|
+
message = "Either full_path of prefix and file_name must not be None"
|
55
|
+
super().__init__(message)
|
56
|
+
|
57
|
+
|
58
|
+
class S3ToAzureBlobStorageOperator(BaseOperator):
|
59
|
+
"""
|
60
|
+
Operator to move data from and AWS S3 Bucket to Microsoft Azure Blob Storage.
|
61
|
+
|
62
|
+
A similar class exists to move data from Microsoft Azure Blob Storage to an AWS S3 Bucket, and lives in
|
63
|
+
the airflow/providers/amazon/aws/transfers/azure_blob_to_s3.py file
|
64
|
+
|
65
|
+
Either an explicit S3 key can be provided, or a prefix containing the files that are to be transferred to
|
66
|
+
Azure blob storage. The same holds for a Blob name; an explicit name can be passed, or a Blob prefix can
|
67
|
+
be provided for the file to be stored to
|
68
|
+
|
69
|
+
.. seealso:
|
70
|
+
For more information on how to use this operator, take a look at the guide:
|
71
|
+
:ref:`howto/operator::SFTPToWasbOperator`
|
72
|
+
|
73
|
+
:param aws_conn_id: ID for the AWS S3 connection to use.
|
74
|
+
:param wasb_conn_id: ID for the Azure Blob Storage connection to use.
|
75
|
+
:param s3_bucket: The name of the AWS S3 bucket that an object (or objects) would be transferred from.
|
76
|
+
(templated)
|
77
|
+
:param container_name: The name of the Azure Storage Blob container an object (or objects) would be
|
78
|
+
transferred to. (templated)
|
79
|
+
:param s3_prefix: Prefix string that filters any S3 objects that begin with this prefix. (templated)
|
80
|
+
:param s3_key: An explicit S3 key (object) to be transferred. (templated)
|
81
|
+
:param blob_prefix: Prefix string that would provide a path in the Azure Storage Blob container for an
|
82
|
+
object (or objects) to be moved to. (templated)
|
83
|
+
:param blob_name: An explicit blob name that an object would be transferred to. This can only be used
|
84
|
+
if a single file is being moved. If there are multiple files in an S3 bucket that are to be moved
|
85
|
+
to a single Azure blob, an exception will be raised. (templated)
|
86
|
+
:param create_container: True if a container should be created if it did not already exist, False
|
87
|
+
otherwise.
|
88
|
+
:param replace: If a blob exists in the container and replace takes a value of true, it will be
|
89
|
+
overwritten. If replace is False and a blob exists in the container, the file will NOT be
|
90
|
+
overwritten.
|
91
|
+
:param s3_verify: Whether or not to verify SSL certificates for S3 connection.
|
92
|
+
By default, SSL certificates are verified.
|
93
|
+
You can provide the following values:
|
94
|
+
|
95
|
+
- ``False``: do not validate SSL certificates. SSL will still be used
|
96
|
+
(unless use_ssl is False), but SSL certificates will not be
|
97
|
+
verified.
|
98
|
+
- ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
|
99
|
+
You can specify this argument if you want to use a different
|
100
|
+
CA cert bundle than the one used by botocore.
|
101
|
+
:param s3_extra_args: kwargs to pass to S3Hook.
|
102
|
+
:param wasb_extra_args: kwargs to pass to WasbHook.
|
103
|
+
"""
|
104
|
+
|
105
|
+
template_fields: Sequence[str] = (
|
106
|
+
"s3_bucket",
|
107
|
+
"container_name",
|
108
|
+
"s3_prefix",
|
109
|
+
"s3_key",
|
110
|
+
"blob_prefix",
|
111
|
+
"blob_name",
|
112
|
+
)
|
113
|
+
|
114
|
+
def __init__(
|
115
|
+
self,
|
116
|
+
*,
|
117
|
+
aws_conn_id: str = "aws_default",
|
118
|
+
wasb_conn_id: str = "wasb_default",
|
119
|
+
s3_bucket: str,
|
120
|
+
container_name: str,
|
121
|
+
s3_prefix: str | None = None, # Only use this to pull an entire directory of files
|
122
|
+
s3_key: str | None = None, # Only use this to pull a single file
|
123
|
+
blob_prefix: str | None = None,
|
124
|
+
blob_name: str | None = None,
|
125
|
+
create_container: bool = False,
|
126
|
+
replace: bool = False,
|
127
|
+
s3_verify: bool = False,
|
128
|
+
s3_extra_args: dict | None = None,
|
129
|
+
wasb_extra_args: dict | None = None,
|
130
|
+
**kwargs,
|
131
|
+
):
|
132
|
+
# Call to constructor of the inherited BaseOperator class
|
133
|
+
super().__init__(**kwargs)
|
134
|
+
|
135
|
+
self.aws_conn_id = aws_conn_id
|
136
|
+
self.wasb_conn_id = wasb_conn_id
|
137
|
+
self.s3_bucket = s3_bucket
|
138
|
+
self.container_name = container_name
|
139
|
+
self.s3_prefix = s3_prefix
|
140
|
+
self.s3_key = s3_key
|
141
|
+
self.blob_prefix = blob_prefix
|
142
|
+
self.blob_name = blob_name
|
143
|
+
self.create_container = create_container
|
144
|
+
self.replace = replace
|
145
|
+
self.s3_verify = s3_verify
|
146
|
+
self.s3_extra_args = s3_extra_args or {}
|
147
|
+
self.wasb_extra_args = wasb_extra_args or {}
|
148
|
+
|
149
|
+
# These cached properties come in handy when working with hooks. Rather than closing and opening new
|
150
|
+
# hooks, the same hook can be used across multiple methods (without having to use the constructor to
|
151
|
+
# create the hook)
|
152
|
+
@cached_property
|
153
|
+
def s3_hook(self) -> S3Hook:
|
154
|
+
"""Create and return an S3Hook."""
|
155
|
+
return S3Hook(aws_conn_id=self.aws_conn_id, verify=self.s3_verify, **self.s3_extra_args)
|
156
|
+
|
157
|
+
@cached_property
|
158
|
+
def wasb_hook(self) -> WasbHook:
|
159
|
+
"""Create and return a WasbHook."""
|
160
|
+
return WasbHook(wasb_conn_id=self.wasb_conn_id, **self.wasb_extra_args)
|
161
|
+
|
162
|
+
def execute(self, context: Context) -> list[str]:
|
163
|
+
"""Execute logic below when operator is executed as a task."""
|
164
|
+
self.log.info(
|
165
|
+
"Getting %s from %s" if self.s3_key else "Getting all files start with %s from %s",
|
166
|
+
self.s3_key if self.s3_key else self.s3_prefix,
|
167
|
+
self.s3_bucket,
|
168
|
+
)
|
169
|
+
|
170
|
+
# Pull a list of files to move from S3 to Azure Blob storage
|
171
|
+
files_to_move: list[str] = self.get_files_to_move()
|
172
|
+
|
173
|
+
# Check to see if there are indeed files to move. If so, move each of these files. Otherwise, output
|
174
|
+
# a logging message that denotes there are no files to move
|
175
|
+
if files_to_move:
|
176
|
+
for file_name in files_to_move:
|
177
|
+
self.move_file(file_name)
|
178
|
+
|
179
|
+
# Assuming that files_to_move is a list (which it always should be), this will get "hit" after the
|
180
|
+
# last file is moved from S3 -> Azure Blob
|
181
|
+
self.log.info("All done, uploaded %s to Azure Blob.", len(files_to_move))
|
182
|
+
|
183
|
+
else:
|
184
|
+
# If there are no files to move, a message will be logged. May want to consider alternative
|
185
|
+
# functionality (should an exception instead be raised?)
|
186
|
+
self.log.info("There are no files to move!")
|
187
|
+
|
188
|
+
# Return a list of the files that were moved
|
189
|
+
return files_to_move
|
190
|
+
|
191
|
+
def get_files_to_move(self) -> list[str]:
|
192
|
+
"""Determine the list of files that need to be moved, and return the name."""
|
193
|
+
if self.s3_key:
|
194
|
+
# Only pull the file name from the s3_key, drop the rest of the key
|
195
|
+
files_to_move: list[str] = [self.s3_key.split("/")[-1]]
|
196
|
+
else:
|
197
|
+
# Pull the keys from the s3_bucket using the provided prefix. Remove the prefix from the file
|
198
|
+
# name, and add to the list of files to move
|
199
|
+
s3_keys: list[str] = self.s3_hook.list_keys(bucket_name=self.s3_bucket, prefix=self.s3_prefix)
|
200
|
+
files_to_move = [s3_key.replace(f"{self.s3_prefix}/", "", 1) for s3_key in s3_keys]
|
201
|
+
|
202
|
+
# Now, make sure that there are not too many files to move to a single Azure blob
|
203
|
+
if self.blob_name and len(files_to_move) > 1:
|
204
|
+
raise TooManyFilesToMoveException(len(files_to_move))
|
205
|
+
|
206
|
+
if not self.replace:
|
207
|
+
# Only grab the files from S3 that are not in Azure Blob already. This will prevent any files that
|
208
|
+
# exist in both S3 and Azure Blob from being overwritten. If a blob_name is provided, check to
|
209
|
+
# see if that blob exists
|
210
|
+
azure_blob_files: list[str] = []
|
211
|
+
|
212
|
+
if self.blob_name:
|
213
|
+
# If the singular blob (stored at self.blob_name) exists, add it to azure_blob_files so it
|
214
|
+
# can be removed from the list of files to move
|
215
|
+
if self.wasb_hook.check_for_blob(self.container_name, self.blob_name):
|
216
|
+
azure_blob_files.append(self.blob_name.split("/")[-1])
|
217
|
+
|
218
|
+
elif self.blob_prefix:
|
219
|
+
azure_blob_files += self.wasb_hook.get_blobs_list_recursive(
|
220
|
+
container_name=self.container_name, prefix=self.blob_prefix
|
221
|
+
)
|
222
|
+
else:
|
223
|
+
raise InvalidAzureBlobParameters
|
224
|
+
|
225
|
+
# This conditional block only does one thing - it alters the elements in the files_to_move list.
|
226
|
+
# This list is being trimmed to remove the existing files in the Azure Blob (as mentioned above)
|
227
|
+
existing_files = azure_blob_files if azure_blob_files else []
|
228
|
+
files_to_move = list(set(files_to_move) - set(existing_files))
|
229
|
+
|
230
|
+
return files_to_move
|
231
|
+
|
232
|
+
def move_file(self, file_name: str) -> None:
|
233
|
+
"""Move file from S3 to Azure Blob storage."""
|
234
|
+
with tempfile.NamedTemporaryFile("w") as temp_file:
|
235
|
+
# If using an s3_key, this creates a scenario where the only file in the files_to_move
|
236
|
+
# list is going to be the name pulled from the s3_key. It's not verbose, but provides
|
237
|
+
# standard implementation across the operator
|
238
|
+
source_s3_key: str = self._create_key(self.s3_key, self.s3_prefix, file_name)
|
239
|
+
|
240
|
+
# Create retrieve the S3 client itself, rather than directly using the hook. Download the file to
|
241
|
+
# the temp_file.name
|
242
|
+
s3_client = self.s3_hook.get_conn()
|
243
|
+
s3_client.download_file(self.s3_bucket, source_s3_key, temp_file.name)
|
244
|
+
|
245
|
+
# Load the file to Azure Blob using either the key that has been passed in, or the key
|
246
|
+
# from the list of files present in the s3_prefix, plus the blob_prefix. There may be
|
247
|
+
# desire to only pass in an S3 key, in which case, the blob_name should be derived from
|
248
|
+
# the S3 key
|
249
|
+
destination_azure_blob_name: str = self._create_key(self.blob_name, self.blob_prefix, file_name)
|
250
|
+
self.wasb_hook.load_file(
|
251
|
+
file_path=temp_file.name,
|
252
|
+
container_name=self.container_name,
|
253
|
+
blob_name=destination_azure_blob_name,
|
254
|
+
create_container=self.create_container,
|
255
|
+
**self.wasb_extra_args,
|
256
|
+
)
|
257
|
+
|
258
|
+
@staticmethod
|
259
|
+
def _create_key(full_path: str | None, prefix: str | None, file_name: str | None):
|
260
|
+
"""Return a file key using its components."""
|
261
|
+
if full_path:
|
262
|
+
return full_path
|
263
|
+
elif prefix and file_name:
|
264
|
+
return f"{prefix}/{file_name}"
|
265
|
+
else:
|
266
|
+
raise InvalidKeyComponents
|
@@ -30,7 +30,8 @@ from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
30
30
|
|
31
31
|
|
32
32
|
class ADFPipelineRunStatusSensorTrigger(BaseTrigger):
|
33
|
-
"""
|
33
|
+
"""
|
34
|
+
Trigger with params to run the task when the ADF Pipeline is running.
|
34
35
|
|
35
36
|
:param run_id: The pipeline run identifier.
|
36
37
|
:param azure_data_factory_conn_id: The connection identifier for connecting to Azure Data Factory.
|
@@ -110,7 +111,8 @@ class ADFPipelineRunStatusSensorTrigger(BaseTrigger):
|
|
110
111
|
|
111
112
|
|
112
113
|
class AzureDataFactoryTrigger(BaseTrigger):
|
113
|
-
"""
|
114
|
+
"""
|
115
|
+
Trigger when the Azure data factory pipeline job finishes.
|
114
116
|
|
115
117
|
When wait_for_termination is set to False, it triggers immediately with success status.
|
116
118
|
|
@@ -62,7 +62,8 @@ def _get_default_azure_credential(
|
|
62
62
|
workload_identity_tenant_id: str | None = None,
|
63
63
|
use_async: bool = False,
|
64
64
|
) -> DefaultAzureCredential | AsyncDefaultAzureCredential:
|
65
|
-
"""
|
65
|
+
"""
|
66
|
+
Get DefaultAzureCredential based on provided arguments.
|
66
67
|
|
67
68
|
If managed_identity_client_id and workload_identity_tenant_id are provided, this function returns
|
68
69
|
DefaultAzureCredential with managed identity.
|
@@ -115,7 +116,8 @@ def add_managed_identity_connection_widgets(func):
|
|
115
116
|
|
116
117
|
|
117
118
|
class AzureIdentityCredentialAdapter(BasicTokenAuthentication):
|
118
|
-
"""
|
119
|
+
"""
|
120
|
+
Adapt azure-identity credentials for backward compatibility.
|
119
121
|
|
120
122
|
Adapt credentials from azure-identity to be compatible with SD
|
121
123
|
that needs msrestazure or azure.common.credentials
|
@@ -132,7 +134,8 @@ class AzureIdentityCredentialAdapter(BasicTokenAuthentication):
|
|
132
134
|
workload_identity_tenant_id: str | None = None,
|
133
135
|
**kwargs,
|
134
136
|
):
|
135
|
-
"""
|
137
|
+
"""
|
138
|
+
Adapt azure-identity credentials for backward compatibility.
|
136
139
|
|
137
140
|
:param credential: Any azure-identity credential (DefaultAzureCredential by default)
|
138
141
|
:param resource_id: The scope to use to get the token (default ARM)
|
@@ -156,7 +159,8 @@ class AzureIdentityCredentialAdapter(BasicTokenAuthentication):
|
|
156
159
|
)
|
157
160
|
|
158
161
|
def set_token(self):
|
159
|
-
"""
|
162
|
+
"""
|
163
|
+
Ask the azure-core BearerTokenCredentialPolicy policy to get a token.
|
160
164
|
|
161
165
|
Using the policy gives us for free the caching system of azure-core.
|
162
166
|
We could make this code simpler by using private method, but by definition
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
28
28
|
|
29
29
|
[project]
|
30
30
|
name = "apache-airflow-providers-microsoft-azure"
|
31
|
-
version = "10.
|
31
|
+
version = "10.2.0.rc1"
|
32
32
|
description = "Provider package apache-airflow-providers-microsoft-azure for Apache Airflow"
|
33
33
|
readme = "README.rst"
|
34
34
|
authors = [
|
@@ -82,8 +82,8 @@ dependencies = [
|
|
82
82
|
]
|
83
83
|
|
84
84
|
[project.urls]
|
85
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
86
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.
|
85
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0"
|
86
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure/10.2.0/changelog.html"
|
87
87
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
88
88
|
"Source Code" = "https://github.com/apache/airflow"
|
89
89
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
@@ -93,6 +93,9 @@ dependencies = [
|
|
93
93
|
[project.entry-points."apache_airflow_provider"]
|
94
94
|
provider_info = "airflow.providers.microsoft.azure.get_provider_info:get_provider_info"
|
95
95
|
[project.optional-dependencies]
|
96
|
+
"amazon" = [
|
97
|
+
"apache-airflow-providers-amazon",
|
98
|
+
]
|
96
99
|
"google" = [
|
97
100
|
"apache-airflow-providers-google",
|
98
101
|
]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|