apache-airflow-providers-google 18.0.0__py3-none-any.whl → 18.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-google might be problematic. Click here for more details.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +5 -5
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/bigquery.py +45 -42
- airflow/providers/google/cloud/hooks/cloud_composer.py +131 -1
- airflow/providers/google/cloud/hooks/cloud_sql.py +88 -13
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +16 -0
- airflow/providers/google/cloud/hooks/dataflow.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +3 -0
- airflow/providers/google/cloud/hooks/gcs.py +107 -3
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/looker.py +1 -1
- airflow/providers/google/cloud/hooks/spanner.py +45 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +30 -0
- airflow/providers/google/cloud/links/base.py +11 -11
- airflow/providers/google/cloud/links/dataproc.py +2 -10
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +3 -1
- airflow/providers/google/cloud/operators/bigquery.py +2 -9
- airflow/providers/google/cloud/operators/cloud_run.py +2 -1
- airflow/providers/google/cloud/operators/cloud_sql.py +1 -1
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +89 -6
- airflow/providers/google/cloud/operators/datafusion.py +36 -7
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/spanner.py +22 -6
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +7 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +30 -0
- airflow/providers/google/cloud/operators/workflows.py +17 -6
- airflow/providers/google/cloud/sensors/bigquery.py +1 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +1 -6
- airflow/providers/google/cloud/sensors/bigtable.py +1 -6
- airflow/providers/google/cloud/sensors/cloud_composer.py +65 -31
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +1 -6
- airflow/providers/google/cloud/sensors/dataflow.py +1 -1
- airflow/providers/google/cloud/sensors/dataform.py +1 -6
- airflow/providers/google/cloud/sensors/datafusion.py +1 -6
- airflow/providers/google/cloud/sensors/dataplex.py +1 -6
- airflow/providers/google/cloud/sensors/dataprep.py +1 -6
- airflow/providers/google/cloud/sensors/dataproc.py +1 -6
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -6
- airflow/providers/google/cloud/sensors/gcs.py +1 -7
- airflow/providers/google/cloud/sensors/looker.py +1 -6
- airflow/providers/google/cloud/sensors/pubsub.py +1 -6
- airflow/providers/google/cloud/sensors/tasks.py +1 -6
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +1 -6
- airflow/providers/google/cloud/sensors/workflows.py +1 -6
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +2 -1
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +11 -2
- airflow/providers/google/cloud/triggers/bigquery.py +15 -3
- airflow/providers/google/cloud/triggers/cloud_composer.py +51 -21
- airflow/providers/google/cloud/triggers/cloud_run.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +90 -0
- airflow/providers/google/cloud/triggers/pubsub.py +14 -18
- airflow/providers/google/common/hooks/base_google.py +1 -1
- airflow/providers/google/get_provider_info.py +15 -0
- airflow/providers/google/leveldb/hooks/leveldb.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +2 -8
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +1 -6
- airflow/providers/google/marketing_platform/sensors/display_video.py +1 -6
- airflow/providers/google/suite/sensors/drive.py +1 -6
- airflow/providers/google/version_compat.py +0 -20
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/METADATA +15 -15
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/RECORD +72 -65
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -90,6 +90,11 @@ class GenerativeModelHook(GoogleBaseHook):
|
|
|
90
90
|
cached_context_model = preview_generative_model.GenerativeModel.from_cached_content(cached_content)
|
|
91
91
|
return cached_context_model
|
|
92
92
|
|
|
93
|
+
@deprecated(
|
|
94
|
+
planned_removal_date="January 3, 2026",
|
|
95
|
+
use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.embed_content",
|
|
96
|
+
category=AirflowProviderDeprecationWarning,
|
|
97
|
+
)
|
|
93
98
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
94
99
|
def text_embedding_model_get_embeddings(
|
|
95
100
|
self,
|
|
@@ -114,6 +119,11 @@ class GenerativeModelHook(GoogleBaseHook):
|
|
|
114
119
|
|
|
115
120
|
return response.values
|
|
116
121
|
|
|
122
|
+
@deprecated(
|
|
123
|
+
planned_removal_date="January 3, 2026",
|
|
124
|
+
use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.generate_content",
|
|
125
|
+
category=AirflowProviderDeprecationWarning,
|
|
126
|
+
)
|
|
117
127
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
118
128
|
def generative_model_generate_content(
|
|
119
129
|
self,
|
|
@@ -156,6 +166,11 @@ class GenerativeModelHook(GoogleBaseHook):
|
|
|
156
166
|
|
|
157
167
|
return response.text
|
|
158
168
|
|
|
169
|
+
@deprecated(
|
|
170
|
+
planned_removal_date="January 3, 2026",
|
|
171
|
+
use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.supervised_fine_tuning_train",
|
|
172
|
+
category=AirflowProviderDeprecationWarning,
|
|
173
|
+
)
|
|
159
174
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
160
175
|
def supervised_fine_tuning_train(
|
|
161
176
|
self,
|
|
@@ -209,6 +224,11 @@ class GenerativeModelHook(GoogleBaseHook):
|
|
|
209
224
|
|
|
210
225
|
return sft_tuning_job
|
|
211
226
|
|
|
227
|
+
@deprecated(
|
|
228
|
+
planned_removal_date="January 3, 2026",
|
|
229
|
+
use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.count_tokens",
|
|
230
|
+
category=AirflowProviderDeprecationWarning,
|
|
231
|
+
)
|
|
212
232
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
213
233
|
def count_tokens(
|
|
214
234
|
self,
|
|
@@ -296,6 +316,11 @@ class GenerativeModelHook(GoogleBaseHook):
|
|
|
296
316
|
|
|
297
317
|
return eval_result
|
|
298
318
|
|
|
319
|
+
@deprecated(
|
|
320
|
+
planned_removal_date="January 3, 2026",
|
|
321
|
+
use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.create_cached_content",
|
|
322
|
+
category=AirflowProviderDeprecationWarning,
|
|
323
|
+
)
|
|
299
324
|
def create_cached_content(
|
|
300
325
|
self,
|
|
301
326
|
model_name: str,
|
|
@@ -330,6 +355,11 @@ class GenerativeModelHook(GoogleBaseHook):
|
|
|
330
355
|
|
|
331
356
|
return response.name
|
|
332
357
|
|
|
358
|
+
@deprecated(
|
|
359
|
+
planned_removal_date="January 3, 2026",
|
|
360
|
+
use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.generate_content",
|
|
361
|
+
category=AirflowProviderDeprecationWarning,
|
|
362
|
+
)
|
|
333
363
|
def generate_from_cached_content(
|
|
334
364
|
self,
|
|
335
365
|
location: str,
|
|
@@ -18,18 +18,10 @@
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
from typing import TYPE_CHECKING, ClassVar
|
|
21
|
+
from urllib.parse import urlparse
|
|
21
22
|
|
|
22
|
-
from airflow.providers.
|
|
23
|
-
|
|
24
|
-
BaseOperator,
|
|
25
|
-
BaseOperatorLink,
|
|
26
|
-
BaseSensorOperator,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
30
|
-
from airflow.sdk.execution_time.xcom import XCom
|
|
31
|
-
else:
|
|
32
|
-
from airflow.models.xcom import XCom # type: ignore[no-redef]
|
|
23
|
+
from airflow.providers.common.compat.sdk import BaseOperatorLink, BaseSensorOperator, XCom
|
|
24
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
|
|
33
25
|
|
|
34
26
|
if TYPE_CHECKING:
|
|
35
27
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
@@ -109,6 +101,14 @@ class BaseGoogleLink(BaseOperatorLink):
|
|
|
109
101
|
if TYPE_CHECKING:
|
|
110
102
|
assert isinstance(operator, (GoogleCloudBaseOperator, BaseSensorOperator))
|
|
111
103
|
|
|
104
|
+
# In cases when worker passes execution to trigger, the value that is put to XCom
|
|
105
|
+
# already contains link to the object in string format. In this case we don't want to execute
|
|
106
|
+
# get_config() again. Instead we can leave this value without any changes
|
|
107
|
+
link_value = XCom.get_value(key=self.key, ti_key=ti_key)
|
|
108
|
+
if link_value and isinstance(link_value, str):
|
|
109
|
+
if urlparse(link_value).scheme in ("http", "https"):
|
|
110
|
+
return link_value
|
|
111
|
+
|
|
112
112
|
conf = self.get_config(operator, ti_key)
|
|
113
113
|
if not conf:
|
|
114
114
|
return ""
|
|
@@ -25,22 +25,14 @@ from typing import TYPE_CHECKING, Any
|
|
|
25
25
|
import attr
|
|
26
26
|
|
|
27
27
|
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
28
|
+
from airflow.providers.common.compat.sdk import BaseOperatorLink, XCom
|
|
28
29
|
from airflow.providers.google.cloud.links.base import BASE_LINK, BaseGoogleLink
|
|
29
|
-
from airflow.providers.google.version_compat import (
|
|
30
|
-
AIRFLOW_V_3_0_PLUS,
|
|
31
|
-
BaseOperator,
|
|
32
|
-
BaseOperatorLink,
|
|
33
|
-
)
|
|
34
30
|
|
|
35
31
|
if TYPE_CHECKING:
|
|
36
32
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
33
|
+
from airflow.providers.google.version_compat import BaseOperator
|
|
37
34
|
from airflow.utils.context import Context
|
|
38
35
|
|
|
39
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
40
|
-
from airflow.sdk.execution_time.xcom import XCom
|
|
41
|
-
else:
|
|
42
|
-
from airflow.models.xcom import XCom # type: ignore[no-redef]
|
|
43
|
-
|
|
44
36
|
|
|
45
37
|
def __getattr__(name: str) -> Any:
|
|
46
38
|
# PEP-562: deprecate module-level variable
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"CloudStorageTransferJobFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"jobName": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Transfer job name assigned by GCP Storage Transfer Service."
|
|
15
|
+
},
|
|
16
|
+
"projectId": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "GCP project ID."
|
|
19
|
+
},
|
|
20
|
+
"description": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Optional description of the transfer job."
|
|
23
|
+
},
|
|
24
|
+
"status": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Status of the transfer job (ENABLED, DISABLED)."
|
|
27
|
+
},
|
|
28
|
+
"sourceBucket": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Source AWS S3 bucket."
|
|
31
|
+
},
|
|
32
|
+
"sourcePath": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Prefix path inside the source bucket."
|
|
35
|
+
},
|
|
36
|
+
"targetBucket": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "Target GCS bucket."
|
|
39
|
+
},
|
|
40
|
+
"targetPath": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "Prefix path inside the target bucket."
|
|
43
|
+
},
|
|
44
|
+
"objectConditions": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"description": "Filtering conditions for objects transferred."
|
|
47
|
+
},
|
|
48
|
+
"transferOptions": {
|
|
49
|
+
"type": "object",
|
|
50
|
+
"description": "Transfer options such as overwrite or delete."
|
|
51
|
+
},
|
|
52
|
+
"schedule": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"description": "Transfer schedule details."
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"type": "object"
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"type": "object",
|
|
63
|
+
"properties": {
|
|
64
|
+
"cloudStorageTransferJob": {
|
|
65
|
+
"$ref": "#/$defs/CloudStorageTransferJobFacet"
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"CloudStorageTransferRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"jobName": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Transfer job name associated with this run."
|
|
15
|
+
},
|
|
16
|
+
"operationName": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Transfer operation name if available."
|
|
19
|
+
},
|
|
20
|
+
"status": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Run status if available."
|
|
23
|
+
},
|
|
24
|
+
"startTime": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Start time of the transfer operation."
|
|
27
|
+
},
|
|
28
|
+
"endTime": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "End time of the transfer operation."
|
|
31
|
+
},
|
|
32
|
+
"wait": {
|
|
33
|
+
"type": "boolean",
|
|
34
|
+
"description": "Whether the operator waited for completion."
|
|
35
|
+
},
|
|
36
|
+
"timeout": {
|
|
37
|
+
"type": ["number", "null"],
|
|
38
|
+
"description": "Timeout in seconds."
|
|
39
|
+
},
|
|
40
|
+
"deferrable": {
|
|
41
|
+
"type": "boolean",
|
|
42
|
+
"description": "Whether the operator used deferrable mode."
|
|
43
|
+
},
|
|
44
|
+
"deleteJobAfterCompletion": {
|
|
45
|
+
"type": "boolean",
|
|
46
|
+
"description": "Whether the transfer job was deleted after completion."
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
"type": "object"
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"type": "object",
|
|
55
|
+
"properties": {
|
|
56
|
+
"cloudStorageTransferRun": {
|
|
57
|
+
"$ref": "#/$defs/CloudStorageTransferRunFacet"
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$defs": {
|
|
4
|
+
"DataFusionRunFacet": {
|
|
5
|
+
"allOf": [
|
|
6
|
+
{
|
|
7
|
+
"$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"type": "object",
|
|
11
|
+
"properties": {
|
|
12
|
+
"runId": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Pipeline run ID assigned by Cloud Data Fusion."
|
|
15
|
+
},
|
|
16
|
+
"runtimeArgs": {
|
|
17
|
+
"type": "object",
|
|
18
|
+
"description": "Runtime arguments provided when starting the pipeline."
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"type": "object"
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {
|
|
28
|
+
"dataFusionRun": {
|
|
29
|
+
"$ref": "#/$defs/DataFusionRunFacet"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -24,13 +24,17 @@ from attr import define, field
|
|
|
24
24
|
from airflow.providers.google import __version__ as provider_version
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
|
-
from openlineage.client.generated.base import RunFacet
|
|
27
|
+
from openlineage.client.generated.base import JobFacet, RunFacet
|
|
28
28
|
|
|
29
29
|
try:
|
|
30
30
|
try:
|
|
31
31
|
from openlineage.client.generated.base import RunFacet
|
|
32
32
|
except ImportError: # Old OpenLineage client is used
|
|
33
33
|
from openlineage.client.facet import BaseFacet as RunFacet # type: ignore[assignment]
|
|
34
|
+
try:
|
|
35
|
+
from openlineage.client.generated.base import JobFacet
|
|
36
|
+
except ImportError: # Old OpenLineage client is used
|
|
37
|
+
from openlineage.client.facet import BaseFacet as JobFacet # type: ignore[assignment]
|
|
34
38
|
|
|
35
39
|
@define
|
|
36
40
|
class BigQueryJobRunFacet(RunFacet):
|
|
@@ -53,6 +57,100 @@ try:
|
|
|
53
57
|
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
54
58
|
"openlineage/BigQueryJobRunFacet.json"
|
|
55
59
|
)
|
|
60
|
+
|
|
61
|
+
@define
|
|
62
|
+
class CloudStorageTransferJobFacet(JobFacet):
|
|
63
|
+
"""
|
|
64
|
+
Facet representing a Cloud Storage Transfer Service job configuration.
|
|
65
|
+
|
|
66
|
+
:param jobName: Unique name of the transfer job.
|
|
67
|
+
:param projectId: GCP project where the transfer job is defined.
|
|
68
|
+
:param description: User-provided description of the transfer job.
|
|
69
|
+
:param status: Current status of the transfer job (e.g. "ENABLED", "DISABLED").
|
|
70
|
+
:param sourceBucket: Name of the source bucket (e.g. AWS S3).
|
|
71
|
+
:param sourcePath: Prefix/path inside the source bucket.
|
|
72
|
+
:param targetBucket: Name of the destination bucket (e.g. GCS).
|
|
73
|
+
:param targetPath: Prefix/path inside the destination bucket.
|
|
74
|
+
:param objectConditions: Object selection rules (e.g. include/exclude prefixes).
|
|
75
|
+
:param transferOptions: Transfer options, such as overwrite behavior or whether to delete objects
|
|
76
|
+
from the source after transfer.
|
|
77
|
+
:param schedule: Schedule for the transfer job (if recurring).
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
jobName: str | None = field(default=None)
|
|
81
|
+
projectId: str | None = field(default=None)
|
|
82
|
+
description: str | None = field(default=None)
|
|
83
|
+
status: str | None = field(default=None)
|
|
84
|
+
sourceBucket: str | None = field(default=None)
|
|
85
|
+
sourcePath: str | None = field(default=None)
|
|
86
|
+
targetBucket: str | None = field(default=None)
|
|
87
|
+
targetPath: str | None = field(default=None)
|
|
88
|
+
objectConditions: dict | None = field(default=None)
|
|
89
|
+
transferOptions: dict | None = field(default=None)
|
|
90
|
+
schedule: dict | None = field(default=None)
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def _get_schema() -> str:
|
|
94
|
+
return (
|
|
95
|
+
"https://raw.githubusercontent.com/apache/airflow/"
|
|
96
|
+
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
97
|
+
"openlineage/CloudStorageTransferJobFacet.json"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@define
|
|
101
|
+
class CloudStorageTransferRunFacet(RunFacet):
|
|
102
|
+
"""
|
|
103
|
+
Facet representing a Cloud Storage Transfer Service job execution run.
|
|
104
|
+
|
|
105
|
+
:param jobName: Name of the transfer job being executed.
|
|
106
|
+
:param operationName: Name of the specific transfer operation instance.
|
|
107
|
+
:param status: Current status of the operation (e.g. "IN_PROGRESS", "SUCCESS", "FAILED").
|
|
108
|
+
:param startTime: Time when the transfer job execution started (ISO 8601 format).
|
|
109
|
+
:param endTime: Time when the transfer job execution finished (ISO 8601 format).
|
|
110
|
+
:param wait: Whether the operator waits for the job to complete before finishing.
|
|
111
|
+
:param timeout: Timeout (in seconds) for the transfer run to complete.
|
|
112
|
+
:param deferrable: Whether the operator defers execution until job completion.
|
|
113
|
+
:param deleteJobAfterCompletion: Whether the operator deletes the transfer job after the run completes.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
jobName: str | None = field(default=None)
|
|
117
|
+
operationName: str | None = field(default=None)
|
|
118
|
+
status: str | None = field(default=None)
|
|
119
|
+
startTime: str | None = field(default=None)
|
|
120
|
+
endTime: str | None = field(default=None)
|
|
121
|
+
wait: bool = field(default=True)
|
|
122
|
+
timeout: float | None = field(default=None)
|
|
123
|
+
deferrable: bool = field(default=False)
|
|
124
|
+
deleteJobAfterCompletion: bool = field(default=False)
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def _get_schema() -> str:
|
|
128
|
+
return (
|
|
129
|
+
"https://raw.githubusercontent.com/apache/airflow/"
|
|
130
|
+
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
131
|
+
"openlineage/CloudStorageTransferRunFacet.json"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
@define
|
|
135
|
+
class DataFusionRunFacet(RunFacet):
|
|
136
|
+
"""
|
|
137
|
+
Facet that represents relevant details of a Cloud Data Fusion pipeline run.
|
|
138
|
+
|
|
139
|
+
:param runId: The pipeline execution id.
|
|
140
|
+
:param runtimeArgs: Runtime arguments passed to the pipeline.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
runId: str | None = field(default=None)
|
|
144
|
+
runtimeArgs: dict[str, str] | None = field(default=None)
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def _get_schema() -> str:
|
|
148
|
+
return (
|
|
149
|
+
"https://raw.githubusercontent.com/apache/airflow/"
|
|
150
|
+
f"providers-google/{provider_version}/airflow/providers/google/"
|
|
151
|
+
"openlineage/DataFusionRunFacet.json"
|
|
152
|
+
)
|
|
153
|
+
|
|
56
154
|
except ImportError: # OpenLineage is not available
|
|
57
155
|
|
|
58
156
|
def create_no_op(*_, **__) -> None:
|
|
@@ -65,3 +163,6 @@ except ImportError: # OpenLineage is not available
|
|
|
65
163
|
return None
|
|
66
164
|
|
|
67
165
|
BigQueryJobRunFacet = create_no_op # type: ignore[misc, assignment]
|
|
166
|
+
CloudStorageTransferJobFacet = create_no_op # type: ignore[misc, assignment]
|
|
167
|
+
CloudStorageTransferRunFacet = create_no_op # type: ignore[misc, assignment]
|
|
168
|
+
DataFusionRunFacet = create_no_op # type: ignore[misc, assignment]
|
|
@@ -97,7 +97,9 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
|
|
|
97
97
|
run_facets: dict[str, RunFacet] = {
|
|
98
98
|
"externalQuery": ExternalQueryRunFacet(externalQueryId=self.job_id, source="bigquery")
|
|
99
99
|
}
|
|
100
|
-
self._client = self.hook.get_client(
|
|
100
|
+
self._client = self.hook.get_client(
|
|
101
|
+
project_id=self.project_id or self.hook.project_id, location=self.location
|
|
102
|
+
)
|
|
101
103
|
try:
|
|
102
104
|
job_properties = self._client.get_job(job_id=self.job_id)._properties
|
|
103
105
|
|
|
@@ -2370,20 +2370,13 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
|
|
|
2370
2370
|
if self.project_id is None:
|
|
2371
2371
|
self.project_id = hook.project_id
|
|
2372
2372
|
|
|
2373
|
-
# Handle missing logical_date. Example: asset-triggered DAGs (Airflow 3)
|
|
2374
|
-
logical_date = context.get("logical_date")
|
|
2375
|
-
if logical_date is None:
|
|
2376
|
-
# Use dag_run.run_after as fallback when logical_date is not available
|
|
2377
|
-
dag_run = context.get("dag_run")
|
|
2378
|
-
if dag_run and hasattr(dag_run, "run_after"):
|
|
2379
|
-
logical_date = dag_run.run_after
|
|
2380
|
-
|
|
2381
2373
|
self.job_id = hook.generate_job_id(
|
|
2382
2374
|
job_id=self.job_id,
|
|
2383
2375
|
dag_id=self.dag_id,
|
|
2384
2376
|
task_id=self.task_id,
|
|
2385
|
-
logical_date=
|
|
2377
|
+
logical_date=None,
|
|
2386
2378
|
configuration=self.configuration,
|
|
2379
|
+
run_after=hook.get_run_after_or_logical_date(context),
|
|
2387
2380
|
force_rerun=self.force_rerun,
|
|
2388
2381
|
)
|
|
2389
2382
|
|
|
@@ -441,9 +441,10 @@ class CloudRunCreateServiceOperator(GoogleCloudBaseOperator):
|
|
|
441
441
|
self.service_name,
|
|
442
442
|
self.region,
|
|
443
443
|
)
|
|
444
|
-
|
|
444
|
+
service = hook.get_service(
|
|
445
445
|
service_name=self.service_name, region=self.region, project_id=self.project_id
|
|
446
446
|
)
|
|
447
|
+
return Service.to_dict(service)
|
|
447
448
|
except google.cloud.exceptions.GoogleCloudError as e:
|
|
448
449
|
self.log.error("An error occurred. Exiting.")
|
|
449
450
|
raise e
|
|
@@ -28,6 +28,7 @@ from googleapiclient.errors import HttpError
|
|
|
28
28
|
|
|
29
29
|
from airflow.configuration import conf
|
|
30
30
|
from airflow.exceptions import AirflowException
|
|
31
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
31
32
|
from airflow.providers.google.cloud.hooks.cloud_sql import CloudSQLDatabaseHook, CloudSQLHook
|
|
32
33
|
from airflow.providers.google.cloud.links.cloud_sql import CloudSQLInstanceDatabaseLink, CloudSQLInstanceLink
|
|
33
34
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
|
@@ -35,7 +36,6 @@ from airflow.providers.google.cloud.triggers.cloud_sql import CloudSQLExportTrig
|
|
|
35
36
|
from airflow.providers.google.cloud.utils.field_validator import GcpBodyFieldValidator
|
|
36
37
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, get_field
|
|
37
38
|
from airflow.providers.google.common.links.storage import FileDetailsLink
|
|
38
|
-
from airflow.providers.google.version_compat import BaseHook
|
|
39
39
|
|
|
40
40
|
if TYPE_CHECKING:
|
|
41
41
|
from airflow.models import Connection
|
|
@@ -65,12 +65,14 @@ from airflow.providers.google.cloud.links.cloud_storage_transfer import (
|
|
|
65
65
|
)
|
|
66
66
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
|
67
67
|
from airflow.providers.google.cloud.triggers.cloud_storage_transfer_service import (
|
|
68
|
+
CloudDataTransferServiceRunJobTrigger,
|
|
68
69
|
CloudStorageTransferServiceCheckJobStatusTrigger,
|
|
69
70
|
)
|
|
70
71
|
from airflow.providers.google.cloud.utils.helpers import normalize_directory_path
|
|
71
72
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
72
73
|
|
|
73
74
|
if TYPE_CHECKING:
|
|
75
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
74
76
|
from airflow.utils.context import Context
|
|
75
77
|
|
|
76
78
|
|
|
@@ -468,6 +470,8 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
|
|
|
468
470
|
If set as a sequence, the identities from the list must grant
|
|
469
471
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
|
470
472
|
account from the list granting this role to the originating account (templated).
|
|
473
|
+
:param timeout: Time to wait for the operation to end in seconds. Defaults to 60 seconds if not specified.
|
|
474
|
+
:param deferrable: Run operator in the deferrable mode.
|
|
471
475
|
"""
|
|
472
476
|
|
|
473
477
|
# [START gcp_transfer_job_run_template_fields]
|
|
@@ -489,6 +493,8 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
|
|
|
489
493
|
api_version: str = "v1",
|
|
490
494
|
project_id: str = PROVIDE_PROJECT_ID,
|
|
491
495
|
google_impersonation_chain: str | Sequence[str] | None = None,
|
|
496
|
+
timeout: float | None = None,
|
|
497
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
|
492
498
|
**kwargs,
|
|
493
499
|
) -> None:
|
|
494
500
|
super().__init__(**kwargs)
|
|
@@ -497,6 +503,8 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
|
|
|
497
503
|
self.gcp_conn_id = gcp_conn_id
|
|
498
504
|
self.api_version = api_version
|
|
499
505
|
self.google_impersonation_chain = google_impersonation_chain
|
|
506
|
+
self.timeout = timeout
|
|
507
|
+
self.deferrable = deferrable
|
|
500
508
|
|
|
501
509
|
def _validate_inputs(self) -> None:
|
|
502
510
|
if not self.job_name:
|
|
@@ -518,8 +526,32 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
|
|
|
518
526
|
job_name=self.job_name,
|
|
519
527
|
)
|
|
520
528
|
|
|
529
|
+
if self.deferrable:
|
|
530
|
+
self.defer(
|
|
531
|
+
timeout=timedelta(seconds=self.timeout or 60),
|
|
532
|
+
trigger=CloudDataTransferServiceRunJobTrigger(
|
|
533
|
+
job_name=self.job_name,
|
|
534
|
+
project_id=project_id,
|
|
535
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
536
|
+
impersonation_chain=self.google_impersonation_chain,
|
|
537
|
+
),
|
|
538
|
+
method_name="execute_complete",
|
|
539
|
+
)
|
|
540
|
+
|
|
521
541
|
return hook.run_transfer_job(job_name=self.job_name, project_id=project_id)
|
|
522
542
|
|
|
543
|
+
def execute_complete(self, context: Context, event: dict[str, Any]) -> Any:
|
|
544
|
+
"""
|
|
545
|
+
Act as a callback for when the trigger fires.
|
|
546
|
+
|
|
547
|
+
This returns immediately. It relies on trigger to throw an exception,
|
|
548
|
+
otherwise it assumes execution was successful.
|
|
549
|
+
"""
|
|
550
|
+
if event["status"] == "error":
|
|
551
|
+
raise AirflowException(event["message"])
|
|
552
|
+
|
|
553
|
+
return event["job_result"]
|
|
554
|
+
|
|
523
555
|
|
|
524
556
|
class CloudDataTransferServiceGetOperationOperator(GoogleCloudBaseOperator):
|
|
525
557
|
"""
|
|
@@ -964,6 +996,7 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
|
|
|
964
996
|
self.aws_role_arn = aws_role_arn
|
|
965
997
|
self.deferrable = deferrable
|
|
966
998
|
self._validate_inputs()
|
|
999
|
+
self._transfer_job: dict[str, Any] | None = None
|
|
967
1000
|
|
|
968
1001
|
def _validate_inputs(self) -> None:
|
|
969
1002
|
if self.delete_job_after_completion and not self.wait:
|
|
@@ -978,19 +1011,18 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
|
|
|
978
1011
|
|
|
979
1012
|
TransferJobPreprocessor(body=body, aws_conn_id=self.aws_conn_id, default_schedule=True).process_body()
|
|
980
1013
|
|
|
981
|
-
|
|
982
|
-
|
|
1014
|
+
self._transfer_job = hook.create_transfer_job(body=body)
|
|
983
1015
|
if self.wait:
|
|
984
1016
|
if not self.deferrable:
|
|
985
|
-
hook.wait_for_transfer_job(
|
|
1017
|
+
hook.wait_for_transfer_job(self._transfer_job, timeout=self.timeout)
|
|
986
1018
|
if self.delete_job_after_completion:
|
|
987
|
-
hook.delete_transfer_job(job_name=
|
|
1019
|
+
hook.delete_transfer_job(job_name=self._transfer_job[NAME], project_id=self.project_id)
|
|
988
1020
|
else:
|
|
989
1021
|
self.defer(
|
|
990
1022
|
timeout=timedelta(seconds=self.timeout or 60),
|
|
991
1023
|
trigger=CloudStorageTransferServiceCheckJobStatusTrigger(
|
|
992
|
-
job_name=
|
|
993
|
-
project_id=
|
|
1024
|
+
job_name=self._transfer_job[NAME],
|
|
1025
|
+
project_id=self._transfer_job[PROJECT_ID],
|
|
994
1026
|
gcp_conn_id=self.gcp_conn_id,
|
|
995
1027
|
impersonation_chain=self.google_impersonation_chain,
|
|
996
1028
|
),
|
|
@@ -1040,6 +1072,57 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
|
|
|
1040
1072
|
|
|
1041
1073
|
return body
|
|
1042
1074
|
|
|
1075
|
+
def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
|
|
1076
|
+
"""Provide OpenLineage OperatorLineage for the S3->GCS transfer."""
|
|
1077
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset
|
|
1078
|
+
from airflow.providers.google.cloud.openlineage.facets import (
|
|
1079
|
+
CloudStorageTransferJobFacet,
|
|
1080
|
+
CloudStorageTransferRunFacet,
|
|
1081
|
+
)
|
|
1082
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
1083
|
+
|
|
1084
|
+
input_ds = Dataset(
|
|
1085
|
+
namespace=f"s3://{self.s3_bucket}",
|
|
1086
|
+
name=normalize_directory_path(self.s3_path) or "",
|
|
1087
|
+
)
|
|
1088
|
+
|
|
1089
|
+
output_ds = Dataset(
|
|
1090
|
+
namespace=f"gs://{self.gcs_bucket}",
|
|
1091
|
+
name=normalize_directory_path(self.gcs_path) or "",
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
job = self._transfer_job or {}
|
|
1095
|
+
job_facet = CloudStorageTransferJobFacet(
|
|
1096
|
+
jobName=job.get(NAME),
|
|
1097
|
+
projectId=job.get(PROJECT_ID, self.project_id),
|
|
1098
|
+
description=job.get(DESCRIPTION, self.description),
|
|
1099
|
+
status=job.get(STATUS),
|
|
1100
|
+
sourceBucket=job.get(TRANSFER_SPEC, {})
|
|
1101
|
+
.get(AWS_S3_DATA_SOURCE, {})
|
|
1102
|
+
.get(BUCKET_NAME, self.s3_bucket),
|
|
1103
|
+
sourcePath=job.get(TRANSFER_SPEC, {}).get(AWS_S3_DATA_SOURCE, {}).get(PATH, self.s3_path),
|
|
1104
|
+
targetBucket=job.get(TRANSFER_SPEC, {}).get(GCS_DATA_SINK, {}).get(BUCKET_NAME, self.gcs_bucket),
|
|
1105
|
+
targetPath=job.get(TRANSFER_SPEC, {}).get(GCS_DATA_SINK, {}).get(PATH, self.gcs_path),
|
|
1106
|
+
objectConditions=job.get(TRANSFER_SPEC, {}).get("objectConditions", self.object_conditions),
|
|
1107
|
+
transferOptions=job.get(TRANSFER_SPEC, {}).get("transferOptions", self.transfer_options),
|
|
1108
|
+
schedule=job.get(SCHEDULE, self.schedule),
|
|
1109
|
+
)
|
|
1110
|
+
|
|
1111
|
+
run_facet = CloudStorageTransferRunFacet(
|
|
1112
|
+
jobName=job.get(NAME),
|
|
1113
|
+
wait=self.wait,
|
|
1114
|
+
timeout=self.timeout,
|
|
1115
|
+
deferrable=self.deferrable,
|
|
1116
|
+
deleteJobAfterCompletion=self.delete_job_after_completion,
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
return OperatorLineage(
|
|
1120
|
+
inputs=[input_ds],
|
|
1121
|
+
outputs=[output_ds],
|
|
1122
|
+
job_facets={"cloudStorageTransferJob": job_facet},
|
|
1123
|
+
run_facets={"cloudStorageTransferRun": run_facet},
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1043
1126
|
|
|
1044
1127
|
class CloudDataTransferServiceGCSToGCSOperator(GoogleCloudBaseOperator):
|
|
1045
1128
|
"""
|