apache-airflow-providers-google 18.0.0__py3-none-any.whl → 18.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-google might be problematic. Click here for more details.

Files changed (72) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +5 -5
  3. airflow/providers/google/assets/gcs.py +1 -11
  4. airflow/providers/google/cloud/bundles/__init__.py +16 -0
  5. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  6. airflow/providers/google/cloud/hooks/bigquery.py +45 -42
  7. airflow/providers/google/cloud/hooks/cloud_composer.py +131 -1
  8. airflow/providers/google/cloud/hooks/cloud_sql.py +88 -13
  9. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +16 -0
  10. airflow/providers/google/cloud/hooks/dataflow.py +1 -1
  11. airflow/providers/google/cloud/hooks/dataprep.py +1 -1
  12. airflow/providers/google/cloud/hooks/dataproc.py +3 -0
  13. airflow/providers/google/cloud/hooks/gcs.py +107 -3
  14. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  15. airflow/providers/google/cloud/hooks/looker.py +1 -1
  16. airflow/providers/google/cloud/hooks/spanner.py +45 -0
  17. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +30 -0
  18. airflow/providers/google/cloud/links/base.py +11 -11
  19. airflow/providers/google/cloud/links/dataproc.py +2 -10
  20. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  21. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  22. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  23. airflow/providers/google/cloud/openlineage/facets.py +102 -1
  24. airflow/providers/google/cloud/openlineage/mixins.py +3 -1
  25. airflow/providers/google/cloud/operators/bigquery.py +2 -9
  26. airflow/providers/google/cloud/operators/cloud_run.py +2 -1
  27. airflow/providers/google/cloud/operators/cloud_sql.py +1 -1
  28. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +89 -6
  29. airflow/providers/google/cloud/operators/datafusion.py +36 -7
  30. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  31. airflow/providers/google/cloud/operators/spanner.py +22 -6
  32. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +7 -0
  33. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +30 -0
  34. airflow/providers/google/cloud/operators/workflows.py +17 -6
  35. airflow/providers/google/cloud/sensors/bigquery.py +1 -1
  36. airflow/providers/google/cloud/sensors/bigquery_dts.py +1 -6
  37. airflow/providers/google/cloud/sensors/bigtable.py +1 -6
  38. airflow/providers/google/cloud/sensors/cloud_composer.py +65 -31
  39. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +1 -6
  40. airflow/providers/google/cloud/sensors/dataflow.py +1 -1
  41. airflow/providers/google/cloud/sensors/dataform.py +1 -6
  42. airflow/providers/google/cloud/sensors/datafusion.py +1 -6
  43. airflow/providers/google/cloud/sensors/dataplex.py +1 -6
  44. airflow/providers/google/cloud/sensors/dataprep.py +1 -6
  45. airflow/providers/google/cloud/sensors/dataproc.py +1 -6
  46. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -6
  47. airflow/providers/google/cloud/sensors/gcs.py +1 -7
  48. airflow/providers/google/cloud/sensors/looker.py +1 -6
  49. airflow/providers/google/cloud/sensors/pubsub.py +1 -6
  50. airflow/providers/google/cloud/sensors/tasks.py +1 -6
  51. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +1 -6
  52. airflow/providers/google/cloud/sensors/workflows.py +1 -6
  53. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  54. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +2 -1
  55. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +11 -2
  56. airflow/providers/google/cloud/triggers/bigquery.py +15 -3
  57. airflow/providers/google/cloud/triggers/cloud_composer.py +51 -21
  58. airflow/providers/google/cloud/triggers/cloud_run.py +1 -1
  59. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +90 -0
  60. airflow/providers/google/cloud/triggers/pubsub.py +14 -18
  61. airflow/providers/google/common/hooks/base_google.py +1 -1
  62. airflow/providers/google/get_provider_info.py +15 -0
  63. airflow/providers/google/leveldb/hooks/leveldb.py +1 -1
  64. airflow/providers/google/marketing_platform/links/analytics_admin.py +2 -8
  65. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +1 -6
  66. airflow/providers/google/marketing_platform/sensors/display_video.py +1 -6
  67. airflow/providers/google/suite/sensors/drive.py +1 -6
  68. airflow/providers/google/version_compat.py +0 -20
  69. {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/METADATA +15 -15
  70. {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/RECORD +72 -65
  71. {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/WHEEL +0 -0
  72. {apache_airflow_providers_google-18.0.0.dist-info → apache_airflow_providers_google-18.1.0rc1.dist-info}/entry_points.txt +0 -0
@@ -90,6 +90,11 @@ class GenerativeModelHook(GoogleBaseHook):
90
90
  cached_context_model = preview_generative_model.GenerativeModel.from_cached_content(cached_content)
91
91
  return cached_context_model
92
92
 
93
+ @deprecated(
94
+ planned_removal_date="January 3, 2026",
95
+ use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.embed_content",
96
+ category=AirflowProviderDeprecationWarning,
97
+ )
93
98
  @GoogleBaseHook.fallback_to_default_project_id
94
99
  def text_embedding_model_get_embeddings(
95
100
  self,
@@ -114,6 +119,11 @@ class GenerativeModelHook(GoogleBaseHook):
114
119
 
115
120
  return response.values
116
121
 
122
+ @deprecated(
123
+ planned_removal_date="January 3, 2026",
124
+ use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.generate_content",
125
+ category=AirflowProviderDeprecationWarning,
126
+ )
117
127
  @GoogleBaseHook.fallback_to_default_project_id
118
128
  def generative_model_generate_content(
119
129
  self,
@@ -156,6 +166,11 @@ class GenerativeModelHook(GoogleBaseHook):
156
166
 
157
167
  return response.text
158
168
 
169
+ @deprecated(
170
+ planned_removal_date="January 3, 2026",
171
+ use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.supervised_fine_tuning_train",
172
+ category=AirflowProviderDeprecationWarning,
173
+ )
159
174
  @GoogleBaseHook.fallback_to_default_project_id
160
175
  def supervised_fine_tuning_train(
161
176
  self,
@@ -209,6 +224,11 @@ class GenerativeModelHook(GoogleBaseHook):
209
224
 
210
225
  return sft_tuning_job
211
226
 
227
+ @deprecated(
228
+ planned_removal_date="January 3, 2026",
229
+ use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.count_tokens",
230
+ category=AirflowProviderDeprecationWarning,
231
+ )
212
232
  @GoogleBaseHook.fallback_to_default_project_id
213
233
  def count_tokens(
214
234
  self,
@@ -296,6 +316,11 @@ class GenerativeModelHook(GoogleBaseHook):
296
316
 
297
317
  return eval_result
298
318
 
319
+ @deprecated(
320
+ planned_removal_date="January 3, 2026",
321
+ use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.create_cached_content",
322
+ category=AirflowProviderDeprecationWarning,
323
+ )
299
324
  def create_cached_content(
300
325
  self,
301
326
  model_name: str,
@@ -330,6 +355,11 @@ class GenerativeModelHook(GoogleBaseHook):
330
355
 
331
356
  return response.name
332
357
 
358
+ @deprecated(
359
+ planned_removal_date="January 3, 2026",
360
+ use_instead="airflow.providers.google.cloud.hooks.gen_ai.generative_model.GenAIGenerativeModelHook.generate_content",
361
+ category=AirflowProviderDeprecationWarning,
362
+ )
333
363
  def generate_from_cached_content(
334
364
  self,
335
365
  location: str,
@@ -18,18 +18,10 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  from typing import TYPE_CHECKING, ClassVar
21
+ from urllib.parse import urlparse
21
22
 
22
- from airflow.providers.google.version_compat import (
23
- AIRFLOW_V_3_0_PLUS,
24
- BaseOperator,
25
- BaseOperatorLink,
26
- BaseSensorOperator,
27
- )
28
-
29
- if AIRFLOW_V_3_0_PLUS:
30
- from airflow.sdk.execution_time.xcom import XCom
31
- else:
32
- from airflow.models.xcom import XCom # type: ignore[no-redef]
23
+ from airflow.providers.common.compat.sdk import BaseOperatorLink, BaseSensorOperator, XCom
24
+ from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS, BaseOperator
33
25
 
34
26
  if TYPE_CHECKING:
35
27
  from airflow.models.taskinstancekey import TaskInstanceKey
@@ -109,6 +101,14 @@ class BaseGoogleLink(BaseOperatorLink):
109
101
  if TYPE_CHECKING:
110
102
  assert isinstance(operator, (GoogleCloudBaseOperator, BaseSensorOperator))
111
103
 
104
+ # In cases when worker passes execution to trigger, the value that is put to XCom
105
+ # already contains link to the object in string format. In this case we don't want to execute
106
+ # get_config() again. Instead we can leave this value without any changes
107
+ link_value = XCom.get_value(key=self.key, ti_key=ti_key)
108
+ if link_value and isinstance(link_value, str):
109
+ if urlparse(link_value).scheme in ("http", "https"):
110
+ return link_value
111
+
112
112
  conf = self.get_config(operator, ti_key)
113
113
  if not conf:
114
114
  return ""
@@ -25,22 +25,14 @@ from typing import TYPE_CHECKING, Any
25
25
  import attr
26
26
 
27
27
  from airflow.exceptions import AirflowProviderDeprecationWarning
28
+ from airflow.providers.common.compat.sdk import BaseOperatorLink, XCom
28
29
  from airflow.providers.google.cloud.links.base import BASE_LINK, BaseGoogleLink
29
- from airflow.providers.google.version_compat import (
30
- AIRFLOW_V_3_0_PLUS,
31
- BaseOperator,
32
- BaseOperatorLink,
33
- )
34
30
 
35
31
  if TYPE_CHECKING:
36
32
  from airflow.models.taskinstancekey import TaskInstanceKey
33
+ from airflow.providers.google.version_compat import BaseOperator
37
34
  from airflow.utils.context import Context
38
35
 
39
- if AIRFLOW_V_3_0_PLUS:
40
- from airflow.sdk.execution_time.xcom import XCom
41
- else:
42
- from airflow.models.xcom import XCom # type: ignore[no-redef]
43
-
44
36
 
45
37
  def __getattr__(name: str) -> Any:
46
38
  # PEP-562: deprecate module-level variable
@@ -0,0 +1,68 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "CloudStorageTransferJobFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/JobFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "jobName": {
13
+ "type": "string",
14
+ "description": "Transfer job name assigned by GCP Storage Transfer Service."
15
+ },
16
+ "projectId": {
17
+ "type": "string",
18
+ "description": "GCP project ID."
19
+ },
20
+ "description": {
21
+ "type": "string",
22
+ "description": "Optional description of the transfer job."
23
+ },
24
+ "status": {
25
+ "type": "string",
26
+ "description": "Status of the transfer job (ENABLED, DISABLED)."
27
+ },
28
+ "sourceBucket": {
29
+ "type": "string",
30
+ "description": "Source AWS S3 bucket."
31
+ },
32
+ "sourcePath": {
33
+ "type": "string",
34
+ "description": "Prefix path inside the source bucket."
35
+ },
36
+ "targetBucket": {
37
+ "type": "string",
38
+ "description": "Target GCS bucket."
39
+ },
40
+ "targetPath": {
41
+ "type": "string",
42
+ "description": "Prefix path inside the target bucket."
43
+ },
44
+ "objectConditions": {
45
+ "type": "object",
46
+ "description": "Filtering conditions for objects transferred."
47
+ },
48
+ "transferOptions": {
49
+ "type": "object",
50
+ "description": "Transfer options such as overwrite or delete."
51
+ },
52
+ "schedule": {
53
+ "type": "object",
54
+ "description": "Transfer schedule details."
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "type": "object"
60
+ }
61
+ },
62
+ "type": "object",
63
+ "properties": {
64
+ "cloudStorageTransferJob": {
65
+ "$ref": "#/$defs/CloudStorageTransferJobFacet"
66
+ }
67
+ }
68
+ }
@@ -0,0 +1,60 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "CloudStorageTransferRunFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "jobName": {
13
+ "type": "string",
14
+ "description": "Transfer job name associated with this run."
15
+ },
16
+ "operationName": {
17
+ "type": "string",
18
+ "description": "Transfer operation name if available."
19
+ },
20
+ "status": {
21
+ "type": "string",
22
+ "description": "Run status if available."
23
+ },
24
+ "startTime": {
25
+ "type": "string",
26
+ "description": "Start time of the transfer operation."
27
+ },
28
+ "endTime": {
29
+ "type": "string",
30
+ "description": "End time of the transfer operation."
31
+ },
32
+ "wait": {
33
+ "type": "boolean",
34
+ "description": "Whether the operator waited for completion."
35
+ },
36
+ "timeout": {
37
+ "type": ["number", "null"],
38
+ "description": "Timeout in seconds."
39
+ },
40
+ "deferrable": {
41
+ "type": "boolean",
42
+ "description": "Whether the operator used deferrable mode."
43
+ },
44
+ "deleteJobAfterCompletion": {
45
+ "type": "boolean",
46
+ "description": "Whether the transfer job was deleted after completion."
47
+ }
48
+ }
49
+ }
50
+ ],
51
+ "type": "object"
52
+ }
53
+ },
54
+ "type": "object",
55
+ "properties": {
56
+ "cloudStorageTransferRun": {
57
+ "$ref": "#/$defs/CloudStorageTransferRunFacet"
58
+ }
59
+ }
60
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$defs": {
4
+ "DataFusionRunFacet": {
5
+ "allOf": [
6
+ {
7
+ "$ref": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet"
8
+ },
9
+ {
10
+ "type": "object",
11
+ "properties": {
12
+ "runId": {
13
+ "type": "string",
14
+ "description": "Pipeline run ID assigned by Cloud Data Fusion."
15
+ },
16
+ "runtimeArgs": {
17
+ "type": "object",
18
+ "description": "Runtime arguments provided when starting the pipeline."
19
+ }
20
+ }
21
+ }
22
+ ],
23
+ "type": "object"
24
+ }
25
+ },
26
+ "type": "object",
27
+ "properties": {
28
+ "dataFusionRun": {
29
+ "$ref": "#/$defs/DataFusionRunFacet"
30
+ }
31
+ }
32
+ }
@@ -24,13 +24,17 @@ from attr import define, field
24
24
  from airflow.providers.google import __version__ as provider_version
25
25
 
26
26
  if TYPE_CHECKING:
27
- from openlineage.client.generated.base import RunFacet
27
+ from openlineage.client.generated.base import JobFacet, RunFacet
28
28
 
29
29
  try:
30
30
  try:
31
31
  from openlineage.client.generated.base import RunFacet
32
32
  except ImportError: # Old OpenLineage client is used
33
33
  from openlineage.client.facet import BaseFacet as RunFacet # type: ignore[assignment]
34
+ try:
35
+ from openlineage.client.generated.base import JobFacet
36
+ except ImportError: # Old OpenLineage client is used
37
+ from openlineage.client.facet import BaseFacet as JobFacet # type: ignore[assignment]
34
38
 
35
39
  @define
36
40
  class BigQueryJobRunFacet(RunFacet):
@@ -53,6 +57,100 @@ try:
53
57
  f"providers-google/{provider_version}/airflow/providers/google/"
54
58
  "openlineage/BigQueryJobRunFacet.json"
55
59
  )
60
+
61
+ @define
62
+ class CloudStorageTransferJobFacet(JobFacet):
63
+ """
64
+ Facet representing a Cloud Storage Transfer Service job configuration.
65
+
66
+ :param jobName: Unique name of the transfer job.
67
+ :param projectId: GCP project where the transfer job is defined.
68
+ :param description: User-provided description of the transfer job.
69
+ :param status: Current status of the transfer job (e.g. "ENABLED", "DISABLED").
70
+ :param sourceBucket: Name of the source bucket (e.g. AWS S3).
71
+ :param sourcePath: Prefix/path inside the source bucket.
72
+ :param targetBucket: Name of the destination bucket (e.g. GCS).
73
+ :param targetPath: Prefix/path inside the destination bucket.
74
+ :param objectConditions: Object selection rules (e.g. include/exclude prefixes).
75
+ :param transferOptions: Transfer options, such as overwrite behavior or whether to delete objects
76
+ from the source after transfer.
77
+ :param schedule: Schedule for the transfer job (if recurring).
78
+ """
79
+
80
+ jobName: str | None = field(default=None)
81
+ projectId: str | None = field(default=None)
82
+ description: str | None = field(default=None)
83
+ status: str | None = field(default=None)
84
+ sourceBucket: str | None = field(default=None)
85
+ sourcePath: str | None = field(default=None)
86
+ targetBucket: str | None = field(default=None)
87
+ targetPath: str | None = field(default=None)
88
+ objectConditions: dict | None = field(default=None)
89
+ transferOptions: dict | None = field(default=None)
90
+ schedule: dict | None = field(default=None)
91
+
92
+ @staticmethod
93
+ def _get_schema() -> str:
94
+ return (
95
+ "https://raw.githubusercontent.com/apache/airflow/"
96
+ f"providers-google/{provider_version}/airflow/providers/google/"
97
+ "openlineage/CloudStorageTransferJobFacet.json"
98
+ )
99
+
100
+ @define
101
+ class CloudStorageTransferRunFacet(RunFacet):
102
+ """
103
+ Facet representing a Cloud Storage Transfer Service job execution run.
104
+
105
+ :param jobName: Name of the transfer job being executed.
106
+ :param operationName: Name of the specific transfer operation instance.
107
+ :param status: Current status of the operation (e.g. "IN_PROGRESS", "SUCCESS", "FAILED").
108
+ :param startTime: Time when the transfer job execution started (ISO 8601 format).
109
+ :param endTime: Time when the transfer job execution finished (ISO 8601 format).
110
+ :param wait: Whether the operator waits for the job to complete before finishing.
111
+ :param timeout: Timeout (in seconds) for the transfer run to complete.
112
+ :param deferrable: Whether the operator defers execution until job completion.
113
+ :param deleteJobAfterCompletion: Whether the operator deletes the transfer job after the run completes.
114
+ """
115
+
116
+ jobName: str | None = field(default=None)
117
+ operationName: str | None = field(default=None)
118
+ status: str | None = field(default=None)
119
+ startTime: str | None = field(default=None)
120
+ endTime: str | None = field(default=None)
121
+ wait: bool = field(default=True)
122
+ timeout: float | None = field(default=None)
123
+ deferrable: bool = field(default=False)
124
+ deleteJobAfterCompletion: bool = field(default=False)
125
+
126
+ @staticmethod
127
+ def _get_schema() -> str:
128
+ return (
129
+ "https://raw.githubusercontent.com/apache/airflow/"
130
+ f"providers-google/{provider_version}/airflow/providers/google/"
131
+ "openlineage/CloudStorageTransferRunFacet.json"
132
+ )
133
+
134
+ @define
135
+ class DataFusionRunFacet(RunFacet):
136
+ """
137
+ Facet that represents relevant details of a Cloud Data Fusion pipeline run.
138
+
139
+ :param runId: The pipeline execution id.
140
+ :param runtimeArgs: Runtime arguments passed to the pipeline.
141
+ """
142
+
143
+ runId: str | None = field(default=None)
144
+ runtimeArgs: dict[str, str] | None = field(default=None)
145
+
146
+ @staticmethod
147
+ def _get_schema() -> str:
148
+ return (
149
+ "https://raw.githubusercontent.com/apache/airflow/"
150
+ f"providers-google/{provider_version}/airflow/providers/google/"
151
+ "openlineage/DataFusionRunFacet.json"
152
+ )
153
+
56
154
  except ImportError: # OpenLineage is not available
57
155
 
58
156
  def create_no_op(*_, **__) -> None:
@@ -65,3 +163,6 @@ except ImportError: # OpenLineage is not available
65
163
  return None
66
164
 
67
165
  BigQueryJobRunFacet = create_no_op # type: ignore[misc, assignment]
166
+ CloudStorageTransferJobFacet = create_no_op # type: ignore[misc, assignment]
167
+ CloudStorageTransferRunFacet = create_no_op # type: ignore[misc, assignment]
168
+ DataFusionRunFacet = create_no_op # type: ignore[misc, assignment]
@@ -97,7 +97,9 @@ class _BigQueryInsertJobOperatorOpenLineageMixin:
97
97
  run_facets: dict[str, RunFacet] = {
98
98
  "externalQuery": ExternalQueryRunFacet(externalQueryId=self.job_id, source="bigquery")
99
99
  }
100
- self._client = self.hook.get_client(project_id=self.hook.project_id, location=self.location)
100
+ self._client = self.hook.get_client(
101
+ project_id=self.project_id or self.hook.project_id, location=self.location
102
+ )
101
103
  try:
102
104
  job_properties = self._client.get_job(job_id=self.job_id)._properties
103
105
 
@@ -2370,20 +2370,13 @@ class BigQueryInsertJobOperator(GoogleCloudBaseOperator, _BigQueryInsertJobOpera
2370
2370
  if self.project_id is None:
2371
2371
  self.project_id = hook.project_id
2372
2372
 
2373
- # Handle missing logical_date. Example: asset-triggered DAGs (Airflow 3)
2374
- logical_date = context.get("logical_date")
2375
- if logical_date is None:
2376
- # Use dag_run.run_after as fallback when logical_date is not available
2377
- dag_run = context.get("dag_run")
2378
- if dag_run and hasattr(dag_run, "run_after"):
2379
- logical_date = dag_run.run_after
2380
-
2381
2373
  self.job_id = hook.generate_job_id(
2382
2374
  job_id=self.job_id,
2383
2375
  dag_id=self.dag_id,
2384
2376
  task_id=self.task_id,
2385
- logical_date=logical_date,
2377
+ logical_date=None,
2386
2378
  configuration=self.configuration,
2379
+ run_after=hook.get_run_after_or_logical_date(context),
2387
2380
  force_rerun=self.force_rerun,
2388
2381
  )
2389
2382
 
@@ -441,9 +441,10 @@ class CloudRunCreateServiceOperator(GoogleCloudBaseOperator):
441
441
  self.service_name,
442
442
  self.region,
443
443
  )
444
- return hook.get_service(
444
+ service = hook.get_service(
445
445
  service_name=self.service_name, region=self.region, project_id=self.project_id
446
446
  )
447
+ return Service.to_dict(service)
447
448
  except google.cloud.exceptions.GoogleCloudError as e:
448
449
  self.log.error("An error occurred. Exiting.")
449
450
  raise e
@@ -28,6 +28,7 @@ from googleapiclient.errors import HttpError
28
28
 
29
29
  from airflow.configuration import conf
30
30
  from airflow.exceptions import AirflowException
31
+ from airflow.providers.common.compat.sdk import BaseHook
31
32
  from airflow.providers.google.cloud.hooks.cloud_sql import CloudSQLDatabaseHook, CloudSQLHook
32
33
  from airflow.providers.google.cloud.links.cloud_sql import CloudSQLInstanceDatabaseLink, CloudSQLInstanceLink
33
34
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
@@ -35,7 +36,6 @@ from airflow.providers.google.cloud.triggers.cloud_sql import CloudSQLExportTrig
35
36
  from airflow.providers.google.cloud.utils.field_validator import GcpBodyFieldValidator
36
37
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, get_field
37
38
  from airflow.providers.google.common.links.storage import FileDetailsLink
38
- from airflow.providers.google.version_compat import BaseHook
39
39
 
40
40
  if TYPE_CHECKING:
41
41
  from airflow.models import Connection
@@ -65,12 +65,14 @@ from airflow.providers.google.cloud.links.cloud_storage_transfer import (
65
65
  )
66
66
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
67
67
  from airflow.providers.google.cloud.triggers.cloud_storage_transfer_service import (
68
+ CloudDataTransferServiceRunJobTrigger,
68
69
  CloudStorageTransferServiceCheckJobStatusTrigger,
69
70
  )
70
71
  from airflow.providers.google.cloud.utils.helpers import normalize_directory_path
71
72
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
72
73
 
73
74
  if TYPE_CHECKING:
75
+ from airflow.providers.openlineage.extractors import OperatorLineage
74
76
  from airflow.utils.context import Context
75
77
 
76
78
 
@@ -468,6 +470,8 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
468
470
  If set as a sequence, the identities from the list must grant
469
471
  Service Account Token Creator IAM role to the directly preceding identity, with first
470
472
  account from the list granting this role to the originating account (templated).
473
+ :param timeout: Time to wait for the operation to end in seconds. Defaults to 60 seconds if not specified.
474
+ :param deferrable: Run operator in the deferrable mode.
471
475
  """
472
476
 
473
477
  # [START gcp_transfer_job_run_template_fields]
@@ -489,6 +493,8 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
489
493
  api_version: str = "v1",
490
494
  project_id: str = PROVIDE_PROJECT_ID,
491
495
  google_impersonation_chain: str | Sequence[str] | None = None,
496
+ timeout: float | None = None,
497
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
492
498
  **kwargs,
493
499
  ) -> None:
494
500
  super().__init__(**kwargs)
@@ -497,6 +503,8 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
497
503
  self.gcp_conn_id = gcp_conn_id
498
504
  self.api_version = api_version
499
505
  self.google_impersonation_chain = google_impersonation_chain
506
+ self.timeout = timeout
507
+ self.deferrable = deferrable
500
508
 
501
509
  def _validate_inputs(self) -> None:
502
510
  if not self.job_name:
@@ -518,8 +526,32 @@ class CloudDataTransferServiceRunJobOperator(GoogleCloudBaseOperator):
518
526
  job_name=self.job_name,
519
527
  )
520
528
 
529
+ if self.deferrable:
530
+ self.defer(
531
+ timeout=timedelta(seconds=self.timeout or 60),
532
+ trigger=CloudDataTransferServiceRunJobTrigger(
533
+ job_name=self.job_name,
534
+ project_id=project_id,
535
+ gcp_conn_id=self.gcp_conn_id,
536
+ impersonation_chain=self.google_impersonation_chain,
537
+ ),
538
+ method_name="execute_complete",
539
+ )
540
+
521
541
  return hook.run_transfer_job(job_name=self.job_name, project_id=project_id)
522
542
 
543
+ def execute_complete(self, context: Context, event: dict[str, Any]) -> Any:
544
+ """
545
+ Act as a callback for when the trigger fires.
546
+
547
+ This returns immediately. It relies on trigger to throw an exception,
548
+ otherwise it assumes execution was successful.
549
+ """
550
+ if event["status"] == "error":
551
+ raise AirflowException(event["message"])
552
+
553
+ return event["job_result"]
554
+
523
555
 
524
556
  class CloudDataTransferServiceGetOperationOperator(GoogleCloudBaseOperator):
525
557
  """
@@ -964,6 +996,7 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
964
996
  self.aws_role_arn = aws_role_arn
965
997
  self.deferrable = deferrable
966
998
  self._validate_inputs()
999
+ self._transfer_job: dict[str, Any] | None = None
967
1000
 
968
1001
  def _validate_inputs(self) -> None:
969
1002
  if self.delete_job_after_completion and not self.wait:
@@ -978,19 +1011,18 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
978
1011
 
979
1012
  TransferJobPreprocessor(body=body, aws_conn_id=self.aws_conn_id, default_schedule=True).process_body()
980
1013
 
981
- job = hook.create_transfer_job(body=body)
982
-
1014
+ self._transfer_job = hook.create_transfer_job(body=body)
983
1015
  if self.wait:
984
1016
  if not self.deferrable:
985
- hook.wait_for_transfer_job(job, timeout=self.timeout)
1017
+ hook.wait_for_transfer_job(self._transfer_job, timeout=self.timeout)
986
1018
  if self.delete_job_after_completion:
987
- hook.delete_transfer_job(job_name=job[NAME], project_id=self.project_id)
1019
+ hook.delete_transfer_job(job_name=self._transfer_job[NAME], project_id=self.project_id)
988
1020
  else:
989
1021
  self.defer(
990
1022
  timeout=timedelta(seconds=self.timeout or 60),
991
1023
  trigger=CloudStorageTransferServiceCheckJobStatusTrigger(
992
- job_name=job[NAME],
993
- project_id=job[PROJECT_ID],
1024
+ job_name=self._transfer_job[NAME],
1025
+ project_id=self._transfer_job[PROJECT_ID],
994
1026
  gcp_conn_id=self.gcp_conn_id,
995
1027
  impersonation_chain=self.google_impersonation_chain,
996
1028
  ),
@@ -1040,6 +1072,57 @@ class CloudDataTransferServiceS3ToGCSOperator(GoogleCloudBaseOperator):
1040
1072
 
1041
1073
  return body
1042
1074
 
1075
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
1076
+ """Provide OpenLineage OperatorLineage for the S3->GCS transfer."""
1077
+ from airflow.providers.common.compat.openlineage.facet import Dataset
1078
+ from airflow.providers.google.cloud.openlineage.facets import (
1079
+ CloudStorageTransferJobFacet,
1080
+ CloudStorageTransferRunFacet,
1081
+ )
1082
+ from airflow.providers.openlineage.extractors import OperatorLineage
1083
+
1084
+ input_ds = Dataset(
1085
+ namespace=f"s3://{self.s3_bucket}",
1086
+ name=normalize_directory_path(self.s3_path) or "",
1087
+ )
1088
+
1089
+ output_ds = Dataset(
1090
+ namespace=f"gs://{self.gcs_bucket}",
1091
+ name=normalize_directory_path(self.gcs_path) or "",
1092
+ )
1093
+
1094
+ job = self._transfer_job or {}
1095
+ job_facet = CloudStorageTransferJobFacet(
1096
+ jobName=job.get(NAME),
1097
+ projectId=job.get(PROJECT_ID, self.project_id),
1098
+ description=job.get(DESCRIPTION, self.description),
1099
+ status=job.get(STATUS),
1100
+ sourceBucket=job.get(TRANSFER_SPEC, {})
1101
+ .get(AWS_S3_DATA_SOURCE, {})
1102
+ .get(BUCKET_NAME, self.s3_bucket),
1103
+ sourcePath=job.get(TRANSFER_SPEC, {}).get(AWS_S3_DATA_SOURCE, {}).get(PATH, self.s3_path),
1104
+ targetBucket=job.get(TRANSFER_SPEC, {}).get(GCS_DATA_SINK, {}).get(BUCKET_NAME, self.gcs_bucket),
1105
+ targetPath=job.get(TRANSFER_SPEC, {}).get(GCS_DATA_SINK, {}).get(PATH, self.gcs_path),
1106
+ objectConditions=job.get(TRANSFER_SPEC, {}).get("objectConditions", self.object_conditions),
1107
+ transferOptions=job.get(TRANSFER_SPEC, {}).get("transferOptions", self.transfer_options),
1108
+ schedule=job.get(SCHEDULE, self.schedule),
1109
+ )
1110
+
1111
+ run_facet = CloudStorageTransferRunFacet(
1112
+ jobName=job.get(NAME),
1113
+ wait=self.wait,
1114
+ timeout=self.timeout,
1115
+ deferrable=self.deferrable,
1116
+ deleteJobAfterCompletion=self.delete_job_after_completion,
1117
+ )
1118
+
1119
+ return OperatorLineage(
1120
+ inputs=[input_ds],
1121
+ outputs=[output_ds],
1122
+ job_facets={"cloudStorageTransferJob": job_facet},
1123
+ run_facets={"cloudStorageTransferRun": run_facet},
1124
+ )
1125
+
1043
1126
 
1044
1127
  class CloudDataTransferServiceGCSToGCSOperator(GoogleCloudBaseOperator):
1045
1128
  """