apache-airflow-providers-google 17.2.0rc1__py3-none-any.whl → 18.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-google might be problematic. Click here for more details.

Files changed (22) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/cloud/hooks/bigquery.py +6 -0
  3. airflow/providers/google/cloud/hooks/cloud_composer.py +79 -13
  4. airflow/providers/google/cloud/hooks/cloud_run.py +16 -8
  5. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -173
  6. airflow/providers/google/cloud/log/gcs_task_handler.py +8 -2
  7. airflow/providers/google/cloud/operators/cloud_composer.py +84 -1
  8. airflow/providers/google/cloud/sensors/cloud_composer.py +1 -1
  9. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +0 -66
  10. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  11. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +18 -9
  12. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +95 -0
  13. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +11 -0
  14. airflow/providers/google/cloud/triggers/cloud_composer.py +21 -15
  15. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  16. airflow/providers/google/marketing_platform/hooks/display_video.py +0 -150
  17. airflow/providers/google/marketing_platform/operators/display_video.py +0 -510
  18. airflow/providers/google/marketing_platform/sensors/display_video.py +1 -68
  19. {apache_airflow_providers_google-17.2.0rc1.dist-info → apache_airflow_providers_google-18.0.0.dist-info}/METADATA +43 -16
  20. {apache_airflow_providers_google-17.2.0rc1.dist-info → apache_airflow_providers_google-18.0.0.dist-info}/RECORD +22 -22
  21. {apache_airflow_providers_google-17.2.0rc1.dist-info → apache_airflow_providers_google-18.0.0.dist-info}/WHEEL +0 -0
  22. {apache_airflow_providers_google-17.2.0rc1.dist-info → apache_airflow_providers_google-18.0.0.dist-info}/entry_points.txt +0 -0
@@ -21,6 +21,7 @@ from __future__ import annotations
21
21
 
22
22
  import warnings
23
23
  from collections.abc import Sequence
24
+ from functools import cached_property
24
25
 
25
26
  from airflow.exceptions import AirflowProviderDeprecationWarning
26
27
  from airflow.providers.google.cloud.transfers.bigquery_to_sql import BigQueryToSqlBaseOperator
@@ -76,5 +77,15 @@ class BigQueryToMySqlOperator(BigQueryToSqlBaseOperator):
76
77
  )
77
78
  self.mysql_conn_id = mysql_conn_id
78
79
 
79
- def get_sql_hook(self) -> MySqlHook:
80
+ @cached_property
81
+ def mysql_hook(self) -> MySqlHook:
80
82
  return MySqlHook(schema=self.database, mysql_conn_id=self.mysql_conn_id)
83
+
84
+ def get_sql_hook(self) -> MySqlHook:
85
+ return self.mysql_hook
86
+
87
+ def execute(self, context):
88
+ # Set source_project_dataset_table here, after hooks are initialized and project_id is available
89
+ project_id = self.bigquery_hook.project_id
90
+ self.source_project_dataset_table = f"{project_id}.{self.dataset_id}.{self.table_id}"
91
+ return super().execute(context)
@@ -19,6 +19,7 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
+ from functools import cached_property
22
23
  from typing import TYPE_CHECKING
23
24
 
24
25
  from psycopg2.extensions import register_adapter
@@ -78,28 +79,36 @@ class BigQueryToPostgresOperator(BigQueryToSqlBaseOperator):
78
79
  self.postgres_conn_id = postgres_conn_id
79
80
  self.replace_index = replace_index
80
81
 
81
- def get_sql_hook(self) -> PostgresHook:
82
+ @cached_property
83
+ def postgres_hook(self) -> PostgresHook:
82
84
  register_adapter(list, Json)
83
85
  register_adapter(dict, Json)
84
86
  return PostgresHook(database=self.database, postgres_conn_id=self.postgres_conn_id)
85
87
 
88
+ def get_sql_hook(self) -> PostgresHook:
89
+ return self.postgres_hook
90
+
86
91
  def execute(self, context: Context) -> None:
87
- big_query_hook = BigQueryHook(
88
- gcp_conn_id=self.gcp_conn_id,
89
- location=self.location,
90
- impersonation_chain=self.impersonation_chain,
91
- )
92
+ if not self.bigquery_hook:
93
+ self.bigquery_hook = BigQueryHook(
94
+ gcp_conn_id=self.gcp_conn_id,
95
+ location=self.location,
96
+ impersonation_chain=self.impersonation_chain,
97
+ )
98
+ # Set source_project_dataset_table here, after hooks are initialized and project_id is available
99
+ project_id = self.bigquery_hook.project_id
100
+ self.source_project_dataset_table = f"{project_id}.{self.dataset_id}.{self.table_id}"
101
+
92
102
  self.persist_links(context)
93
- sql_hook: PostgresHook = self.get_sql_hook()
94
103
  for rows in bigquery_get_data(
95
104
  self.log,
96
105
  self.dataset_id,
97
106
  self.table_id,
98
- big_query_hook,
107
+ self.bigquery_hook,
99
108
  self.batch_size,
100
109
  self.selected_fields,
101
110
  ):
102
- sql_hook.insert_rows(
111
+ self.postgres_hook.insert_rows(
103
112
  table=self.target_table_name,
104
113
  rows=rows,
105
114
  target_fields=self.selected_fields,
@@ -30,6 +30,7 @@ from airflow.providers.google.version_compat import BaseOperator
30
30
 
31
31
  if TYPE_CHECKING:
32
32
  from airflow.providers.common.sql.hooks.sql import DbApiHook
33
+ from airflow.providers.openlineage.extractors import OperatorLineage
33
34
  from airflow.utils.context import Context
34
35
 
35
36
 
@@ -140,3 +141,97 @@ class BigQueryToSqlBaseOperator(BaseOperator):
140
141
  replace=self.replace,
141
142
  commit_every=self.batch_size,
142
143
  )
144
+
145
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage | None:
146
+ """
147
+ Build a generic OpenLineage facet for BigQuery -> SQL transfers.
148
+
149
+ This consolidates nearly identical implementations from child
150
+ operators. Children still provide a concrete SQL hook via
151
+ ``get_sql_hook()`` and may override behavior if needed.
152
+ """
153
+ from airflow.providers.common.compat.openlineage.facet import Dataset
154
+ from airflow.providers.google.cloud.openlineage.utils import (
155
+ BIGQUERY_NAMESPACE,
156
+ get_facets_from_bq_table_for_given_fields,
157
+ get_identity_column_lineage_facet,
158
+ )
159
+ from airflow.providers.openlineage.extractors import OperatorLineage
160
+
161
+ if not self.bigquery_hook:
162
+ self.bigquery_hook = BigQueryHook(
163
+ gcp_conn_id=self.gcp_conn_id,
164
+ location=self.location,
165
+ impersonation_chain=self.impersonation_chain,
166
+ )
167
+
168
+ try:
169
+ if not getattr(self, "source_project_dataset_table", None):
170
+ project_id = self.bigquery_hook.project_id
171
+ self.source_project_dataset_table = f"{project_id}.{self.dataset_id}.{self.table_id}"
172
+
173
+ table_obj = self.bigquery_hook.get_client().get_table(self.source_project_dataset_table)
174
+ except Exception:
175
+ self.log.debug(
176
+ "OpenLineage: could not fetch BigQuery table %s",
177
+ getattr(self, "source_project_dataset_table", None),
178
+ exc_info=True,
179
+ )
180
+ return OperatorLineage()
181
+
182
+ if self.selected_fields:
183
+ if isinstance(self.selected_fields, str):
184
+ bigquery_field_names = list(self.selected_fields)
185
+ else:
186
+ bigquery_field_names = self.selected_fields
187
+ else:
188
+ bigquery_field_names = [f.name for f in getattr(table_obj, "schema", [])]
189
+
190
+ input_dataset = Dataset(
191
+ namespace=BIGQUERY_NAMESPACE,
192
+ name=self.source_project_dataset_table,
193
+ facets=get_facets_from_bq_table_for_given_fields(table_obj, bigquery_field_names),
194
+ )
195
+
196
+ sql_hook = self.get_sql_hook()
197
+ db_info = sql_hook.get_openlineage_database_info(sql_hook.get_conn())
198
+ if db_info is None:
199
+ self.log.debug("OpenLineage: could not get database info from SQL hook %s", type(sql_hook))
200
+ return OperatorLineage()
201
+ namespace = f"{db_info.scheme}://{db_info.authority}"
202
+
203
+ schema_name = None
204
+ if hasattr(sql_hook, "get_openlineage_default_schema"):
205
+ try:
206
+ schema_name = sql_hook.get_openlineage_default_schema()
207
+ except Exception:
208
+ schema_name = None
209
+
210
+ if self.target_table_name and "." in self.target_table_name:
211
+ schema_part, table_part = self.target_table_name.split(".", 1)
212
+ else:
213
+ schema_part = schema_name or ""
214
+ table_part = self.target_table_name or ""
215
+
216
+ if db_info and db_info.scheme == "mysql":
217
+ output_name = f"{self.database}.{table_part}" if self.database else f"{table_part}"
218
+ else:
219
+ if self.database:
220
+ if schema_part:
221
+ output_name = f"{self.database}.{schema_part}.{table_part}"
222
+ else:
223
+ output_name = f"{self.database}.{table_part}"
224
+ else:
225
+ if schema_part:
226
+ output_name = f"{schema_part}.{table_part}"
227
+ else:
228
+ output_name = f"{table_part}"
229
+
230
+ column_lineage_facet = get_identity_column_lineage_facet(
231
+ bigquery_field_names, input_datasets=[input_dataset]
232
+ )
233
+
234
+ output_facets = column_lineage_facet or {}
235
+ output_dataset = Dataset(namespace=namespace, name=output_name, facets=output_facets)
236
+
237
+ return OperatorLineage(inputs=[input_dataset], outputs=[output_dataset])
@@ -144,6 +144,9 @@ class GCSToBigQueryOperator(BaseOperator):
144
144
  partition by field, type and expiration as per API specifications.
145
145
  Note that 'field' is not available in concurrency with
146
146
  dataset.table$partition.
147
+ Ignored if 'range_partitioning' is set.
148
+ :param range_partitioning: configure optional range partitioning fields i.e.
149
+ partition by field and integer interval as per API specifications.
147
150
  :param cluster_fields: Request that the result of this load be stored sorted
148
151
  by one or more columns. BigQuery supports clustering for both partitioned and
149
152
  non-partitioned tables. The order of columns given determines the sort order.
@@ -219,6 +222,7 @@ class GCSToBigQueryOperator(BaseOperator):
219
222
  src_fmt_configs=None,
220
223
  external_table=False,
221
224
  time_partitioning=None,
225
+ range_partitioning=None,
222
226
  cluster_fields=None,
223
227
  autodetect=True,
224
228
  encryption_configuration=None,
@@ -246,6 +250,10 @@ class GCSToBigQueryOperator(BaseOperator):
246
250
  src_fmt_configs = {}
247
251
  if time_partitioning is None:
248
252
  time_partitioning = {}
253
+ if range_partitioning is None:
254
+ range_partitioning = {}
255
+ if range_partitioning and time_partitioning:
256
+ raise ValueError("Only one of time_partitioning or range_partitioning can be set.")
249
257
  self.bucket = bucket
250
258
  self.source_objects = source_objects
251
259
  self.schema_object = schema_object
@@ -283,6 +291,7 @@ class GCSToBigQueryOperator(BaseOperator):
283
291
  self.schema_update_options = schema_update_options
284
292
  self.src_fmt_configs = src_fmt_configs
285
293
  self.time_partitioning = time_partitioning
294
+ self.range_partitioning = range_partitioning
286
295
  self.cluster_fields = cluster_fields
287
296
  self.autodetect = autodetect
288
297
  self.encryption_configuration = encryption_configuration
@@ -627,6 +636,8 @@ class GCSToBigQueryOperator(BaseOperator):
627
636
  )
628
637
  if self.time_partitioning:
629
638
  self.configuration["load"].update({"timePartitioning": self.time_partitioning})
639
+ if self.range_partitioning:
640
+ self.configuration["load"].update({"rangePartitioning": self.range_partitioning})
630
641
 
631
642
  if self.cluster_fields:
632
643
  self.configuration["load"].update({"clustering": {"fields": self.cluster_fields}})
@@ -52,11 +52,6 @@ class CloudComposerExecutionTrigger(BaseTrigger):
52
52
  self.impersonation_chain = impersonation_chain
53
53
  self.pooling_period_seconds = pooling_period_seconds
54
54
 
55
- self.gcp_hook = CloudComposerAsyncHook(
56
- gcp_conn_id=self.gcp_conn_id,
57
- impersonation_chain=self.impersonation_chain,
58
- )
59
-
60
55
  def serialize(self) -> tuple[str, dict[str, Any]]:
61
56
  return (
62
57
  "airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerExecutionTrigger",
@@ -70,7 +65,14 @@ class CloudComposerExecutionTrigger(BaseTrigger):
70
65
  },
71
66
  )
72
67
 
68
+ def _get_async_hook(self) -> CloudComposerAsyncHook:
69
+ return CloudComposerAsyncHook(
70
+ gcp_conn_id=self.gcp_conn_id,
71
+ impersonation_chain=self.impersonation_chain,
72
+ )
73
+
73
74
  async def run(self):
75
+ self.gcp_hook = self._get_async_hook()
74
76
  while True:
75
77
  operation = await self.gcp_hook.get_operation(operation_name=self.operation_name)
76
78
  if operation.done:
@@ -108,11 +110,6 @@ class CloudComposerAirflowCLICommandTrigger(BaseTrigger):
108
110
  self.impersonation_chain = impersonation_chain
109
111
  self.poll_interval = poll_interval
110
112
 
111
- self.gcp_hook = CloudComposerAsyncHook(
112
- gcp_conn_id=self.gcp_conn_id,
113
- impersonation_chain=self.impersonation_chain,
114
- )
115
-
116
113
  def serialize(self) -> tuple[str, dict[str, Any]]:
117
114
  return (
118
115
  "airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerAirflowCLICommandTrigger",
@@ -127,7 +124,14 @@ class CloudComposerAirflowCLICommandTrigger(BaseTrigger):
127
124
  },
128
125
  )
129
126
 
127
+ def _get_async_hook(self) -> CloudComposerAsyncHook:
128
+ return CloudComposerAsyncHook(
129
+ gcp_conn_id=self.gcp_conn_id,
130
+ impersonation_chain=self.impersonation_chain,
131
+ )
132
+
130
133
  async def run(self):
134
+ self.gcp_hook = self._get_async_hook()
131
135
  try:
132
136
  result = await self.gcp_hook.wait_command_execution_result(
133
137
  project_id=self.project_id,
@@ -199,11 +203,6 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
199
203
  self.poll_interval = poll_interval
200
204
  self.composer_airflow_version = composer_airflow_version
201
205
 
202
- self.gcp_hook = CloudComposerAsyncHook(
203
- gcp_conn_id=self.gcp_conn_id,
204
- impersonation_chain=self.impersonation_chain,
205
- )
206
-
207
206
  def serialize(self) -> tuple[str, dict[str, Any]]:
208
207
  return (
209
208
  "airflow.providers.google.cloud.triggers.cloud_composer.CloudComposerDAGRunTrigger",
@@ -264,6 +263,12 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
264
263
  return False
265
264
  return True
266
265
 
266
+ def _get_async_hook(self) -> CloudComposerAsyncHook:
267
+ return CloudComposerAsyncHook(
268
+ gcp_conn_id=self.gcp_conn_id,
269
+ impersonation_chain=self.impersonation_chain,
270
+ )
271
+
267
272
  def _check_composer_dag_run_id_states(self, dag_runs: list[dict]) -> bool:
268
273
  for dag_run in dag_runs:
269
274
  if dag_run["run_id"] == self.composer_dag_run_id and dag_run["state"] in self.allowed_states:
@@ -271,6 +276,7 @@ class CloudComposerDAGRunTrigger(BaseTrigger):
271
276
  return False
272
277
 
273
278
  async def run(self):
279
+ self.gcp_hook: CloudComposerAsyncHook = self._get_async_hook()
274
280
  try:
275
281
  while True:
276
282
  if datetime.now(self.end_date.tzinfo).timestamp() > self.end_date.timestamp():
@@ -23,9 +23,9 @@ from google.cloud.bigquery.table import Row, RowIterator
23
23
 
24
24
  if TYPE_CHECKING:
25
25
  from collections.abc import Iterator
26
- from logging import Logger
27
26
 
28
27
  from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
28
+ from airflow.sdk.types import Logger
29
29
 
30
30
 
31
31
  def bigquery_get_data(
@@ -24,8 +24,6 @@ from typing import Any
24
24
 
25
25
  from googleapiclient.discovery import Resource, build
26
26
 
27
- from airflow.exceptions import AirflowProviderDeprecationWarning
28
- from airflow.providers.google.common.deprecated import deprecated
29
27
  from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
30
28
 
31
29
 
@@ -48,23 +46,6 @@ class GoogleDisplayVideo360Hook(GoogleBaseHook):
48
46
  )
49
47
  self.api_version = api_version
50
48
 
51
- @deprecated(
52
- planned_removal_date="September 01, 2025",
53
- use_instead="airflow.providers.google.marketing_platform.hooks.display_video.get_conn_to_display_video",
54
- category=AirflowProviderDeprecationWarning,
55
- )
56
- def get_conn(self) -> Resource:
57
- """Retrieve connection to DisplayVideo."""
58
- if not self._conn:
59
- http_authorized = self._authorize()
60
- self._conn = build(
61
- "doubleclickbidmanager",
62
- self.api_version,
63
- http=http_authorized,
64
- cache_discovery=False,
65
- )
66
- return self._conn
67
-
68
49
  def get_conn_to_display_video(self) -> Resource:
69
50
  """Retrieve connection to DisplayVideo."""
70
51
  if not self._conn:
@@ -96,137 +77,6 @@ class GoogleDisplayVideo360Hook(GoogleBaseHook):
96
77
  """
97
78
  return [f"gdbm-{partner_id}/entity/{{{{ ds_nodash }}}}.*.{entity_type}.json"]
98
79
 
99
- @deprecated(
100
- planned_removal_date="September 01, 2025",
101
- use_instead="airflow.providers.google.marketing_platform.hooks.display_video.create_sdf_download_operation",
102
- category=AirflowProviderDeprecationWarning,
103
- )
104
- def create_query(self, query: dict[str, Any]) -> dict:
105
- """
106
- Create a query.
107
-
108
- :param query: Query object to be passed to request body.
109
- """
110
- response = self.get_conn().queries().create(body=query).execute(num_retries=self.num_retries)
111
- return response
112
-
113
- @deprecated(
114
- planned_removal_date="September 01, 2025",
115
- category=AirflowProviderDeprecationWarning,
116
- )
117
- def delete_query(self, query_id: str) -> None:
118
- """
119
- Delete a stored query as well as the associated stored reports.
120
-
121
- :param query_id: Query ID to delete.
122
- """
123
- self.get_conn().queries().delete(queryId=query_id).execute(num_retries=self.num_retries)
124
-
125
- @deprecated(
126
- planned_removal_date="September 01, 2025",
127
- use_instead="airflow.providers.google.marketing_platform.hooks.display_video.get_sdf_download_operation",
128
- category=AirflowProviderDeprecationWarning,
129
- )
130
- def get_query(self, query_id: str) -> dict:
131
- """
132
- Retrieve a stored query.
133
-
134
- :param query_id: Query ID to retrieve.
135
- """
136
- response = self.get_conn().queries().get(queryId=query_id).execute(num_retries=self.num_retries)
137
- return response
138
-
139
- @deprecated(
140
- planned_removal_date="September 01, 2025",
141
- category=AirflowProviderDeprecationWarning,
142
- )
143
- def list_queries(self) -> list[dict]:
144
- """Retrieve stored queries."""
145
- response = self.get_conn().queries().list().execute(num_retries=self.num_retries)
146
- return response.get("queries", [])
147
-
148
- @deprecated(
149
- planned_removal_date="September 01, 2025",
150
- use_instead="airflow.providers.google.marketing_platform.hooks.display_video.create_sdf_download_operation",
151
- category=AirflowProviderDeprecationWarning,
152
- )
153
- def run_query(self, query_id: str, params: dict[str, Any] | None) -> dict:
154
- """
155
- Run a stored query to generate a report.
156
-
157
- :param query_id: Query ID to run.
158
- :param params: Parameters for the report.
159
- """
160
- return (
161
- self.get_conn().queries().run(queryId=query_id, body=params).execute(num_retries=self.num_retries)
162
- )
163
-
164
- @deprecated(
165
- planned_removal_date="September 01, 2025",
166
- category=AirflowProviderDeprecationWarning,
167
- )
168
- def get_report(self, query_id: str, report_id: str) -> dict:
169
- """
170
- Retrieve a report.
171
-
172
- :param query_id: Query ID for which report was generated.
173
- :param report_id: Report ID to retrieve.
174
- """
175
- return (
176
- self.get_conn()
177
- .queries()
178
- .reports()
179
- .get(queryId=query_id, reportId=report_id)
180
- .execute(num_retries=self.num_retries)
181
- )
182
-
183
- @deprecated(
184
- planned_removal_date="September 01, 2025",
185
- use_instead="airflow.providers.google.marketing_platform.hooks.display_video.create_sdf_download_operation",
186
- category=AirflowProviderDeprecationWarning,
187
- )
188
- def upload_line_items(self, line_items: Any) -> list[dict[str, Any]]:
189
- """
190
- Upload line items in CSV format.
191
-
192
- :param line_items: downloaded data from GCS and passed to the body request
193
- :return: response body.
194
- """
195
- request_body = {
196
- "lineItems": line_items,
197
- "dryRun": False,
198
- "format": "CSV",
199
- }
200
-
201
- response = (
202
- self.get_conn()
203
- .lineitems()
204
- .uploadlineitems(body=request_body)
205
- .execute(num_retries=self.num_retries)
206
- )
207
- return response
208
-
209
- @deprecated(
210
- planned_removal_date="September 01, 2025",
211
- use_instead="airflow.providers.google.marketing_platform.hooks.display_video.download_media",
212
- category=AirflowProviderDeprecationWarning,
213
- )
214
- def download_line_items(self, request_body: dict[str, Any]) -> list[Any]:
215
- """
216
- Retrieve line items in CSV format.
217
-
218
- :param request_body: dictionary with parameters that should be passed into.
219
- More information about it can be found here:
220
- https://developers.google.com/bid-manager/v1.1/lineitems/downloadlineitems
221
- """
222
- response = (
223
- self.get_conn()
224
- .lineitems()
225
- .downloadlineitems(body=request_body)
226
- .execute(num_retries=self.num_retries)
227
- )
228
- return response["lineItems"]
229
-
230
80
  def create_sdf_download_operation(self, body_request: dict[str, Any]) -> dict[str, Any]:
231
81
  """
232
82
  Create an SDF Download Task and Returns an Operation.