apache-airflow-providers-google 16.1.0__py3-none-any.whl → 17.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +1 -5
- airflow/providers/google/cloud/hooks/bigquery.py +1 -130
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_run.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_sql.py +5 -5
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +1 -1
- airflow/providers/google/cloud/hooks/dataflow.py +0 -85
- airflow/providers/google/cloud/hooks/datafusion.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -4
- airflow/providers/google/cloud/hooks/dataproc.py +68 -70
- airflow/providers/google/cloud/hooks/gcs.py +3 -5
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/looker.py +1 -5
- airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +4 -4
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +7 -0
- airflow/providers/google/cloud/links/kubernetes_engine.py +3 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -2
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -1
- airflow/providers/google/cloud/openlineage/mixins.py +7 -7
- airflow/providers/google/cloud/operators/automl.py +1 -1
- airflow/providers/google/cloud/operators/bigquery.py +8 -609
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_sql.py +1 -5
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +2 -2
- airflow/providers/google/cloud/operators/dataproc.py +1 -1
- airflow/providers/google/cloud/operators/dlp.py +2 -2
- airflow/providers/google/cloud/operators/kubernetes_engine.py +4 -4
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +7 -1
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +7 -5
- airflow/providers/google/cloud/operators/vision.py +1 -1
- airflow/providers/google/cloud/sensors/dataflow.py +23 -6
- airflow/providers/google/cloud/sensors/datafusion.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +1 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +3 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +9 -9
- airflow/providers/google/cloud/triggers/bigquery.py +11 -13
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_run.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +1 -1
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataproc.py +10 -9
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/common/auth_backend/google_openid.py +2 -2
- airflow/providers/google/common/hooks/base_google.py +2 -6
- airflow/providers/google/common/utils/id_token_credentials.py +2 -2
- airflow/providers/google/get_provider_info.py +19 -16
- airflow/providers/google/leveldb/hooks/leveldb.py +1 -5
- airflow/providers/google/marketing_platform/hooks/display_video.py +47 -3
- airflow/providers/google/marketing_platform/links/analytics_admin.py +1 -1
- airflow/providers/google/marketing_platform/operators/display_video.py +64 -15
- airflow/providers/google/marketing_platform/sensors/display_video.py +9 -2
- airflow/providers/google/version_compat.py +10 -3
- {apache_airflow_providers_google-16.1.0.dist-info → apache_airflow_providers_google-17.0.0rc1.dist-info}/METADATA +106 -100
- {apache_airflow_providers_google-16.1.0.dist-info → apache_airflow_providers_google-17.0.0rc1.dist-info}/RECORD +63 -62
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/life_sciences.py +0 -30
- airflow/providers/google/cloud/operators/life_sciences.py +0 -118
- {apache_airflow_providers_google-16.1.0.dist-info → apache_airflow_providers_google-17.0.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-16.1.0.dist-info → apache_airflow_providers_google-17.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
29
29
|
|
30
30
|
__all__ = ["__version__"]
|
31
31
|
|
32
|
-
__version__ = "
|
32
|
+
__version__ = "17.0.0"
|
33
33
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
35
35
|
"2.10.0"
|
@@ -29,11 +29,7 @@ from google.auth.exceptions import GoogleAuthError
|
|
29
29
|
|
30
30
|
from airflow.exceptions import AirflowException
|
31
31
|
from airflow.providers.google.common.hooks.base_google import get_field
|
32
|
-
|
33
|
-
try:
|
34
|
-
from airflow.sdk import BaseHook
|
35
|
-
except ImportError:
|
36
|
-
from airflow.hooks.base import BaseHook # type: ignore[attr-defined,no-redef]
|
32
|
+
from airflow.providers.google.version_compat import BaseHook
|
37
33
|
|
38
34
|
if TYPE_CHECKING:
|
39
35
|
from google.ads.googleads.v20.services.services.customer_service import CustomerServiceClient
|
@@ -400,135 +400,6 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
400
400
|
except NotFound:
|
401
401
|
return False
|
402
402
|
|
403
|
-
@deprecated(
|
404
|
-
planned_removal_date="July 30, 2025",
|
405
|
-
use_instead="airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_table",
|
406
|
-
category=AirflowProviderDeprecationWarning,
|
407
|
-
)
|
408
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
409
|
-
def create_empty_table(
|
410
|
-
self,
|
411
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
412
|
-
dataset_id: str | None = None,
|
413
|
-
table_id: str | None = None,
|
414
|
-
table_resource: dict[str, Any] | None = None,
|
415
|
-
schema_fields: list | None = None,
|
416
|
-
time_partitioning: dict | None = None,
|
417
|
-
cluster_fields: list[str] | None = None,
|
418
|
-
labels: dict | None = None,
|
419
|
-
view: dict | None = None,
|
420
|
-
materialized_view: dict | None = None,
|
421
|
-
encryption_configuration: dict | None = None,
|
422
|
-
retry: Retry = DEFAULT_RETRY,
|
423
|
-
location: str | None = None,
|
424
|
-
exists_ok: bool = True,
|
425
|
-
) -> Table:
|
426
|
-
"""
|
427
|
-
Create a new, empty table in the dataset.
|
428
|
-
|
429
|
-
To create a view, which is defined by a SQL query, parse a dictionary to
|
430
|
-
the *view* argument.
|
431
|
-
|
432
|
-
:param project_id: The project to create the table into.
|
433
|
-
:param dataset_id: The dataset to create the table into.
|
434
|
-
:param table_id: The Name of the table to be created.
|
435
|
-
:param table_resource: Table resource as described in documentation:
|
436
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table
|
437
|
-
If provided all other parameters are ignored.
|
438
|
-
:param schema_fields: If set, the schema field list as defined here:
|
439
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
|
440
|
-
|
441
|
-
.. code-block:: python
|
442
|
-
|
443
|
-
schema_fields = [
|
444
|
-
{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
|
445
|
-
{"name": "salary", "type": "INTEGER", "mode": "NULLABLE"},
|
446
|
-
]
|
447
|
-
|
448
|
-
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
449
|
-
:param retry: Optional. How to retry the RPC.
|
450
|
-
:param time_partitioning: configure optional time partitioning fields i.e.
|
451
|
-
partition by field, type and expiration as per API specifications.
|
452
|
-
|
453
|
-
.. seealso::
|
454
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning
|
455
|
-
:param cluster_fields: [Optional] The fields used for clustering.
|
456
|
-
BigQuery supports clustering for both partitioned and
|
457
|
-
non-partitioned tables.
|
458
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields
|
459
|
-
:param view: [Optional] A dictionary containing definition for the view.
|
460
|
-
If set, it will create a view instead of a table:
|
461
|
-
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition
|
462
|
-
|
463
|
-
.. code-block:: python
|
464
|
-
|
465
|
-
view = {
|
466
|
-
"query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 1000",
|
467
|
-
"useLegacySql": False,
|
468
|
-
}
|
469
|
-
|
470
|
-
:param materialized_view: [Optional] The materialized view definition.
|
471
|
-
:param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys).
|
472
|
-
|
473
|
-
.. code-block:: python
|
474
|
-
|
475
|
-
encryption_configuration = {
|
476
|
-
"kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key",
|
477
|
-
}
|
478
|
-
|
479
|
-
:param num_retries: Maximum number of retries in case of connection problems.
|
480
|
-
:param location: (Optional) The geographic location where the table should reside.
|
481
|
-
:param exists_ok: If ``True``, ignore "already exists" errors when creating the table.
|
482
|
-
:return: Created table
|
483
|
-
"""
|
484
|
-
_table_resource: dict[str, Any] = {}
|
485
|
-
|
486
|
-
if self.location:
|
487
|
-
_table_resource["location"] = self.location
|
488
|
-
|
489
|
-
if schema_fields:
|
490
|
-
_table_resource["schema"] = {"fields": schema_fields}
|
491
|
-
|
492
|
-
if time_partitioning:
|
493
|
-
_table_resource["timePartitioning"] = time_partitioning
|
494
|
-
|
495
|
-
if cluster_fields:
|
496
|
-
_table_resource["clustering"] = {"fields": cluster_fields}
|
497
|
-
|
498
|
-
if labels:
|
499
|
-
_table_resource["labels"] = labels
|
500
|
-
|
501
|
-
if view:
|
502
|
-
_table_resource["view"] = view
|
503
|
-
|
504
|
-
if materialized_view:
|
505
|
-
_table_resource["materializedView"] = materialized_view
|
506
|
-
|
507
|
-
if encryption_configuration:
|
508
|
-
_table_resource["encryptionConfiguration"] = encryption_configuration
|
509
|
-
|
510
|
-
table_resource = table_resource or _table_resource
|
511
|
-
table_resource = self._resolve_table_reference(
|
512
|
-
table_resource=table_resource,
|
513
|
-
project_id=project_id,
|
514
|
-
dataset_id=dataset_id,
|
515
|
-
table_id=table_id,
|
516
|
-
)
|
517
|
-
table = Table.from_api_repr(table_resource)
|
518
|
-
result = self.get_client(project_id=project_id, location=location).create_table(
|
519
|
-
table=table, exists_ok=exists_ok, retry=retry
|
520
|
-
)
|
521
|
-
get_hook_lineage_collector().add_output_asset(
|
522
|
-
context=self,
|
523
|
-
scheme="bigquery",
|
524
|
-
asset_kwargs={
|
525
|
-
"project_id": result.project,
|
526
|
-
"dataset_id": result.dataset_id,
|
527
|
-
"table_id": result.table_id,
|
528
|
-
},
|
529
|
-
)
|
530
|
-
return result
|
531
|
-
|
532
403
|
@GoogleBaseHook.fallback_to_default_project_id
|
533
404
|
def create_table(
|
534
405
|
self,
|
@@ -2248,7 +2119,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
2248
2119
|
self,
|
2249
2120
|
sql: str,
|
2250
2121
|
pass_value: Any,
|
2251
|
-
records: list[Any],
|
2122
|
+
records: list[Any] | None = None,
|
2252
2123
|
tolerance: float | None = None,
|
2253
2124
|
) -> None:
|
2254
2125
|
"""
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
13
|
+
# software distributed under the License is distributed on an
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
# KIND, either express or implied. See the License for the
|
16
|
+
# specific language governing permissions and limitations
|
17
|
+
# under the License.
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
from collections.abc import Sequence
|
21
|
+
from typing import TYPE_CHECKING
|
22
|
+
|
23
|
+
from google.cloud.logging_v2.services.config_service_v2 import ConfigServiceV2Client
|
24
|
+
from google.cloud.logging_v2.types import (
|
25
|
+
CreateSinkRequest,
|
26
|
+
DeleteSinkRequest,
|
27
|
+
GetSinkRequest,
|
28
|
+
ListSinksRequest,
|
29
|
+
LogSink,
|
30
|
+
UpdateSinkRequest,
|
31
|
+
)
|
32
|
+
|
33
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
34
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
35
|
+
|
36
|
+
if TYPE_CHECKING:
|
37
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
38
|
+
|
39
|
+
|
40
|
+
class CloudLoggingHook(GoogleBaseHook):
|
41
|
+
"""
|
42
|
+
Hook for Google Cloud Logging Log Sinks API.
|
43
|
+
|
44
|
+
:param gcp_conn_id: The connection ID to use when fetching connection info.
|
45
|
+
:param impersonation_chain: Optional service account to impersonate.
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
gcp_conn_id: str = "google_cloud_default",
|
51
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
52
|
+
**kwargs,
|
53
|
+
) -> None:
|
54
|
+
super().__init__(gcp_conn_id=gcp_conn_id, impersonation_chain=impersonation_chain, **kwargs)
|
55
|
+
self._client: ConfigServiceV2Client | None = None
|
56
|
+
|
57
|
+
def get_conn(self) -> ConfigServiceV2Client:
|
58
|
+
"""Return the Google Cloud Logging Config client."""
|
59
|
+
if not self._client:
|
60
|
+
self._client = ConfigServiceV2Client(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
61
|
+
return self._client
|
62
|
+
|
63
|
+
def get_parent(self, project_id):
|
64
|
+
return f"projects/{project_id}"
|
65
|
+
|
66
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
67
|
+
def create_sink(
|
68
|
+
self, sink: LogSink | dict, unique_writer_identity: bool = True, project_id: str = PROVIDE_PROJECT_ID
|
69
|
+
) -> LogSink:
|
70
|
+
if isinstance(sink, dict):
|
71
|
+
sink = LogSink(**sink)
|
72
|
+
request = CreateSinkRequest(
|
73
|
+
parent=self.get_parent(project_id), sink=sink, unique_writer_identity=unique_writer_identity
|
74
|
+
)
|
75
|
+
return self.get_conn().create_sink(request=request)
|
76
|
+
|
77
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
78
|
+
def get_sink(self, sink_name: str, project_id: str = PROVIDE_PROJECT_ID) -> LogSink:
|
79
|
+
request = GetSinkRequest(sink_name=f"projects/{project_id}/sinks/{sink_name}")
|
80
|
+
return self.get_conn().get_sink(request=request)
|
81
|
+
|
82
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
83
|
+
def list_sinks(self, page_size: int | None = None, project_id: str = PROVIDE_PROJECT_ID) -> list[LogSink]:
|
84
|
+
request = ListSinksRequest(parent=self.get_parent(project_id), page_size=page_size)
|
85
|
+
return list(self.get_conn().list_sinks(request=request))
|
86
|
+
|
87
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
88
|
+
def delete_sink(self, sink_name: str, project_id: str = PROVIDE_PROJECT_ID) -> None:
|
89
|
+
request = DeleteSinkRequest(sink_name=f"projects/{project_id}/sinks/{sink_name}")
|
90
|
+
self.get_conn().delete_sink(request=request)
|
91
|
+
|
92
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
93
|
+
def update_sink(
|
94
|
+
self,
|
95
|
+
sink_name: str,
|
96
|
+
sink: LogSink | dict,
|
97
|
+
unique_writer_identity: bool,
|
98
|
+
update_mask: FieldMask | dict,
|
99
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
100
|
+
) -> LogSink:
|
101
|
+
if isinstance(sink, dict):
|
102
|
+
sink = LogSink(**sink)
|
103
|
+
request = UpdateSinkRequest(
|
104
|
+
sink_name=f"projects/{project_id}/sinks/{sink_name}",
|
105
|
+
sink=sink,
|
106
|
+
unique_writer_identity=unique_writer_identity,
|
107
|
+
update_mask=update_mask,
|
108
|
+
)
|
109
|
+
return self.get_conn().update_sink(request=request)
|
@@ -38,7 +38,7 @@ from google.cloud.run_v2 import (
|
|
38
38
|
ServicesClient,
|
39
39
|
UpdateJobRequest,
|
40
40
|
)
|
41
|
-
from google.longrunning import operations_pb2
|
41
|
+
from google.longrunning import operations_pb2
|
42
42
|
|
43
43
|
from airflow.exceptions import AirflowException
|
44
44
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
@@ -60,11 +60,7 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
60
60
|
GoogleBaseHook,
|
61
61
|
get_field,
|
62
62
|
)
|
63
|
-
|
64
|
-
try:
|
65
|
-
from airflow.sdk import BaseHook
|
66
|
-
except ImportError:
|
67
|
-
from airflow.hooks.base import BaseHook # type: ignore[attr-defined,no-redef]
|
63
|
+
from airflow.providers.google.version_compat import BaseHook
|
68
64
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
69
65
|
|
70
66
|
if TYPE_CHECKING:
|
@@ -1107,6 +1103,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
1107
1103
|
return connection_uri
|
1108
1104
|
|
1109
1105
|
def _get_instance_socket_name(self) -> str:
|
1106
|
+
if self.project_id is None:
|
1107
|
+
raise ValueError("The project_id should not be none")
|
1110
1108
|
return self.project_id + ":" + self.location + ":" + self.instance
|
1111
1109
|
|
1112
1110
|
def _get_sqlproxy_instance_specification(self) -> str:
|
@@ -1139,6 +1137,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
1139
1137
|
raise ValueError("Proxy runner can only be retrieved in case of use_proxy = True")
|
1140
1138
|
if not self.sql_proxy_unique_path:
|
1141
1139
|
raise ValueError("The sql_proxy_unique_path should be set")
|
1140
|
+
if self.project_id is None:
|
1141
|
+
raise ValueError("The project_id should not be None")
|
1142
1142
|
return CloudSqlProxyRunner(
|
1143
1143
|
path_prefix=self.sql_proxy_unique_path,
|
1144
1144
|
instance_specification=self._get_sqlproxy_instance_specification(),
|
@@ -58,7 +58,7 @@ if TYPE_CHECKING:
|
|
58
58
|
from google.cloud.storage_transfer_v1.services.storage_transfer_service.pagers import (
|
59
59
|
ListTransferJobsAsyncPager,
|
60
60
|
)
|
61
|
-
from google.longrunning import operations_pb2
|
61
|
+
from google.longrunning import operations_pb2
|
62
62
|
from proto import Message
|
63
63
|
|
64
64
|
log = logging.getLogger(__name__)
|
@@ -51,7 +51,6 @@ from googleapiclient.discovery import Resource, build
|
|
51
51
|
|
52
52
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
53
53
|
from airflow.providers.apache.beam.hooks.beam import BeamHook, BeamRunnerType, beam_options_to_args
|
54
|
-
from airflow.providers.google.common.deprecated import deprecated
|
55
54
|
from airflow.providers.google.common.hooks.base_google import (
|
56
55
|
PROVIDE_PROJECT_ID,
|
57
56
|
GoogleBaseAsyncHook,
|
@@ -1126,90 +1125,6 @@ class DataflowHook(GoogleBaseHook):
|
|
1126
1125
|
)
|
1127
1126
|
jobs_controller.cancel()
|
1128
1127
|
|
1129
|
-
@deprecated(
|
1130
|
-
planned_removal_date="July 01, 2025",
|
1131
|
-
use_instead="airflow.providers.google.cloud.hooks.dataflow.DataflowHook.launch_beam_yaml_job",
|
1132
|
-
category=AirflowProviderDeprecationWarning,
|
1133
|
-
)
|
1134
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
1135
|
-
def start_sql_job(
|
1136
|
-
self,
|
1137
|
-
job_name: str,
|
1138
|
-
query: str,
|
1139
|
-
options: dict[str, Any],
|
1140
|
-
project_id: str,
|
1141
|
-
location: str = DEFAULT_DATAFLOW_LOCATION,
|
1142
|
-
on_new_job_id_callback: Callable[[str], None] | None = None,
|
1143
|
-
on_new_job_callback: Callable[[dict], None] | None = None,
|
1144
|
-
):
|
1145
|
-
"""
|
1146
|
-
Start Dataflow SQL query.
|
1147
|
-
|
1148
|
-
:param job_name: The unique name to assign to the Cloud Dataflow job.
|
1149
|
-
:param query: The SQL query to execute.
|
1150
|
-
:param options: Job parameters to be executed.
|
1151
|
-
For more information, look at:
|
1152
|
-
`https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query
|
1153
|
-
<gcloud beta dataflow sql query>`__
|
1154
|
-
command reference
|
1155
|
-
:param location: The location of the Dataflow job (for example europe-west1)
|
1156
|
-
:param project_id: The ID of the GCP project that owns the job.
|
1157
|
-
If set to ``None`` or missing, the default project_id from the GCP connection is used.
|
1158
|
-
:param on_new_job_id_callback: (Deprecated) Callback called when the job ID is known.
|
1159
|
-
:param on_new_job_callback: Callback called when the job is known.
|
1160
|
-
:return: the new job object
|
1161
|
-
"""
|
1162
|
-
gcp_options = {
|
1163
|
-
"project": project_id,
|
1164
|
-
"format": "value(job.id)",
|
1165
|
-
"job-name": job_name,
|
1166
|
-
"region": location,
|
1167
|
-
}
|
1168
|
-
cmd = self._build_gcloud_command(
|
1169
|
-
command=["gcloud", "dataflow", "sql", "query", query], parameters={**gcp_options, **options}
|
1170
|
-
)
|
1171
|
-
self.log.info("Executing command: %s", " ".join(shlex.quote(c) for c in cmd))
|
1172
|
-
with self.provide_authorized_gcloud():
|
1173
|
-
proc = subprocess.run(cmd, capture_output=True)
|
1174
|
-
self.log.info("Output: %s", proc.stdout.decode())
|
1175
|
-
self.log.warning("Stderr: %s", proc.stderr.decode())
|
1176
|
-
self.log.info("Exit code %d", proc.returncode)
|
1177
|
-
stderr_last_20_lines = "\n".join(proc.stderr.decode().strip().splitlines()[-20:])
|
1178
|
-
if proc.returncode != 0:
|
1179
|
-
raise AirflowException(
|
1180
|
-
f"Process exit with non-zero exit code. Exit code: {proc.returncode} Error Details : "
|
1181
|
-
f"{stderr_last_20_lines}"
|
1182
|
-
)
|
1183
|
-
job_id = proc.stdout.decode().strip()
|
1184
|
-
|
1185
|
-
self.log.info("Created job ID: %s", job_id)
|
1186
|
-
|
1187
|
-
jobs_controller = _DataflowJobsController(
|
1188
|
-
dataflow=self.get_conn(),
|
1189
|
-
project_number=project_id,
|
1190
|
-
job_id=job_id,
|
1191
|
-
location=location,
|
1192
|
-
poll_sleep=self.poll_sleep,
|
1193
|
-
num_retries=self.num_retries,
|
1194
|
-
drain_pipeline=self.drain_pipeline,
|
1195
|
-
wait_until_finished=self.wait_until_finished,
|
1196
|
-
)
|
1197
|
-
job = jobs_controller.get_jobs(refresh=True)[0]
|
1198
|
-
|
1199
|
-
if on_new_job_id_callback:
|
1200
|
-
warnings.warn(
|
1201
|
-
"on_new_job_id_callback is Deprecated. Please start using on_new_job_callback",
|
1202
|
-
AirflowProviderDeprecationWarning,
|
1203
|
-
stacklevel=3,
|
1204
|
-
)
|
1205
|
-
on_new_job_id_callback(cast("str", job.get("id")))
|
1206
|
-
|
1207
|
-
if on_new_job_callback:
|
1208
|
-
on_new_job_callback(job)
|
1209
|
-
|
1210
|
-
jobs_controller.wait_for_done()
|
1211
|
-
return jobs_controller.get_jobs(refresh=True)[0]
|
1212
|
-
|
1213
1128
|
@GoogleBaseHook.fallback_to_default_project_id
|
1214
1129
|
def get_job(
|
1215
1130
|
self,
|
@@ -434,7 +434,7 @@ class DataFusionHook(GoogleBaseHook):
|
|
434
434
|
pipeline_id: str,
|
435
435
|
pipeline_type: DataFusionPipelineType = DataFusionPipelineType.BATCH,
|
436
436
|
namespace: str = "default",
|
437
|
-
) ->
|
437
|
+
) -> dict:
|
438
438
|
url = os.path.join(
|
439
439
|
self._base_url(instance_url, namespace),
|
440
440
|
quote(pipeline_name),
|
@@ -28,10 +28,7 @@ import requests
|
|
28
28
|
from requests import HTTPError
|
29
29
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
30
30
|
|
31
|
-
|
32
|
-
from airflow.sdk import BaseHook
|
33
|
-
except ImportError:
|
34
|
-
from airflow.hooks.base import BaseHook # type: ignore[attr-defined,no-redef]
|
31
|
+
from airflow.providers.google.version_compat import BaseHook
|
35
32
|
|
36
33
|
|
37
34
|
def _get_field(extras: dict, field_name: str) -> str | None:
|