apache-airflow-providers-google 16.0.0a1__py3-none-any.whl → 16.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +34 -0
- airflow/providers/google/cloud/hooks/bigquery.py +63 -76
- airflow/providers/google/cloud/hooks/gcs.py +3 -3
- airflow/providers/google/cloud/hooks/looker.py +5 -0
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -36
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +1 -66
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +1 -1
- airflow/providers/google/cloud/links/cloud_run.py +59 -0
- airflow/providers/google/cloud/log/gcs_task_handler.py +4 -4
- airflow/providers/google/cloud/operators/bigquery.py +49 -10
- airflow/providers/google/cloud/operators/cloud_run.py +10 -1
- airflow/providers/google/cloud/operators/gcs.py +1 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +3 -85
- airflow/providers/google/cloud/operators/pubsub.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +0 -92
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +4 -0
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +9 -5
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +11 -5
- airflow/providers/google/cloud/triggers/bigquery.py +32 -5
- airflow/providers/google/cloud/triggers/dataproc.py +62 -10
- airflow/providers/google/get_provider_info.py +14 -5
- airflow/providers/google/leveldb/hooks/leveldb.py +25 -0
- {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/METADATA +23 -22
- {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/RECORD +29 -28
- airflow/providers/google/cloud/links/automl.py +0 -193
- {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-16.0.0a1.dist-info → apache_airflow_providers_google-16.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -93,16 +93,32 @@ class IfExistAction(enum.Enum):
|
|
93
93
|
SKIP = "skip"
|
94
94
|
|
95
95
|
|
96
|
+
class _BigQueryHookWithFlexibleProjectId(BigQueryHook):
|
97
|
+
@property
|
98
|
+
def project_id(self) -> str:
|
99
|
+
_, project_id = self.get_credentials_and_project_id()
|
100
|
+
return project_id or PROVIDE_PROJECT_ID
|
101
|
+
|
102
|
+
@project_id.setter
|
103
|
+
def project_id(self, value: str) -> None:
|
104
|
+
cached_creds, _ = self.get_credentials_and_project_id()
|
105
|
+
self._cached_project_id = value or PROVIDE_PROJECT_ID
|
106
|
+
self._cached_credntials = cached_creds
|
107
|
+
|
108
|
+
|
96
109
|
class _BigQueryDbHookMixin:
|
97
|
-
def get_db_hook(self: BigQueryCheckOperator) ->
|
110
|
+
def get_db_hook(self: BigQueryCheckOperator) -> _BigQueryHookWithFlexibleProjectId: # type:ignore[misc]
|
98
111
|
"""Get BigQuery DB Hook."""
|
99
|
-
|
112
|
+
hook = _BigQueryHookWithFlexibleProjectId(
|
100
113
|
gcp_conn_id=self.gcp_conn_id,
|
101
114
|
use_legacy_sql=self.use_legacy_sql,
|
102
115
|
location=self.location,
|
103
116
|
impersonation_chain=self.impersonation_chain,
|
104
117
|
labels=self.labels,
|
105
118
|
)
|
119
|
+
if self.project_id:
|
120
|
+
hook.project_id = self.project_id
|
121
|
+
return hook
|
106
122
|
|
107
123
|
|
108
124
|
class _BigQueryOperatorsEncryptionConfigurationMixin:
|
@@ -190,6 +206,7 @@ class BigQueryCheckOperator(
|
|
190
206
|
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs.
|
191
207
|
For example, [{ 'name': 'corpus', 'parameterType': { 'type': 'STRING' },
|
192
208
|
'parameterValue': { 'value': 'romeoandjuliet' } }]. (templated)
|
209
|
+
:param project_id: Google Cloud Project where the job is running
|
193
210
|
"""
|
194
211
|
|
195
212
|
template_fields: Sequence[str] = (
|
@@ -208,6 +225,7 @@ class BigQueryCheckOperator(
|
|
208
225
|
*,
|
209
226
|
sql: str,
|
210
227
|
gcp_conn_id: str = "google_cloud_default",
|
228
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
211
229
|
use_legacy_sql: bool = True,
|
212
230
|
location: str | None = None,
|
213
231
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -228,6 +246,7 @@ class BigQueryCheckOperator(
|
|
228
246
|
self.deferrable = deferrable
|
229
247
|
self.poll_interval = poll_interval
|
230
248
|
self.query_params = query_params
|
249
|
+
self.project_id = project_id
|
231
250
|
|
232
251
|
def _submit_job(
|
233
252
|
self,
|
@@ -243,7 +262,7 @@ class BigQueryCheckOperator(
|
|
243
262
|
|
244
263
|
return hook.insert_job(
|
245
264
|
configuration=configuration,
|
246
|
-
project_id=
|
265
|
+
project_id=self.project_id,
|
247
266
|
location=self.location,
|
248
267
|
job_id=job_id,
|
249
268
|
nowait=True,
|
@@ -257,6 +276,8 @@ class BigQueryCheckOperator(
|
|
257
276
|
gcp_conn_id=self.gcp_conn_id,
|
258
277
|
impersonation_chain=self.impersonation_chain,
|
259
278
|
)
|
279
|
+
if self.project_id is None:
|
280
|
+
self.project_id = hook.project_id
|
260
281
|
job = self._submit_job(hook, job_id="")
|
261
282
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
262
283
|
if job.running():
|
@@ -265,7 +286,7 @@ class BigQueryCheckOperator(
|
|
265
286
|
trigger=BigQueryCheckTrigger(
|
266
287
|
conn_id=self.gcp_conn_id,
|
267
288
|
job_id=job.job_id,
|
268
|
-
project_id=
|
289
|
+
project_id=self.project_id,
|
269
290
|
location=self.location or hook.location,
|
270
291
|
poll_interval=self.poll_interval,
|
271
292
|
impersonation_chain=self.impersonation_chain,
|
@@ -342,6 +363,7 @@ class BigQueryValueCheckOperator(
|
|
342
363
|
:param deferrable: Run operator in the deferrable mode.
|
343
364
|
:param poll_interval: (Deferrable mode only) polling period in seconds to
|
344
365
|
check for the status of job.
|
366
|
+
:param project_id: Google Cloud Project where the job is running
|
345
367
|
"""
|
346
368
|
|
347
369
|
template_fields: Sequence[str] = (
|
@@ -363,6 +385,7 @@ class BigQueryValueCheckOperator(
|
|
363
385
|
tolerance: Any = None,
|
364
386
|
encryption_configuration: dict | None = None,
|
365
387
|
gcp_conn_id: str = "google_cloud_default",
|
388
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
366
389
|
use_legacy_sql: bool = True,
|
367
390
|
location: str | None = None,
|
368
391
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -380,6 +403,7 @@ class BigQueryValueCheckOperator(
|
|
380
403
|
self.labels = labels
|
381
404
|
self.deferrable = deferrable
|
382
405
|
self.poll_interval = poll_interval
|
406
|
+
self.project_id = project_id
|
383
407
|
|
384
408
|
def _submit_job(
|
385
409
|
self,
|
@@ -398,7 +422,7 @@ class BigQueryValueCheckOperator(
|
|
398
422
|
|
399
423
|
return hook.insert_job(
|
400
424
|
configuration=configuration,
|
401
|
-
project_id=
|
425
|
+
project_id=self.project_id,
|
402
426
|
location=self.location,
|
403
427
|
job_id=job_id,
|
404
428
|
nowait=True,
|
@@ -409,7 +433,8 @@ class BigQueryValueCheckOperator(
|
|
409
433
|
super().execute(context=context)
|
410
434
|
else:
|
411
435
|
hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
412
|
-
|
436
|
+
if self.project_id is None:
|
437
|
+
self.project_id = hook.project_id
|
413
438
|
job = self._submit_job(hook, job_id="")
|
414
439
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
415
440
|
if job.running():
|
@@ -418,7 +443,7 @@ class BigQueryValueCheckOperator(
|
|
418
443
|
trigger=BigQueryValueCheckTrigger(
|
419
444
|
conn_id=self.gcp_conn_id,
|
420
445
|
job_id=job.job_id,
|
421
|
-
project_id=
|
446
|
+
project_id=self.project_id,
|
422
447
|
location=self.location or hook.location,
|
423
448
|
sql=self.sql,
|
424
449
|
pass_value=self.pass_value,
|
@@ -575,6 +600,9 @@ class BigQueryIntervalCheckOperator(
|
|
575
600
|
hook = BigQueryHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
576
601
|
self.log.info("Using ratio formula: %s", self.ratio_formula)
|
577
602
|
|
603
|
+
if self.project_id is None:
|
604
|
+
self.project_id = hook.project_id
|
605
|
+
|
578
606
|
self.log.info("Executing SQL check: %s", self.sql1)
|
579
607
|
job_1 = self._submit_job(hook, sql=self.sql1, job_id="")
|
580
608
|
context["ti"].xcom_push(key="job_id", value=job_1.job_id)
|
@@ -587,7 +615,7 @@ class BigQueryIntervalCheckOperator(
|
|
587
615
|
conn_id=self.gcp_conn_id,
|
588
616
|
first_job_id=job_1.job_id,
|
589
617
|
second_job_id=job_2.job_id,
|
590
|
-
project_id=
|
618
|
+
project_id=self.project_id,
|
591
619
|
table=self.table,
|
592
620
|
location=self.location or hook.location,
|
593
621
|
metrics_thresholds=self.metrics_thresholds,
|
@@ -654,6 +682,7 @@ class BigQueryColumnCheckOperator(
|
|
654
682
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
655
683
|
account from the list granting this role to the originating account (templated).
|
656
684
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
685
|
+
:param project_id: Google Cloud Project where the job is running
|
657
686
|
"""
|
658
687
|
|
659
688
|
template_fields: Sequence[str] = tuple(set(SQLColumnCheckOperator.template_fields) | {"gcp_conn_id"})
|
@@ -670,6 +699,7 @@ class BigQueryColumnCheckOperator(
|
|
670
699
|
accept_none: bool = True,
|
671
700
|
encryption_configuration: dict | None = None,
|
672
701
|
gcp_conn_id: str = "google_cloud_default",
|
702
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
673
703
|
use_legacy_sql: bool = True,
|
674
704
|
location: str | None = None,
|
675
705
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -695,6 +725,7 @@ class BigQueryColumnCheckOperator(
|
|
695
725
|
self.location = location
|
696
726
|
self.impersonation_chain = impersonation_chain
|
697
727
|
self.labels = labels
|
728
|
+
self.project_id = project_id
|
698
729
|
|
699
730
|
def _submit_job(
|
700
731
|
self,
|
@@ -706,7 +737,7 @@ class BigQueryColumnCheckOperator(
|
|
706
737
|
self.include_encryption_configuration(configuration, "query")
|
707
738
|
return hook.insert_job(
|
708
739
|
configuration=configuration,
|
709
|
-
project_id=
|
740
|
+
project_id=self.project_id,
|
710
741
|
location=self.location,
|
711
742
|
job_id=job_id,
|
712
743
|
nowait=False,
|
@@ -715,6 +746,9 @@ class BigQueryColumnCheckOperator(
|
|
715
746
|
def execute(self, context=None):
|
716
747
|
"""Perform checks on the given columns."""
|
717
748
|
hook = self.get_db_hook()
|
749
|
+
|
750
|
+
if self.project_id is None:
|
751
|
+
self.project_id = hook.project_id
|
718
752
|
failed_tests = []
|
719
753
|
|
720
754
|
job = self._submit_job(hook, job_id="")
|
@@ -786,6 +820,7 @@ class BigQueryTableCheckOperator(
|
|
786
820
|
account from the list granting this role to the originating account (templated).
|
787
821
|
:param labels: a dictionary containing labels for the table, passed to BigQuery
|
788
822
|
:param encryption_configuration: (Optional) Custom encryption configuration (e.g., Cloud KMS keys).
|
823
|
+
:param project_id: Google Cloud Project where the job is running
|
789
824
|
|
790
825
|
.. code-block:: python
|
791
826
|
|
@@ -805,6 +840,7 @@ class BigQueryTableCheckOperator(
|
|
805
840
|
checks: dict,
|
806
841
|
partition_clause: str | None = None,
|
807
842
|
gcp_conn_id: str = "google_cloud_default",
|
843
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
808
844
|
use_legacy_sql: bool = True,
|
809
845
|
location: str | None = None,
|
810
846
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -819,6 +855,7 @@ class BigQueryTableCheckOperator(
|
|
819
855
|
self.impersonation_chain = impersonation_chain
|
820
856
|
self.labels = labels
|
821
857
|
self.encryption_configuration = encryption_configuration
|
858
|
+
self.project_id = project_id
|
822
859
|
|
823
860
|
def _submit_job(
|
824
861
|
self,
|
@@ -832,7 +869,7 @@ class BigQueryTableCheckOperator(
|
|
832
869
|
|
833
870
|
return hook.insert_job(
|
834
871
|
configuration=configuration,
|
835
|
-
project_id=
|
872
|
+
project_id=self.project_id,
|
836
873
|
location=self.location,
|
837
874
|
job_id=job_id,
|
838
875
|
nowait=False,
|
@@ -841,6 +878,8 @@ class BigQueryTableCheckOperator(
|
|
841
878
|
def execute(self, context=None):
|
842
879
|
"""Execute the given checks on the table."""
|
843
880
|
hook = self.get_db_hook()
|
881
|
+
if self.project_id is None:
|
882
|
+
self.project_id = hook.project_id
|
844
883
|
job = self._submit_job(hook, job_id="")
|
845
884
|
context["ti"].xcom_push(key="job_id", value=job.job_id)
|
846
885
|
records = job.result().to_dataframe()
|
@@ -27,6 +27,7 @@ from google.cloud.run_v2 import Job, Service
|
|
27
27
|
from airflow.configuration import conf
|
28
28
|
from airflow.exceptions import AirflowException
|
29
29
|
from airflow.providers.google.cloud.hooks.cloud_run import CloudRunHook, CloudRunServiceHook
|
30
|
+
from airflow.providers.google.cloud.links.cloud_run import CloudRunJobLoggingLink
|
30
31
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
31
32
|
from airflow.providers.google.cloud.triggers.cloud_run import CloudRunJobFinishedTrigger, RunJobStatus
|
32
33
|
|
@@ -248,7 +249,7 @@ class CloudRunExecuteJobOperator(GoogleCloudBaseOperator):
|
|
248
249
|
|
249
250
|
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
250
251
|
:param region: Required. The ID of the Google Cloud region that the service belongs to.
|
251
|
-
:param job_name: Required. The name of the job to
|
252
|
+
:param job_name: Required. The name of the job to execute.
|
252
253
|
:param overrides: Optional map of override values.
|
253
254
|
:param gcp_conn_id: The connection ID used to connect to Google Cloud.
|
254
255
|
:param polling_period_seconds: Optional. Control the rate of the poll for the result of deferrable run.
|
@@ -265,6 +266,7 @@ class CloudRunExecuteJobOperator(GoogleCloudBaseOperator):
|
|
265
266
|
:param deferrable: Run the operator in deferrable mode.
|
266
267
|
"""
|
267
268
|
|
269
|
+
operator_extra_links = (CloudRunJobLoggingLink(),)
|
268
270
|
template_fields = (
|
269
271
|
"project_id",
|
270
272
|
"region",
|
@@ -312,6 +314,13 @@ class CloudRunExecuteJobOperator(GoogleCloudBaseOperator):
|
|
312
314
|
if self.operation is None:
|
313
315
|
raise AirflowException("Operation is None")
|
314
316
|
|
317
|
+
if self.operation.metadata.log_uri:
|
318
|
+
CloudRunJobLoggingLink.persist(
|
319
|
+
context=context,
|
320
|
+
task_instance=self,
|
321
|
+
log_uri=self.operation.metadata.log_uri,
|
322
|
+
)
|
323
|
+
|
315
324
|
if not self.deferrable:
|
316
325
|
result: Execution = self._wait_for_operation(self.operation)
|
317
326
|
self._fail_if_execution_failed(result)
|
@@ -57,7 +57,6 @@ from airflow.providers.google.cloud.triggers.kubernetes_engine import (
|
|
57
57
|
GKEOperationTrigger,
|
58
58
|
GKEStartPodTrigger,
|
59
59
|
)
|
60
|
-
from airflow.providers.google.common.deprecated import deprecated
|
61
60
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
62
61
|
from airflow.providers_manager import ProvidersManager
|
63
62
|
from airflow.utils.timezone import utcnow
|
@@ -222,7 +221,6 @@ class GKEDeleteClusterOperator(GKEOperatorMixin, GoogleCloudBaseOperator):
|
|
222
221
|
If set as a sequence, the identities from the list must grant
|
223
222
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
224
223
|
account from the list granting this role to the originating account (templated).
|
225
|
-
:param name: (Deprecated) The name of the resource to delete, in this case cluster name
|
226
224
|
:param api_version: The api version to use
|
227
225
|
:param deferrable: Run operator in the deferrable mode.
|
228
226
|
:param poll_interval: Interval size which defines how often operation status is checked.
|
@@ -241,7 +239,6 @@ class GKEDeleteClusterOperator(GKEOperatorMixin, GoogleCloudBaseOperator):
|
|
241
239
|
gcp_conn_id: str = "google_cloud_default",
|
242
240
|
impersonation_chain: str | Sequence[str] | None = None,
|
243
241
|
cluster_name: str | None = None,
|
244
|
-
name: str | None = None,
|
245
242
|
api_version: str = "v2",
|
246
243
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
247
244
|
poll_interval: int = 10,
|
@@ -251,36 +248,17 @@ class GKEDeleteClusterOperator(GKEOperatorMixin, GoogleCloudBaseOperator):
|
|
251
248
|
super().__init__(*args, **kwargs)
|
252
249
|
|
253
250
|
self.location = location
|
254
|
-
self.cluster_name = cluster_name
|
251
|
+
self.cluster_name = cluster_name
|
255
252
|
self.use_internal_ip = use_internal_ip
|
256
253
|
self.use_dns_endpoint = use_dns_endpoint
|
257
254
|
self.project_id = project_id
|
258
255
|
self.gcp_conn_id = gcp_conn_id
|
259
256
|
self.impersonation_chain = impersonation_chain
|
260
|
-
self._name = name
|
261
257
|
self.api_version = api_version
|
262
258
|
self.deferrable = deferrable
|
263
259
|
self.poll_interval = poll_interval
|
264
260
|
self._check_input()
|
265
261
|
|
266
|
-
@property
|
267
|
-
@deprecated(
|
268
|
-
planned_removal_date="May 01, 2025",
|
269
|
-
use_instead="cluster_name",
|
270
|
-
category=AirflowProviderDeprecationWarning,
|
271
|
-
)
|
272
|
-
def name(self) -> str | None:
|
273
|
-
return self._name
|
274
|
-
|
275
|
-
@name.setter
|
276
|
-
@deprecated(
|
277
|
-
planned_removal_date="May 01, 2025",
|
278
|
-
use_instead="cluster_name",
|
279
|
-
category=AirflowProviderDeprecationWarning,
|
280
|
-
)
|
281
|
-
def name(self, name: str) -> None:
|
282
|
-
self._name = name
|
283
|
-
|
284
262
|
def _check_input(self) -> None:
|
285
263
|
if not all([self.project_id, self.cluster_name, self.location]):
|
286
264
|
self.log.error("One of (project_id, cluster_name, location) is missing or incorrect")
|
@@ -622,16 +600,10 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
|
|
622
600
|
If set as a sequence, the identities from the list must grant
|
623
601
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
624
602
|
account from the list granting this role to the originating account (templated).
|
625
|
-
:param regional: (Deprecated) The location param is region name.
|
626
603
|
:param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted.
|
627
604
|
If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod",
|
628
605
|
only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod.
|
629
606
|
Current default is `keep_pod`, but this will be changed in the next major release of this provider.
|
630
|
-
:param is_delete_operator_pod: (Deprecated) What to do when the pod reaches its final
|
631
|
-
state, or the execution is interrupted. If True, delete the
|
632
|
-
pod; if False, leave the pod. Current default is False, but this will be
|
633
|
-
changed in the next major release of this provider.
|
634
|
-
Deprecated - use `on_finish_action` instead.
|
635
607
|
:param deferrable: Run operator in the deferrable mode.
|
636
608
|
"""
|
637
609
|
|
@@ -651,30 +623,15 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
|
|
651
623
|
project_id: str = PROVIDE_PROJECT_ID,
|
652
624
|
gcp_conn_id: str = "google_cloud_default",
|
653
625
|
impersonation_chain: str | Sequence[str] | None = None,
|
654
|
-
regional: bool | None = None,
|
655
626
|
on_finish_action: str | None = None,
|
656
|
-
is_delete_operator_pod: bool | None = None,
|
657
627
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
658
628
|
*args,
|
659
629
|
**kwargs,
|
660
630
|
) -> None:
|
661
|
-
if
|
662
|
-
kwargs["on_finish_action"] = (
|
663
|
-
OnFinishAction.DELETE_POD if is_delete_operator_pod else OnFinishAction.KEEP_POD
|
664
|
-
)
|
665
|
-
elif on_finish_action is not None:
|
631
|
+
if on_finish_action is not None:
|
666
632
|
kwargs["on_finish_action"] = OnFinishAction(on_finish_action)
|
667
633
|
else:
|
668
|
-
|
669
|
-
f"You have not set parameter `on_finish_action` in class {self.__class__.__name__}. "
|
670
|
-
"Currently the default for this parameter is `keep_pod` but in a future release"
|
671
|
-
" the default will be changed to `delete_pod`. To ensure pods are not deleted in"
|
672
|
-
" the future you will need to set `on_finish_action=keep_pod` explicitly.",
|
673
|
-
AirflowProviderDeprecationWarning,
|
674
|
-
stacklevel=2,
|
675
|
-
)
|
676
|
-
kwargs["on_finish_action"] = OnFinishAction.KEEP_POD
|
677
|
-
|
634
|
+
kwargs["on_finish_action"] = OnFinishAction.DELETE_POD
|
678
635
|
super().__init__(*args, **kwargs)
|
679
636
|
self.project_id = project_id
|
680
637
|
self.location = location
|
@@ -683,9 +640,6 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
|
|
683
640
|
self.use_internal_ip = use_internal_ip
|
684
641
|
self.use_dns_endpoint = use_dns_endpoint
|
685
642
|
self.impersonation_chain = impersonation_chain
|
686
|
-
self._regional = regional
|
687
|
-
if is_delete_operator_pod is not None:
|
688
|
-
self.is_delete_operator_pod = is_delete_operator_pod
|
689
643
|
self.deferrable = deferrable
|
690
644
|
|
691
645
|
# There is no need to manage the kube_config file, as it will be generated automatically.
|
@@ -693,42 +647,6 @@ class GKEStartPodOperator(GKEOperatorMixin, KubernetesPodOperator):
|
|
693
647
|
if self.config_file:
|
694
648
|
raise AirflowException("config_file is not an allowed parameter for the GKEStartPodOperator.")
|
695
649
|
|
696
|
-
@property
|
697
|
-
@deprecated(
|
698
|
-
planned_removal_date="May 01, 2025",
|
699
|
-
use_instead="on_finish_action",
|
700
|
-
category=AirflowProviderDeprecationWarning,
|
701
|
-
)
|
702
|
-
def is_delete_operator_pod(self) -> bool | None:
|
703
|
-
return self._is_delete_operator_pod
|
704
|
-
|
705
|
-
@is_delete_operator_pod.setter
|
706
|
-
@deprecated(
|
707
|
-
planned_removal_date="May 01, 2025",
|
708
|
-
use_instead="on_finish_action",
|
709
|
-
category=AirflowProviderDeprecationWarning,
|
710
|
-
)
|
711
|
-
def is_delete_operator_pod(self, is_delete_operator_pod) -> None:
|
712
|
-
self._is_delete_operator_pod = is_delete_operator_pod
|
713
|
-
|
714
|
-
@property
|
715
|
-
@deprecated(
|
716
|
-
planned_removal_date="May 01, 2025",
|
717
|
-
reason="The parameter is not in actual use.",
|
718
|
-
category=AirflowProviderDeprecationWarning,
|
719
|
-
)
|
720
|
-
def regional(self) -> bool | None:
|
721
|
-
return self._regional
|
722
|
-
|
723
|
-
@regional.setter
|
724
|
-
@deprecated(
|
725
|
-
planned_removal_date="May 01, 2025",
|
726
|
-
reason="The parameter is not in actual use.",
|
727
|
-
category=AirflowProviderDeprecationWarning,
|
728
|
-
)
|
729
|
-
def regional(self, regional) -> None:
|
730
|
-
self._regional = regional
|
731
|
-
|
732
650
|
def invoke_defer_method(self, last_log_time: DateTime | None = None):
|
733
651
|
"""Redefine triggers which are being used in child classes."""
|
734
652
|
trigger_start_time = utcnow()
|
@@ -40,6 +40,7 @@ from google.cloud.pubsub_v1.types import (
|
|
40
40
|
SchemaSettings,
|
41
41
|
)
|
42
42
|
|
43
|
+
from airflow.configuration import conf
|
43
44
|
from airflow.exceptions import AirflowException
|
44
45
|
from airflow.providers.google.cloud.hooks.pubsub import PubSubHook
|
45
46
|
from airflow.providers.google.cloud.links.pubsub import PubSubSubscriptionLink, PubSubTopicLink
|
@@ -770,7 +771,7 @@ class PubSubPullOperator(GoogleCloudBaseOperator):
|
|
770
771
|
messages_callback: Callable[[list[ReceivedMessage], Context], Any] | None = None,
|
771
772
|
gcp_conn_id: str = "google_cloud_default",
|
772
773
|
impersonation_chain: str | Sequence[str] | None = None,
|
773
|
-
deferrable: bool = False,
|
774
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
774
775
|
poll_interval: int = 300,
|
775
776
|
**kwargs,
|
776
777
|
) -> None:
|
@@ -22,105 +22,13 @@ from __future__ import annotations
|
|
22
22
|
from collections.abc import Sequence
|
23
23
|
from typing import TYPE_CHECKING
|
24
24
|
|
25
|
-
from airflow.exceptions import AirflowProviderDeprecationWarning
|
26
25
|
from airflow.providers.google.cloud.hooks.vertex_ai.generative_model import GenerativeModelHook
|
27
26
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
28
|
-
from airflow.providers.google.common.deprecated import deprecated
|
29
27
|
|
30
28
|
if TYPE_CHECKING:
|
31
29
|
from airflow.utils.context import Context
|
32
30
|
|
33
31
|
|
34
|
-
@deprecated(
|
35
|
-
planned_removal_date="April 09, 2025",
|
36
|
-
use_instead="GenerativeModelGenerateContentOperator",
|
37
|
-
category=AirflowProviderDeprecationWarning,
|
38
|
-
)
|
39
|
-
class TextGenerationModelPredictOperator(GoogleCloudBaseOperator):
|
40
|
-
"""
|
41
|
-
Uses the Vertex AI PaLM API to generate natural language text.
|
42
|
-
|
43
|
-
:param project_id: Required. The ID of the Google Cloud project that the
|
44
|
-
service belongs to (templated).
|
45
|
-
:param location: Required. The ID of the Google Cloud location that the
|
46
|
-
service belongs to (templated).
|
47
|
-
:param prompt: Required. Inputs or queries that a user or a program gives
|
48
|
-
to the Vertex AI PaLM API, in order to elicit a specific response (templated).
|
49
|
-
:param pretrained_model: By default uses the pre-trained model `text-bison`,
|
50
|
-
optimized for performing natural language tasks such as classification,
|
51
|
-
summarization, extraction, content creation, and ideation.
|
52
|
-
:param temperature: Temperature controls the degree of randomness in token
|
53
|
-
selection. Defaults to 0.0.
|
54
|
-
:param max_output_tokens: Token limit determines the maximum amount of text
|
55
|
-
output. Defaults to 256.
|
56
|
-
:param top_p: Tokens are selected from most probable to least until the sum
|
57
|
-
of their probabilities equals the top_p value. Defaults to 0.8.
|
58
|
-
:param top_k: A top_k of 1 means the selected token is the most probable
|
59
|
-
among all tokens. Defaults to 0.4.
|
60
|
-
:param gcp_conn_id: The connection ID to use connecting to Google Cloud.
|
61
|
-
:param impersonation_chain: Optional service account to impersonate using short-term
|
62
|
-
credentials, or chained list of accounts required to get the access_token
|
63
|
-
of the last account in the list, which will be impersonated in the request.
|
64
|
-
If set as a string, the account must grant the originating account
|
65
|
-
the Service Account Token Creator IAM role.
|
66
|
-
If set as a sequence, the identities from the list must grant
|
67
|
-
Service Account Token Creator IAM role to the directly preceding identity, with first
|
68
|
-
account from the list granting this role to the originating account (templated).
|
69
|
-
"""
|
70
|
-
|
71
|
-
template_fields = ("location", "project_id", "impersonation_chain", "prompt")
|
72
|
-
|
73
|
-
def __init__(
|
74
|
-
self,
|
75
|
-
*,
|
76
|
-
project_id: str,
|
77
|
-
location: str,
|
78
|
-
prompt: str,
|
79
|
-
pretrained_model: str = "text-bison",
|
80
|
-
temperature: float = 0.0,
|
81
|
-
max_output_tokens: int = 256,
|
82
|
-
top_p: float = 0.8,
|
83
|
-
top_k: int = 40,
|
84
|
-
gcp_conn_id: str = "google_cloud_default",
|
85
|
-
impersonation_chain: str | Sequence[str] | None = None,
|
86
|
-
**kwargs,
|
87
|
-
) -> None:
|
88
|
-
super().__init__(**kwargs)
|
89
|
-
self.project_id = project_id
|
90
|
-
self.location = location
|
91
|
-
self.prompt = prompt
|
92
|
-
self.pretrained_model = pretrained_model
|
93
|
-
self.temperature = temperature
|
94
|
-
self.max_output_tokens = max_output_tokens
|
95
|
-
self.top_p = top_p
|
96
|
-
self.top_k = top_k
|
97
|
-
self.gcp_conn_id = gcp_conn_id
|
98
|
-
self.impersonation_chain = impersonation_chain
|
99
|
-
|
100
|
-
def execute(self, context: Context):
|
101
|
-
self.hook = GenerativeModelHook(
|
102
|
-
gcp_conn_id=self.gcp_conn_id,
|
103
|
-
impersonation_chain=self.impersonation_chain,
|
104
|
-
)
|
105
|
-
|
106
|
-
self.log.info("Submitting prompt")
|
107
|
-
response = self.hook.text_generation_model_predict(
|
108
|
-
project_id=self.project_id,
|
109
|
-
location=self.location,
|
110
|
-
prompt=self.prompt,
|
111
|
-
pretrained_model=self.pretrained_model,
|
112
|
-
temperature=self.temperature,
|
113
|
-
max_output_tokens=self.max_output_tokens,
|
114
|
-
top_p=self.top_p,
|
115
|
-
top_k=self.top_k,
|
116
|
-
)
|
117
|
-
|
118
|
-
self.log.info("Model response: %s", response)
|
119
|
-
self.xcom_push(context, key="model_response", value=response)
|
120
|
-
|
121
|
-
return response
|
122
|
-
|
123
|
-
|
124
32
|
class TextEmbeddingModelGetEmbeddingsOperator(GoogleCloudBaseOperator):
|
125
33
|
"""
|
126
34
|
Uses the Vertex AI Embeddings API to generate embeddings based on prompt.
|
@@ -112,6 +112,10 @@ class RunPipelineJobOperator(GoogleCloudBaseOperator):
|
|
112
112
|
"project_id",
|
113
113
|
"input_artifacts",
|
114
114
|
"impersonation_chain",
|
115
|
+
"template_path",
|
116
|
+
"pipeline_root",
|
117
|
+
"parameter_values",
|
118
|
+
"service_account",
|
115
119
|
]
|
116
120
|
operator_extra_links = (VertexAIPipelineJobLink(),)
|
117
121
|
|
@@ -25,6 +25,7 @@ from typing import TYPE_CHECKING
|
|
25
25
|
from airflow.models import BaseOperator
|
26
26
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
|
27
27
|
from airflow.providers.google.cloud.links.bigquery import BigQueryTableLink
|
28
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
28
29
|
|
29
30
|
if TYPE_CHECKING:
|
30
31
|
from airflow.utils.context import Context
|
@@ -73,6 +74,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
73
74
|
If set as a sequence, the identities from the list must grant
|
74
75
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
75
76
|
account from the list granting this role to the originating account (templated).
|
77
|
+
:param project_id: Google Cloud Project where the job is running
|
76
78
|
"""
|
77
79
|
|
78
80
|
template_fields: Sequence[str] = (
|
@@ -93,6 +95,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
93
95
|
write_disposition: str = "WRITE_EMPTY",
|
94
96
|
create_disposition: str = "CREATE_IF_NEEDED",
|
95
97
|
gcp_conn_id: str = "google_cloud_default",
|
98
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
96
99
|
labels: dict | None = None,
|
97
100
|
encryption_configuration: dict | None = None,
|
98
101
|
location: str | None = None,
|
@@ -112,6 +115,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
112
115
|
self.impersonation_chain = impersonation_chain
|
113
116
|
self.hook: BigQueryHook | None = None
|
114
117
|
self._job_conf: dict = {}
|
118
|
+
self.project_id = project_id
|
115
119
|
|
116
120
|
def _prepare_job_configuration(self):
|
117
121
|
self.source_project_dataset_tables = (
|
@@ -124,7 +128,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
124
128
|
for source_project_dataset_table in self.source_project_dataset_tables:
|
125
129
|
source_project, source_dataset, source_table = self.hook.split_tablename(
|
126
130
|
table_input=source_project_dataset_table,
|
127
|
-
default_project_id=self.
|
131
|
+
default_project_id=self.project_id,
|
128
132
|
var_name="source_project_dataset_table",
|
129
133
|
)
|
130
134
|
source_project_dataset_tables_fixup.append(
|
@@ -133,7 +137,7 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
133
137
|
|
134
138
|
destination_project, destination_dataset, destination_table = self.hook.split_tablename(
|
135
139
|
table_input=self.destination_project_dataset_table,
|
136
|
-
default_project_id=self.
|
140
|
+
default_project_id=self.project_id,
|
137
141
|
)
|
138
142
|
configuration = {
|
139
143
|
"copy": {
|
@@ -168,12 +172,12 @@ class BigQueryToBigQueryOperator(BaseOperator):
|
|
168
172
|
impersonation_chain=self.impersonation_chain,
|
169
173
|
)
|
170
174
|
|
171
|
-
if not self.
|
172
|
-
|
175
|
+
if not self.project_id:
|
176
|
+
self.project_id = self.hook.project_id
|
173
177
|
|
174
178
|
configuration = self._prepare_job_configuration()
|
175
179
|
self._job_conf = self.hook.insert_job(
|
176
|
-
configuration=configuration, project_id=self.
|
180
|
+
configuration=configuration, project_id=self.project_id
|
177
181
|
).to_api_repr()
|
178
182
|
|
179
183
|
dest_table_info = self._job_conf["configuration"]["copy"]["destinationTable"]
|
@@ -208,7 +208,7 @@ class FacebookAdsReportToGcsOperator(BaseOperator):
|
|
208
208
|
|
209
209
|
def _flush_rows(self, converted_rows: list[Any] | None, object_name: str):
|
210
210
|
if converted_rows:
|
211
|
-
headers =
|
211
|
+
headers = self.fields
|
212
212
|
with tempfile.NamedTemporaryFile("w", suffix=".csv") as csvfile:
|
213
213
|
writer = csv.DictWriter(csvfile, fieldnames=headers)
|
214
214
|
writer.writeheader()
|