apache-airflow-providers-google 10.18.0rc2__py3-none-any.whl → 10.19.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +11 -10
- airflow/providers/google/cloud/links/automl.py +38 -0
- airflow/providers/google/cloud/links/translate.py +180 -0
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -2
- airflow/providers/google/cloud/openlineage/BigQueryErrorRunFacet.json +30 -0
- airflow/providers/google/cloud/openlineage/BigQueryJobRunFacet.json +37 -0
- airflow/providers/google/cloud/openlineage/__init__.py +16 -0
- airflow/providers/google/cloud/openlineage/utils.py +388 -0
- airflow/providers/google/cloud/operators/automl.py +75 -63
- airflow/providers/google/cloud/operators/bigquery.py +1 -62
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +5 -0
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +6 -0
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -4
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +8 -11
- airflow/providers/google/cloud/utils/credentials_provider.py +41 -32
- airflow/providers/google/common/hooks/base_google.py +11 -5
- airflow/providers/google/get_provider_info.py +8 -2
- {apache_airflow_providers_google-10.18.0rc2.dist-info → apache_airflow_providers_google-10.19.0rc1.dist-info}/METADATA +8 -8
- {apache_airflow_providers_google-10.18.0rc2.dist-info → apache_airflow_providers_google-10.19.0rc1.dist-info}/RECORD +24 -20
- airflow/providers/google/cloud/utils/openlineage.py +0 -81
- {apache_airflow_providers_google-10.18.0rc2.dist-info → apache_airflow_providers_google-10.19.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.18.0rc2.dist-info → apache_airflow_providers_google-10.19.0rc1.dist-info}/entry_points.txt +0 -0
@@ -24,7 +24,6 @@ import warnings
|
|
24
24
|
from functools import cached_property
|
25
25
|
from typing import TYPE_CHECKING, Sequence, Tuple
|
26
26
|
|
27
|
-
from deprecated import deprecated
|
28
27
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
29
28
|
from google.cloud.automl_v1beta1 import (
|
30
29
|
BatchPredictResult,
|
@@ -35,15 +34,15 @@ from google.cloud.automl_v1beta1 import (
|
|
35
34
|
TableSpec,
|
36
35
|
)
|
37
36
|
|
38
|
-
from airflow.exceptions import AirflowException
|
37
|
+
from airflow.exceptions import AirflowException
|
39
38
|
from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook
|
40
39
|
from airflow.providers.google.cloud.hooks.vertex_ai.prediction_service import PredictionServiceHook
|
41
|
-
from airflow.providers.google.cloud.links.
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
from airflow.providers.google.cloud.links.translate import (
|
41
|
+
TranslationDatasetListLink,
|
42
|
+
TranslationLegacyDatasetLink,
|
43
|
+
TranslationLegacyModelLink,
|
44
|
+
TranslationLegacyModelPredictLink,
|
45
|
+
TranslationLegacyModelTrainLink,
|
47
46
|
)
|
48
47
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
49
48
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
@@ -119,8 +118,8 @@ class AutoMLTrainModelOperator(GoogleCloudBaseOperator):
|
|
119
118
|
"impersonation_chain",
|
120
119
|
)
|
121
120
|
operator_extra_links = (
|
122
|
-
|
123
|
-
|
121
|
+
TranslationLegacyModelTrainLink(),
|
122
|
+
TranslationLegacyModelLink(),
|
124
123
|
)
|
125
124
|
|
126
125
|
def __init__(
|
@@ -173,7 +172,9 @@ class AutoMLTrainModelOperator(GoogleCloudBaseOperator):
|
|
173
172
|
)
|
174
173
|
project_id = self.project_id or hook.project_id
|
175
174
|
if project_id:
|
176
|
-
|
175
|
+
TranslationLegacyModelTrainLink.persist(
|
176
|
+
context=context, task_instance=self, project_id=project_id
|
177
|
+
)
|
177
178
|
operation_result = hook.wait_for_operation(timeout=self.timeout, operation=operation)
|
178
179
|
result = Model.to_dict(operation_result)
|
179
180
|
model_id = hook.extract_object_id(result)
|
@@ -181,7 +182,7 @@ class AutoMLTrainModelOperator(GoogleCloudBaseOperator):
|
|
181
182
|
|
182
183
|
self.xcom_push(context, key="model_id", value=model_id)
|
183
184
|
if project_id:
|
184
|
-
|
185
|
+
TranslationLegacyModelLink.persist(
|
185
186
|
context=context,
|
186
187
|
task_instance=self,
|
187
188
|
dataset_id=self.model["dataset_id"] or "-",
|
@@ -195,6 +196,9 @@ class AutoMLPredictOperator(GoogleCloudBaseOperator):
|
|
195
196
|
"""
|
196
197
|
Runs prediction operation on Google Cloud AutoML.
|
197
198
|
|
199
|
+
AutoMLPredictOperator for text, image, and video prediction has been deprecated.
|
200
|
+
Please use endpoint_id param instead of model_id param.
|
201
|
+
|
198
202
|
.. seealso::
|
199
203
|
For more information on how to use this operator, take a look at the guide:
|
200
204
|
:ref:`howto/operator:AutoMLPredictOperator`
|
@@ -228,7 +232,7 @@ class AutoMLPredictOperator(GoogleCloudBaseOperator):
|
|
228
232
|
"project_id",
|
229
233
|
"impersonation_chain",
|
230
234
|
)
|
231
|
-
operator_extra_links = (
|
235
|
+
operator_extra_links = (TranslationLegacyModelPredictLink(),)
|
232
236
|
|
233
237
|
def __init__(
|
234
238
|
self,
|
@@ -325,7 +329,7 @@ class AutoMLPredictOperator(GoogleCloudBaseOperator):
|
|
325
329
|
|
326
330
|
project_id = self.project_id or hook.project_id
|
327
331
|
if project_id and self.model_id:
|
328
|
-
|
332
|
+
TranslationLegacyModelPredictLink.persist(
|
329
333
|
context=context,
|
330
334
|
task_instance=self,
|
331
335
|
model_id=self.model_id,
|
@@ -389,7 +393,7 @@ class AutoMLBatchPredictOperator(GoogleCloudBaseOperator):
|
|
389
393
|
"project_id",
|
390
394
|
"impersonation_chain",
|
391
395
|
)
|
392
|
-
operator_extra_links = (
|
396
|
+
operator_extra_links = (TranslationLegacyModelPredictLink(),)
|
393
397
|
|
394
398
|
def __init__(
|
395
399
|
self,
|
@@ -426,7 +430,7 @@ class AutoMLBatchPredictOperator(GoogleCloudBaseOperator):
|
|
426
430
|
gcp_conn_id=self.gcp_conn_id,
|
427
431
|
impersonation_chain=self.impersonation_chain,
|
428
432
|
)
|
429
|
-
model: Model = hook.get_model(
|
433
|
+
self.model: Model = hook.get_model(
|
430
434
|
model_id=self.model_id,
|
431
435
|
location=self.location,
|
432
436
|
project_id=self.project_id,
|
@@ -435,7 +439,7 @@ class AutoMLBatchPredictOperator(GoogleCloudBaseOperator):
|
|
435
439
|
metadata=self.metadata,
|
436
440
|
)
|
437
441
|
|
438
|
-
if not hasattr(model, "translation_model_metadata"):
|
442
|
+
if not hasattr(self.model, "translation_model_metadata"):
|
439
443
|
_raise_exception_for_deprecated_operator(
|
440
444
|
self.__class__.__name__,
|
441
445
|
[
|
@@ -462,7 +466,7 @@ class AutoMLBatchPredictOperator(GoogleCloudBaseOperator):
|
|
462
466
|
self.log.info("Batch prediction is ready.")
|
463
467
|
project_id = self.project_id or hook.project_id
|
464
468
|
if project_id:
|
465
|
-
|
469
|
+
TranslationLegacyModelPredictLink.persist(
|
466
470
|
context=context,
|
467
471
|
task_instance=self,
|
468
472
|
model_id=self.model_id,
|
@@ -511,7 +515,7 @@ class AutoMLCreateDatasetOperator(GoogleCloudBaseOperator):
|
|
511
515
|
"project_id",
|
512
516
|
"impersonation_chain",
|
513
517
|
)
|
514
|
-
operator_extra_links = (
|
518
|
+
operator_extra_links = (TranslationLegacyDatasetLink(),)
|
515
519
|
|
516
520
|
def __init__(
|
517
521
|
self,
|
@@ -560,7 +564,7 @@ class AutoMLCreateDatasetOperator(GoogleCloudBaseOperator):
|
|
560
564
|
self.xcom_push(context, key="dataset_id", value=dataset_id)
|
561
565
|
project_id = self.project_id or hook.project_id
|
562
566
|
if project_id:
|
563
|
-
|
567
|
+
TranslationLegacyDatasetLink.persist(
|
564
568
|
context=context,
|
565
569
|
task_instance=self,
|
566
570
|
dataset_id=dataset_id,
|
@@ -611,7 +615,7 @@ class AutoMLImportDataOperator(GoogleCloudBaseOperator):
|
|
611
615
|
"project_id",
|
612
616
|
"impersonation_chain",
|
613
617
|
)
|
614
|
-
operator_extra_links = (
|
618
|
+
operator_extra_links = (TranslationLegacyDatasetLink(),)
|
615
619
|
|
616
620
|
def __init__(
|
617
621
|
self,
|
@@ -668,7 +672,7 @@ class AutoMLImportDataOperator(GoogleCloudBaseOperator):
|
|
668
672
|
self.log.info("Import is completed")
|
669
673
|
project_id = self.project_id or hook.project_id
|
670
674
|
if project_id:
|
671
|
-
|
675
|
+
TranslationLegacyDatasetLink.persist(
|
672
676
|
context=context,
|
673
677
|
task_instance=self,
|
674
678
|
dataset_id=self.dataset_id,
|
@@ -680,6 +684,10 @@ class AutoMLTablesListColumnSpecsOperator(GoogleCloudBaseOperator):
|
|
680
684
|
"""
|
681
685
|
Lists column specs in a table.
|
682
686
|
|
687
|
+
Operator AutoMLTablesListColumnSpecsOperator has been deprecated due to shutdown of
|
688
|
+
a legacy version of AutoML Tables on March 31, 2024. For additional information
|
689
|
+
see: https://cloud.google.com/automl-tables/docs/deprecations.
|
690
|
+
|
683
691
|
.. seealso::
|
684
692
|
For more information on how to use this operator, take a look at the guide:
|
685
693
|
:ref:`howto/operator:AutoMLTablesListColumnSpecsOperator`
|
@@ -722,7 +730,7 @@ class AutoMLTablesListColumnSpecsOperator(GoogleCloudBaseOperator):
|
|
722
730
|
"project_id",
|
723
731
|
"impersonation_chain",
|
724
732
|
)
|
725
|
-
operator_extra_links = (
|
733
|
+
operator_extra_links = (TranslationLegacyDatasetLink(),)
|
726
734
|
|
727
735
|
def __init__(
|
728
736
|
self,
|
@@ -754,6 +762,11 @@ class AutoMLTablesListColumnSpecsOperator(GoogleCloudBaseOperator):
|
|
754
762
|
self.retry = retry
|
755
763
|
self.gcp_conn_id = gcp_conn_id
|
756
764
|
self.impersonation_chain = impersonation_chain
|
765
|
+
raise AirflowException(
|
766
|
+
"Operator AutoMLTablesListColumnSpecsOperator has been deprecated due to shutdown of "
|
767
|
+
"a legacy version of AutoML Tables on March 31, 2024. "
|
768
|
+
"For additional information see: https://cloud.google.com/automl-tables/docs/deprecations."
|
769
|
+
)
|
757
770
|
|
758
771
|
def execute(self, context: Context):
|
759
772
|
hook = CloudAutoMLHook(
|
@@ -777,7 +790,7 @@ class AutoMLTablesListColumnSpecsOperator(GoogleCloudBaseOperator):
|
|
777
790
|
self.log.info("Columns specs obtained.")
|
778
791
|
project_id = self.project_id or hook.project_id
|
779
792
|
if project_id:
|
780
|
-
|
793
|
+
TranslationLegacyDatasetLink.persist(
|
781
794
|
context=context,
|
782
795
|
task_instance=self,
|
783
796
|
dataset_id=self.dataset_id,
|
@@ -786,20 +799,14 @@ class AutoMLTablesListColumnSpecsOperator(GoogleCloudBaseOperator):
|
|
786
799
|
return result
|
787
800
|
|
788
801
|
|
789
|
-
@deprecated(
|
790
|
-
reason=(
|
791
|
-
"Class `AutoMLTablesUpdateDatasetOperator` has been deprecated and no longer available. "
|
792
|
-
"Please use `UpdateDatasetOperator` instead"
|
793
|
-
),
|
794
|
-
category=AirflowProviderDeprecationWarning,
|
795
|
-
action="error",
|
796
|
-
)
|
797
802
|
class AutoMLTablesUpdateDatasetOperator(GoogleCloudBaseOperator):
|
798
803
|
"""
|
799
804
|
Updates a dataset.
|
800
805
|
|
801
|
-
AutoMLTablesUpdateDatasetOperator has been deprecated
|
802
|
-
|
806
|
+
Operator AutoMLTablesUpdateDatasetOperator has been deprecated due to shutdown of
|
807
|
+
a legacy version of AutoML Tables on March 31, 2024. For additional information
|
808
|
+
see: https://cloud.google.com/automl-tables/docs/deprecations.
|
809
|
+
Please use :class:`airflow.providers.google.cloud.operators.vertex_ai.dataset.UpdateDatasetOperator`
|
803
810
|
instead.
|
804
811
|
|
805
812
|
.. seealso::
|
@@ -834,7 +841,7 @@ class AutoMLTablesUpdateDatasetOperator(GoogleCloudBaseOperator):
|
|
834
841
|
"location",
|
835
842
|
"impersonation_chain",
|
836
843
|
)
|
837
|
-
operator_extra_links = (
|
844
|
+
operator_extra_links = (TranslationLegacyDatasetLink(),)
|
838
845
|
|
839
846
|
def __init__(
|
840
847
|
self,
|
@@ -859,6 +866,12 @@ class AutoMLTablesUpdateDatasetOperator(GoogleCloudBaseOperator):
|
|
859
866
|
self.retry = retry
|
860
867
|
self.gcp_conn_id = gcp_conn_id
|
861
868
|
self.impersonation_chain = impersonation_chain
|
869
|
+
raise AirflowException(
|
870
|
+
"Operator AutoMLTablesUpdateDatasetOperator has been deprecated due to shutdown of "
|
871
|
+
"a legacy version of AutoML Tables on March 31, 2024. "
|
872
|
+
"For additional information see: https://cloud.google.com/automl-tables/docs/deprecations. "
|
873
|
+
"Please use UpdateDatasetOperator from Vertex AI instead."
|
874
|
+
)
|
862
875
|
|
863
876
|
def execute(self, context: Context):
|
864
877
|
hook = CloudAutoMLHook(
|
@@ -876,7 +889,7 @@ class AutoMLTablesUpdateDatasetOperator(GoogleCloudBaseOperator):
|
|
876
889
|
self.log.info("Dataset updated.")
|
877
890
|
project_id = hook.project_id
|
878
891
|
if project_id:
|
879
|
-
|
892
|
+
TranslationLegacyDatasetLink.persist(
|
880
893
|
context=context,
|
881
894
|
task_instance=self,
|
882
895
|
dataset_id=hook.extract_object_id(self.dataset),
|
@@ -924,7 +937,7 @@ class AutoMLGetModelOperator(GoogleCloudBaseOperator):
|
|
924
937
|
"project_id",
|
925
938
|
"impersonation_chain",
|
926
939
|
)
|
927
|
-
operator_extra_links = (
|
940
|
+
operator_extra_links = (TranslationLegacyModelLink(),)
|
928
941
|
|
929
942
|
def __init__(
|
930
943
|
self,
|
@@ -968,7 +981,7 @@ class AutoMLGetModelOperator(GoogleCloudBaseOperator):
|
|
968
981
|
model = Model.to_dict(result)
|
969
982
|
project_id = self.project_id or hook.project_id
|
970
983
|
if project_id:
|
971
|
-
|
984
|
+
TranslationLegacyModelLink.persist(
|
972
985
|
context=context,
|
973
986
|
task_instance=self,
|
974
987
|
dataset_id=model["dataset_id"],
|
@@ -1069,14 +1082,6 @@ class AutoMLDeleteModelOperator(GoogleCloudBaseOperator):
|
|
1069
1082
|
self.log.info("Deletion is completed")
|
1070
1083
|
|
1071
1084
|
|
1072
|
-
@deprecated(
|
1073
|
-
reason=(
|
1074
|
-
"Class `AutoMLDeployModelOperator` has been deprecated and no longer available. Please use "
|
1075
|
-
"`DeployModelOperator` instead"
|
1076
|
-
),
|
1077
|
-
category=AirflowProviderDeprecationWarning,
|
1078
|
-
action="error",
|
1079
|
-
)
|
1080
1085
|
class AutoMLDeployModelOperator(GoogleCloudBaseOperator):
|
1081
1086
|
"""
|
1082
1087
|
Deploys a model; if a model is already deployed, deploying it with the same parameters has no effect.
|
@@ -1087,8 +1092,10 @@ class AutoMLDeployModelOperator(GoogleCloudBaseOperator):
|
|
1087
1092
|
Only applicable for Text Classification, Image Object Detection and Tables; all other
|
1088
1093
|
domains manage deployment automatically.
|
1089
1094
|
|
1090
|
-
AutoMLDeployModelOperator has been deprecated
|
1091
|
-
|
1095
|
+
Operator AutoMLDeployModelOperator has been deprecated due to shutdown of a legacy version
|
1096
|
+
of AutoML Natural Language, Vision, Video Intelligence on March 31, 2024.
|
1097
|
+
For additional information see: https://cloud.google.com/vision/automl/docs/deprecations .
|
1098
|
+
Please use :class:`airflow.providers.google.cloud.operators.vertex_ai.endpoint_service.DeployModelOperator`
|
1092
1099
|
instead.
|
1093
1100
|
|
1094
1101
|
.. seealso::
|
@@ -1151,24 +1158,20 @@ class AutoMLDeployModelOperator(GoogleCloudBaseOperator):
|
|
1151
1158
|
self.retry = retry
|
1152
1159
|
self.gcp_conn_id = gcp_conn_id
|
1153
1160
|
self.impersonation_chain = impersonation_chain
|
1161
|
+
raise AirflowException(
|
1162
|
+
"Operator AutoMLDeployModelOperator has been deprecated due to shutdown of "
|
1163
|
+
"a legacy version of AutoML AutoML Natural Language, Vision, Video Intelligence "
|
1164
|
+
"on March 31, 2024. "
|
1165
|
+
"For additional information see: https://cloud.google.com/vision/automl/docs/deprecations. "
|
1166
|
+
"Please use DeployModelOperator from Vertex AI instead."
|
1167
|
+
)
|
1154
1168
|
|
1155
1169
|
def execute(self, context: Context):
|
1156
1170
|
hook = CloudAutoMLHook(
|
1157
1171
|
gcp_conn_id=self.gcp_conn_id,
|
1158
1172
|
impersonation_chain=self.impersonation_chain,
|
1159
1173
|
)
|
1160
|
-
model = hook.get_model(
|
1161
|
-
model_id=self.model_id,
|
1162
|
-
location=self.location,
|
1163
|
-
project_id=self.project_id,
|
1164
|
-
retry=self.retry,
|
1165
|
-
timeout=self.timeout,
|
1166
|
-
metadata=self.metadata,
|
1167
|
-
)
|
1168
|
-
if not hasattr(model, "translation_model_metadata"):
|
1169
|
-
_raise_exception_for_deprecated_operator(self.__class__.__name__, "DeployModelOperator")
|
1170
1174
|
self.log.info("Deploying model_id %s", self.model_id)
|
1171
|
-
|
1172
1175
|
operation = hook.deploy_model(
|
1173
1176
|
model_id=self.model_id,
|
1174
1177
|
location=self.location,
|
@@ -1186,6 +1189,10 @@ class AutoMLTablesListTableSpecsOperator(GoogleCloudBaseOperator):
|
|
1186
1189
|
"""
|
1187
1190
|
Lists table specs in a dataset.
|
1188
1191
|
|
1192
|
+
Operator AutoMLTablesListTableSpecsOperator has been deprecated due to shutdown of
|
1193
|
+
a legacy version of AutoML Tables on March 31, 2024. For additional information
|
1194
|
+
see: https://cloud.google.com/automl-tables/docs/deprecations.
|
1195
|
+
|
1189
1196
|
.. seealso::
|
1190
1197
|
For more information on how to use this operator, take a look at the guide:
|
1191
1198
|
:ref:`howto/operator:AutoMLTablesListTableSpecsOperator`
|
@@ -1223,7 +1230,7 @@ class AutoMLTablesListTableSpecsOperator(GoogleCloudBaseOperator):
|
|
1223
1230
|
"project_id",
|
1224
1231
|
"impersonation_chain",
|
1225
1232
|
)
|
1226
|
-
operator_extra_links = (
|
1233
|
+
operator_extra_links = (TranslationLegacyDatasetLink(),)
|
1227
1234
|
|
1228
1235
|
def __init__(
|
1229
1236
|
self,
|
@@ -1251,6 +1258,11 @@ class AutoMLTablesListTableSpecsOperator(GoogleCloudBaseOperator):
|
|
1251
1258
|
self.retry = retry
|
1252
1259
|
self.gcp_conn_id = gcp_conn_id
|
1253
1260
|
self.impersonation_chain = impersonation_chain
|
1261
|
+
raise AirflowException(
|
1262
|
+
"Operator AutoMLTablesListTableSpecsOperator has been deprecated due to shutdown of "
|
1263
|
+
"a legacy version of AutoML Tables on March 31, 2024. "
|
1264
|
+
"For additional information see: https://cloud.google.com/automl-tables/docs/deprecations. "
|
1265
|
+
)
|
1254
1266
|
|
1255
1267
|
def execute(self, context: Context):
|
1256
1268
|
hook = CloudAutoMLHook(
|
@@ -1273,7 +1285,7 @@ class AutoMLTablesListTableSpecsOperator(GoogleCloudBaseOperator):
|
|
1273
1285
|
self.log.info("Table specs obtained.")
|
1274
1286
|
project_id = self.project_id or hook.project_id
|
1275
1287
|
if project_id:
|
1276
|
-
|
1288
|
+
TranslationLegacyDatasetLink.persist(
|
1277
1289
|
context=context,
|
1278
1290
|
task_instance=self,
|
1279
1291
|
dataset_id=self.dataset_id,
|
@@ -1318,7 +1330,7 @@ class AutoMLListDatasetOperator(GoogleCloudBaseOperator):
|
|
1318
1330
|
"project_id",
|
1319
1331
|
"impersonation_chain",
|
1320
1332
|
)
|
1321
|
-
operator_extra_links = (
|
1333
|
+
operator_extra_links = (TranslationDatasetListLink(),)
|
1322
1334
|
|
1323
1335
|
def __init__(
|
1324
1336
|
self,
|
@@ -1373,7 +1385,7 @@ class AutoMLListDatasetOperator(GoogleCloudBaseOperator):
|
|
1373
1385
|
)
|
1374
1386
|
project_id = self.project_id or hook.project_id
|
1375
1387
|
if project_id:
|
1376
|
-
|
1388
|
+
TranslationDatasetListLink.persist(context=context, task_instance=self, project_id=project_id)
|
1377
1389
|
return result
|
1378
1390
|
|
1379
1391
|
|
@@ -47,6 +47,7 @@ from airflow.providers.common.sql.operators.sql import (
|
|
47
47
|
from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook, BigQueryJob
|
48
48
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
|
49
49
|
from airflow.providers.google.cloud.links.bigquery import BigQueryDatasetLink, BigQueryTableLink
|
50
|
+
from airflow.providers.google.cloud.openlineage.utils import _BigQueryOpenLineageMixin
|
50
51
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
51
52
|
from airflow.providers.google.cloud.triggers.bigquery import (
|
52
53
|
BigQueryCheckTrigger,
|
@@ -141,68 +142,6 @@ class _BigQueryDbHookMixin:
|
|
141
142
|
)
|
142
143
|
|
143
144
|
|
144
|
-
class _BigQueryOpenLineageMixin:
|
145
|
-
def get_openlineage_facets_on_complete(self, task_instance):
|
146
|
-
"""
|
147
|
-
Retrieve OpenLineage data for a COMPLETE BigQuery job.
|
148
|
-
|
149
|
-
This method retrieves statistics for the specified job_ids using the BigQueryDatasetsProvider.
|
150
|
-
It calls BigQuery API, retrieving input and output dataset info from it, as well as run-level
|
151
|
-
usage statistics.
|
152
|
-
|
153
|
-
Run facets should contain:
|
154
|
-
- ExternalQueryRunFacet
|
155
|
-
- BigQueryJobRunFacet
|
156
|
-
|
157
|
-
Job facets should contain:
|
158
|
-
- SqlJobFacet if operator has self.sql
|
159
|
-
|
160
|
-
Input datasets should contain facets:
|
161
|
-
- DataSourceDatasetFacet
|
162
|
-
- SchemaDatasetFacet
|
163
|
-
|
164
|
-
Output datasets should contain facets:
|
165
|
-
- DataSourceDatasetFacet
|
166
|
-
- SchemaDatasetFacet
|
167
|
-
- OutputStatisticsOutputDatasetFacet
|
168
|
-
"""
|
169
|
-
from openlineage.client.facet import SqlJobFacet
|
170
|
-
from openlineage.common.provider.bigquery import BigQueryDatasetsProvider
|
171
|
-
|
172
|
-
from airflow.providers.openlineage.extractors import OperatorLineage
|
173
|
-
from airflow.providers.openlineage.utils.utils import normalize_sql
|
174
|
-
|
175
|
-
if not self.job_id:
|
176
|
-
return OperatorLineage()
|
177
|
-
|
178
|
-
client = self.hook.get_client(project_id=self.hook.project_id)
|
179
|
-
job_ids = self.job_id
|
180
|
-
if isinstance(self.job_id, str):
|
181
|
-
job_ids = [self.job_id]
|
182
|
-
inputs, outputs, run_facets = {}, {}, {}
|
183
|
-
for job_id in job_ids:
|
184
|
-
stats = BigQueryDatasetsProvider(client=client).get_facets(job_id=job_id)
|
185
|
-
for input in stats.inputs:
|
186
|
-
input = input.to_openlineage_dataset()
|
187
|
-
inputs[input.name] = input
|
188
|
-
if stats.output:
|
189
|
-
output = stats.output.to_openlineage_dataset()
|
190
|
-
outputs[output.name] = output
|
191
|
-
for key, value in stats.run_facets.items():
|
192
|
-
run_facets[key] = value
|
193
|
-
|
194
|
-
job_facets = {}
|
195
|
-
if hasattr(self, "sql"):
|
196
|
-
job_facets["sql"] = SqlJobFacet(query=normalize_sql(self.sql))
|
197
|
-
|
198
|
-
return OperatorLineage(
|
199
|
-
inputs=list(inputs.values()),
|
200
|
-
outputs=list(outputs.values()),
|
201
|
-
run_facets=run_facets,
|
202
|
-
job_facets=job_facets,
|
203
|
-
)
|
204
|
-
|
205
|
-
|
206
145
|
class _BigQueryOperatorsEncryptionConfigurationMixin:
|
207
146
|
"""A class to handle the configuration for BigQueryHook.insert_job method."""
|
208
147
|
|
@@ -176,6 +176,7 @@ class CreateAutoMLForecastingTrainingJobOperator(AutoMLTrainingJobBaseOperator):
|
|
176
176
|
gcp_conn_id=self.gcp_conn_id,
|
177
177
|
impersonation_chain=self.impersonation_chain,
|
178
178
|
)
|
179
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
179
180
|
model, training_id = self.hook.create_auto_ml_forecasting_training_job(
|
180
181
|
project_id=self.project_id,
|
181
182
|
region=self.region,
|
@@ -283,6 +284,7 @@ class CreateAutoMLImageTrainingJobOperator(AutoMLTrainingJobBaseOperator):
|
|
283
284
|
gcp_conn_id=self.gcp_conn_id,
|
284
285
|
impersonation_chain=self.impersonation_chain,
|
285
286
|
)
|
287
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
286
288
|
model, training_id = self.hook.create_auto_ml_image_training_job(
|
287
289
|
project_id=self.project_id,
|
288
290
|
region=self.region,
|
@@ -391,6 +393,7 @@ class CreateAutoMLTabularTrainingJobOperator(AutoMLTrainingJobBaseOperator):
|
|
391
393
|
impersonation_chain=self.impersonation_chain,
|
392
394
|
)
|
393
395
|
credentials, _ = self.hook.get_credentials_and_project_id()
|
396
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
394
397
|
model, training_id = self.hook.create_auto_ml_tabular_training_job(
|
395
398
|
project_id=self.project_id,
|
396
399
|
region=self.region,
|
@@ -485,6 +488,7 @@ class CreateAutoMLTextTrainingJobOperator(AutoMLTrainingJobBaseOperator):
|
|
485
488
|
gcp_conn_id=self.gcp_conn_id,
|
486
489
|
impersonation_chain=self.impersonation_chain,
|
487
490
|
)
|
491
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
488
492
|
model, training_id = self.hook.create_auto_ml_text_training_job(
|
489
493
|
project_id=self.project_id,
|
490
494
|
region=self.region,
|
@@ -561,6 +565,7 @@ class CreateAutoMLVideoTrainingJobOperator(AutoMLTrainingJobBaseOperator):
|
|
561
565
|
gcp_conn_id=self.gcp_conn_id,
|
562
566
|
impersonation_chain=self.impersonation_chain,
|
563
567
|
)
|
568
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
564
569
|
model, training_id = self.hook.create_auto_ml_video_training_job(
|
565
570
|
project_id=self.project_id,
|
566
571
|
region=self.region,
|
@@ -493,6 +493,8 @@ class CreateCustomContainerTrainingJobOperator(CustomTrainingJobBaseOperator):
|
|
493
493
|
def execute(self, context: Context):
|
494
494
|
super().execute(context)
|
495
495
|
|
496
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
497
|
+
|
496
498
|
if self.deferrable:
|
497
499
|
self.invoke_defer(context=context)
|
498
500
|
|
@@ -966,6 +968,8 @@ class CreateCustomPythonPackageTrainingJobOperator(CustomTrainingJobBaseOperator
|
|
966
968
|
def execute(self, context: Context):
|
967
969
|
super().execute(context)
|
968
970
|
|
971
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
972
|
+
|
969
973
|
if self.deferrable:
|
970
974
|
self.invoke_defer(context=context)
|
971
975
|
|
@@ -1446,6 +1450,8 @@ class CreateCustomTrainingJobOperator(CustomTrainingJobBaseOperator):
|
|
1446
1450
|
def execute(self, context: Context):
|
1447
1451
|
super().execute(context)
|
1448
1452
|
|
1453
|
+
self.parent_model = self.parent_model.split("@")[0] if self.parent_model else None
|
1454
|
+
|
1449
1455
|
if self.deferrable:
|
1450
1456
|
self.invoke_defer(context=context)
|
1451
1457
|
|
@@ -43,8 +43,10 @@ class AzureFileShareToGCSOperator(BaseOperator):
|
|
43
43
|
Does not include subdirectories. May be filtered by prefix.
|
44
44
|
|
45
45
|
:param share_name: The Azure FileShare share where to find the objects. (templated)
|
46
|
-
:param directory_name: (
|
46
|
+
:param directory_name: (Deprecated) Path to Azure FileShare directory which content is to be transferred.
|
47
47
|
Defaults to root directory (templated)
|
48
|
+
:param directory_path: (Optional) Path to Azure FileShare directory which content is to be transferred.
|
49
|
+
Defaults to root directory. Use this instead of ``directory_name``. (templated)
|
48
50
|
:param prefix: Prefix string which filters objects whose name begin with
|
49
51
|
such prefix. (templated)
|
50
52
|
:param azure_fileshare_conn_id: The source WASB connection
|
@@ -63,13 +65,14 @@ class AzureFileShareToGCSOperator(BaseOperator):
|
|
63
65
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
64
66
|
account from the list granting this role to the originating account (templated).
|
65
67
|
|
66
|
-
Note that ``share_name``, ``
|
68
|
+
Note that ``share_name``, ``directory_path``, ``prefix``, and ``dest_gcs`` are
|
67
69
|
templated, so you can use variables in them if you wish.
|
68
70
|
"""
|
69
71
|
|
70
72
|
template_fields: Sequence[str] = (
|
71
73
|
"share_name",
|
72
74
|
"directory_name",
|
75
|
+
"directory_path",
|
73
76
|
"prefix",
|
74
77
|
"dest_gcs",
|
75
78
|
)
|
@@ -94,8 +97,8 @@ class AzureFileShareToGCSOperator(BaseOperator):
|
|
94
97
|
self.share_name = share_name
|
95
98
|
self.directory_path = directory_path
|
96
99
|
self.directory_name = directory_name
|
97
|
-
if self.directory_path is None:
|
98
|
-
self.directory_path = directory_name
|
100
|
+
if self.directory_path is None and self.directory_name is not None:
|
101
|
+
self.directory_path = self.directory_name
|
99
102
|
warnings.warn(
|
100
103
|
"Use 'directory_path' instead of 'directory_name'.",
|
101
104
|
AirflowProviderDeprecationWarning,
|
@@ -296,7 +296,7 @@ class BigQueryToGCSOperator(BaseOperator):
|
|
296
296
|
from openlineage.client.run import Dataset
|
297
297
|
|
298
298
|
from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
|
299
|
-
from airflow.providers.google.cloud.utils
|
299
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
300
300
|
get_facets_from_bq_table,
|
301
301
|
get_identity_column_lineage_facet,
|
302
302
|
)
|
@@ -750,7 +750,7 @@ class GCSToBigQueryOperator(BaseOperator):
|
|
750
750
|
)
|
751
751
|
from openlineage.client.run import Dataset
|
752
752
|
|
753
|
-
from airflow.providers.google.cloud.utils
|
753
|
+
from airflow.providers.google.cloud.openlineage.utils import (
|
754
754
|
get_facets_from_bq_table,
|
755
755
|
get_identity_column_lineage_facet,
|
756
756
|
)
|
@@ -97,22 +97,19 @@ class PubsubPullTrigger(BaseTrigger):
|
|
97
97
|
|
98
98
|
async def run(self) -> AsyncIterator[TriggerEvent]: # type: ignore[override]
|
99
99
|
try:
|
100
|
-
pulled_messages = None
|
101
100
|
while True:
|
102
|
-
if pulled_messages
|
101
|
+
if pulled_messages := await self.hook.pull(
|
102
|
+
project_id=self.project_id,
|
103
|
+
subscription=self.subscription,
|
104
|
+
max_messages=self.max_messages,
|
105
|
+
return_immediately=True,
|
106
|
+
):
|
103
107
|
if self.ack_messages:
|
104
108
|
await self.message_acknowledgement(pulled_messages)
|
105
109
|
yield TriggerEvent({"status": "success", "message": pulled_messages})
|
106
110
|
return
|
107
|
-
|
108
|
-
|
109
|
-
project_id=self.project_id,
|
110
|
-
subscription=self.subscription,
|
111
|
-
max_messages=self.max_messages,
|
112
|
-
return_immediately=True,
|
113
|
-
)
|
114
|
-
self.log.info("Sleeping for %s seconds.", self.poke_interval)
|
115
|
-
await asyncio.sleep(self.poke_interval)
|
111
|
+
self.log.info("Sleeping for %s seconds.", self.poke_interval)
|
112
|
+
await asyncio.sleep(self.poke_interval)
|
116
113
|
except Exception as e:
|
117
114
|
yield TriggerEvent({"status": "error", "message": str(e)})
|
118
115
|
return
|