apache-airflow-providers-google 11.0.0__py3-none-any.whl → 12.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/assets/gcs.py +1 -7
- airflow/providers/google/cloud/hooks/alloy_db.py +289 -0
- airflow/providers/google/cloud/hooks/cloud_batch.py +13 -5
- airflow/providers/google/cloud/hooks/dataproc.py +7 -3
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +41 -22
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -38
- airflow/providers/google/cloud/hooks/translate.py +355 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +147 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +10 -0
- airflow/providers/google/cloud/links/alloy_db.py +55 -0
- airflow/providers/google/cloud/links/translate.py +98 -0
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -5
- airflow/providers/google/cloud/openlineage/mixins.py +4 -12
- airflow/providers/google/cloud/openlineage/utils.py +200 -22
- airflow/providers/google/cloud/operators/alloy_db.py +459 -0
- airflow/providers/google/cloud/operators/automl.py +55 -44
- airflow/providers/google/cloud/operators/bigquery.py +60 -15
- airflow/providers/google/cloud/operators/dataproc.py +12 -0
- airflow/providers/google/cloud/operators/gcs.py +5 -14
- airflow/providers/google/cloud/operators/kubernetes_engine.py +377 -705
- airflow/providers/google/cloud/operators/mlengine.py +41 -31
- airflow/providers/google/cloud/operators/translate.py +586 -1
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +163 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +5 -0
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/__init__.py +16 -0
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +112 -0
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +6 -11
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +3 -0
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +3 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -10
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +3 -15
- airflow/providers/google/cloud/transfers/gcs_to_local.py +9 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +41 -6
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +15 -0
- airflow/providers/google/get_provider_info.py +30 -18
- airflow/providers/google/version_compat.py +36 -0
- {apache_airflow_providers_google-11.0.0.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/METADATA +16 -18
- {apache_airflow_providers_google-11.0.0.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/RECORD +42 -37
- airflow/providers/google/cloud/hooks/datapipeline.py +0 -71
- airflow/providers/google/cloud/openlineage/BigQueryErrorRunFacet.json +0 -30
- airflow/providers/google/cloud/operators/datapipeline.py +0 -63
- {apache_airflow_providers_google-11.0.0.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-11.0.0.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/entry_points.txt +0 -0
@@ -39,9 +39,14 @@ if TYPE_CHECKING:
|
|
39
39
|
from google.api_core.operation import Operation
|
40
40
|
from google.cloud.translate_v3.services.translation_service import pagers
|
41
41
|
from google.cloud.translate_v3.types import (
|
42
|
+
BatchDocumentInputConfig,
|
43
|
+
BatchDocumentOutputConfig,
|
42
44
|
DatasetInputConfig,
|
45
|
+
DocumentInputConfig,
|
46
|
+
DocumentOutputConfig,
|
43
47
|
InputConfig,
|
44
48
|
OutputConfig,
|
49
|
+
TranslateDocumentResponse,
|
45
50
|
TranslateTextGlossaryConfig,
|
46
51
|
TransliterationConfig,
|
47
52
|
automl_translation,
|
@@ -560,3 +565,353 @@ class TranslateHook(GoogleBaseHook):
|
|
560
565
|
metadata=metadata,
|
561
566
|
)
|
562
567
|
return result
|
568
|
+
|
569
|
+
def create_model(
|
570
|
+
self,
|
571
|
+
dataset_id: str,
|
572
|
+
display_name: str,
|
573
|
+
project_id: str,
|
574
|
+
location: str,
|
575
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
576
|
+
timeout: float | None = None,
|
577
|
+
metadata: Sequence[tuple[str, str]] = (),
|
578
|
+
) -> Operation:
|
579
|
+
"""
|
580
|
+
Create the native model by training on translation dataset provided.
|
581
|
+
|
582
|
+
:param dataset_id: ID of dataset to be used for model training.
|
583
|
+
:param display_name: Display name of the model trained.
|
584
|
+
A-Z and a-z, underscores (_), and ASCII digits 0-9.
|
585
|
+
:param project_id: ID of the Google Cloud project where dataset is located. If not provided
|
586
|
+
default project_id is used.
|
587
|
+
:param location: The location of the project.
|
588
|
+
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
|
589
|
+
retried.
|
590
|
+
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
591
|
+
`retry` is specified, the timeout applies to each individual attempt.
|
592
|
+
:param metadata: Additional metadata that is provided to the method.
|
593
|
+
|
594
|
+
:return: `Operation` object with the model creation results, when finished.
|
595
|
+
"""
|
596
|
+
client = self.get_client()
|
597
|
+
project_id = project_id or self.project_id
|
598
|
+
parent = f"projects/{project_id}/locations/{location}"
|
599
|
+
dataset = f"projects/{project_id}/locations/{location}/datasets/{dataset_id}"
|
600
|
+
result = client.create_model(
|
601
|
+
request={
|
602
|
+
"parent": parent,
|
603
|
+
"model": {
|
604
|
+
"display_name": display_name,
|
605
|
+
"dataset": dataset,
|
606
|
+
},
|
607
|
+
},
|
608
|
+
retry=retry,
|
609
|
+
timeout=timeout,
|
610
|
+
metadata=metadata,
|
611
|
+
)
|
612
|
+
return result
|
613
|
+
|
614
|
+
def get_model(
|
615
|
+
self,
|
616
|
+
model_id: str,
|
617
|
+
project_id: str,
|
618
|
+
location: str,
|
619
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
620
|
+
timeout: float | _MethodDefault = DEFAULT,
|
621
|
+
metadata: Sequence[tuple[str, str]] = (),
|
622
|
+
) -> automl_translation.Model:
|
623
|
+
"""
|
624
|
+
Retrieve the dataset for the given model_id.
|
625
|
+
|
626
|
+
:param model_id: ID of translation model to be retrieved.
|
627
|
+
:param project_id: ID of the Google Cloud project where dataset is located. If not provided
|
628
|
+
default project_id is used.
|
629
|
+
:param location: The location of the project.
|
630
|
+
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
|
631
|
+
retried.
|
632
|
+
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
633
|
+
`retry` is specified, the timeout applies to each individual attempt.
|
634
|
+
:param metadata: Additional metadata that is provided to the method.
|
635
|
+
|
636
|
+
:return: `automl_translation.Model` instance.
|
637
|
+
"""
|
638
|
+
client = self.get_client()
|
639
|
+
name = f"projects/{project_id}/locations/{location}/models/{model_id}"
|
640
|
+
return client.get_model(
|
641
|
+
request={"name": name},
|
642
|
+
retry=retry,
|
643
|
+
timeout=timeout,
|
644
|
+
metadata=metadata,
|
645
|
+
)
|
646
|
+
|
647
|
+
def list_models(
|
648
|
+
self,
|
649
|
+
project_id: str,
|
650
|
+
location: str,
|
651
|
+
filter_str: str | None = None,
|
652
|
+
page_size: int | None = None,
|
653
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
654
|
+
timeout: float | _MethodDefault = DEFAULT,
|
655
|
+
metadata: Sequence[tuple[str, str]] = (),
|
656
|
+
) -> pagers.ListModelsPager:
|
657
|
+
"""
|
658
|
+
List translation models in a project.
|
659
|
+
|
660
|
+
:param project_id: ID of the Google Cloud project where models are located. If not provided
|
661
|
+
default project_id is used.
|
662
|
+
:param location: The location of the project.
|
663
|
+
:param filter_str: An optional expression for filtering the models that will
|
664
|
+
be returned. Supported filter: ``dataset_id=${dataset_id}``.
|
665
|
+
:param page_size: Optional custom page size value. The server can
|
666
|
+
return fewer results than requested.
|
667
|
+
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
|
668
|
+
retried.
|
669
|
+
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
670
|
+
`retry` is specified, the timeout applies to each individual attempt.
|
671
|
+
:param metadata: Additional metadata that is provided to the method.
|
672
|
+
|
673
|
+
:return: ``pagers.ListDatasetsPager`` instance, iterable object to retrieve the datasets list.
|
674
|
+
"""
|
675
|
+
client = self.get_client()
|
676
|
+
parent = f"projects/{project_id}/locations/{location}"
|
677
|
+
result = client.list_models(
|
678
|
+
request={
|
679
|
+
"parent": parent,
|
680
|
+
"filter": filter_str,
|
681
|
+
"page_size": page_size,
|
682
|
+
},
|
683
|
+
retry=retry,
|
684
|
+
timeout=timeout,
|
685
|
+
metadata=metadata,
|
686
|
+
)
|
687
|
+
return result
|
688
|
+
|
689
|
+
def delete_model(
|
690
|
+
self,
|
691
|
+
model_id: str,
|
692
|
+
project_id: str,
|
693
|
+
location: str,
|
694
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
695
|
+
timeout: float | None = None,
|
696
|
+
metadata: Sequence[tuple[str, str]] = (),
|
697
|
+
) -> Operation:
|
698
|
+
"""
|
699
|
+
Delete the translation model and all of its contents.
|
700
|
+
|
701
|
+
:param model_id: ID of model to be deleted.
|
702
|
+
:param project_id: ID of the Google Cloud project where dataset is located. If not provided
|
703
|
+
default project_id is used.
|
704
|
+
:param location: The location of the project.
|
705
|
+
:param retry: A retry object used to retry requests. If `None` is specified, requests will not be
|
706
|
+
retried.
|
707
|
+
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
708
|
+
`retry` is specified, the timeout applies to each individual attempt.
|
709
|
+
:param metadata: Additional metadata that is provided to the method.
|
710
|
+
|
711
|
+
:return: `Operation` object with dataset deletion results, when finished.
|
712
|
+
"""
|
713
|
+
client = self.get_client()
|
714
|
+
name = f"projects/{project_id}/locations/{location}/models/{model_id}"
|
715
|
+
result = client.delete_model(
|
716
|
+
request={"name": name},
|
717
|
+
retry=retry,
|
718
|
+
timeout=timeout,
|
719
|
+
metadata=metadata,
|
720
|
+
)
|
721
|
+
return result
|
722
|
+
|
723
|
+
def translate_document(
|
724
|
+
self,
|
725
|
+
*,
|
726
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
727
|
+
source_language_code: str | None = None,
|
728
|
+
target_language_code: str,
|
729
|
+
location: str | None = None,
|
730
|
+
document_input_config: DocumentInputConfig | dict,
|
731
|
+
document_output_config: DocumentOutputConfig | dict | None,
|
732
|
+
customized_attribution: str | None = None,
|
733
|
+
is_translate_native_pdf_only: bool = False,
|
734
|
+
enable_shadow_removal_native_pdf: bool = False,
|
735
|
+
enable_rotation_correction: bool = False,
|
736
|
+
model: str | None = None,
|
737
|
+
glossary_config: TranslateTextGlossaryConfig | None = None,
|
738
|
+
labels: str | None = None,
|
739
|
+
timeout: float | _MethodDefault = DEFAULT,
|
740
|
+
metadata: Sequence[tuple[str, str]] = (),
|
741
|
+
retry: Retry | _MethodDefault | None = DEFAULT,
|
742
|
+
) -> TranslateDocumentResponse:
|
743
|
+
"""
|
744
|
+
Translate the document provided.
|
745
|
+
|
746
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
747
|
+
:param source_language_code: Optional. The ISO-639 language code of the
|
748
|
+
input document text if known. If the source language isn't specified,
|
749
|
+
the API attempts to identify the source language automatically and returns
|
750
|
+
the source language within the response.
|
751
|
+
:param target_language_code: Required. The ISO-639 language code to use
|
752
|
+
for translation of the input document text.
|
753
|
+
:param location: Optional. Project or location to make a call. Must refer to
|
754
|
+
a caller's project.
|
755
|
+
If not specified, 'global' is used.
|
756
|
+
Non-global location is required for requests using AutoML models or custom glossaries.
|
757
|
+
Models and glossaries must be within the same region (have the same location-id).
|
758
|
+
:param document_input_config: A document translation request input config.
|
759
|
+
:param document_output_config: Optional. A document translation request output config.
|
760
|
+
If not provided the translated file will only be returned through a byte-stream
|
761
|
+
and its output mime type will be the same as the input file's mime type.
|
762
|
+
:param customized_attribution: Optional. This flag is to support user customized
|
763
|
+
attribution. If not provided, the default is ``Machine Translated by Google``.
|
764
|
+
Customized attribution should follow rules in
|
765
|
+
https://cloud.google.com/translate/attribution#attribution_and_logos
|
766
|
+
:param is_translate_native_pdf_only: Optional. Param for external
|
767
|
+
customers. If true, the page limit of online native PDF
|
768
|
+
translation is 300 and only native PDF pages will be
|
769
|
+
translated.
|
770
|
+
:param enable_shadow_removal_native_pdf: Optional. If true, use the text removal server to remove the
|
771
|
+
shadow text on background image for native PDF translation.
|
772
|
+
Shadow removal feature can only be enabled when both ``is_translate_native_pdf_only``,
|
773
|
+
``pdf_native_only`` are False.
|
774
|
+
:param enable_rotation_correction: Optional. If true, enable auto rotation
|
775
|
+
correction in DVS.
|
776
|
+
:param model: Optional. The ``model`` type requested for this translation.
|
777
|
+
If not provided, the default Google model (NMT) will be used.
|
778
|
+
The format depends on model type:
|
779
|
+
|
780
|
+
- AutoML Translation models:
|
781
|
+
``projects/{project-number-or-id}/locations/{location-id}/models/{model-id}``
|
782
|
+
- General (built-in) models:
|
783
|
+
``projects/{project-number-or-id}/locations/{location-id}/models/general/nmt``,
|
784
|
+
|
785
|
+
If not provided, the default Google model (NMT) will be used
|
786
|
+
for translation.
|
787
|
+
:param glossary_config: Optional. Glossary to be applied. The glossary must be
|
788
|
+
within the same region (have the same location-id) as the
|
789
|
+
model.
|
790
|
+
:param labels: Optional. The labels with user-defined
|
791
|
+
metadata for the request.
|
792
|
+
See https://cloud.google.com/translate/docs/advanced/labels for more information.
|
793
|
+
:param retry: Designation of what errors, if any, should be retried.
|
794
|
+
:param timeout: The timeout for this request.
|
795
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
796
|
+
|
797
|
+
:return: Translate document result from the API response.
|
798
|
+
"""
|
799
|
+
client = self.get_client()
|
800
|
+
location_id = "global" if not location else location
|
801
|
+
parent = f"projects/{project_id or self.project_id}/locations/{location_id}"
|
802
|
+
return client.translate_document(
|
803
|
+
request={
|
804
|
+
"parent": parent,
|
805
|
+
"source_language_code": source_language_code,
|
806
|
+
"target_language_code": target_language_code,
|
807
|
+
"document_input_config": document_input_config,
|
808
|
+
"document_output_config": document_output_config,
|
809
|
+
"customized_attribution": customized_attribution,
|
810
|
+
"is_translate_native_pdf_only": is_translate_native_pdf_only,
|
811
|
+
"enable_shadow_removal_native_pdf": enable_shadow_removal_native_pdf,
|
812
|
+
"enable_rotation_correction": enable_rotation_correction,
|
813
|
+
"model": model,
|
814
|
+
"glossary_config": glossary_config,
|
815
|
+
"labels": labels,
|
816
|
+
},
|
817
|
+
timeout=timeout,
|
818
|
+
retry=retry,
|
819
|
+
metadata=metadata,
|
820
|
+
)
|
821
|
+
|
822
|
+
def batch_translate_document(
|
823
|
+
self,
|
824
|
+
*,
|
825
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
826
|
+
source_language_code: str,
|
827
|
+
target_language_codes: MutableSequence[str] | None = None,
|
828
|
+
location: str | None = None,
|
829
|
+
input_configs: MutableSequence[BatchDocumentInputConfig | dict],
|
830
|
+
output_config: BatchDocumentOutputConfig | dict,
|
831
|
+
customized_attribution: str | None = None,
|
832
|
+
format_conversions: MutableMapping[str, str] | None = None,
|
833
|
+
enable_shadow_removal_native_pdf: bool = False,
|
834
|
+
enable_rotation_correction: bool = False,
|
835
|
+
models: MutableMapping[str, str] | None = None,
|
836
|
+
glossaries: MutableMapping[str, TranslateTextGlossaryConfig] | None = None,
|
837
|
+
timeout: float | _MethodDefault = DEFAULT,
|
838
|
+
metadata: Sequence[tuple[str, str]] = (),
|
839
|
+
retry: Retry | _MethodDefault | None = DEFAULT,
|
840
|
+
) -> Operation:
|
841
|
+
"""
|
842
|
+
Translate documents batch by configs provided.
|
843
|
+
|
844
|
+
:param project_id: Required. The ID of the Google Cloud project that the service belongs to.
|
845
|
+
:param source_language_code: Optional. The ISO-639 language code of the
|
846
|
+
input text if known. If the source language isn't specified, the API attempts to identify
|
847
|
+
the source language automatically and returns the source language within the response.
|
848
|
+
:param target_language_codes: Required. The ISO-639 language code to use
|
849
|
+
for translation of the input document. Specify up to 10 language codes here.
|
850
|
+
:param location: Optional. Project or location to make a call. Must refer to
|
851
|
+
a caller's project. If not specified, 'global' is used.
|
852
|
+
Non-global location is required for requests using AutoML models or custom glossaries.
|
853
|
+
Models and glossaries must be within the same region (have the same location-id).
|
854
|
+
:param input_configs: Input configurations. The total number of files matched should be <=
|
855
|
+
100. The total content size to translate should be <= 100M Unicode codepoints.
|
856
|
+
The files must use UTF-8 encoding.
|
857
|
+
:param output_config: Output configuration. If 2 input configs match to the same file (that
|
858
|
+
is, same input path), no output for duplicate inputs will be generated.
|
859
|
+
:param format_conversions: Optional. The file format conversion map that is applied to
|
860
|
+
all input files. The map key is the original mime_type.
|
861
|
+
The map value is the target mime_type of translated documents.
|
862
|
+
Supported file format conversion includes:
|
863
|
+
|
864
|
+
- ``application/pdf`` to
|
865
|
+
``application/vnd.openxmlformats-officedocument.wordprocessingml.document``
|
866
|
+
|
867
|
+
If nothing specified, output files will be in the same format as the original file.
|
868
|
+
:param customized_attribution: Optional. This flag is to support user customized
|
869
|
+
attribution. If not provided, the default is ``Machine Translated by Google``.
|
870
|
+
Customized attribution should follow rules in
|
871
|
+
https://cloud.google.com/translate/attribution#attribution_and_logos
|
872
|
+
:param enable_shadow_removal_native_pdf: Optional. If true, use the text removal server to remove the
|
873
|
+
shadow text on background image for native PDF translation.
|
874
|
+
Shadow removal feature can only be enabled when both ``is_translate_native_pdf_only``,
|
875
|
+
``pdf_native_only`` are False.
|
876
|
+
:param enable_rotation_correction: Optional. If true, enable auto rotation
|
877
|
+
correction in DVS.
|
878
|
+
:param models: Optional. The models to use for translation. Map's key is
|
879
|
+
target language code. Map's value is the model name. Value
|
880
|
+
can be a built-in general model, or an AutoML Translation model.
|
881
|
+
The value format depends on model type:
|
882
|
+
|
883
|
+
- AutoML Translation models:
|
884
|
+
``projects/{project-number-or-id}/locations/{location-id}/models/{model-id}``
|
885
|
+
- General (built-in) models:
|
886
|
+
``projects/{project-number-or-id}/locations/{location-id}/models/general/nmt``,
|
887
|
+
|
888
|
+
If the map is empty or a specific model is not requested for
|
889
|
+
a language pair, then default google model (NMT) is used.
|
890
|
+
:param glossaries: Glossaries to be applied. It's keyed by target language code.
|
891
|
+
:param retry: Designation of what errors, if any, should be retried.
|
892
|
+
:param timeout: The timeout for this request.
|
893
|
+
:param metadata: Strings which should be sent along with the request as metadata.
|
894
|
+
|
895
|
+
:return: Batch translate document result from the API response.
|
896
|
+
"""
|
897
|
+
client = self.get_client()
|
898
|
+
location_id = "global" if not location else location
|
899
|
+
parent = f"projects/{project_id or self.project_id}/locations/{location_id}"
|
900
|
+
return client.batch_translate_document(
|
901
|
+
request={
|
902
|
+
"parent": parent,
|
903
|
+
"source_language_code": source_language_code,
|
904
|
+
"target_language_codes": target_language_codes,
|
905
|
+
"input_configs": input_configs,
|
906
|
+
"output_config": output_config,
|
907
|
+
"format_conversions": format_conversions,
|
908
|
+
"customized_attribution": customized_attribution,
|
909
|
+
"enable_shadow_removal_native_pdf": enable_shadow_removal_native_pdf,
|
910
|
+
"enable_rotation_correction": enable_rotation_correction,
|
911
|
+
"models": models,
|
912
|
+
"glossaries": glossaries,
|
913
|
+
},
|
914
|
+
timeout=timeout,
|
915
|
+
retry=retry,
|
916
|
+
metadata=metadata,
|
917
|
+
)
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
"""This module contains a Google Cloud Vertex AI Feature Store hook."""
|
18
|
+
|
19
|
+
from __future__ import annotations
|
20
|
+
|
21
|
+
from google.api_core.client_options import ClientOptions
|
22
|
+
from google.cloud.aiplatform_v1beta1 import (
|
23
|
+
FeatureOnlineStoreAdminServiceClient,
|
24
|
+
)
|
25
|
+
|
26
|
+
from airflow.exceptions import AirflowException
|
27
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
28
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
29
|
+
|
30
|
+
|
31
|
+
class FeatureStoreHook(GoogleBaseHook):
|
32
|
+
"""
|
33
|
+
Hook for interacting with Google Cloud Vertex AI Feature Store.
|
34
|
+
|
35
|
+
This hook provides an interface to manage Feature Store resources in Vertex AI,
|
36
|
+
including feature views and their synchronization operations. It handles authentication
|
37
|
+
and provides methods for common Feature Store operations.
|
38
|
+
|
39
|
+
:param gcp_conn_id: The connection ID to use for connecting to Google Cloud Platform.
|
40
|
+
Defaults to 'google_cloud_default'.
|
41
|
+
:param impersonation_chain: Optional service account to impersonate using short-term
|
42
|
+
credentials. Can be either a single account or a chain of accounts required to
|
43
|
+
get the access_token of the last account in the list, which will be impersonated
|
44
|
+
in the request. If set as a string, the account must grant the originating account
|
45
|
+
the Service Account Token Creator IAM role. If set as a sequence, the identities
|
46
|
+
from the list must grant Service Account Token Creator IAM role to the directly
|
47
|
+
preceding identity, with first account from the list granting this role to the
|
48
|
+
originating account.
|
49
|
+
"""
|
50
|
+
|
51
|
+
def get_feature_online_store_admin_service_client(
|
52
|
+
self,
|
53
|
+
location: str | None = None,
|
54
|
+
) -> FeatureOnlineStoreAdminServiceClient:
|
55
|
+
"""
|
56
|
+
Create and returns a FeatureOnlineStoreAdminServiceClient object.
|
57
|
+
|
58
|
+
This method initializes a client for interacting with the Feature Store API,
|
59
|
+
handling proper endpoint configuration based on the specified location.
|
60
|
+
|
61
|
+
:param location: Optional. The Google Cloud region where the service is located.
|
62
|
+
If provided and not 'global', the client will be configured to use the
|
63
|
+
region-specific API endpoint.
|
64
|
+
"""
|
65
|
+
if location and location != "global":
|
66
|
+
client_options = ClientOptions(api_endpoint=f"{location}-aiplatform.googleapis.com:443")
|
67
|
+
else:
|
68
|
+
client_options = ClientOptions()
|
69
|
+
return FeatureOnlineStoreAdminServiceClient(
|
70
|
+
credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options
|
71
|
+
)
|
72
|
+
|
73
|
+
def get_feature_view_sync(
|
74
|
+
self,
|
75
|
+
location: str,
|
76
|
+
feature_view_sync_name: str,
|
77
|
+
) -> dict:
|
78
|
+
"""
|
79
|
+
Retrieve the status and details of a Feature View synchronization operation.
|
80
|
+
|
81
|
+
This method fetches information about a specific feature view sync operation,
|
82
|
+
including its current status, timing information, and synchronization metrics.
|
83
|
+
|
84
|
+
:param location: The Google Cloud region where the feature store is located
|
85
|
+
(e.g., 'us-central1', 'us-east1').
|
86
|
+
:param feature_view_sync_name: The full resource name of the feature view
|
87
|
+
sync operation to retrieve.
|
88
|
+
"""
|
89
|
+
client = self.get_feature_online_store_admin_service_client(location)
|
90
|
+
|
91
|
+
try:
|
92
|
+
response = client.get_feature_view_sync(name=feature_view_sync_name)
|
93
|
+
|
94
|
+
report = {
|
95
|
+
"name": feature_view_sync_name,
|
96
|
+
"start_time": int(response.run_time.start_time.seconds),
|
97
|
+
}
|
98
|
+
|
99
|
+
if hasattr(response.run_time, "end_time") and response.run_time.end_time.seconds:
|
100
|
+
report["end_time"] = int(response.run_time.end_time.seconds)
|
101
|
+
report["sync_summary"] = {
|
102
|
+
"row_synced": int(response.sync_summary.row_synced),
|
103
|
+
"total_slot": int(response.sync_summary.total_slot),
|
104
|
+
}
|
105
|
+
|
106
|
+
return report
|
107
|
+
|
108
|
+
except Exception as e:
|
109
|
+
self.log.error("Failed to get feature view sync: %s", str(e))
|
110
|
+
raise AirflowException(str(e))
|
111
|
+
|
112
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
113
|
+
def sync_feature_view(
|
114
|
+
self,
|
115
|
+
location: str,
|
116
|
+
feature_online_store_id: str,
|
117
|
+
feature_view_id: str,
|
118
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
119
|
+
) -> str:
|
120
|
+
"""
|
121
|
+
Initiate a synchronization operation for a Feature View.
|
122
|
+
|
123
|
+
This method triggers a sync operation that updates the online serving data
|
124
|
+
for a feature view based on the latest data in the underlying batch source.
|
125
|
+
The sync operation ensures that the online feature values are up-to-date
|
126
|
+
for real-time serving.
|
127
|
+
|
128
|
+
:param location: The Google Cloud region where the feature store is located
|
129
|
+
(e.g., 'us-central1', 'us-east1').
|
130
|
+
:param feature_online_store_id: The ID of the online feature store that
|
131
|
+
contains the feature view to be synchronized.
|
132
|
+
:param feature_view_id: The ID of the feature view to synchronize.
|
133
|
+
:param project_id: The ID of the Google Cloud project that contains the
|
134
|
+
feature store. If not provided, will attempt to determine from the
|
135
|
+
environment.
|
136
|
+
"""
|
137
|
+
client = self.get_feature_online_store_admin_service_client(location)
|
138
|
+
feature_view = f"projects/{project_id}/locations/{location}/featureOnlineStores/{feature_online_store_id}/featureViews/{feature_view_id}"
|
139
|
+
|
140
|
+
try:
|
141
|
+
response = client.sync_feature_view(feature_view=feature_view)
|
142
|
+
|
143
|
+
return str(response.feature_view_sync)
|
144
|
+
|
145
|
+
except Exception as e:
|
146
|
+
self.log.error("Failed to sync feature view: %s", str(e))
|
147
|
+
raise AirflowException(str(e))
|
@@ -43,6 +43,11 @@ if TYPE_CHECKING:
|
|
43
43
|
class GenerativeModelHook(GoogleBaseHook):
|
44
44
|
"""Hook for Google Cloud Vertex AI Generative Model APIs."""
|
45
45
|
|
46
|
+
@deprecated(
|
47
|
+
planned_removal_date="April 09, 2025",
|
48
|
+
use_instead="GenerativeModelHook.get_generative_model",
|
49
|
+
category=AirflowProviderDeprecationWarning,
|
50
|
+
)
|
46
51
|
def get_text_generation_model(self, pretrained_model: str):
|
47
52
|
"""Return a Model Garden Model object based on Text Generation."""
|
48
53
|
model = TextGenerationModel.from_pretrained(pretrained_model)
|
@@ -275,6 +280,11 @@ class GenerativeModelHook(GoogleBaseHook):
|
|
275
280
|
|
276
281
|
return response.text
|
277
282
|
|
283
|
+
@deprecated(
|
284
|
+
planned_removal_date="April 09, 2025",
|
285
|
+
use_instead="GenerativeModelHook.generative_model_generate_content",
|
286
|
+
category=AirflowProviderDeprecationWarning,
|
287
|
+
)
|
278
288
|
@GoogleBaseHook.fallback_to_default_project_id
|
279
289
|
def text_generation_model_predict(
|
280
290
|
self,
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
13
|
+
# software distributed under the License is distributed on an
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
# KIND, either express or implied. See the License for the
|
16
|
+
# specific language governing permissions and limitations
|
17
|
+
# under the License.
|
18
|
+
"""This module contains Google Cloud AlloyDB links."""
|
19
|
+
|
20
|
+
from __future__ import annotations
|
21
|
+
|
22
|
+
from typing import TYPE_CHECKING
|
23
|
+
|
24
|
+
from airflow.providers.google.cloud.links.base import BaseGoogleLink
|
25
|
+
|
26
|
+
if TYPE_CHECKING:
|
27
|
+
from airflow.models import BaseOperator
|
28
|
+
from airflow.utils.context import Context
|
29
|
+
|
30
|
+
ALLOY_DB_BASE_LINK = "/alloydb"
|
31
|
+
ALLOY_DB_CLUSTER_LINK = (
|
32
|
+
ALLOY_DB_BASE_LINK + "/locations/{location_id}/clusters/{cluster_id}?project={project_id}"
|
33
|
+
)
|
34
|
+
|
35
|
+
|
36
|
+
class AlloyDBClusterLink(BaseGoogleLink):
|
37
|
+
"""Helper class for constructing AlloyDB cluster Link."""
|
38
|
+
|
39
|
+
name = "AlloyDB Cluster"
|
40
|
+
key = "alloy_db_cluster"
|
41
|
+
format_str = ALLOY_DB_CLUSTER_LINK
|
42
|
+
|
43
|
+
@staticmethod
|
44
|
+
def persist(
|
45
|
+
context: Context,
|
46
|
+
task_instance: BaseOperator,
|
47
|
+
location_id: str,
|
48
|
+
cluster_id: str,
|
49
|
+
project_id: str | None,
|
50
|
+
):
|
51
|
+
task_instance.xcom_push(
|
52
|
+
context,
|
53
|
+
key=AlloyDBClusterLink.key,
|
54
|
+
value={"location_id": location_id, "cluster_id": cluster_id, "project_id": project_id},
|
55
|
+
)
|