PyPI - apache-airflow-providers-yandex - Versions diffs - 3.11.0rc1__py3-none-any.whl → 3.11.1__py3-none-any.whl - Mend

apache-airflow-providers-yandex 3.11.0rc1py3-none-any.whl → 3.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

airflow/providers/yandex/__init__.py CHANGED Viewed

@@ -25,14 +25,11 @@ from __future__ import annotations
 import packaging.version
-__all__ = ["__version__"]
+from airflow import __version__ as airflow_version
-__version__ = "3.11.0"
+__all__ = ["__version__"]
-try:
-    from airflow import __version__ as airflow_version
-except ImportError:
-    from airflow.version import version as airflow_version
+__version__ = "3.11.1"
 if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
     "2.7.0"

airflow/providers/yandex/get_provider_info.py CHANGED Viewed

@@ -28,8 +28,9 @@ def get_provider_info():
         "name": "Yandex",
         "description": "This package is for Yandex, including:\n\n    - `Yandex.Cloud <https://cloud.yandex.com/>`__\n",
         "state": "ready",
-        "source-date-epoch": 1714477757,
+        "source-date-epoch": 1716289256,
         "versions": [
+            "3.11.1",
             "3.11.0",
             "3.10.0",
             "3.9.0",
@@ -52,7 +53,7 @@ def get_provider_info():
             "1.0.1",
             "1.0.0",
         ],
-        "dependencies": ["apache-airflow>=2.7.0", "yandexcloud>=0.228.0", "yandex-query-client>=0.1.2"],
+        "dependencies": ["apache-airflow>=2.7.0", "yandexcloud>=0.278.0", "yandex-query-client>=0.1.4"],
         "integrations": [
             {
                 "integration-name": "Yandex.Cloud",
@@ -63,14 +64,14 @@ def get_provider_info():
             {
                 "integration-name": "Yandex.Cloud Dataproc",
                 "external-doc-url": "https://cloud.yandex.com/dataproc",
-                "how-to-guide": ["/docs/apache-airflow-providers-yandex/operators.rst"],
+                "how-to-guide": ["/docs/apache-airflow-providers-yandex/operators/dataproc.rst"],
                 "logo": "/integration-logos/yandex/Yandex-Cloud.png",
                 "tags": ["service"],
             },
             {
                 "integration-name": "Yandex.Cloud YQ",
                 "external-doc-url": "https://cloud.yandex.com/en/services/query",
-                "how-to-guide": ["/docs/apache-airflow-providers-yandex/operators.rst"],
+                "how-to-guide": ["/docs/apache-airflow-providers-yandex/operators/yq.rst"],
                 "logo": "/integration-logos/yandex/Yandex-Cloud.png",
                 "tags": ["service"],
             },
@@ -78,7 +79,7 @@ def get_provider_info():
         "operators": [
             {
                 "integration-name": "Yandex.Cloud Dataproc",
-                "python-modules": ["airflow.providers.yandex.operators.yandexcloud_dataproc"],
+                "python-modules": ["airflow.providers.yandex.operators.dataproc"],
             },
             {
                 "integration-name": "Yandex.Cloud YQ",
@@ -89,7 +90,7 @@ def get_provider_info():
             {"integration-name": "Yandex.Cloud", "python-modules": ["airflow.providers.yandex.hooks.yandex"]},
             {
                 "integration-name": "Yandex.Cloud Dataproc",
-                "python-modules": ["airflow.providers.yandex.hooks.yandexcloud_dataproc"],
+                "python-modules": ["airflow.providers.yandex.hooks.dataproc"],
             },
             {"integration-name": "Yandex.Cloud YQ", "python-modules": ["airflow.providers.yandex.hooks.yq"]},
         ],

airflow/providers/yandex/hooks/dataproc.py ADDED Viewed

@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+from airflow.providers.yandex.hooks.yandex import YandexCloudBaseHook
+class DataprocHook(YandexCloudBaseHook):
+    """
+    A base hook for Yandex.Cloud Data Proc.
+    :param yandex_conn_id: The connection ID to use when fetching connection info.
+    """
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.cluster_id = None
+        self.client = self.sdk.wrappers.Dataproc(
+            default_folder_id=self.default_folder_id,
+            default_public_ssh_key=self.default_public_ssh_key,
+        )

airflow/providers/yandex/hooks/yandexcloud_dataproc.py CHANGED Viewed

@@ -14,22 +14,17 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from __future__ import annotations
-from airflow.providers.yandex.hooks.yandex import YandexCloudBaseHook
+"""This module is deprecated. Please use :mod:`airflow.providers.yandex.hooks.dataproc` instead."""
+from __future__ import annotations
-class DataprocHook(YandexCloudBaseHook):
-    """
-    A base hook for Yandex.Cloud Data Proc.
+import warnings
-    :param yandex_conn_id: The connection ID to use when fetching connection info.
-    """
+from airflow.exceptions import AirflowProviderDeprecationWarning
+from airflow.providers.yandex.hooks.dataproc import *  # noqa: F403
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-        self.cluster_id = None
-        self.client = self.sdk.wrappers.Dataproc(
-            default_folder_id=self.default_folder_id,
-            default_public_ssh_key=self.default_public_ssh_key,
-        )
+warnings.warn(
+    "This module is deprecated. Please use `airflow.providers.yandex.hooks.dataproc` instead.",
+    AirflowProviderDeprecationWarning,
+    stacklevel=2,
+)

airflow/providers/yandex/hooks/yq.py CHANGED Viewed

@@ -19,9 +19,7 @@ from __future__ import annotations
 from datetime import timedelta
 from typing import Any
-import yandexcloud
-import yandexcloud._auth_fabric as auth_fabric
-from yandex.cloud.iam.v1.iam_token_service_pb2_grpc import IamTokenServiceStub
+import yandexcloud.auth as yc_auth
 from yandex_query_client import YQHttpClient, YQHttpClientConfig
 from airflow.providers.yandex.hooks.yandex import YandexCloudBaseHook
@@ -100,13 +98,4 @@ class YQHook(YandexCloudBaseHook):
         if iam_token is not None:
             return iam_token
-        service_account_key = self.credentials.get("service_account_key")
-        # if service_account_key is None metadata server will be used
-        token_requester = auth_fabric.get_auth_token_requester(service_account_key=service_account_key)
-        if service_account_key is None:
-            return token_requester.get_token()
-        sdk = yandexcloud.SDK()
-        client = sdk.client(IamTokenServiceStub)
-        return client.Create(token_requester.get_token_request()).iam_token
+        return yc_auth.get_auth_token(service_account_key=self.credentials.get("service_account_key"))

airflow/providers/yandex/operators/dataproc.py ADDED Viewed

@@ -0,0 +1,535 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+import warnings
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Iterable, Sequence
+from airflow.exceptions import AirflowProviderDeprecationWarning
+from airflow.models import BaseOperator
+from airflow.providers.yandex.hooks.dataproc import DataprocHook
+if TYPE_CHECKING:
+    from airflow.utils.context import Context
+@dataclass
+class InitializationAction:
+    """Data for initialization action to be run at start of DataProc cluster."""
+    uri: str  # Uri of the executable file
+    args: Sequence[str]  # Arguments to the initialization action
+    timeout: int  # Execution timeout
+class DataprocCreateClusterOperator(BaseOperator):
+    """Creates Yandex.Cloud Data Proc cluster.
+    :param folder_id: ID of the folder in which cluster should be created.
+    :param cluster_name: Cluster name. Must be unique inside the folder.
+    :param cluster_description: Cluster description.
+    :param cluster_image_version: Cluster image version. Use default.
+    :param ssh_public_keys: List of SSH public keys that will be deployed to created compute instances.
+    :param subnet_id: ID of the subnetwork. All Data Proc cluster nodes will use one subnetwork.
+    :param services: List of services that will be installed to the cluster. Possible options:
+        HDFS, YARN, MAPREDUCE, HIVE, TEZ, ZOOKEEPER, HBASE, SQOOP, FLUME, SPARK, SPARK, ZEPPELIN, OOZIE
+    :param s3_bucket: Yandex.Cloud S3 bucket to store cluster logs.
+                      Jobs will not work if the bucket is not specified.
+    :param zone: Availability zone to create cluster in.
+                 Currently there are ru-central1-a, ru-central1-b and ru-central1-c.
+    :param service_account_id: Service account id for the cluster.
+                               Service account can be created inside the folder.
+    :param masternode_resource_preset: Resources preset (CPU+RAM configuration)
+                                       for the primary node of the cluster.
+    :param masternode_disk_size: Masternode storage size in GiB.
+    :param masternode_disk_type: Masternode storage type. Possible options: network-ssd, network-hdd.
+    :param datanode_resource_preset: Resources preset (CPU+RAM configuration)
+                                     for the data nodes of the cluster.
+    :param datanode_disk_size: Datanodes storage size in GiB.
+    :param datanode_disk_type: Datanodes storage type. Possible options: network-ssd, network-hdd.
+    :param computenode_resource_preset: Resources preset (CPU+RAM configuration)
+                                        for the compute nodes of the cluster.
+    :param computenode_disk_size: Computenodes storage size in GiB.
+    :param computenode_disk_type: Computenodes storage type. Possible options: network-ssd, network-hdd.
+    :param connection_id: ID of the Yandex.Cloud Airflow connection.
+    :param computenode_max_count: Maximum number of nodes of compute autoscaling subcluster.
+    :param computenode_warmup_duration: The warmup time of the instance in seconds. During this time,
+                                        traffic is sent to the instance,
+                                        but instance metrics are not collected. In seconds.
+    :param computenode_stabilization_duration: Minimum amount of time in seconds for monitoring before
+                                   Instance Groups can reduce the number of instances in the group.
+                                   During this time, the group size doesn't decrease,
+                                   even if the new metric values indicate that it should. In seconds.
+    :param computenode_preemptible: Preemptible instances are stopped at least once every 24 hours,
+                        and can be stopped at any time if their resources are needed by Compute.
+    :param computenode_cpu_utilization_target: Defines an autoscaling rule
+                                   based on the average CPU utilization of the instance group.
+                                   in percents. 10-100.
+                                   By default is not set and default autoscaling strategy is used.
+    :param computenode_decommission_timeout: Timeout to gracefully decommission nodes during downscaling.
+                                             In seconds
+    :param properties: Properties passed to main node software.
+                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/settings-list
+    :param enable_ui_proxy: Enable UI Proxy feature for forwarding Hadoop components web interfaces
+                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/ui-proxy
+    :param host_group_ids: Dedicated host groups to place VMs of cluster on.
+                        Docs: https://cloud.yandex.com/docs/compute/concepts/dedicated-host
+    :param security_group_ids: User security groups.
+                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/network#security-groups
+    :param log_group_id: Id of log group to write logs. By default logs will be sent to default log group.
+                    To disable cloud log sending set cluster property dataproc:disable_cloud_logging = true
+                    Docs: https://cloud.yandex.com/docs/data-proc/concepts/logs
+    :param initialization_actions: Set of init-actions to run when cluster starts.
+                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/init-action
+    :param labels: Cluster labels as key:value pairs. No more than 64 per resource.
+                        Docs: https://cloud.yandex.com/docs/resource-manager/concepts/labels
+    """
+    def __init__(
+        self,
+        *,
+        folder_id: str | None = None,
+        cluster_name: str | None = None,
+        cluster_description: str | None = "",
+        cluster_image_version: str | None = None,
+        ssh_public_keys: str | Iterable[str] | None = None,
+        subnet_id: str | None = None,
+        services: Iterable[str] = ("HDFS", "YARN", "MAPREDUCE", "HIVE", "SPARK"),
+        s3_bucket: str | None = None,
+        zone: str = "ru-central1-b",
+        service_account_id: str | None = None,
+        masternode_resource_preset: str | None = None,
+        masternode_disk_size: int | None = None,
+        masternode_disk_type: str | None = None,
+        datanode_resource_preset: str | None = None,
+        datanode_disk_size: int | None = None,
+        datanode_disk_type: str | None = None,
+        datanode_count: int = 1,
+        computenode_resource_preset: str | None = None,
+        computenode_disk_size: int | None = None,
+        computenode_disk_type: str | None = None,
+        computenode_count: int = 0,
+        computenode_max_hosts_count: int | None = None,
+        computenode_measurement_duration: int | None = None,
+        computenode_warmup_duration: int | None = None,
+        computenode_stabilization_duration: int | None = None,
+        computenode_preemptible: bool = False,
+        computenode_cpu_utilization_target: int | None = None,
+        computenode_decommission_timeout: int | None = None,
+        connection_id: str | None = None,
+        properties: dict[str, str] | None = None,
+        enable_ui_proxy: bool = False,
+        host_group_ids: Iterable[str] | None = None,
+        security_group_ids: Iterable[str] | None = None,
+        log_group_id: str | None = None,
+        initialization_actions: Iterable[InitializationAction] | None = None,
+        labels: dict[str, str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.folder_id = folder_id
+        self.yandex_conn_id = connection_id
+        self.cluster_name = cluster_name
+        self.cluster_description = cluster_description
+        self.cluster_image_version = cluster_image_version
+        self.ssh_public_keys = ssh_public_keys
+        self.subnet_id = subnet_id
+        self.services = services
+        self.s3_bucket = s3_bucket
+        self.zone = zone
+        self.service_account_id = service_account_id
+        self.masternode_resource_preset = masternode_resource_preset
+        self.masternode_disk_size = masternode_disk_size
+        self.masternode_disk_type = masternode_disk_type
+        self.datanode_resource_preset = datanode_resource_preset
+        self.datanode_disk_size = datanode_disk_size
+        self.datanode_disk_type = datanode_disk_type
+        self.datanode_count = datanode_count
+        self.computenode_resource_preset = computenode_resource_preset
+        self.computenode_disk_size = computenode_disk_size
+        self.computenode_disk_type = computenode_disk_type
+        self.computenode_count = computenode_count
+        self.computenode_max_hosts_count = computenode_max_hosts_count
+        self.computenode_measurement_duration = computenode_measurement_duration
+        self.computenode_warmup_duration = computenode_warmup_duration
+        self.computenode_stabilization_duration = computenode_stabilization_duration
+        self.computenode_preemptible = computenode_preemptible
+        self.computenode_cpu_utilization_target = computenode_cpu_utilization_target
+        self.computenode_decommission_timeout = computenode_decommission_timeout
+        self.properties = properties
+        self.enable_ui_proxy = enable_ui_proxy
+        self.host_group_ids = host_group_ids
+        self.security_group_ids = security_group_ids
+        self.log_group_id = log_group_id
+        self.initialization_actions = initialization_actions
+        self.labels = labels
+        self.hook: DataprocHook | None = None
+    def execute(self, context: Context) -> dict:
+        self.hook = DataprocHook(
+            yandex_conn_id=self.yandex_conn_id,
+        )
+        operation_result = self.hook.client.create_cluster(
+            folder_id=self.folder_id,
+            cluster_name=self.cluster_name,
+            cluster_description=self.cluster_description,
+            cluster_image_version=self.cluster_image_version,
+            ssh_public_keys=self.ssh_public_keys,
+            subnet_id=self.subnet_id,
+            services=self.services,
+            s3_bucket=self.s3_bucket,
+            zone=self.zone,
+            service_account_id=self.service_account_id or self.hook.default_service_account_id,
+            masternode_resource_preset=self.masternode_resource_preset,
+            masternode_disk_size=self.masternode_disk_size,
+            masternode_disk_type=self.masternode_disk_type,
+            datanode_resource_preset=self.datanode_resource_preset,
+            datanode_disk_size=self.datanode_disk_size,
+            datanode_disk_type=self.datanode_disk_type,
+            datanode_count=self.datanode_count,
+            computenode_resource_preset=self.computenode_resource_preset,
+            computenode_disk_size=self.computenode_disk_size,
+            computenode_disk_type=self.computenode_disk_type,
+            computenode_count=self.computenode_count,
+            computenode_max_hosts_count=self.computenode_max_hosts_count,
+            computenode_measurement_duration=self.computenode_measurement_duration,
+            computenode_warmup_duration=self.computenode_warmup_duration,
+            computenode_stabilization_duration=self.computenode_stabilization_duration,
+            computenode_preemptible=self.computenode_preemptible,
+            computenode_cpu_utilization_target=self.computenode_cpu_utilization_target,
+            computenode_decommission_timeout=self.computenode_decommission_timeout,
+            properties=self.properties,
+            enable_ui_proxy=self.enable_ui_proxy,
+            host_group_ids=self.host_group_ids,
+            security_group_ids=self.security_group_ids,
+            log_group_id=self.log_group_id,
+            labels=self.labels,
+            initialization_actions=self.initialization_actions
+            and [
+                self.hook.sdk.wrappers.InitializationAction(
+                    uri=init_action.uri,
+                    args=init_action.args,
+                    timeout=init_action.timeout,
+                )
+                for init_action in self.initialization_actions
+            ],
+        )
+        cluster_id = operation_result.response.id
+        context["task_instance"].xcom_push(key="cluster_id", value=cluster_id)
+        # Deprecated
+        context["task_instance"].xcom_push(key="yandexcloud_connection_id", value=self.yandex_conn_id)
+        return cluster_id
+    @property
+    def cluster_id(self):
+        return self.output
+class DataprocBaseOperator(BaseOperator):
+    """Base class for DataProc operators working with given cluster.
+    :param connection_id: ID of the Yandex.Cloud Airflow connection.
+    :param cluster_id: ID of the cluster to remove. (templated)
+    """
+    template_fields: Sequence[str] = ("cluster_id",)
+    def __init__(self, *, yandex_conn_id: str | None = None, cluster_id: str | None = None, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self.cluster_id = cluster_id
+        self.yandex_conn_id = yandex_conn_id
+    def _setup(self, context: Context) -> DataprocHook:
+        if self.cluster_id is None:
+            self.cluster_id = context["task_instance"].xcom_pull(key="cluster_id")
+        if self.yandex_conn_id is None:
+            xcom_yandex_conn_id = context["task_instance"].xcom_pull(key="yandexcloud_connection_id")
+            if xcom_yandex_conn_id:
+                warnings.warn(
+                    "Implicit pass of `yandex_conn_id` is deprecated, please pass it explicitly",
+                    AirflowProviderDeprecationWarning,
+                    stacklevel=2,
+                )
+                self.yandex_conn_id = xcom_yandex_conn_id
+        return DataprocHook(yandex_conn_id=self.yandex_conn_id)
+    def execute(self, context: Context):
+        raise NotImplementedError()
+class DataprocDeleteClusterOperator(DataprocBaseOperator):
+    """Deletes Yandex.Cloud Data Proc cluster.
+    :param connection_id: ID of the Yandex.Cloud Airflow connection.
+    :param cluster_id: ID of the cluster to remove. (templated)
+    """
+    def __init__(self, *, connection_id: str | None = None, cluster_id: str | None = None, **kwargs) -> None:
+        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
+    def execute(self, context: Context) -> None:
+        hook = self._setup(context)
+        hook.client.delete_cluster(self.cluster_id)
+class DataprocCreateHiveJobOperator(DataprocBaseOperator):
+    """Runs Hive job in Data Proc cluster.
+    :param query: Hive query.
+    :param query_file_uri: URI of the script that contains Hive queries. Can be placed in HDFS or S3.
+    :param properties: A mapping of property names to values, used to configure Hive.
+    :param script_variables: Mapping of query variable names to values.
+    :param continue_on_failure: Whether to continue executing queries if a query fails.
+    :param name: Name of the job. Used for labeling.
+    :param cluster_id: ID of the cluster to run job in.
+                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
+    :param connection_id: ID of the Yandex.Cloud Airflow connection.
+    """
+    def __init__(
+        self,
+        *,
+        query: str | None = None,
+        query_file_uri: str | None = None,
+        script_variables: dict[str, str] | None = None,
+        continue_on_failure: bool = False,
+        properties: dict[str, str] | None = None,
+        name: str = "Hive job",
+        cluster_id: str | None = None,
+        connection_id: str | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
+        self.query = query
+        self.query_file_uri = query_file_uri
+        self.script_variables = script_variables
+        self.continue_on_failure = continue_on_failure
+        self.properties = properties
+        self.name = name
+    def execute(self, context: Context) -> None:
+        hook = self._setup(context)
+        hook.client.create_hive_job(
+            query=self.query,
+            query_file_uri=self.query_file_uri,
+            script_variables=self.script_variables,
+            continue_on_failure=self.continue_on_failure,
+            properties=self.properties,
+            name=self.name,
+            cluster_id=self.cluster_id,
+        )
+class DataprocCreateMapReduceJobOperator(DataprocBaseOperator):
+    """Runs Mapreduce job in Data Proc cluster.
+    :param main_jar_file_uri: URI of jar file with job.
+                              Can be placed in HDFS or S3. Can be specified instead of main_class.
+    :param main_class: Name of the main class of the job. Can be specified instead of main_jar_file_uri.
+    :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3.
+    :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3.
+    :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3.
+    :param properties: Properties for the job.
+    :param args: Arguments to be passed to the job.
+    :param name: Name of the job. Used for labeling.
+    :param cluster_id: ID of the cluster to run job in.
+                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
+    :param connection_id: ID of the Yandex.Cloud Airflow connection.
+    """
+    def __init__(
+        self,
+        *,
+        main_class: str | None = None,
+        main_jar_file_uri: str | None = None,
+        jar_file_uris: Iterable[str] | None = None,
+        archive_uris: Iterable[str] | None = None,
+        file_uris: Iterable[str] | None = None,
+        args: Iterable[str] | None = None,
+        properties: dict[str, str] | None = None,
+        name: str = "Mapreduce job",
+        cluster_id: str | None = None,
+        connection_id: str | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
+        self.main_class = main_class
+        self.main_jar_file_uri = main_jar_file_uri
+        self.jar_file_uris = jar_file_uris
+        self.archive_uris = archive_uris
+        self.file_uris = file_uris
+        self.args = args
+        self.properties = properties
+        self.name = name
+    def execute(self, context: Context) -> None:
+        hook = self._setup(context)
+        hook.client.create_mapreduce_job(
+            main_class=self.main_class,
+            main_jar_file_uri=self.main_jar_file_uri,
+            jar_file_uris=self.jar_file_uris,
+            archive_uris=self.archive_uris,
+            file_uris=self.file_uris,
+            args=self.args,
+            properties=self.properties,
+            name=self.name,
+            cluster_id=self.cluster_id,
+        )
+class DataprocCreateSparkJobOperator(DataprocBaseOperator):
+    """Runs Spark job in Data Proc cluster.
+    :param main_jar_file_uri: URI of jar file with job. Can be placed in HDFS or S3.
+    :param main_class: Name of the main class of the job.
+    :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3.
+    :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3.
+    :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3.
+    :param properties: Properties for the job.
+    :param args: Arguments to be passed to the job.
+    :param name: Name of the job. Used for labeling.
+    :param cluster_id: ID of the cluster to run job in.
+                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
+    :param connection_id: ID of the Yandex.Cloud Airflow connection.
+    :param packages: List of maven coordinates of jars to include on the driver and executor classpaths.
+    :param repositories: List of additional remote repositories to search for the maven coordinates
+                        given with --packages.
+    :param exclude_packages: List of groupId:artifactId, to exclude while resolving the dependencies
+                        provided in --packages to avoid dependency conflicts.
+    """
+    def __init__(
+        self,
+        *,
+        main_class: str | None = None,
+        main_jar_file_uri: str | None = None,
+        jar_file_uris: Iterable[str] | None = None,
+        archive_uris: Iterable[str] | None = None,
+        file_uris: Iterable[str] | None = None,
+        args: Iterable[str] | None = None,
+        properties: dict[str, str] | None = None,
+        name: str = "Spark job",
+        cluster_id: str | None = None,
+        connection_id: str | None = None,
+        packages: Iterable[str] | None = None,
+        repositories: Iterable[str] | None = None,
+        exclude_packages: Iterable[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
+        self.main_class = main_class
+        self.main_jar_file_uri = main_jar_file_uri
+        self.jar_file_uris = jar_file_uris
+        self.archive_uris = archive_uris
+        self.file_uris = file_uris
+        self.args = args
+        self.properties = properties
+        self.name = name
+        self.packages = packages
+        self.repositories = repositories
+        self.exclude_packages = exclude_packages
+    def execute(self, context: Context) -> None:
+        hook = self._setup(context)
+        hook.client.create_spark_job(
+            main_class=self.main_class,
+            main_jar_file_uri=self.main_jar_file_uri,
+            jar_file_uris=self.jar_file_uris,
+            archive_uris=self.archive_uris,
+            file_uris=self.file_uris,
+            args=self.args,
+            properties=self.properties,
+            packages=self.packages,
+            repositories=self.repositories,
+            exclude_packages=self.exclude_packages,
+            name=self.name,
+            cluster_id=self.cluster_id,
+        )
+class DataprocCreatePysparkJobOperator(DataprocBaseOperator):
+    """Runs Pyspark job in Data Proc cluster.
+    :param main_python_file_uri: URI of python file with job. Can be placed in HDFS or S3.
+    :param python_file_uris: URIs of python files used in the job. Can be placed in HDFS or S3.
+    :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3.
+    :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3.
+    :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3.
+    :param properties: Properties for the job.
+    :param args: Arguments to be passed to the job.
+    :param name: Name of the job. Used for labeling.
+    :param cluster_id: ID of the cluster to run job in.
+                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
+    :param connection_id: ID of the Yandex.Cloud Airflow connection.
+    :param packages: List of maven coordinates of jars to include on the driver and executor classpaths.
+    :param repositories: List of additional remote repositories to search for the maven coordinates
+                         given with --packages.
+    :param exclude_packages: List of groupId:artifactId, to exclude while resolving the dependencies
+                         provided in --packages to avoid dependency conflicts.
+    """
+    def __init__(
+        self,
+        *,
+        main_python_file_uri: str | None = None,
+        python_file_uris: Iterable[str] | None = None,
+        jar_file_uris: Iterable[str] | None = None,
+        archive_uris: Iterable[str] | None = None,
+        file_uris: Iterable[str] | None = None,
+        args: Iterable[str] | None = None,
+        properties: dict[str, str] | None = None,
+        name: str = "Pyspark job",
+        cluster_id: str | None = None,
+        connection_id: str | None = None,
+        packages: Iterable[str] | None = None,
+        repositories: Iterable[str] | None = None,
+        exclude_packages: Iterable[str] | None = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
+        self.main_python_file_uri = main_python_file_uri
+        self.python_file_uris = python_file_uris
+        self.jar_file_uris = jar_file_uris
+        self.archive_uris = archive_uris
+        self.file_uris = file_uris
+        self.args = args
+        self.properties = properties
+        self.name = name
+        self.packages = packages
+        self.repositories = repositories
+        self.exclude_packages = exclude_packages
+    def execute(self, context: Context) -> None:
+        hook = self._setup(context)
+        hook.client.create_pyspark_job(
+            main_python_file_uri=self.main_python_file_uri,
+            python_file_uris=self.python_file_uris,
+            jar_file_uris=self.jar_file_uris,
+            archive_uris=self.archive_uris,
+            file_uris=self.file_uris,
+            args=self.args,
+            properties=self.properties,
+            packages=self.packages,
+            repositories=self.repositories,
+            exclude_packages=self.exclude_packages,
+            name=self.name,
+            cluster_id=self.cluster_id,
+        )

airflow/providers/yandex/operators/yandexcloud_dataproc.py CHANGED Viewed

@@ -14,522 +14,17 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+"""This module is deprecated. Please use :mod:`airflow.providers.yandex.operators.dataproc` instead."""
 from __future__ import annotations
 import warnings
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Iterable, Sequence
 from airflow.exceptions import AirflowProviderDeprecationWarning
-from airflow.models import BaseOperator
-from airflow.providers.yandex.hooks.yandexcloud_dataproc import DataprocHook
-if TYPE_CHECKING:
-    from airflow.utils.context import Context
-@dataclass
-class InitializationAction:
-    """Data for initialization action to be run at start of DataProc cluster."""
-    uri: str  # Uri of the executable file
-    args: Sequence[str]  # Arguments to the initialization action
-    timeout: int  # Execution timeout
-class DataprocCreateClusterOperator(BaseOperator):
-    """Creates Yandex.Cloud Data Proc cluster.
-    :param folder_id: ID of the folder in which cluster should be created.
-    :param cluster_name: Cluster name. Must be unique inside the folder.
-    :param cluster_description: Cluster description.
-    :param cluster_image_version: Cluster image version. Use default.
-    :param ssh_public_keys: List of SSH public keys that will be deployed to created compute instances.
-    :param subnet_id: ID of the subnetwork. All Data Proc cluster nodes will use one subnetwork.
-    :param services: List of services that will be installed to the cluster. Possible options:
-        HDFS, YARN, MAPREDUCE, HIVE, TEZ, ZOOKEEPER, HBASE, SQOOP, FLUME, SPARK, SPARK, ZEPPELIN, OOZIE
-    :param s3_bucket: Yandex.Cloud S3 bucket to store cluster logs.
-                      Jobs will not work if the bucket is not specified.
-    :param zone: Availability zone to create cluster in.
-                 Currently there are ru-central1-a, ru-central1-b and ru-central1-c.
-    :param service_account_id: Service account id for the cluster.
-                               Service account can be created inside the folder.
-    :param masternode_resource_preset: Resources preset (CPU+RAM configuration)
-                                       for the primary node of the cluster.
-    :param masternode_disk_size: Masternode storage size in GiB.
-    :param masternode_disk_type: Masternode storage type. Possible options: network-ssd, network-hdd.
-    :param datanode_resource_preset: Resources preset (CPU+RAM configuration)
-                                     for the data nodes of the cluster.
-    :param datanode_disk_size: Datanodes storage size in GiB.
-    :param datanode_disk_type: Datanodes storage type. Possible options: network-ssd, network-hdd.
-    :param computenode_resource_preset: Resources preset (CPU+RAM configuration)
-                                        for the compute nodes of the cluster.
-    :param computenode_disk_size: Computenodes storage size in GiB.
-    :param computenode_disk_type: Computenodes storage type. Possible options: network-ssd, network-hdd.
-    :param connection_id: ID of the Yandex.Cloud Airflow connection.
-    :param computenode_max_count: Maximum number of nodes of compute autoscaling subcluster.
-    :param computenode_warmup_duration: The warmup time of the instance in seconds. During this time,
-                                        traffic is sent to the instance,
-                                        but instance metrics are not collected. In seconds.
-    :param computenode_stabilization_duration: Minimum amount of time in seconds for monitoring before
-                                   Instance Groups can reduce the number of instances in the group.
-                                   During this time, the group size doesn't decrease,
-                                   even if the new metric values indicate that it should. In seconds.
-    :param computenode_preemptible: Preemptible instances are stopped at least once every 24 hours,
-                        and can be stopped at any time if their resources are needed by Compute.
-    :param computenode_cpu_utilization_target: Defines an autoscaling rule
-                                   based on the average CPU utilization of the instance group.
-                                   in percents. 10-100.
-                                   By default is not set and default autoscaling strategy is used.
-    :param computenode_decommission_timeout: Timeout to gracefully decommission nodes during downscaling.
-                                             In seconds
-    :param properties: Properties passed to main node software.
-                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/settings-list
-    :param enable_ui_proxy: Enable UI Proxy feature for forwarding Hadoop components web interfaces
-                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/ui-proxy
-    :param host_group_ids: Dedicated host groups to place VMs of cluster on.
-                        Docs: https://cloud.yandex.com/docs/compute/concepts/dedicated-host
-    :param security_group_ids: User security groups.
-                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/network#security-groups
-    :param log_group_id: Id of log group to write logs. By default logs will be sent to default log group.
-                    To disable cloud log sending set cluster property dataproc:disable_cloud_logging = true
-                    Docs: https://cloud.yandex.com/docs/data-proc/concepts/logs
-    :param initialization_actions: Set of init-actions to run when cluster starts.
-                        Docs: https://cloud.yandex.com/docs/data-proc/concepts/init-action
-    :param labels: Cluster labels as key:value pairs. No more than 64 per resource.
-                        Docs: https://cloud.yandex.com/docs/resource-manager/concepts/labels
-    """
-    def __init__(
-        self,
-        *,
-        folder_id: str | None = None,
-        cluster_name: str | None = None,
-        cluster_description: str | None = "",
-        cluster_image_version: str | None = None,
-        ssh_public_keys: str | Iterable[str] | None = None,
-        subnet_id: str | None = None,
-        services: Iterable[str] = ("HDFS", "YARN", "MAPREDUCE", "HIVE", "SPARK"),
-        s3_bucket: str | None = None,
-        zone: str = "ru-central1-b",
-        service_account_id: str | None = None,
-        masternode_resource_preset: str | None = None,
-        masternode_disk_size: int | None = None,
-        masternode_disk_type: str | None = None,
-        datanode_resource_preset: str | None = None,
-        datanode_disk_size: int | None = None,
-        datanode_disk_type: str | None = None,
-        datanode_count: int = 1,
-        computenode_resource_preset: str | None = None,
-        computenode_disk_size: int | None = None,
-        computenode_disk_type: str | None = None,
-        computenode_count: int = 0,
-        computenode_max_hosts_count: int | None = None,
-        computenode_measurement_duration: int | None = None,
-        computenode_warmup_duration: int | None = None,
-        computenode_stabilization_duration: int | None = None,
-        computenode_preemptible: bool = False,
-        computenode_cpu_utilization_target: int | None = None,
-        computenode_decommission_timeout: int | None = None,
-        connection_id: str | None = None,
-        properties: dict[str, str] | None = None,
-        enable_ui_proxy: bool = False,
-        host_group_ids: Iterable[str] | None = None,
-        security_group_ids: Iterable[str] | None = None,
-        log_group_id: str | None = None,
-        initialization_actions: Iterable[InitializationAction] | None = None,
-        labels: dict[str, str] | None = None,
-        **kwargs,
-    ) -> None:
-        super().__init__(**kwargs)
-        self.folder_id = folder_id
-        self.yandex_conn_id = connection_id
-        self.cluster_name = cluster_name
-        self.cluster_description = cluster_description
-        self.cluster_image_version = cluster_image_version
-        self.ssh_public_keys = ssh_public_keys
-        self.subnet_id = subnet_id
-        self.services = services
-        self.s3_bucket = s3_bucket
-        self.zone = zone
-        self.service_account_id = service_account_id
-        self.masternode_resource_preset = masternode_resource_preset
-        self.masternode_disk_size = masternode_disk_size
-        self.masternode_disk_type = masternode_disk_type
-        self.datanode_resource_preset = datanode_resource_preset
-        self.datanode_disk_size = datanode_disk_size
-        self.datanode_disk_type = datanode_disk_type
-        self.datanode_count = datanode_count
-        self.computenode_resource_preset = computenode_resource_preset
-        self.computenode_disk_size = computenode_disk_size
-        self.computenode_disk_type = computenode_disk_type
-        self.computenode_count = computenode_count
-        self.computenode_max_hosts_count = computenode_max_hosts_count
-        self.computenode_measurement_duration = computenode_measurement_duration
-        self.computenode_warmup_duration = computenode_warmup_duration
-        self.computenode_stabilization_duration = computenode_stabilization_duration
-        self.computenode_preemptible = computenode_preemptible
-        self.computenode_cpu_utilization_target = computenode_cpu_utilization_target
-        self.computenode_decommission_timeout = computenode_decommission_timeout
-        self.properties = properties
-        self.enable_ui_proxy = enable_ui_proxy
-        self.host_group_ids = host_group_ids
-        self.security_group_ids = security_group_ids
-        self.log_group_id = log_group_id
-        self.initialization_actions = initialization_actions
-        self.labels = labels
-        self.hook: DataprocHook | None = None
-    def execute(self, context: Context) -> dict:
-        self.hook = DataprocHook(
-            yandex_conn_id=self.yandex_conn_id,
-        )
-        operation_result = self.hook.client.create_cluster(
-            folder_id=self.folder_id,
-            cluster_name=self.cluster_name,
-            cluster_description=self.cluster_description,
-            cluster_image_version=self.cluster_image_version,
-            ssh_public_keys=self.ssh_public_keys,
-            subnet_id=self.subnet_id,
-            services=self.services,
-            s3_bucket=self.s3_bucket,
-            zone=self.zone,
-            service_account_id=self.service_account_id or self.hook.default_service_account_id,
-            masternode_resource_preset=self.masternode_resource_preset,
-            masternode_disk_size=self.masternode_disk_size,
-            masternode_disk_type=self.masternode_disk_type,
-            datanode_resource_preset=self.datanode_resource_preset,
-            datanode_disk_size=self.datanode_disk_size,
-            datanode_disk_type=self.datanode_disk_type,
-            datanode_count=self.datanode_count,
-            computenode_resource_preset=self.computenode_resource_preset,
-            computenode_disk_size=self.computenode_disk_size,
-            computenode_disk_type=self.computenode_disk_type,
-            computenode_count=self.computenode_count,
-            computenode_max_hosts_count=self.computenode_max_hosts_count,
-            computenode_measurement_duration=self.computenode_measurement_duration,
-            computenode_warmup_duration=self.computenode_warmup_duration,
-            computenode_stabilization_duration=self.computenode_stabilization_duration,
-            computenode_preemptible=self.computenode_preemptible,
-            computenode_cpu_utilization_target=self.computenode_cpu_utilization_target,
-            computenode_decommission_timeout=self.computenode_decommission_timeout,
-            properties=self.properties,
-            enable_ui_proxy=self.enable_ui_proxy,
-            host_group_ids=self.host_group_ids,
-            security_group_ids=self.security_group_ids,
-            log_group_id=self.log_group_id,
-            labels=self.labels,
-            initialization_actions=self.initialization_actions
-            and [
-                self.hook.sdk.wrappers.InitializationAction(
-                    uri=init_action.uri,
-                    args=init_action.args,
-                    timeout=init_action.timeout,
-                )
-                for init_action in self.initialization_actions
-            ],
-        )
-        cluster_id = operation_result.response.id
-        context["task_instance"].xcom_push(key="cluster_id", value=cluster_id)
-        # Deprecated
-        context["task_instance"].xcom_push(key="yandexcloud_connection_id", value=self.yandex_conn_id)
-        return cluster_id
-    @property
-    def cluster_id(self):
-        return self.output
-class DataprocBaseOperator(BaseOperator):
-    """Base class for DataProc operators working with given cluster.
-    :param connection_id: ID of the Yandex.Cloud Airflow connection.
-    :param cluster_id: ID of the cluster to remove. (templated)
-    """
-    template_fields: Sequence[str] = ("cluster_id",)
-    def __init__(self, *, yandex_conn_id: str | None = None, cluster_id: str | None = None, **kwargs) -> None:
-        super().__init__(**kwargs)
-        self.cluster_id = cluster_id
-        self.yandex_conn_id = yandex_conn_id
-    def _setup(self, context: Context) -> DataprocHook:
-        if self.cluster_id is None:
-            self.cluster_id = context["task_instance"].xcom_pull(key="cluster_id")
-        if self.yandex_conn_id is None:
-            xcom_yandex_conn_id = context["task_instance"].xcom_pull(key="yandexcloud_connection_id")
-            if xcom_yandex_conn_id:
-                warnings.warn(
-                    "Implicit pass of `yandex_conn_id` is deprecated, please pass it explicitly",
-                    AirflowProviderDeprecationWarning,
-                    stacklevel=2,
-                )
-                self.yandex_conn_id = xcom_yandex_conn_id
-        return DataprocHook(yandex_conn_id=self.yandex_conn_id)
-    def execute(self, context: Context):
-        raise NotImplementedError()
-class DataprocDeleteClusterOperator(DataprocBaseOperator):
-    """Deletes Yandex.Cloud Data Proc cluster.
-    :param connection_id: ID of the Yandex.Cloud Airflow connection.
-    :param cluster_id: ID of the cluster to remove. (templated)
-    """
-    def __init__(self, *, connection_id: str | None = None, cluster_id: str | None = None, **kwargs) -> None:
-        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
-    def execute(self, context: Context) -> None:
-        hook = self._setup(context)
-        hook.client.delete_cluster(self.cluster_id)
-class DataprocCreateHiveJobOperator(DataprocBaseOperator):
-    """Runs Hive job in Data Proc cluster.
-    :param query: Hive query.
-    :param query_file_uri: URI of the script that contains Hive queries. Can be placed in HDFS or S3.
-    :param properties: A mapping of property names to values, used to configure Hive.
-    :param script_variables: Mapping of query variable names to values.
-    :param continue_on_failure: Whether to continue executing queries if a query fails.
-    :param name: Name of the job. Used for labeling.
-    :param cluster_id: ID of the cluster to run job in.
-                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
-    :param connection_id: ID of the Yandex.Cloud Airflow connection.
-    """
-    def __init__(
-        self,
-        *,
-        query: str | None = None,
-        query_file_uri: str | None = None,
-        script_variables: dict[str, str] | None = None,
-        continue_on_failure: bool = False,
-        properties: dict[str, str] | None = None,
-        name: str = "Hive job",
-        cluster_id: str | None = None,
-        connection_id: str | None = None,
-        **kwargs,
-    ) -> None:
-        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
-        self.query = query
-        self.query_file_uri = query_file_uri
-        self.script_variables = script_variables
-        self.continue_on_failure = continue_on_failure
-        self.properties = properties
-        self.name = name
-    def execute(self, context: Context) -> None:
-        hook = self._setup(context)
-        hook.client.create_hive_job(
-            query=self.query,
-            query_file_uri=self.query_file_uri,
-            script_variables=self.script_variables,
-            continue_on_failure=self.continue_on_failure,
-            properties=self.properties,
-            name=self.name,
-            cluster_id=self.cluster_id,
-        )
-class DataprocCreateMapReduceJobOperator(DataprocBaseOperator):
-    """Runs Mapreduce job in Data Proc cluster.
-    :param main_jar_file_uri: URI of jar file with job.
-                              Can be placed in HDFS or S3. Can be specified instead of main_class.
-    :param main_class: Name of the main class of the job. Can be specified instead of main_jar_file_uri.
-    :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3.
-    :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3.
-    :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3.
-    :param properties: Properties for the job.
-    :param args: Arguments to be passed to the job.
-    :param name: Name of the job. Used for labeling.
-    :param cluster_id: ID of the cluster to run job in.
-                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
-    :param connection_id: ID of the Yandex.Cloud Airflow connection.
-    """
-    def __init__(
-        self,
-        *,
-        main_class: str | None = None,
-        main_jar_file_uri: str | None = None,
-        jar_file_uris: Iterable[str] | None = None,
-        archive_uris: Iterable[str] | None = None,
-        file_uris: Iterable[str] | None = None,
-        args: Iterable[str] | None = None,
-        properties: dict[str, str] | None = None,
-        name: str = "Mapreduce job",
-        cluster_id: str | None = None,
-        connection_id: str | None = None,
-        **kwargs,
-    ) -> None:
-        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
-        self.main_class = main_class
-        self.main_jar_file_uri = main_jar_file_uri
-        self.jar_file_uris = jar_file_uris
-        self.archive_uris = archive_uris
-        self.file_uris = file_uris
-        self.args = args
-        self.properties = properties
-        self.name = name
-    def execute(self, context: Context) -> None:
-        hook = self._setup(context)
-        hook.client.create_mapreduce_job(
-            main_class=self.main_class,
-            main_jar_file_uri=self.main_jar_file_uri,
-            jar_file_uris=self.jar_file_uris,
-            archive_uris=self.archive_uris,
-            file_uris=self.file_uris,
-            args=self.args,
-            properties=self.properties,
-            name=self.name,
-            cluster_id=self.cluster_id,
-        )
-class DataprocCreateSparkJobOperator(DataprocBaseOperator):
-    """Runs Spark job in Data Proc cluster.
-    :param main_jar_file_uri: URI of jar file with job. Can be placed in HDFS or S3.
-    :param main_class: Name of the main class of the job.
-    :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3.
-    :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3.
-    :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3.
-    :param properties: Properties for the job.
-    :param args: Arguments to be passed to the job.
-    :param name: Name of the job. Used for labeling.
-    :param cluster_id: ID of the cluster to run job in.
-                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
-    :param connection_id: ID of the Yandex.Cloud Airflow connection.
-    :param packages: List of maven coordinates of jars to include on the driver and executor classpaths.
-    :param repositories: List of additional remote repositories to search for the maven coordinates
-                        given with --packages.
-    :param exclude_packages: List of groupId:artifactId, to exclude while resolving the dependencies
-                        provided in --packages to avoid dependency conflicts.
-    """
-    def __init__(
-        self,
-        *,
-        main_class: str | None = None,
-        main_jar_file_uri: str | None = None,
-        jar_file_uris: Iterable[str] | None = None,
-        archive_uris: Iterable[str] | None = None,
-        file_uris: Iterable[str] | None = None,
-        args: Iterable[str] | None = None,
-        properties: dict[str, str] | None = None,
-        name: str = "Spark job",
-        cluster_id: str | None = None,
-        connection_id: str | None = None,
-        packages: Iterable[str] | None = None,
-        repositories: Iterable[str] | None = None,
-        exclude_packages: Iterable[str] | None = None,
-        **kwargs,
-    ) -> None:
-        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
-        self.main_class = main_class
-        self.main_jar_file_uri = main_jar_file_uri
-        self.jar_file_uris = jar_file_uris
-        self.archive_uris = archive_uris
-        self.file_uris = file_uris
-        self.args = args
-        self.properties = properties
-        self.name = name
-        self.packages = packages
-        self.repositories = repositories
-        self.exclude_packages = exclude_packages
-    def execute(self, context: Context) -> None:
-        hook = self._setup(context)
-        hook.client.create_spark_job(
-            main_class=self.main_class,
-            main_jar_file_uri=self.main_jar_file_uri,
-            jar_file_uris=self.jar_file_uris,
-            archive_uris=self.archive_uris,
-            file_uris=self.file_uris,
-            args=self.args,
-            properties=self.properties,
-            packages=self.packages,
-            repositories=self.repositories,
-            exclude_packages=self.exclude_packages,
-            name=self.name,
-            cluster_id=self.cluster_id,
-        )
-class DataprocCreatePysparkJobOperator(DataprocBaseOperator):
-    """Runs Pyspark job in Data Proc cluster.
-    :param main_python_file_uri: URI of python file with job. Can be placed in HDFS or S3.
-    :param python_file_uris: URIs of python files used in the job. Can be placed in HDFS or S3.
-    :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3.
-    :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3.
-    :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3.
-    :param properties: Properties for the job.
-    :param args: Arguments to be passed to the job.
-    :param name: Name of the job. Used for labeling.
-    :param cluster_id: ID of the cluster to run job in.
-                       Will try to take the ID from Dataproc Hook object if it's specified. (templated)
-    :param connection_id: ID of the Yandex.Cloud Airflow connection.
-    :param packages: List of maven coordinates of jars to include on the driver and executor classpaths.
-    :param repositories: List of additional remote repositories to search for the maven coordinates
-                         given with --packages.
-    :param exclude_packages: List of groupId:artifactId, to exclude while resolving the dependencies
-                         provided in --packages to avoid dependency conflicts.
-    """
-    def __init__(
-        self,
-        *,
-        main_python_file_uri: str | None = None,
-        python_file_uris: Iterable[str] | None = None,
-        jar_file_uris: Iterable[str] | None = None,
-        archive_uris: Iterable[str] | None = None,
-        file_uris: Iterable[str] | None = None,
-        args: Iterable[str] | None = None,
-        properties: dict[str, str] | None = None,
-        name: str = "Pyspark job",
-        cluster_id: str | None = None,
-        connection_id: str | None = None,
-        packages: Iterable[str] | None = None,
-        repositories: Iterable[str] | None = None,
-        exclude_packages: Iterable[str] | None = None,
-        **kwargs,
-    ) -> None:
-        super().__init__(yandex_conn_id=connection_id, cluster_id=cluster_id, **kwargs)
-        self.main_python_file_uri = main_python_file_uri
-        self.python_file_uris = python_file_uris
-        self.jar_file_uris = jar_file_uris
-        self.archive_uris = archive_uris
-        self.file_uris = file_uris
-        self.args = args
-        self.properties = properties
-        self.name = name
-        self.packages = packages
-        self.repositories = repositories
-        self.exclude_packages = exclude_packages
+from airflow.providers.yandex.operators.dataproc import *  # noqa: F403
-    def execute(self, context: Context) -> None:
-        hook = self._setup(context)
-        hook.client.create_pyspark_job(
-            main_python_file_uri=self.main_python_file_uri,
-            python_file_uris=self.python_file_uris,
-            jar_file_uris=self.jar_file_uris,
-            archive_uris=self.archive_uris,
-            file_uris=self.file_uris,
-            args=self.args,
-            properties=self.properties,
-            packages=self.packages,
-            repositories=self.repositories,
-            exclude_packages=self.exclude_packages,
-            name=self.name,
-            cluster_id=self.cluster_id,
-        )
+warnings.warn(
+    "This module is deprecated. Please use `airflow.providers.yandex.operators.dataproc` instead.",
+    AirflowProviderDeprecationWarning,
+    stacklevel=2,
+)

{apache_airflow_providers_yandex-3.11.0rc1.dist-info → apache_airflow_providers_yandex-3.11.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: apache-airflow-providers-yandex
-Version: 3.11.0rc1
+Version: 3.11.1
 Summary: Provider package apache-airflow-providers-yandex for Apache Airflow
 Keywords: airflow-provider,yandex,airflow,integration
 Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -21,12 +21,12 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: System :: Monitoring
-Requires-Dist: apache-airflow>=2.7.0rc0
-Requires-Dist: yandex-query-client>=0.1.2
-Requires-Dist: yandexcloud>=0.228.0
+Requires-Dist: apache-airflow>=2.7.0
+Requires-Dist: yandex-query-client>=0.1.4
+Requires-Dist: yandexcloud>=0.278.0
 Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
-Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.0/changelog.html
-Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.0
+Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.1/changelog.html
+Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.1
 Project-URL: Slack Chat, https://s.apache.org/airflow-slack
 Project-URL: Source Code, https://github.com/apache/airflow
 Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -76,7 +76,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
 Package ``apache-airflow-providers-yandex``
-Release: ``3.11.0.rc1``
+Release: ``3.11.1``
 This package is for Yandex, including:
@@ -91,7 +91,7 @@ This is a provider package for ``yandex`` provider. All classes for this provide
 are in ``airflow.providers.yandex`` python package.
 You can find package information and changelog for the provider
-in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.0/>`_.
+in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.1/>`_.
 Installation
 ------------
@@ -109,9 +109,9 @@ Requirements
 PIP package              Version required
 =======================  ==================
 ``apache-airflow``       ``>=2.7.0``
-``yandexcloud``          ``>=0.228.0``
-``yandex-query-client``  ``>=0.1.2``
+``yandexcloud``          ``>=0.278.0``
+``yandex-query-client``  ``>=0.1.4``
 =======================  ==================
 The changelog for the provider package can be found in the
-`changelog <https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.0/changelog.html>`_.
+`changelog <https://airflow.apache.org/docs/apache-airflow-providers-yandex/3.11.1/changelog.html>`_.

{apache_airflow_providers_yandex-3.11.0rc1.dist-info → apache_airflow_providers_yandex-3.11.1.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,16 @@
 airflow/providers/yandex/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
-airflow/providers/yandex/__init__.py,sha256=Yt71vCxPPsxO_2P9J0vcegEAAkCHBPzVGqnrAqlc1sM,1582
-airflow/providers/yandex/get_provider_info.py,sha256=UphQCZS6MagPObXUU2wMT4hyjz-0Ct148R3I8mYKkFk,4707
+airflow/providers/yandex/__init__.py,sha256=YDLtjjlu9HOYq1ZymV8A6FrVMGCaF4EmVDyWYaMaZMo,1494
+airflow/providers/yandex/get_provider_info.py,sha256=z_jO2yskCqIQzWSD_O0ZQ1uWbCR2AV9Ve2mFcGQWbjg,4717
 airflow/providers/yandex/hooks/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
+airflow/providers/yandex/hooks/dataproc.py,sha256=1UdqxDMI7uL6fNkG6oU6l2tFITF_nHXiV1VUgRqF7KY,1379
 airflow/providers/yandex/hooks/yandex.py,sha256=xJMUzGo0sNpb5-LQvgq6jDxWHK3XkNzlpoeEELREeow,7097
-airflow/providers/yandex/hooks/yandexcloud_dataproc.py,sha256=1UdqxDMI7uL6fNkG6oU6l2tFITF_nHXiV1VUgRqF7KY,1379
-airflow/providers/yandex/hooks/yq.py,sha256=WpKL_Ic1BkqLU4JX8Lv8oPRk5RVXmHLMmL34AxTo_BU,3978
+airflow/providers/yandex/hooks/yandexcloud_dataproc.py,sha256=-JVJm3YLkDbJZKauCR1oCnWNkdLUJa1Fj_5HmZq1f44,1243
+airflow/providers/yandex/hooks/yq.py,sha256=a1J5y-LocaG89cy-A9hgbzLmVbmWrpPwjRgss-KaYVg,3477
 airflow/providers/yandex/links/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
 airflow/providers/yandex/links/yq.py,sha256=jsy3liqQFk1eSSdK9YDbor0Epp7ng_q2ueVIwsD2i-8,1578
 airflow/providers/yandex/operators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
-airflow/providers/yandex/operators/yandexcloud_dataproc.py,sha256=0-g6AzP0KiQ6pJFLMFXHtB1YFaUPkl_4FQJZyH0ce9E,25957
+airflow/providers/yandex/operators/dataproc.py,sha256=QJc7UvBNPhAUBsuYQ4H8Wf0LpZP_-kCw7RdI0n3P_Bs,25945
+airflow/providers/yandex/operators/yandexcloud_dataproc.py,sha256=bDLMwevS5spRfVEtixdKhQTC9gqDMm9himLrRohJwKQ,1255
 airflow/providers/yandex/operators/yq.py,sha256=lGqbogakylV4s5D5movQRL4v3IU2Qt1JHH8ygo3Hd2Q,3223
 airflow/providers/yandex/secrets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
 airflow/providers/yandex/secrets/lockbox.py,sha256=9Vi95RXd6VT36Rh1PVMUfFzm42oyrlMl77DoL9ivxVc,12161
@@ -17,7 +19,7 @@ airflow/providers/yandex/utils/credentials.py,sha256=6McJIitAuTROJRUSKTdWChfcZ9o
 airflow/providers/yandex/utils/defaults.py,sha256=CXt75MhGJe8echoDpl1vR4VG5bEvYDDjIHmFqckDh2w,950
 airflow/providers/yandex/utils/fields.py,sha256=1D8SDWH8h0djj5Hnk50w6BpPeNJyP-689Qfjpkr-yCg,1728
 airflow/providers/yandex/utils/user_agent.py,sha256=AC-WEzhjxkgUYOy4LdX2-nnUZdMhKRRUCJ2_TjfNm6k,1839
-apache_airflow_providers_yandex-3.11.0rc1.dist-info/entry_points.txt,sha256=ApXKRkvdgU2QNSQovjewC0b-LptwfBGBnJB3LTgBNx8,102
-apache_airflow_providers_yandex-3.11.0rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
-apache_airflow_providers_yandex-3.11.0rc1.dist-info/METADATA,sha256=YrzHIxxUERD5JzvDmyXV6NRjyJlhNydlYaedzq5u7Pc,4919
-apache_airflow_providers_yandex-3.11.0rc1.dist-info/RECORD,,
+apache_airflow_providers_yandex-3.11.1.dist-info/entry_points.txt,sha256=ApXKRkvdgU2QNSQovjewC0b-LptwfBGBnJB3LTgBNx8,102
+apache_airflow_providers_yandex-3.11.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
+apache_airflow_providers_yandex-3.11.1.dist-info/METADATA,sha256=Teoe7nxOttI_AsbkeAB_hlWBkF972Mm8KyyX1iFWcrs,4909
+apache_airflow_providers_yandex-3.11.1.dist-info/RECORD,,

{apache_airflow_providers_yandex-3.11.0rc1.dist-info → apache_airflow_providers_yandex-3.11.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{apache_airflow_providers_yandex-3.11.0rc1.dist-info → apache_airflow_providers_yandex-3.11.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

apache-airflow-providers-yandex 3.11.0rc1__py3-none-any.whl → 3.11.1__py3-none-any.whl

apache-airflow-providers-yandex 3.11.0rc1py3-none-any.whl → 3.11.1py3-none-any.whl