apache-airflow-providers-snowflake 5.2.1__tar.gz → 5.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-snowflake might be problematic. Click here for more details.
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/PKG-INFO +6 -6
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/README.rst +3 -3
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/__init__.py +1 -1
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/get_provider_info.py +3 -2
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/hooks/snowflake.py +6 -4
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/operators/snowflake.py +15 -0
- apache_airflow_providers_snowflake-5.3.0/airflow/providers/snowflake/transfers/copy_into_snowflake.py +298 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/pyproject.toml +3 -3
- apache_airflow_providers_snowflake-5.2.1/airflow/providers/snowflake/transfers/copy_into_snowflake.py +0 -141
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/LICENSE +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/hooks/__init__.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/hooks/snowflake_sql_api.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/operators/__init__.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/transfers/__init__.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/triggers/__init__.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/triggers/snowflake_trigger.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/utils/__init__.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/utils/common.py +0 -0
- {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/utils/sql_api_generate_jwt.py +0 -0
{apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-snowflake
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.3.0
|
|
4
4
|
Summary: Provider package apache-airflow-providers-snowflake for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,snowflake,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -27,8 +27,8 @@ Requires-Dist: snowflake-sqlalchemy>=1.1.0
|
|
|
27
27
|
Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
|
|
28
28
|
Requires-Dist: apache-airflow-providers-openlineage ; extra == "openlineage"
|
|
29
29
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
30
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
31
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
30
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html
|
|
31
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0
|
|
32
32
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
33
33
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
34
34
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
|
@@ -80,7 +80,7 @@ Provides-Extra: openlineage
|
|
|
80
80
|
|
|
81
81
|
Package ``apache-airflow-providers-snowflake``
|
|
82
82
|
|
|
83
|
-
Release: ``5.
|
|
83
|
+
Release: ``5.3.0``
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
`Snowflake <https://www.snowflake.com/>`__
|
|
@@ -93,7 +93,7 @@ This is a provider package for ``snowflake`` provider. All classes for this prov
|
|
|
93
93
|
are in ``airflow.providers.snowflake`` python package.
|
|
94
94
|
|
|
95
95
|
You can find package information and changelog for the provider
|
|
96
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
96
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/>`_.
|
|
97
97
|
|
|
98
98
|
Installation
|
|
99
99
|
------------
|
|
@@ -137,4 +137,4 @@ Dependent package
|
|
|
137
137
|
============================================================================================================== ===============
|
|
138
138
|
|
|
139
139
|
The changelog for the provider package can be found in the
|
|
140
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
140
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html>`_.
|
{apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/README.rst
RENAMED
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
|
|
43
43
|
Package ``apache-airflow-providers-snowflake``
|
|
44
44
|
|
|
45
|
-
Release: ``5.
|
|
45
|
+
Release: ``5.3.0``
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
`Snowflake <https://www.snowflake.com/>`__
|
|
@@ -55,7 +55,7 @@ This is a provider package for ``snowflake`` provider. All classes for this prov
|
|
|
55
55
|
are in ``airflow.providers.snowflake`` python package.
|
|
56
56
|
|
|
57
57
|
You can find package information and changelog for the provider
|
|
58
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
58
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/>`_.
|
|
59
59
|
|
|
60
60
|
Installation
|
|
61
61
|
------------
|
|
@@ -99,4 +99,4 @@ Dependent package
|
|
|
99
99
|
============================================================================================================== ===============
|
|
100
100
|
|
|
101
101
|
The changelog for the provider package can be found in the
|
|
102
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
102
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html>`_.
|
|
@@ -27,9 +27,10 @@ def get_provider_info():
|
|
|
27
27
|
"package-name": "apache-airflow-providers-snowflake",
|
|
28
28
|
"name": "Snowflake",
|
|
29
29
|
"description": "`Snowflake <https://www.snowflake.com/>`__\n",
|
|
30
|
-
"
|
|
31
|
-
"source-date-epoch":
|
|
30
|
+
"state": "ready",
|
|
31
|
+
"source-date-epoch": 1705912272,
|
|
32
32
|
"versions": [
|
|
33
|
+
"5.3.0",
|
|
33
34
|
"5.2.1",
|
|
34
35
|
"5.2.0",
|
|
35
36
|
"5.1.2",
|
|
@@ -170,7 +170,9 @@ class SnowflakeHook(DbApiHook):
|
|
|
170
170
|
warnings.warn(
|
|
171
171
|
f"Conflicting params `{field_name}` and `{backcompat_key}` found in extras. "
|
|
172
172
|
f"Using value for `{field_name}`. Please ensure this is the correct "
|
|
173
|
-
f"value and remove the backcompat key `{backcompat_key}`."
|
|
173
|
+
f"value and remove the backcompat key `{backcompat_key}`.",
|
|
174
|
+
UserWarning,
|
|
175
|
+
stacklevel=2,
|
|
174
176
|
)
|
|
175
177
|
return extra_dict[field_name] or None
|
|
176
178
|
return extra_dict.get(backcompat_key) or None
|
|
@@ -300,7 +302,7 @@ class SnowflakeHook(DbApiHook):
|
|
|
300
302
|
def get_autocommit(self, conn):
|
|
301
303
|
return getattr(conn, "autocommit_mode", False)
|
|
302
304
|
|
|
303
|
-
@overload
|
|
305
|
+
@overload # type: ignore[override]
|
|
304
306
|
def run(
|
|
305
307
|
self,
|
|
306
308
|
sql: str | Iterable[str],
|
|
@@ -385,10 +387,10 @@ class SnowflakeHook(DbApiHook):
|
|
|
385
387
|
with self._get_cursor(conn, return_dictionaries) as cur:
|
|
386
388
|
results = []
|
|
387
389
|
for sql_statement in sql_list:
|
|
388
|
-
self._run_command(cur, sql_statement, parameters)
|
|
390
|
+
self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
|
|
389
391
|
|
|
390
392
|
if handler is not None:
|
|
391
|
-
result = self._make_common_data_structure(handler(cur))
|
|
393
|
+
result = self._make_common_data_structure(handler(cur)) # type: ignore[attr-defined]
|
|
392
394
|
if return_single_query_results(sql, return_last, split_statements):
|
|
393
395
|
_last_result = result
|
|
394
396
|
_last_description = cur.description
|
|
@@ -514,6 +514,21 @@ class SnowflakeSqlApiOperator(SQLExecuteQueryOperator):
|
|
|
514
514
|
if self.do_xcom_push:
|
|
515
515
|
context["ti"].xcom_push(key="query_ids", value=self.query_ids)
|
|
516
516
|
|
|
517
|
+
succeeded_query_ids = []
|
|
518
|
+
for query_id in self.query_ids:
|
|
519
|
+
self.log.info("Retrieving status for query id %s", query_id)
|
|
520
|
+
statement_status = self._hook.get_sql_api_query_status(query_id)
|
|
521
|
+
if statement_status.get("status") == "running":
|
|
522
|
+
break
|
|
523
|
+
elif statement_status.get("status") == "success":
|
|
524
|
+
succeeded_query_ids.append(query_id)
|
|
525
|
+
else:
|
|
526
|
+
raise AirflowException(f"{statement_status.get('status')}: {statement_status.get('message')}")
|
|
527
|
+
|
|
528
|
+
if len(self.query_ids) == len(succeeded_query_ids):
|
|
529
|
+
self.log.info("%s completed successfully.", self.task_id)
|
|
530
|
+
return
|
|
531
|
+
|
|
517
532
|
if self.deferrable:
|
|
518
533
|
self.defer(
|
|
519
534
|
timeout=self.execution_timeout,
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
|
13
|
+
# software distributed under the License is distributed on an
|
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
# KIND, either express or implied. See the License for the
|
|
16
|
+
# specific language governing permissions and limitations
|
|
17
|
+
# under the License.
|
|
18
|
+
"""Abstract operator that child classes implement ``COPY INTO <TABLE> SQL in Snowflake``."""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from typing import Any, Sequence
|
|
22
|
+
|
|
23
|
+
from airflow.models import BaseOperator
|
|
24
|
+
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
|
|
25
|
+
from airflow.providers.snowflake.utils.common import enclose_param
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CopyFromExternalStageToSnowflakeOperator(BaseOperator):
|
|
29
|
+
"""
|
|
30
|
+
Executes a COPY INTO command to load files from an external stage from clouds to Snowflake.
|
|
31
|
+
|
|
32
|
+
This operator requires the snowflake_conn_id connection. The snowflake host, login,
|
|
33
|
+
and, password field must be setup in the connection. Other inputs can be defined
|
|
34
|
+
in the connection or hook instantiation.
|
|
35
|
+
|
|
36
|
+
:param namespace: snowflake namespace
|
|
37
|
+
:param table: snowflake table
|
|
38
|
+
:param file_format: file format name i.e. CSV, AVRO, etc
|
|
39
|
+
:param stage: reference to a specific snowflake stage. If the stage's schema is not the same as the
|
|
40
|
+
table one, it must be specified
|
|
41
|
+
:param prefix: cloud storage location specified to limit the set of files to load
|
|
42
|
+
:param files: files to load into table
|
|
43
|
+
:param pattern: pattern to load files from external location to table
|
|
44
|
+
:param copy_into_postifx: optional sql postfix for INSERT INTO query
|
|
45
|
+
such as `formatTypeOptions` and `copyOptions`
|
|
46
|
+
:param snowflake_conn_id: Reference to :ref:`Snowflake connection id<howto/connection:snowflake>`
|
|
47
|
+
:param account: snowflake account name
|
|
48
|
+
:param warehouse: name of snowflake warehouse
|
|
49
|
+
:param database: name of snowflake database
|
|
50
|
+
:param region: name of snowflake region
|
|
51
|
+
:param role: name of snowflake role
|
|
52
|
+
:param schema: name of snowflake schema
|
|
53
|
+
:param authenticator: authenticator for Snowflake.
|
|
54
|
+
'snowflake' (default) to use the internal Snowflake authenticator
|
|
55
|
+
'externalbrowser' to authenticate using your web browser and
|
|
56
|
+
Okta, ADFS or any other SAML 2.0-compliant identify provider
|
|
57
|
+
(IdP) that has been defined for your account
|
|
58
|
+
``https://<your_okta_account_name>.okta.com`` to authenticate
|
|
59
|
+
through native Okta.
|
|
60
|
+
:param session_parameters: You can set session-level parameters at
|
|
61
|
+
the time you connect to Snowflake
|
|
62
|
+
:param copy_options: snowflake COPY INTO syntax copy options
|
|
63
|
+
:param validation_mode: snowflake COPY INTO syntax validation mode
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
template_fields: Sequence[str] = ("files",)
|
|
68
|
+
template_fields_renderers = {"files": "json"}
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
*,
|
|
73
|
+
files: list | None = None,
|
|
74
|
+
table: str,
|
|
75
|
+
stage: str,
|
|
76
|
+
prefix: str | None = None,
|
|
77
|
+
file_format: str,
|
|
78
|
+
schema: str | None = None,
|
|
79
|
+
columns_array: list | None = None,
|
|
80
|
+
pattern: str | None = None,
|
|
81
|
+
warehouse: str | None = None,
|
|
82
|
+
database: str | None = None,
|
|
83
|
+
autocommit: bool = True,
|
|
84
|
+
snowflake_conn_id: str = "snowflake_default",
|
|
85
|
+
role: str | None = None,
|
|
86
|
+
authenticator: str | None = None,
|
|
87
|
+
session_parameters: dict | None = None,
|
|
88
|
+
copy_options: str | None = None,
|
|
89
|
+
validation_mode: str | None = None,
|
|
90
|
+
**kwargs,
|
|
91
|
+
):
|
|
92
|
+
super().__init__(**kwargs)
|
|
93
|
+
self.files = files
|
|
94
|
+
self.table = table
|
|
95
|
+
self.stage = stage
|
|
96
|
+
self.prefix = prefix
|
|
97
|
+
self.file_format = file_format
|
|
98
|
+
self.schema = schema
|
|
99
|
+
self.columns_array = columns_array
|
|
100
|
+
self.pattern = pattern
|
|
101
|
+
self.warehouse = warehouse
|
|
102
|
+
self.database = database
|
|
103
|
+
self.autocommit = autocommit
|
|
104
|
+
self.snowflake_conn_id = snowflake_conn_id
|
|
105
|
+
self.role = role
|
|
106
|
+
self.authenticator = authenticator
|
|
107
|
+
self.session_parameters = session_parameters
|
|
108
|
+
self.copy_options = copy_options
|
|
109
|
+
self.validation_mode = validation_mode
|
|
110
|
+
|
|
111
|
+
self.hook: SnowflakeHook | None = None
|
|
112
|
+
self._sql: str | None = None
|
|
113
|
+
self._result: list[dict[str, Any]] = []
|
|
114
|
+
|
|
115
|
+
def execute(self, context: Any) -> None:
|
|
116
|
+
self.hook = SnowflakeHook(
|
|
117
|
+
snowflake_conn_id=self.snowflake_conn_id,
|
|
118
|
+
warehouse=self.warehouse,
|
|
119
|
+
database=self.database,
|
|
120
|
+
role=self.role,
|
|
121
|
+
schema=self.schema,
|
|
122
|
+
authenticator=self.authenticator,
|
|
123
|
+
session_parameters=self.session_parameters,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if self.schema:
|
|
127
|
+
into = f"{self.schema}.{self.table}"
|
|
128
|
+
else:
|
|
129
|
+
into = self.table
|
|
130
|
+
|
|
131
|
+
if self.columns_array:
|
|
132
|
+
into = f"{into}({', '.join(self.columns_array)})"
|
|
133
|
+
|
|
134
|
+
self._sql = f"""
|
|
135
|
+
COPY INTO {into}
|
|
136
|
+
FROM @{self.stage}/{self.prefix or ""}
|
|
137
|
+
{"FILES=(" + ",".join(map(enclose_param, self.files)) + ")" if self.files else ""}
|
|
138
|
+
{"PATTERN=" + enclose_param(self.pattern) if self.pattern else ""}
|
|
139
|
+
FILE_FORMAT={self.file_format}
|
|
140
|
+
{self.copy_options or ""}
|
|
141
|
+
{self.validation_mode or ""}
|
|
142
|
+
"""
|
|
143
|
+
self.log.info("Executing COPY command...")
|
|
144
|
+
self._result = self.hook.run( # type: ignore # mypy does not work well with return_dictionaries=True
|
|
145
|
+
sql=self._sql,
|
|
146
|
+
autocommit=self.autocommit,
|
|
147
|
+
handler=lambda x: x.fetchall(),
|
|
148
|
+
return_dictionaries=True,
|
|
149
|
+
)
|
|
150
|
+
self.log.info("COPY command completed")
|
|
151
|
+
|
|
152
|
+
@staticmethod
|
|
153
|
+
def _extract_openlineage_unique_dataset_paths(
|
|
154
|
+
query_result: list[dict[str, Any]],
|
|
155
|
+
) -> tuple[list[tuple[str, str]], list[str]]:
|
|
156
|
+
"""Extracts and returns unique OpenLineage dataset paths and file paths that failed to be parsed.
|
|
157
|
+
|
|
158
|
+
Each row in the results is expected to have a 'file' field, which is a URI.
|
|
159
|
+
The function parses these URIs and constructs a set of unique OpenLineage (namespace, name) tuples.
|
|
160
|
+
Additionally, it captures any URIs that cannot be parsed or processed
|
|
161
|
+
and returns them in a separate error list.
|
|
162
|
+
|
|
163
|
+
For Azure, Snowflake has a unique way of representing URI:
|
|
164
|
+
azure://<account_name>.blob.core.windows.net/<container_name>/path/to/file.csv
|
|
165
|
+
that is transformed by this function to a Dataset with more universal naming convention:
|
|
166
|
+
Dataset(namespace="wasbs://container_name@account_name", name="path/to"), as described at
|
|
167
|
+
https://github.com/OpenLineage/OpenLineage/blob/main/spec/Naming.md#wasbs-azure-blob-storage
|
|
168
|
+
|
|
169
|
+
:param query_result: A list of dictionaries, each containing a 'file' key with a URI value.
|
|
170
|
+
:return: Two lists - the first is a sorted list of tuples, each representing a unique dataset path,
|
|
171
|
+
and the second contains any URIs that cannot be parsed or processed correctly.
|
|
172
|
+
|
|
173
|
+
>>> method = CopyFromExternalStageToSnowflakeOperator._extract_openlineage_unique_dataset_paths
|
|
174
|
+
|
|
175
|
+
>>> results = [{"file": "azure://my_account.blob.core.windows.net/azure_container/dir3/file.csv"}]
|
|
176
|
+
>>> method(results)
|
|
177
|
+
([('wasbs://azure_container@my_account', 'dir3')], [])
|
|
178
|
+
|
|
179
|
+
>>> results = [{"file": "azure://my_account.blob.core.windows.net/azure_container"}]
|
|
180
|
+
>>> method(results)
|
|
181
|
+
([('wasbs://azure_container@my_account', '/')], [])
|
|
182
|
+
|
|
183
|
+
>>> results = [{"file": "s3://bucket"}, {"file": "gcs://bucket/"}, {"file": "s3://bucket/a.csv"}]
|
|
184
|
+
>>> method(results)
|
|
185
|
+
([('gcs://bucket', '/'), ('s3://bucket', '/')], [])
|
|
186
|
+
|
|
187
|
+
>>> results = [{"file": "s3://bucket/dir/file.csv"}, {"file": "gcs://bucket/dir/dir2/a.txt"}]
|
|
188
|
+
>>> method(results)
|
|
189
|
+
([('gcs://bucket', 'dir/dir2'), ('s3://bucket', 'dir')], [])
|
|
190
|
+
|
|
191
|
+
>>> results = [
|
|
192
|
+
... {"file": "s3://bucket/dir/file.csv"},
|
|
193
|
+
... {"file": "azure://my_account.something_new.windows.net/azure_container"},
|
|
194
|
+
... ]
|
|
195
|
+
>>> method(results)
|
|
196
|
+
([('s3://bucket', 'dir')], ['azure://my_account.something_new.windows.net/azure_container'])
|
|
197
|
+
"""
|
|
198
|
+
import re
|
|
199
|
+
from pathlib import Path
|
|
200
|
+
from urllib.parse import urlparse
|
|
201
|
+
|
|
202
|
+
azure_regex = r"azure:\/\/(\w+)?\.blob.core.windows.net\/(\w+)\/?(.*)?"
|
|
203
|
+
extraction_error_files = []
|
|
204
|
+
unique_dataset_paths = set()
|
|
205
|
+
|
|
206
|
+
for row in query_result:
|
|
207
|
+
uri = urlparse(row["file"])
|
|
208
|
+
if uri.scheme == "azure":
|
|
209
|
+
match = re.fullmatch(azure_regex, row["file"])
|
|
210
|
+
if not match:
|
|
211
|
+
extraction_error_files.append(row["file"])
|
|
212
|
+
continue
|
|
213
|
+
account_name, container_name, name = match.groups()
|
|
214
|
+
namespace = f"wasbs://{container_name}@{account_name}"
|
|
215
|
+
else:
|
|
216
|
+
namespace = f"{uri.scheme}://{uri.netloc}"
|
|
217
|
+
name = uri.path.lstrip("/")
|
|
218
|
+
|
|
219
|
+
name = Path(name).parent.as_posix()
|
|
220
|
+
if name in ("", "."):
|
|
221
|
+
name = "/"
|
|
222
|
+
|
|
223
|
+
unique_dataset_paths.add((namespace, name))
|
|
224
|
+
|
|
225
|
+
return sorted(unique_dataset_paths), sorted(extraction_error_files)
|
|
226
|
+
|
|
227
|
+
def get_openlineage_facets_on_complete(self, task_instance):
|
|
228
|
+
"""Implement _on_complete because we rely on return value of a query."""
|
|
229
|
+
import re
|
|
230
|
+
|
|
231
|
+
from openlineage.client.facet import (
|
|
232
|
+
ExternalQueryRunFacet,
|
|
233
|
+
ExtractionError,
|
|
234
|
+
ExtractionErrorRunFacet,
|
|
235
|
+
SqlJobFacet,
|
|
236
|
+
)
|
|
237
|
+
from openlineage.client.run import Dataset
|
|
238
|
+
|
|
239
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
240
|
+
from airflow.providers.openlineage.sqlparser import SQLParser
|
|
241
|
+
|
|
242
|
+
if not self._sql:
|
|
243
|
+
return OperatorLineage()
|
|
244
|
+
|
|
245
|
+
query_results = self._result or []
|
|
246
|
+
# If no files were uploaded we get [{"status": "0 files were uploaded..."}]
|
|
247
|
+
if len(query_results) == 1 and query_results[0].get("status"):
|
|
248
|
+
query_results = []
|
|
249
|
+
unique_dataset_paths, extraction_error_files = self._extract_openlineage_unique_dataset_paths(
|
|
250
|
+
query_results
|
|
251
|
+
)
|
|
252
|
+
input_datasets = [Dataset(namespace=namespace, name=name) for namespace, name in unique_dataset_paths]
|
|
253
|
+
|
|
254
|
+
run_facets = {}
|
|
255
|
+
if extraction_error_files:
|
|
256
|
+
self.log.debug(
|
|
257
|
+
f"Unable to extract Dataset namespace and name "
|
|
258
|
+
f"for the following files: `{extraction_error_files}`."
|
|
259
|
+
)
|
|
260
|
+
run_facets["extractionError"] = ExtractionErrorRunFacet(
|
|
261
|
+
totalTasks=len(query_results),
|
|
262
|
+
failedTasks=len(extraction_error_files),
|
|
263
|
+
errors=[
|
|
264
|
+
ExtractionError(
|
|
265
|
+
errorMessage="Unable to extract Dataset namespace and name.",
|
|
266
|
+
stackTrace=None,
|
|
267
|
+
task=file_uri,
|
|
268
|
+
taskNumber=None,
|
|
269
|
+
)
|
|
270
|
+
for file_uri in extraction_error_files
|
|
271
|
+
],
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
connection = self.hook.get_connection(getattr(self.hook, str(self.hook.conn_name_attr)))
|
|
275
|
+
database_info = self.hook.get_openlineage_database_info(connection)
|
|
276
|
+
|
|
277
|
+
dest_name = self.table
|
|
278
|
+
schema = self.hook.get_openlineage_default_schema()
|
|
279
|
+
database = database_info.database
|
|
280
|
+
if schema:
|
|
281
|
+
dest_name = f"{schema}.{dest_name}"
|
|
282
|
+
if database:
|
|
283
|
+
dest_name = f"{database}.{dest_name}"
|
|
284
|
+
|
|
285
|
+
snowflake_namespace = SQLParser.create_namespace(database_info)
|
|
286
|
+
query = SQLParser.normalize_sql(self._sql)
|
|
287
|
+
query = re.sub(r"\n+", "\n", re.sub(r" +", " ", query))
|
|
288
|
+
|
|
289
|
+
run_facets["externalQuery"] = ExternalQueryRunFacet(
|
|
290
|
+
externalQueryId=self.hook.query_ids[0], source=snowflake_namespace
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
return OperatorLineage(
|
|
294
|
+
inputs=input_datasets,
|
|
295
|
+
outputs=[Dataset(namespace=snowflake_namespace, name=dest_name)],
|
|
296
|
+
job_facets={"sql": SqlJobFacet(query=query)},
|
|
297
|
+
run_facets=run_facets,
|
|
298
|
+
)
|
{apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/pyproject.toml
RENAMED
|
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
|
|
|
28
28
|
|
|
29
29
|
[project]
|
|
30
30
|
name = "apache-airflow-providers-snowflake"
|
|
31
|
-
version = "5.
|
|
31
|
+
version = "5.3.0"
|
|
32
32
|
description = "Provider package apache-airflow-providers-snowflake for Apache Airflow"
|
|
33
33
|
readme = "README.rst"
|
|
34
34
|
authors = [
|
|
@@ -62,8 +62,8 @@ dependencies = [
|
|
|
62
62
|
]
|
|
63
63
|
|
|
64
64
|
[project.urls]
|
|
65
|
-
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
66
|
-
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.
|
|
65
|
+
"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0"
|
|
66
|
+
"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html"
|
|
67
67
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
68
68
|
"Source Code" = "https://github.com/apache/airflow"
|
|
69
69
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
|
4
|
-
# distributed with this work for additional information
|
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
|
7
|
-
# "License"); you may not use this file except in compliance
|
|
8
|
-
# with the License. You may obtain a copy of the License at
|
|
9
|
-
#
|
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
#
|
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
|
13
|
-
# software distributed under the License is distributed on an
|
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
# KIND, either express or implied. See the License for the
|
|
16
|
-
# specific language governing permissions and limitations
|
|
17
|
-
# under the License.
|
|
18
|
-
"""Abstract operator that child classes implement ``COPY INTO <TABLE> SQL in Snowflake``."""
|
|
19
|
-
from __future__ import annotations
|
|
20
|
-
|
|
21
|
-
from typing import Any, Sequence
|
|
22
|
-
|
|
23
|
-
from airflow.models import BaseOperator
|
|
24
|
-
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
|
|
25
|
-
from airflow.providers.snowflake.utils.common import enclose_param
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class CopyFromExternalStageToSnowflakeOperator(BaseOperator):
|
|
29
|
-
"""
|
|
30
|
-
Executes a COPY INTO command to load files from an external stage from clouds to Snowflake.
|
|
31
|
-
|
|
32
|
-
This operator requires the snowflake_conn_id connection. The snowflake host, login,
|
|
33
|
-
and, password field must be setup in the connection. Other inputs can be defined
|
|
34
|
-
in the connection or hook instantiation.
|
|
35
|
-
|
|
36
|
-
:param namespace: snowflake namespace
|
|
37
|
-
:param table: snowflake table
|
|
38
|
-
:param file_format: file format name i.e. CSV, AVRO, etc
|
|
39
|
-
:param stage: reference to a specific snowflake stage. If the stage's schema is not the same as the
|
|
40
|
-
table one, it must be specified
|
|
41
|
-
:param prefix: cloud storage location specified to limit the set of files to load
|
|
42
|
-
:param files: files to load into table
|
|
43
|
-
:param pattern: pattern to load files from external location to table
|
|
44
|
-
:param copy_into_postifx: optional sql postfix for INSERT INTO query
|
|
45
|
-
such as `formatTypeOptions` and `copyOptions`
|
|
46
|
-
:param snowflake_conn_id: Reference to :ref:`Snowflake connection id<howto/connection:snowflake>`
|
|
47
|
-
:param account: snowflake account name
|
|
48
|
-
:param warehouse: name of snowflake warehouse
|
|
49
|
-
:param database: name of snowflake database
|
|
50
|
-
:param region: name of snowflake region
|
|
51
|
-
:param role: name of snowflake role
|
|
52
|
-
:param schema: name of snowflake schema
|
|
53
|
-
:param authenticator: authenticator for Snowflake.
|
|
54
|
-
'snowflake' (default) to use the internal Snowflake authenticator
|
|
55
|
-
'externalbrowser' to authenticate using your web browser and
|
|
56
|
-
Okta, ADFS or any other SAML 2.0-compliant identify provider
|
|
57
|
-
(IdP) that has been defined for your account
|
|
58
|
-
``https://<your_okta_account_name>.okta.com`` to authenticate
|
|
59
|
-
through native Okta.
|
|
60
|
-
:param session_parameters: You can set session-level parameters at
|
|
61
|
-
the time you connect to Snowflake
|
|
62
|
-
:param copy_options: snowflake COPY INTO syntax copy options
|
|
63
|
-
:param validation_mode: snowflake COPY INTO syntax validation mode
|
|
64
|
-
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
template_fields: Sequence[str] = ("files",)
|
|
68
|
-
template_fields_renderers = {"files": "json"}
|
|
69
|
-
|
|
70
|
-
def __init__(
|
|
71
|
-
self,
|
|
72
|
-
*,
|
|
73
|
-
files: list | None = None,
|
|
74
|
-
table: str,
|
|
75
|
-
stage: str,
|
|
76
|
-
prefix: str | None = None,
|
|
77
|
-
file_format: str,
|
|
78
|
-
schema: str | None = None,
|
|
79
|
-
columns_array: list | None = None,
|
|
80
|
-
pattern: str | None = None,
|
|
81
|
-
warehouse: str | None = None,
|
|
82
|
-
database: str | None = None,
|
|
83
|
-
autocommit: bool = True,
|
|
84
|
-
snowflake_conn_id: str = "snowflake_default",
|
|
85
|
-
role: str | None = None,
|
|
86
|
-
authenticator: str | None = None,
|
|
87
|
-
session_parameters: dict | None = None,
|
|
88
|
-
copy_options: str | None = None,
|
|
89
|
-
validation_mode: str | None = None,
|
|
90
|
-
**kwargs,
|
|
91
|
-
):
|
|
92
|
-
super().__init__(**kwargs)
|
|
93
|
-
self.files = files
|
|
94
|
-
self.table = table
|
|
95
|
-
self.stage = stage
|
|
96
|
-
self.prefix = prefix
|
|
97
|
-
self.file_format = file_format
|
|
98
|
-
self.schema = schema
|
|
99
|
-
self.columns_array = columns_array
|
|
100
|
-
self.pattern = pattern
|
|
101
|
-
self.warehouse = warehouse
|
|
102
|
-
self.database = database
|
|
103
|
-
self.autocommit = autocommit
|
|
104
|
-
self.snowflake_conn_id = snowflake_conn_id
|
|
105
|
-
self.role = role
|
|
106
|
-
self.authenticator = authenticator
|
|
107
|
-
self.session_parameters = session_parameters
|
|
108
|
-
self.copy_options = copy_options
|
|
109
|
-
self.validation_mode = validation_mode
|
|
110
|
-
|
|
111
|
-
def execute(self, context: Any) -> None:
|
|
112
|
-
snowflake_hook = SnowflakeHook(
|
|
113
|
-
snowflake_conn_id=self.snowflake_conn_id,
|
|
114
|
-
warehouse=self.warehouse,
|
|
115
|
-
database=self.database,
|
|
116
|
-
role=self.role,
|
|
117
|
-
schema=self.schema,
|
|
118
|
-
authenticator=self.authenticator,
|
|
119
|
-
session_parameters=self.session_parameters,
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
if self.schema:
|
|
123
|
-
into = f"{self.schema}.{self.table}"
|
|
124
|
-
else:
|
|
125
|
-
into = self.table
|
|
126
|
-
|
|
127
|
-
if self.columns_array:
|
|
128
|
-
into = f"{into}({', '.join(self.columns_array)})"
|
|
129
|
-
|
|
130
|
-
sql = f"""
|
|
131
|
-
COPY INTO {into}
|
|
132
|
-
FROM @{self.stage}/{self.prefix or ""}
|
|
133
|
-
{"FILES=(" + ",".join(map(enclose_param, self.files)) + ")" if self.files else ""}
|
|
134
|
-
{"PATTERN=" + enclose_param(self.pattern) if self.pattern else ""}
|
|
135
|
-
FILE_FORMAT={self.file_format}
|
|
136
|
-
{self.copy_options or ""}
|
|
137
|
-
{self.validation_mode or ""}
|
|
138
|
-
"""
|
|
139
|
-
self.log.info("Executing COPY command...")
|
|
140
|
-
snowflake_hook.run(sql=sql, autocommit=self.autocommit)
|
|
141
|
-
self.log.info("COPY command completed")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|