apache-airflow-providers-snowflake 5.2.1__tar.gz → 5.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-snowflake might be problematic. Click here for more details.

Files changed (19) hide show
  1. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/PKG-INFO +6 -6
  2. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/README.rst +3 -3
  3. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/__init__.py +1 -1
  4. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/get_provider_info.py +3 -2
  5. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/hooks/snowflake.py +6 -4
  6. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/operators/snowflake.py +15 -0
  7. apache_airflow_providers_snowflake-5.3.0/airflow/providers/snowflake/transfers/copy_into_snowflake.py +298 -0
  8. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/pyproject.toml +3 -3
  9. apache_airflow_providers_snowflake-5.2.1/airflow/providers/snowflake/transfers/copy_into_snowflake.py +0 -141
  10. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/LICENSE +0 -0
  11. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/hooks/__init__.py +0 -0
  12. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/hooks/snowflake_sql_api.py +0 -0
  13. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/operators/__init__.py +0 -0
  14. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/transfers/__init__.py +0 -0
  15. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/triggers/__init__.py +0 -0
  16. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/triggers/snowflake_trigger.py +0 -0
  17. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/utils/__init__.py +0 -0
  18. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/utils/common.py +0 -0
  19. {apache_airflow_providers_snowflake-5.2.1 → apache_airflow_providers_snowflake-5.3.0}/airflow/providers/snowflake/utils/sql_api_generate_jwt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-snowflake
3
- Version: 5.2.1
3
+ Version: 5.3.0
4
4
  Summary: Provider package apache-airflow-providers-snowflake for Apache Airflow
5
5
  Keywords: airflow-provider,snowflake,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -27,8 +27,8 @@ Requires-Dist: snowflake-sqlalchemy>=1.1.0
27
27
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
28
28
  Requires-Dist: apache-airflow-providers-openlineage ; extra == "openlineage"
29
29
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
30
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/changelog.html
31
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1
30
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html
31
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0
32
32
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
33
33
  Project-URL: Source Code, https://github.com/apache/airflow
34
34
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -80,7 +80,7 @@ Provides-Extra: openlineage
80
80
 
81
81
  Package ``apache-airflow-providers-snowflake``
82
82
 
83
- Release: ``5.2.1``
83
+ Release: ``5.3.0``
84
84
 
85
85
 
86
86
  `Snowflake <https://www.snowflake.com/>`__
@@ -93,7 +93,7 @@ This is a provider package for ``snowflake`` provider. All classes for this prov
93
93
  are in ``airflow.providers.snowflake`` python package.
94
94
 
95
95
  You can find package information and changelog for the provider
96
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/>`_.
96
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/>`_.
97
97
 
98
98
  Installation
99
99
  ------------
@@ -137,4 +137,4 @@ Dependent package
137
137
  ============================================================================================================== ===============
138
138
 
139
139
  The changelog for the provider package can be found in the
140
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/changelog.html>`_.
140
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-snowflake``
44
44
 
45
- Release: ``5.2.1``
45
+ Release: ``5.3.0``
46
46
 
47
47
 
48
48
  `Snowflake <https://www.snowflake.com/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``snowflake`` provider. All classes for this prov
55
55
  are in ``airflow.providers.snowflake`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -99,4 +99,4 @@ Dependent package
99
99
  ============================================================================================================== ===============
100
100
 
101
101
  The changelog for the provider package can be found in the
102
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/changelog.html>`_.
102
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html>`_.
@@ -27,7 +27,7 @@ import packaging.version
27
27
 
28
28
  __all__ = ["__version__"]
29
29
 
30
- __version__ = "5.2.1"
30
+ __version__ = "5.3.0"
31
31
 
32
32
  try:
33
33
  from airflow import __version__ as airflow_version
@@ -27,9 +27,10 @@ def get_provider_info():
27
27
  "package-name": "apache-airflow-providers-snowflake",
28
28
  "name": "Snowflake",
29
29
  "description": "`Snowflake <https://www.snowflake.com/>`__\n",
30
- "suspended": False,
31
- "source-date-epoch": 1703288173,
30
+ "state": "ready",
31
+ "source-date-epoch": 1705912272,
32
32
  "versions": [
33
+ "5.3.0",
33
34
  "5.2.1",
34
35
  "5.2.0",
35
36
  "5.1.2",
@@ -170,7 +170,9 @@ class SnowflakeHook(DbApiHook):
170
170
  warnings.warn(
171
171
  f"Conflicting params `{field_name}` and `{backcompat_key}` found in extras. "
172
172
  f"Using value for `{field_name}`. Please ensure this is the correct "
173
- f"value and remove the backcompat key `{backcompat_key}`."
173
+ f"value and remove the backcompat key `{backcompat_key}`.",
174
+ UserWarning,
175
+ stacklevel=2,
174
176
  )
175
177
  return extra_dict[field_name] or None
176
178
  return extra_dict.get(backcompat_key) or None
@@ -300,7 +302,7 @@ class SnowflakeHook(DbApiHook):
300
302
  def get_autocommit(self, conn):
301
303
  return getattr(conn, "autocommit_mode", False)
302
304
 
303
- @overload
305
+ @overload # type: ignore[override]
304
306
  def run(
305
307
  self,
306
308
  sql: str | Iterable[str],
@@ -385,10 +387,10 @@ class SnowflakeHook(DbApiHook):
385
387
  with self._get_cursor(conn, return_dictionaries) as cur:
386
388
  results = []
387
389
  for sql_statement in sql_list:
388
- self._run_command(cur, sql_statement, parameters)
390
+ self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
389
391
 
390
392
  if handler is not None:
391
- result = self._make_common_data_structure(handler(cur))
393
+ result = self._make_common_data_structure(handler(cur)) # type: ignore[attr-defined]
392
394
  if return_single_query_results(sql, return_last, split_statements):
393
395
  _last_result = result
394
396
  _last_description = cur.description
@@ -514,6 +514,21 @@ class SnowflakeSqlApiOperator(SQLExecuteQueryOperator):
514
514
  if self.do_xcom_push:
515
515
  context["ti"].xcom_push(key="query_ids", value=self.query_ids)
516
516
 
517
+ succeeded_query_ids = []
518
+ for query_id in self.query_ids:
519
+ self.log.info("Retrieving status for query id %s", query_id)
520
+ statement_status = self._hook.get_sql_api_query_status(query_id)
521
+ if statement_status.get("status") == "running":
522
+ break
523
+ elif statement_status.get("status") == "success":
524
+ succeeded_query_ids.append(query_id)
525
+ else:
526
+ raise AirflowException(f"{statement_status.get('status')}: {statement_status.get('message')}")
527
+
528
+ if len(self.query_ids) == len(succeeded_query_ids):
529
+ self.log.info("%s completed successfully.", self.task_id)
530
+ return
531
+
517
532
  if self.deferrable:
518
533
  self.defer(
519
534
  timeout=self.execution_timeout,
@@ -0,0 +1,298 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ """Abstract operator that child classes implement ``COPY INTO <TABLE> SQL in Snowflake``."""
19
+ from __future__ import annotations
20
+
21
+ from typing import Any, Sequence
22
+
23
+ from airflow.models import BaseOperator
24
+ from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
25
+ from airflow.providers.snowflake.utils.common import enclose_param
26
+
27
+
28
+ class CopyFromExternalStageToSnowflakeOperator(BaseOperator):
29
+ """
30
+ Executes a COPY INTO command to load files from an external stage from clouds to Snowflake.
31
+
32
+ This operator requires the snowflake_conn_id connection. The snowflake host, login,
33
+ and, password field must be setup in the connection. Other inputs can be defined
34
+ in the connection or hook instantiation.
35
+
36
+ :param namespace: snowflake namespace
37
+ :param table: snowflake table
38
+ :param file_format: file format name i.e. CSV, AVRO, etc
39
+ :param stage: reference to a specific snowflake stage. If the stage's schema is not the same as the
40
+ table one, it must be specified
41
+ :param prefix: cloud storage location specified to limit the set of files to load
42
+ :param files: files to load into table
43
+ :param pattern: pattern to load files from external location to table
44
+ :param copy_into_postifx: optional sql postfix for INSERT INTO query
45
+ such as `formatTypeOptions` and `copyOptions`
46
+ :param snowflake_conn_id: Reference to :ref:`Snowflake connection id<howto/connection:snowflake>`
47
+ :param account: snowflake account name
48
+ :param warehouse: name of snowflake warehouse
49
+ :param database: name of snowflake database
50
+ :param region: name of snowflake region
51
+ :param role: name of snowflake role
52
+ :param schema: name of snowflake schema
53
+ :param authenticator: authenticator for Snowflake.
54
+ 'snowflake' (default) to use the internal Snowflake authenticator
55
+ 'externalbrowser' to authenticate using your web browser and
56
+ Okta, ADFS or any other SAML 2.0-compliant identify provider
57
+ (IdP) that has been defined for your account
58
+ ``https://<your_okta_account_name>.okta.com`` to authenticate
59
+ through native Okta.
60
+ :param session_parameters: You can set session-level parameters at
61
+ the time you connect to Snowflake
62
+ :param copy_options: snowflake COPY INTO syntax copy options
63
+ :param validation_mode: snowflake COPY INTO syntax validation mode
64
+
65
+ """
66
+
67
+ template_fields: Sequence[str] = ("files",)
68
+ template_fields_renderers = {"files": "json"}
69
+
70
+ def __init__(
71
+ self,
72
+ *,
73
+ files: list | None = None,
74
+ table: str,
75
+ stage: str,
76
+ prefix: str | None = None,
77
+ file_format: str,
78
+ schema: str | None = None,
79
+ columns_array: list | None = None,
80
+ pattern: str | None = None,
81
+ warehouse: str | None = None,
82
+ database: str | None = None,
83
+ autocommit: bool = True,
84
+ snowflake_conn_id: str = "snowflake_default",
85
+ role: str | None = None,
86
+ authenticator: str | None = None,
87
+ session_parameters: dict | None = None,
88
+ copy_options: str | None = None,
89
+ validation_mode: str | None = None,
90
+ **kwargs,
91
+ ):
92
+ super().__init__(**kwargs)
93
+ self.files = files
94
+ self.table = table
95
+ self.stage = stage
96
+ self.prefix = prefix
97
+ self.file_format = file_format
98
+ self.schema = schema
99
+ self.columns_array = columns_array
100
+ self.pattern = pattern
101
+ self.warehouse = warehouse
102
+ self.database = database
103
+ self.autocommit = autocommit
104
+ self.snowflake_conn_id = snowflake_conn_id
105
+ self.role = role
106
+ self.authenticator = authenticator
107
+ self.session_parameters = session_parameters
108
+ self.copy_options = copy_options
109
+ self.validation_mode = validation_mode
110
+
111
+ self.hook: SnowflakeHook | None = None
112
+ self._sql: str | None = None
113
+ self._result: list[dict[str, Any]] = []
114
+
115
+ def execute(self, context: Any) -> None:
116
+ self.hook = SnowflakeHook(
117
+ snowflake_conn_id=self.snowflake_conn_id,
118
+ warehouse=self.warehouse,
119
+ database=self.database,
120
+ role=self.role,
121
+ schema=self.schema,
122
+ authenticator=self.authenticator,
123
+ session_parameters=self.session_parameters,
124
+ )
125
+
126
+ if self.schema:
127
+ into = f"{self.schema}.{self.table}"
128
+ else:
129
+ into = self.table
130
+
131
+ if self.columns_array:
132
+ into = f"{into}({', '.join(self.columns_array)})"
133
+
134
+ self._sql = f"""
135
+ COPY INTO {into}
136
+ FROM @{self.stage}/{self.prefix or ""}
137
+ {"FILES=(" + ",".join(map(enclose_param, self.files)) + ")" if self.files else ""}
138
+ {"PATTERN=" + enclose_param(self.pattern) if self.pattern else ""}
139
+ FILE_FORMAT={self.file_format}
140
+ {self.copy_options or ""}
141
+ {self.validation_mode or ""}
142
+ """
143
+ self.log.info("Executing COPY command...")
144
+ self._result = self.hook.run( # type: ignore # mypy does not work well with return_dictionaries=True
145
+ sql=self._sql,
146
+ autocommit=self.autocommit,
147
+ handler=lambda x: x.fetchall(),
148
+ return_dictionaries=True,
149
+ )
150
+ self.log.info("COPY command completed")
151
+
152
+ @staticmethod
153
+ def _extract_openlineage_unique_dataset_paths(
154
+ query_result: list[dict[str, Any]],
155
+ ) -> tuple[list[tuple[str, str]], list[str]]:
156
+ """Extracts and returns unique OpenLineage dataset paths and file paths that failed to be parsed.
157
+
158
+ Each row in the results is expected to have a 'file' field, which is a URI.
159
+ The function parses these URIs and constructs a set of unique OpenLineage (namespace, name) tuples.
160
+ Additionally, it captures any URIs that cannot be parsed or processed
161
+ and returns them in a separate error list.
162
+
163
+ For Azure, Snowflake has a unique way of representing URI:
164
+ azure://<account_name>.blob.core.windows.net/<container_name>/path/to/file.csv
165
+ that is transformed by this function to a Dataset with more universal naming convention:
166
+ Dataset(namespace="wasbs://container_name@account_name", name="path/to"), as described at
167
+ https://github.com/OpenLineage/OpenLineage/blob/main/spec/Naming.md#wasbs-azure-blob-storage
168
+
169
+ :param query_result: A list of dictionaries, each containing a 'file' key with a URI value.
170
+ :return: Two lists - the first is a sorted list of tuples, each representing a unique dataset path,
171
+ and the second contains any URIs that cannot be parsed or processed correctly.
172
+
173
+ >>> method = CopyFromExternalStageToSnowflakeOperator._extract_openlineage_unique_dataset_paths
174
+
175
+ >>> results = [{"file": "azure://my_account.blob.core.windows.net/azure_container/dir3/file.csv"}]
176
+ >>> method(results)
177
+ ([('wasbs://azure_container@my_account', 'dir3')], [])
178
+
179
+ >>> results = [{"file": "azure://my_account.blob.core.windows.net/azure_container"}]
180
+ >>> method(results)
181
+ ([('wasbs://azure_container@my_account', '/')], [])
182
+
183
+ >>> results = [{"file": "s3://bucket"}, {"file": "gcs://bucket/"}, {"file": "s3://bucket/a.csv"}]
184
+ >>> method(results)
185
+ ([('gcs://bucket', '/'), ('s3://bucket', '/')], [])
186
+
187
+ >>> results = [{"file": "s3://bucket/dir/file.csv"}, {"file": "gcs://bucket/dir/dir2/a.txt"}]
188
+ >>> method(results)
189
+ ([('gcs://bucket', 'dir/dir2'), ('s3://bucket', 'dir')], [])
190
+
191
+ >>> results = [
192
+ ... {"file": "s3://bucket/dir/file.csv"},
193
+ ... {"file": "azure://my_account.something_new.windows.net/azure_container"},
194
+ ... ]
195
+ >>> method(results)
196
+ ([('s3://bucket', 'dir')], ['azure://my_account.something_new.windows.net/azure_container'])
197
+ """
198
+ import re
199
+ from pathlib import Path
200
+ from urllib.parse import urlparse
201
+
202
+ azure_regex = r"azure:\/\/(\w+)?\.blob.core.windows.net\/(\w+)\/?(.*)?"
203
+ extraction_error_files = []
204
+ unique_dataset_paths = set()
205
+
206
+ for row in query_result:
207
+ uri = urlparse(row["file"])
208
+ if uri.scheme == "azure":
209
+ match = re.fullmatch(azure_regex, row["file"])
210
+ if not match:
211
+ extraction_error_files.append(row["file"])
212
+ continue
213
+ account_name, container_name, name = match.groups()
214
+ namespace = f"wasbs://{container_name}@{account_name}"
215
+ else:
216
+ namespace = f"{uri.scheme}://{uri.netloc}"
217
+ name = uri.path.lstrip("/")
218
+
219
+ name = Path(name).parent.as_posix()
220
+ if name in ("", "."):
221
+ name = "/"
222
+
223
+ unique_dataset_paths.add((namespace, name))
224
+
225
+ return sorted(unique_dataset_paths), sorted(extraction_error_files)
226
+
227
+ def get_openlineage_facets_on_complete(self, task_instance):
228
+ """Implement _on_complete because we rely on return value of a query."""
229
+ import re
230
+
231
+ from openlineage.client.facet import (
232
+ ExternalQueryRunFacet,
233
+ ExtractionError,
234
+ ExtractionErrorRunFacet,
235
+ SqlJobFacet,
236
+ )
237
+ from openlineage.client.run import Dataset
238
+
239
+ from airflow.providers.openlineage.extractors import OperatorLineage
240
+ from airflow.providers.openlineage.sqlparser import SQLParser
241
+
242
+ if not self._sql:
243
+ return OperatorLineage()
244
+
245
+ query_results = self._result or []
246
+ # If no files were uploaded we get [{"status": "0 files were uploaded..."}]
247
+ if len(query_results) == 1 and query_results[0].get("status"):
248
+ query_results = []
249
+ unique_dataset_paths, extraction_error_files = self._extract_openlineage_unique_dataset_paths(
250
+ query_results
251
+ )
252
+ input_datasets = [Dataset(namespace=namespace, name=name) for namespace, name in unique_dataset_paths]
253
+
254
+ run_facets = {}
255
+ if extraction_error_files:
256
+ self.log.debug(
257
+ f"Unable to extract Dataset namespace and name "
258
+ f"for the following files: `{extraction_error_files}`."
259
+ )
260
+ run_facets["extractionError"] = ExtractionErrorRunFacet(
261
+ totalTasks=len(query_results),
262
+ failedTasks=len(extraction_error_files),
263
+ errors=[
264
+ ExtractionError(
265
+ errorMessage="Unable to extract Dataset namespace and name.",
266
+ stackTrace=None,
267
+ task=file_uri,
268
+ taskNumber=None,
269
+ )
270
+ for file_uri in extraction_error_files
271
+ ],
272
+ )
273
+
274
+ connection = self.hook.get_connection(getattr(self.hook, str(self.hook.conn_name_attr)))
275
+ database_info = self.hook.get_openlineage_database_info(connection)
276
+
277
+ dest_name = self.table
278
+ schema = self.hook.get_openlineage_default_schema()
279
+ database = database_info.database
280
+ if schema:
281
+ dest_name = f"{schema}.{dest_name}"
282
+ if database:
283
+ dest_name = f"{database}.{dest_name}"
284
+
285
+ snowflake_namespace = SQLParser.create_namespace(database_info)
286
+ query = SQLParser.normalize_sql(self._sql)
287
+ query = re.sub(r"\n+", "\n", re.sub(r" +", " ", query))
288
+
289
+ run_facets["externalQuery"] = ExternalQueryRunFacet(
290
+ externalQueryId=self.hook.query_ids[0], source=snowflake_namespace
291
+ )
292
+
293
+ return OperatorLineage(
294
+ inputs=input_datasets,
295
+ outputs=[Dataset(namespace=snowflake_namespace, name=dest_name)],
296
+ job_facets={"sql": SqlJobFacet(query=query)},
297
+ run_facets=run_facets,
298
+ )
@@ -28,7 +28,7 @@ build-backend = "flit_core.buildapi"
28
28
 
29
29
  [project]
30
30
  name = "apache-airflow-providers-snowflake"
31
- version = "5.2.1"
31
+ version = "5.3.0"
32
32
  description = "Provider package apache-airflow-providers-snowflake for Apache Airflow"
33
33
  readme = "README.rst"
34
34
  authors = [
@@ -62,8 +62,8 @@ dependencies = [
62
62
  ]
63
63
 
64
64
  [project.urls]
65
- "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1"
66
- "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/changelog.html"
65
+ "Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0"
66
+ "Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html"
67
67
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
68
68
  "Source Code" = "https://github.com/apache/airflow"
69
69
  "Slack Chat" = "https://s.apache.org/airflow-slack"
@@ -1,141 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one
3
- # or more contributor license agreements. See the NOTICE file
4
- # distributed with this work for additional information
5
- # regarding copyright ownership. The ASF licenses this file
6
- # to you under the Apache License, Version 2.0 (the
7
- # "License"); you may not use this file except in compliance
8
- # with the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing,
13
- # software distributed under the License is distributed on an
14
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- # KIND, either express or implied. See the License for the
16
- # specific language governing permissions and limitations
17
- # under the License.
18
- """Abstract operator that child classes implement ``COPY INTO <TABLE> SQL in Snowflake``."""
19
- from __future__ import annotations
20
-
21
- from typing import Any, Sequence
22
-
23
- from airflow.models import BaseOperator
24
- from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
25
- from airflow.providers.snowflake.utils.common import enclose_param
26
-
27
-
28
- class CopyFromExternalStageToSnowflakeOperator(BaseOperator):
29
- """
30
- Executes a COPY INTO command to load files from an external stage from clouds to Snowflake.
31
-
32
- This operator requires the snowflake_conn_id connection. The snowflake host, login,
33
- and, password field must be setup in the connection. Other inputs can be defined
34
- in the connection or hook instantiation.
35
-
36
- :param namespace: snowflake namespace
37
- :param table: snowflake table
38
- :param file_format: file format name i.e. CSV, AVRO, etc
39
- :param stage: reference to a specific snowflake stage. If the stage's schema is not the same as the
40
- table one, it must be specified
41
- :param prefix: cloud storage location specified to limit the set of files to load
42
- :param files: files to load into table
43
- :param pattern: pattern to load files from external location to table
44
- :param copy_into_postifx: optional sql postfix for INSERT INTO query
45
- such as `formatTypeOptions` and `copyOptions`
46
- :param snowflake_conn_id: Reference to :ref:`Snowflake connection id<howto/connection:snowflake>`
47
- :param account: snowflake account name
48
- :param warehouse: name of snowflake warehouse
49
- :param database: name of snowflake database
50
- :param region: name of snowflake region
51
- :param role: name of snowflake role
52
- :param schema: name of snowflake schema
53
- :param authenticator: authenticator for Snowflake.
54
- 'snowflake' (default) to use the internal Snowflake authenticator
55
- 'externalbrowser' to authenticate using your web browser and
56
- Okta, ADFS or any other SAML 2.0-compliant identify provider
57
- (IdP) that has been defined for your account
58
- ``https://<your_okta_account_name>.okta.com`` to authenticate
59
- through native Okta.
60
- :param session_parameters: You can set session-level parameters at
61
- the time you connect to Snowflake
62
- :param copy_options: snowflake COPY INTO syntax copy options
63
- :param validation_mode: snowflake COPY INTO syntax validation mode
64
-
65
- """
66
-
67
- template_fields: Sequence[str] = ("files",)
68
- template_fields_renderers = {"files": "json"}
69
-
70
- def __init__(
71
- self,
72
- *,
73
- files: list | None = None,
74
- table: str,
75
- stage: str,
76
- prefix: str | None = None,
77
- file_format: str,
78
- schema: str | None = None,
79
- columns_array: list | None = None,
80
- pattern: str | None = None,
81
- warehouse: str | None = None,
82
- database: str | None = None,
83
- autocommit: bool = True,
84
- snowflake_conn_id: str = "snowflake_default",
85
- role: str | None = None,
86
- authenticator: str | None = None,
87
- session_parameters: dict | None = None,
88
- copy_options: str | None = None,
89
- validation_mode: str | None = None,
90
- **kwargs,
91
- ):
92
- super().__init__(**kwargs)
93
- self.files = files
94
- self.table = table
95
- self.stage = stage
96
- self.prefix = prefix
97
- self.file_format = file_format
98
- self.schema = schema
99
- self.columns_array = columns_array
100
- self.pattern = pattern
101
- self.warehouse = warehouse
102
- self.database = database
103
- self.autocommit = autocommit
104
- self.snowflake_conn_id = snowflake_conn_id
105
- self.role = role
106
- self.authenticator = authenticator
107
- self.session_parameters = session_parameters
108
- self.copy_options = copy_options
109
- self.validation_mode = validation_mode
110
-
111
- def execute(self, context: Any) -> None:
112
- snowflake_hook = SnowflakeHook(
113
- snowflake_conn_id=self.snowflake_conn_id,
114
- warehouse=self.warehouse,
115
- database=self.database,
116
- role=self.role,
117
- schema=self.schema,
118
- authenticator=self.authenticator,
119
- session_parameters=self.session_parameters,
120
- )
121
-
122
- if self.schema:
123
- into = f"{self.schema}.{self.table}"
124
- else:
125
- into = self.table
126
-
127
- if self.columns_array:
128
- into = f"{into}({', '.join(self.columns_array)})"
129
-
130
- sql = f"""
131
- COPY INTO {into}
132
- FROM @{self.stage}/{self.prefix or ""}
133
- {"FILES=(" + ",".join(map(enclose_param, self.files)) + ")" if self.files else ""}
134
- {"PATTERN=" + enclose_param(self.pattern) if self.pattern else ""}
135
- FILE_FORMAT={self.file_format}
136
- {self.copy_options or ""}
137
- {self.validation_mode or ""}
138
- """
139
- self.log.info("Executing COPY command...")
140
- snowflake_hook.run(sql=sql, autocommit=self.autocommit)
141
- self.log.info("COPY command completed")