apache-airflow-providers-snowflake 5.2.1rc1__py3-none-any.whl → 5.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,7 +27,7 @@ import packaging.version
27
27
 
28
28
  __all__ = ["__version__"]
29
29
 
30
- __version__ = "5.2.1"
30
+ __version__ = "5.3.0"
31
31
 
32
32
  try:
33
33
  from airflow import __version__ as airflow_version
@@ -27,9 +27,10 @@ def get_provider_info():
27
27
  "package-name": "apache-airflow-providers-snowflake",
28
28
  "name": "Snowflake",
29
29
  "description": "`Snowflake <https://www.snowflake.com/>`__\n",
30
- "suspended": False,
31
- "source-date-epoch": 1703288173,
30
+ "state": "ready",
31
+ "source-date-epoch": 1705912272,
32
32
  "versions": [
33
+ "5.3.0",
33
34
  "5.2.1",
34
35
  "5.2.0",
35
36
  "5.1.2",
@@ -170,7 +170,9 @@ class SnowflakeHook(DbApiHook):
170
170
  warnings.warn(
171
171
  f"Conflicting params `{field_name}` and `{backcompat_key}` found in extras. "
172
172
  f"Using value for `{field_name}`. Please ensure this is the correct "
173
- f"value and remove the backcompat key `{backcompat_key}`."
173
+ f"value and remove the backcompat key `{backcompat_key}`.",
174
+ UserWarning,
175
+ stacklevel=2,
174
176
  )
175
177
  return extra_dict[field_name] or None
176
178
  return extra_dict.get(backcompat_key) or None
@@ -300,7 +302,7 @@ class SnowflakeHook(DbApiHook):
300
302
  def get_autocommit(self, conn):
301
303
  return getattr(conn, "autocommit_mode", False)
302
304
 
303
- @overload
305
+ @overload # type: ignore[override]
304
306
  def run(
305
307
  self,
306
308
  sql: str | Iterable[str],
@@ -385,10 +387,10 @@ class SnowflakeHook(DbApiHook):
385
387
  with self._get_cursor(conn, return_dictionaries) as cur:
386
388
  results = []
387
389
  for sql_statement in sql_list:
388
- self._run_command(cur, sql_statement, parameters)
390
+ self._run_command(cur, sql_statement, parameters) # type: ignore[attr-defined]
389
391
 
390
392
  if handler is not None:
391
- result = self._make_common_data_structure(handler(cur))
393
+ result = self._make_common_data_structure(handler(cur)) # type: ignore[attr-defined]
392
394
  if return_single_query_results(sql, return_last, split_statements):
393
395
  _last_result = result
394
396
  _last_description = cur.description
@@ -514,6 +514,21 @@ class SnowflakeSqlApiOperator(SQLExecuteQueryOperator):
514
514
  if self.do_xcom_push:
515
515
  context["ti"].xcom_push(key="query_ids", value=self.query_ids)
516
516
 
517
+ succeeded_query_ids = []
518
+ for query_id in self.query_ids:
519
+ self.log.info("Retrieving status for query id %s", query_id)
520
+ statement_status = self._hook.get_sql_api_query_status(query_id)
521
+ if statement_status.get("status") == "running":
522
+ break
523
+ elif statement_status.get("status") == "success":
524
+ succeeded_query_ids.append(query_id)
525
+ else:
526
+ raise AirflowException(f"{statement_status.get('status')}: {statement_status.get('message')}")
527
+
528
+ if len(self.query_ids) == len(succeeded_query_ids):
529
+ self.log.info("%s completed successfully.", self.task_id)
530
+ return
531
+
517
532
  if self.deferrable:
518
533
  self.defer(
519
534
  timeout=self.execution_timeout,
@@ -108,8 +108,12 @@ class CopyFromExternalStageToSnowflakeOperator(BaseOperator):
108
108
  self.copy_options = copy_options
109
109
  self.validation_mode = validation_mode
110
110
 
111
+ self.hook: SnowflakeHook | None = None
112
+ self._sql: str | None = None
113
+ self._result: list[dict[str, Any]] = []
114
+
111
115
  def execute(self, context: Any) -> None:
112
- snowflake_hook = SnowflakeHook(
116
+ self.hook = SnowflakeHook(
113
117
  snowflake_conn_id=self.snowflake_conn_id,
114
118
  warehouse=self.warehouse,
115
119
  database=self.database,
@@ -127,7 +131,7 @@ class CopyFromExternalStageToSnowflakeOperator(BaseOperator):
127
131
  if self.columns_array:
128
132
  into = f"{into}({', '.join(self.columns_array)})"
129
133
 
130
- sql = f"""
134
+ self._sql = f"""
131
135
  COPY INTO {into}
132
136
  FROM @{self.stage}/{self.prefix or ""}
133
137
  {"FILES=(" + ",".join(map(enclose_param, self.files)) + ")" if self.files else ""}
@@ -137,5 +141,158 @@ class CopyFromExternalStageToSnowflakeOperator(BaseOperator):
137
141
  {self.validation_mode or ""}
138
142
  """
139
143
  self.log.info("Executing COPY command...")
140
- snowflake_hook.run(sql=sql, autocommit=self.autocommit)
144
+ self._result = self.hook.run( # type: ignore # mypy does not work well with return_dictionaries=True
145
+ sql=self._sql,
146
+ autocommit=self.autocommit,
147
+ handler=lambda x: x.fetchall(),
148
+ return_dictionaries=True,
149
+ )
141
150
  self.log.info("COPY command completed")
151
+
152
+ @staticmethod
153
+ def _extract_openlineage_unique_dataset_paths(
154
+ query_result: list[dict[str, Any]],
155
+ ) -> tuple[list[tuple[str, str]], list[str]]:
156
+ """Extracts and returns unique OpenLineage dataset paths and file paths that failed to be parsed.
157
+
158
+ Each row in the results is expected to have a 'file' field, which is a URI.
159
+ The function parses these URIs and constructs a set of unique OpenLineage (namespace, name) tuples.
160
+ Additionally, it captures any URIs that cannot be parsed or processed
161
+ and returns them in a separate error list.
162
+
163
+ For Azure, Snowflake has a unique way of representing URI:
164
+ azure://<account_name>.blob.core.windows.net/<container_name>/path/to/file.csv
165
+ that is transformed by this function to a Dataset with more universal naming convention:
166
+ Dataset(namespace="wasbs://container_name@account_name", name="path/to"), as described at
167
+ https://github.com/OpenLineage/OpenLineage/blob/main/spec/Naming.md#wasbs-azure-blob-storage
168
+
169
+ :param query_result: A list of dictionaries, each containing a 'file' key with a URI value.
170
+ :return: Two lists - the first is a sorted list of tuples, each representing a unique dataset path,
171
+ and the second contains any URIs that cannot be parsed or processed correctly.
172
+
173
+ >>> method = CopyFromExternalStageToSnowflakeOperator._extract_openlineage_unique_dataset_paths
174
+
175
+ >>> results = [{"file": "azure://my_account.blob.core.windows.net/azure_container/dir3/file.csv"}]
176
+ >>> method(results)
177
+ ([('wasbs://azure_container@my_account', 'dir3')], [])
178
+
179
+ >>> results = [{"file": "azure://my_account.blob.core.windows.net/azure_container"}]
180
+ >>> method(results)
181
+ ([('wasbs://azure_container@my_account', '/')], [])
182
+
183
+ >>> results = [{"file": "s3://bucket"}, {"file": "gcs://bucket/"}, {"file": "s3://bucket/a.csv"}]
184
+ >>> method(results)
185
+ ([('gcs://bucket', '/'), ('s3://bucket', '/')], [])
186
+
187
+ >>> results = [{"file": "s3://bucket/dir/file.csv"}, {"file": "gcs://bucket/dir/dir2/a.txt"}]
188
+ >>> method(results)
189
+ ([('gcs://bucket', 'dir/dir2'), ('s3://bucket', 'dir')], [])
190
+
191
+ >>> results = [
192
+ ... {"file": "s3://bucket/dir/file.csv"},
193
+ ... {"file": "azure://my_account.something_new.windows.net/azure_container"},
194
+ ... ]
195
+ >>> method(results)
196
+ ([('s3://bucket', 'dir')], ['azure://my_account.something_new.windows.net/azure_container'])
197
+ """
198
+ import re
199
+ from pathlib import Path
200
+ from urllib.parse import urlparse
201
+
202
+ azure_regex = r"azure:\/\/(\w+)?\.blob.core.windows.net\/(\w+)\/?(.*)?"
203
+ extraction_error_files = []
204
+ unique_dataset_paths = set()
205
+
206
+ for row in query_result:
207
+ uri = urlparse(row["file"])
208
+ if uri.scheme == "azure":
209
+ match = re.fullmatch(azure_regex, row["file"])
210
+ if not match:
211
+ extraction_error_files.append(row["file"])
212
+ continue
213
+ account_name, container_name, name = match.groups()
214
+ namespace = f"wasbs://{container_name}@{account_name}"
215
+ else:
216
+ namespace = f"{uri.scheme}://{uri.netloc}"
217
+ name = uri.path.lstrip("/")
218
+
219
+ name = Path(name).parent.as_posix()
220
+ if name in ("", "."):
221
+ name = "/"
222
+
223
+ unique_dataset_paths.add((namespace, name))
224
+
225
+ return sorted(unique_dataset_paths), sorted(extraction_error_files)
226
+
227
+ def get_openlineage_facets_on_complete(self, task_instance):
228
+ """Implement _on_complete because we rely on return value of a query."""
229
+ import re
230
+
231
+ from openlineage.client.facet import (
232
+ ExternalQueryRunFacet,
233
+ ExtractionError,
234
+ ExtractionErrorRunFacet,
235
+ SqlJobFacet,
236
+ )
237
+ from openlineage.client.run import Dataset
238
+
239
+ from airflow.providers.openlineage.extractors import OperatorLineage
240
+ from airflow.providers.openlineage.sqlparser import SQLParser
241
+
242
+ if not self._sql:
243
+ return OperatorLineage()
244
+
245
+ query_results = self._result or []
246
+ # If no files were uploaded we get [{"status": "0 files were uploaded..."}]
247
+ if len(query_results) == 1 and query_results[0].get("status"):
248
+ query_results = []
249
+ unique_dataset_paths, extraction_error_files = self._extract_openlineage_unique_dataset_paths(
250
+ query_results
251
+ )
252
+ input_datasets = [Dataset(namespace=namespace, name=name) for namespace, name in unique_dataset_paths]
253
+
254
+ run_facets = {}
255
+ if extraction_error_files:
256
+ self.log.debug(
257
+ f"Unable to extract Dataset namespace and name "
258
+ f"for the following files: `{extraction_error_files}`."
259
+ )
260
+ run_facets["extractionError"] = ExtractionErrorRunFacet(
261
+ totalTasks=len(query_results),
262
+ failedTasks=len(extraction_error_files),
263
+ errors=[
264
+ ExtractionError(
265
+ errorMessage="Unable to extract Dataset namespace and name.",
266
+ stackTrace=None,
267
+ task=file_uri,
268
+ taskNumber=None,
269
+ )
270
+ for file_uri in extraction_error_files
271
+ ],
272
+ )
273
+
274
+ connection = self.hook.get_connection(getattr(self.hook, str(self.hook.conn_name_attr)))
275
+ database_info = self.hook.get_openlineage_database_info(connection)
276
+
277
+ dest_name = self.table
278
+ schema = self.hook.get_openlineage_default_schema()
279
+ database = database_info.database
280
+ if schema:
281
+ dest_name = f"{schema}.{dest_name}"
282
+ if database:
283
+ dest_name = f"{database}.{dest_name}"
284
+
285
+ snowflake_namespace = SQLParser.create_namespace(database_info)
286
+ query = SQLParser.normalize_sql(self._sql)
287
+ query = re.sub(r"\n+", "\n", re.sub(r" +", " ", query))
288
+
289
+ run_facets["externalQuery"] = ExternalQueryRunFacet(
290
+ externalQueryId=self.hook.query_ids[0], source=snowflake_namespace
291
+ )
292
+
293
+ return OperatorLineage(
294
+ inputs=input_datasets,
295
+ outputs=[Dataset(namespace=snowflake_namespace, name=dest_name)],
296
+ job_facets={"sql": SqlJobFacet(query=query)},
297
+ run_facets=run_facets,
298
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-snowflake
3
- Version: 5.2.1rc1
3
+ Version: 5.3.0
4
4
  Summary: Provider package apache-airflow-providers-snowflake for Apache Airflow
5
5
  Keywords: airflow-provider,snowflake,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -20,15 +20,15 @@ Classifier: Programming Language :: Python :: 3.9
20
20
  Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Topic :: System :: Monitoring
23
- Requires-Dist: apache-airflow-providers-common-sql>=1.10.0.dev0
24
- Requires-Dist: apache-airflow>=2.6.0.dev0
23
+ Requires-Dist: apache-airflow-providers-common-sql>=1.10.0
24
+ Requires-Dist: apache-airflow>=2.6.0
25
25
  Requires-Dist: snowflake-connector-python>=2.7.8
26
26
  Requires-Dist: snowflake-sqlalchemy>=1.1.0
27
27
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
28
28
  Requires-Dist: apache-airflow-providers-openlineage ; extra == "openlineage"
29
29
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
30
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/changelog.html
31
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1
30
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html
31
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0
32
32
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
33
33
  Project-URL: Source Code, https://github.com/apache/airflow
34
34
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -80,7 +80,7 @@ Provides-Extra: openlineage
80
80
 
81
81
  Package ``apache-airflow-providers-snowflake``
82
82
 
83
- Release: ``5.2.1.rc1``
83
+ Release: ``5.3.0``
84
84
 
85
85
 
86
86
  `Snowflake <https://www.snowflake.com/>`__
@@ -93,7 +93,7 @@ This is a provider package for ``snowflake`` provider. All classes for this prov
93
93
  are in ``airflow.providers.snowflake`` python package.
94
94
 
95
95
  You can find package information and changelog for the provider
96
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/>`_.
96
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/>`_.
97
97
 
98
98
  Installation
99
99
  ------------
@@ -137,4 +137,4 @@ Dependent package
137
137
  ============================================================================================================== ===============
138
138
 
139
139
  The changelog for the provider package can be found in the
140
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.2.1/changelog.html>`_.
140
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/5.3.0/changelog.html>`_.
@@ -1,19 +1,19 @@
1
1
  airflow/providers/snowflake/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
2
- airflow/providers/snowflake/__init__.py,sha256=N1ToUM81znAFMFd1wrRK0qvhAnkr9XI8cWGn4piL7XU,1584
3
- airflow/providers/snowflake/get_provider_info.py,sha256=tVuJUNRgl7RUJmFB50ibcYus3VyZGJ7fqE1nWu9m4A4,4660
2
+ airflow/providers/snowflake/__init__.py,sha256=uni5RXXyJ7ikG3ilrlEdQ5XaDkSiQD_RdMg01GlbUnc,1584
3
+ airflow/providers/snowflake/get_provider_info.py,sha256=69aKCJgzdB03_e0BobdDmCp4CcVA3XrDwr6u8Ozja1c,4679
4
4
  airflow/providers/snowflake/hooks/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
5
- airflow/providers/snowflake/hooks/snowflake.py,sha256=e8hmCQ0_yWj48leljR5pJgWgk9g4bL8IvbBkzW_usDA,21026
5
+ airflow/providers/snowflake/hooks/snowflake.py,sha256=hPJfFjTWDVAuSqp8GhDrNpQ0kh59t4hLMQAUVioTa1k,21180
6
6
  airflow/providers/snowflake/hooks/snowflake_sql_api.py,sha256=RCakX8FjmTDWfDZJkxkHnZB9VJDxORZ0W6SHfS-Oh5M,12703
7
7
  airflow/providers/snowflake/operators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
8
- airflow/providers/snowflake/operators/snowflake.py,sha256=Pyp-SS1knrJm6GziOjHwfEpcAY96BubCy_-utTrZOg8,25686
8
+ airflow/providers/snowflake/operators/snowflake.py,sha256=yZhA9A2nYdIiFVEW4iDC6OtpKx7Qw4DJ7MMq1me4M08,26386
9
9
  airflow/providers/snowflake/transfers/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
10
- airflow/providers/snowflake/transfers/copy_into_snowflake.py,sha256=rdhgkggjEEeZ7Afi3PIyTC-XP0a2NgCIqzSWe1l262w,5765
10
+ airflow/providers/snowflake/transfers/copy_into_snowflake.py,sha256=YB7E31SsgCKFHfp7GYYSm-0dM5iCxSPMGE9B10-SwT8,12634
11
11
  airflow/providers/snowflake/triggers/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
12
12
  airflow/providers/snowflake/triggers/snowflake_trigger.py,sha256=HgQGOpAhrCfh0bn6kDxakIi2afq1oRr_d6Av5yxRx5c,4224
13
13
  airflow/providers/snowflake/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
14
14
  airflow/providers/snowflake/utils/common.py,sha256=DG-KLy2KpZWAqZqm_XIECm8lmdoUlzwkXv9onmkQThc,1644
15
15
  airflow/providers/snowflake/utils/sql_api_generate_jwt.py,sha256=4LueR3FySbo99cza8nZBJIGy9Wcrs9jFiWlVUHiK42w,6763
16
- apache_airflow_providers_snowflake-5.2.1rc1.dist-info/entry_points.txt,sha256=bCrl5J1PXUMzbgnrKYho61rkbL2gHRT4I6f_1jlxAX4,105
17
- apache_airflow_providers_snowflake-5.2.1rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
18
- apache_airflow_providers_snowflake-5.2.1rc1.dist-info/METADATA,sha256=S0-P_jN7VqjJx2lWd36lEvDRW5hrlYPZhhJ3uFc2FdQ,6469
19
- apache_airflow_providers_snowflake-5.2.1rc1.dist-info/RECORD,,
16
+ apache_airflow_providers_snowflake-5.3.0.dist-info/entry_points.txt,sha256=bCrl5J1PXUMzbgnrKYho61rkbL2gHRT4I6f_1jlxAX4,105
17
+ apache_airflow_providers_snowflake-5.3.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
18
+ apache_airflow_providers_snowflake-5.3.0.dist-info/METADATA,sha256=7cHE3hWFtVw1GByGlGpBgpoGmymPjGkKaAm6O7OFSAE,6452
19
+ apache_airflow_providers_snowflake-5.3.0.dist-info/RECORD,,