apache-airflow-providers-snowflake 6.4.0__py3-none-any.whl → 6.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-snowflake might be problematic. Click here for more details.

@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "6.4.0"
32
+ __version__ = "6.5.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.10.0"
@@ -17,8 +17,8 @@
17
17
 
18
18
  from __future__ import annotations
19
19
 
20
- from collections.abc import Sequence
21
- from typing import TYPE_CHECKING, Callable
20
+ from collections.abc import Callable, Sequence
21
+ from typing import TYPE_CHECKING
22
22
 
23
23
  from airflow.providers.snowflake.version_compat import AIRFLOW_V_3_0_PLUS
24
24
 
@@ -19,12 +19,12 @@ from __future__ import annotations
19
19
 
20
20
  import base64
21
21
  import os
22
- from collections.abc import Iterable, Mapping
22
+ from collections.abc import Callable, Iterable, Mapping
23
23
  from contextlib import closing, contextmanager
24
24
  from functools import cached_property
25
25
  from io import StringIO
26
26
  from pathlib import Path
27
- from typing import TYPE_CHECKING, Any, Callable, TypeVar, overload
27
+ from typing import TYPE_CHECKING, Any, TypeVar, overload
28
28
  from urllib.parse import urlparse
29
29
 
30
30
  import requests
@@ -617,10 +617,9 @@ class SnowflakeHook(DbApiHook):
617
617
 
618
618
  def get_openlineage_database_specific_lineage(self, task_instance) -> OperatorLineage | None:
619
619
  """
620
- Generate OpenLineage metadata for a Snowflake task instance based on executed query IDs.
620
+ Emit separate OpenLineage events for each Snowflake query, based on executed query IDs.
621
621
 
622
- If a single query ID is present, attach an `ExternalQueryRunFacet` to the lineage metadata.
623
- If multiple query IDs are present, emits separate OpenLineage events for each query.
622
+ If a single query ID is present, also add an `ExternalQueryRunFacet` to the returned lineage metadata.
624
623
 
625
624
  Note that `get_openlineage_database_specific_lineage` is usually called after task's execution,
626
625
  so if multiple query IDs are present, both START and COMPLETE event for each query will be emitted
@@ -641,13 +640,22 @@ class SnowflakeHook(DbApiHook):
641
640
  )
642
641
 
643
642
  if not self.query_ids:
644
- self.log.debug("openlineage: no snowflake query ids found.")
643
+ self.log.info("OpenLineage could not find snowflake query ids.")
645
644
  return None
646
645
 
647
646
  self.log.debug("openlineage: getting connection to get database info")
648
647
  connection = self.get_connection(self.get_conn_id())
649
648
  namespace = SQLParser.create_namespace(self.get_openlineage_database_info(connection))
650
649
 
650
+ self.log.info("Separate OpenLineage events will be emitted for each query_id.")
651
+ emit_openlineage_events_for_snowflake_queries(
652
+ task_instance=task_instance,
653
+ hook=self,
654
+ query_ids=self.query_ids,
655
+ query_for_extra_metadata=True,
656
+ query_source_namespace=namespace,
657
+ )
658
+
651
659
  if len(self.query_ids) == 1:
652
660
  self.log.debug("Attaching ExternalQueryRunFacet with single query_id to OpenLineage event.")
653
661
  return OperatorLineage(
@@ -658,20 +666,4 @@ class SnowflakeHook(DbApiHook):
658
666
  }
659
667
  )
660
668
 
661
- self.log.info("Multiple query_ids found. Separate OpenLineage event will be emitted for each query.")
662
- try:
663
- from airflow.providers.openlineage.utils.utils import should_use_external_connection
664
-
665
- use_external_connection = should_use_external_connection(self)
666
- except ImportError:
667
- # OpenLineage provider release < 1.8.0 - we always use connection
668
- use_external_connection = True
669
-
670
- emit_openlineage_events_for_snowflake_queries(
671
- query_ids=self.query_ids,
672
- query_source_namespace=namespace,
673
- task_instance=task_instance,
674
- hook=self if use_external_connection else None,
675
- )
676
-
677
669
  return None
@@ -17,6 +17,7 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  import base64
20
+ import time
20
21
  import uuid
21
22
  import warnings
22
23
  from datetime import timedelta
@@ -25,8 +26,18 @@ from typing import Any
25
26
 
26
27
  import aiohttp
27
28
  import requests
29
+ from aiohttp import ClientConnectionError, ClientResponseError
28
30
  from cryptography.hazmat.backends import default_backend
29
31
  from cryptography.hazmat.primitives import serialization
32
+ from requests.exceptions import ConnectionError, HTTPError, Timeout
33
+ from tenacity import (
34
+ AsyncRetrying,
35
+ Retrying,
36
+ before_sleep_log,
37
+ retry_if_exception,
38
+ stop_after_attempt,
39
+ wait_exponential,
40
+ )
30
41
 
31
42
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
32
43
  from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
@@ -65,6 +76,7 @@ class SnowflakeSqlApiHook(SnowflakeHook):
65
76
  :param token_life_time: lifetime of the JWT Token in timedelta
66
77
  :param token_renewal_delta: Renewal time of the JWT Token in timedelta
67
78
  :param deferrable: Run operator in the deferrable mode.
79
+ :param api_retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` & ``tenacity.AsyncRetrying`` classes.
68
80
  """
69
81
 
70
82
  LIFETIME = timedelta(minutes=59) # The tokens will have a 59 minute lifetime
@@ -75,15 +87,27 @@ class SnowflakeSqlApiHook(SnowflakeHook):
75
87
  snowflake_conn_id: str,
76
88
  token_life_time: timedelta = LIFETIME,
77
89
  token_renewal_delta: timedelta = RENEWAL_DELTA,
90
+ api_retry_args: dict[Any, Any] | None = None, # Optional retry arguments passed to tenacity.retry
78
91
  *args: Any,
79
92
  **kwargs: Any,
80
93
  ):
81
94
  self.snowflake_conn_id = snowflake_conn_id
82
95
  self.token_life_time = token_life_time
83
96
  self.token_renewal_delta = token_renewal_delta
97
+
84
98
  super().__init__(snowflake_conn_id, *args, **kwargs)
85
99
  self.private_key: Any = None
86
100
 
101
+ self.retry_config = {
102
+ "retry": retry_if_exception(self._should_retry_on_error),
103
+ "wait": wait_exponential(multiplier=1, min=1, max=60),
104
+ "stop": stop_after_attempt(5),
105
+ "before_sleep": before_sleep_log(self.log, log_level=20), # INFO level
106
+ "reraise": True,
107
+ }
108
+ if api_retry_args:
109
+ self.retry_config.update(api_retry_args)
110
+
87
111
  def get_private_key(self) -> None:
88
112
  """Get the private key from snowflake connection."""
89
113
  conn = self.get_connection(self.snowflake_conn_id)
@@ -168,13 +192,8 @@ class SnowflakeSqlApiHook(SnowflakeHook):
168
192
  "query_tag": query_tag,
169
193
  },
170
194
  }
171
- response = requests.post(url, json=data, headers=headers, params=params)
172
- try:
173
- response.raise_for_status()
174
- except requests.exceptions.HTTPError as e: # pragma: no cover
175
- msg = f"Response: {e.response.content.decode()} Status Code: {e.response.status_code}"
176
- raise AirflowException(msg)
177
- json_response = response.json()
195
+
196
+ _, json_response = self._make_api_call_with_retries("POST", url, headers, params, data)
178
197
  self.log.info("Snowflake SQL POST API response: %s", json_response)
179
198
  if "statementHandles" in json_response:
180
199
  self.query_ids = json_response["statementHandles"]
@@ -259,13 +278,10 @@ class SnowflakeSqlApiHook(SnowflakeHook):
259
278
  """
260
279
  for query_id in query_ids:
261
280
  header, params, url = self.get_request_url_header_params(query_id)
262
- try:
263
- response = requests.get(url, headers=header, params=params)
264
- response.raise_for_status()
265
- self.log.info(response.json())
266
- except requests.exceptions.HTTPError as e:
267
- msg = f"Response: {e.response.content.decode()}, Status Code: {e.response.status_code}"
268
- raise AirflowException(msg)
281
+ _, response_json = self._make_api_call_with_retries(
282
+ method="GET", url=url, headers=header, params=params
283
+ )
284
+ self.log.info(response_json)
269
285
 
270
286
  def _process_response(self, status_code, resp):
271
287
  self.log.info("Snowflake SQL GET statements status API response: %s", resp)
@@ -295,11 +311,83 @@ class SnowflakeSqlApiHook(SnowflakeHook):
295
311
  """
296
312
  self.log.info("Retrieving status for query id %s", query_id)
297
313
  header, params, url = self.get_request_url_header_params(query_id)
298
- response = requests.get(url, params=params, headers=header)
299
- status_code = response.status_code
300
- resp = response.json()
314
+ status_code, resp = self._make_api_call_with_retries("GET", url, header, params)
301
315
  return self._process_response(status_code, resp)
302
316
 
317
+ def wait_for_query(
318
+ self, query_id: str, raise_error: bool = False, poll_interval: int = 5, timeout: int = 60
319
+ ) -> dict[str, str | list[str]]:
320
+ """
321
+ Wait for query to finish either successfully or with error.
322
+
323
+ :param query_id: statement handle id for the individual statement.
324
+ :param raise_error: whether to raise an error if the query failed.
325
+ :param poll_interval: time (in seconds) between checking the query status.
326
+ :param timeout: max time (in seconds) to wait for the query to finish before raising a TimeoutError.
327
+
328
+ :raises RuntimeError: If the query status is 'error' and `raise_error` is True.
329
+ :raises TimeoutError: If the query doesn't finish within the specified timeout.
330
+ """
331
+ start_time = time.time()
332
+
333
+ while True:
334
+ response = self.get_sql_api_query_status(query_id=query_id)
335
+ self.log.debug("Query status `%s`", response["status"])
336
+
337
+ if time.time() - start_time > timeout:
338
+ raise TimeoutError(
339
+ f"Query `{query_id}` did not finish within the timeout period of {timeout} seconds."
340
+ )
341
+
342
+ if response["status"] != "running":
343
+ self.log.info("Query status `%s`", response["status"])
344
+ break
345
+
346
+ time.sleep(poll_interval)
347
+
348
+ if response["status"] == "error" and raise_error:
349
+ raise RuntimeError(response["message"])
350
+
351
+ return response
352
+
353
+ def get_result_from_successful_sql_api_query(self, query_id: str) -> list[dict[str, Any]]:
354
+ """
355
+ Based on the query id HTTP requests are made to snowflake SQL API and return result data.
356
+
357
+ :param query_id: statement handle id for the individual statement.
358
+
359
+ :raises RuntimeError: If the query status is not 'success'.
360
+ """
361
+ self.log.info("Retrieving data for query id %s", query_id)
362
+ header, params, url = self.get_request_url_header_params(query_id)
363
+ status_code, response = self._make_api_call_with_retries("GET", url, header, params)
364
+
365
+ if (query_status := self._process_response(status_code, response)["status"]) != "success":
366
+ msg = f"Query must have status `success` to retrieve data; got `{query_status}`."
367
+ raise RuntimeError(msg)
368
+
369
+ # Below fields should always be present in response, but added some safety checks
370
+ data = response.get("data", [])
371
+ if not data:
372
+ self.log.warning("No data found in the API response.")
373
+ return []
374
+ metadata = response.get("resultSetMetaData", {})
375
+ col_names = [row["name"] for row in metadata.get("rowType", [])]
376
+ if not col_names:
377
+ self.log.warning("No column metadata found in the API response.")
378
+ return []
379
+
380
+ num_partitions = len(metadata.get("partitionInfo", []))
381
+ if num_partitions > 1:
382
+ self.log.debug("Result data is returned as multiple partitions. Will perform additional queries.")
383
+ url += "?partition="
384
+ for partition_no in range(1, num_partitions): # First partition was already returned
385
+ self.log.debug("Querying for partition no. %s", partition_no)
386
+ _, response = self._make_api_call_with_retries("GET", url + str(partition_no), header, params)
387
+ data.extend(response.get("data", []))
388
+
389
+ return [dict(zip(col_names, row)) for row in data] # Merged column names with data
390
+
303
391
  async def get_sql_api_query_status_async(self, query_id: str) -> dict[str, str | list[str]]:
304
392
  """
305
393
  Based on the query id async HTTP request is made to snowflake SQL API and return response.
@@ -308,10 +396,81 @@ class SnowflakeSqlApiHook(SnowflakeHook):
308
396
  """
309
397
  self.log.info("Retrieving status for query id %s", query_id)
310
398
  header, params, url = self.get_request_url_header_params(query_id)
311
- async with (
312
- aiohttp.ClientSession(headers=header) as session,
313
- session.get(url, params=params) as response,
399
+ status_code, resp = await self._make_api_call_with_retries_async("GET", url, header, params)
400
+ return self._process_response(status_code, resp)
401
+
402
+ @staticmethod
403
+ def _should_retry_on_error(exception) -> bool:
404
+ """
405
+ Determine if the exception should trigger a retry based on error type and status code.
406
+
407
+ Retries on HTTP errors 429 (Too Many Requests), 503 (Service Unavailable),
408
+ and 504 (Gateway Timeout) as recommended by Snowflake error handling docs.
409
+ Retries on connection errors and timeouts.
410
+
411
+ :param exception: The exception to check
412
+ :return: True if the request should be retried, False otherwise
413
+ """
414
+ if isinstance(exception, HTTPError):
415
+ return exception.response.status_code in [429, 503, 504]
416
+ if isinstance(exception, ClientResponseError):
417
+ return exception.status in [429, 503, 504]
418
+ if isinstance(
419
+ exception,
420
+ ConnectionError | Timeout | ClientConnectionError,
314
421
  ):
315
- status_code = response.status
316
- resp = await response.json()
317
- return self._process_response(status_code, resp)
422
+ return True
423
+ return False
424
+
425
+ def _make_api_call_with_retries(
426
+ self, method: str, url: str, headers: dict, params: dict | None = None, json: dict | None = None
427
+ ):
428
+ """
429
+ Make an API call to the Snowflake SQL API with retry logic for specific HTTP errors.
430
+
431
+ Error handling implemented based on Snowflake error handling docs:
432
+ https://docs.snowflake.com/en/developer-guide/sql-api/handling-errors
433
+
434
+ :param method: The HTTP method to use for the API call.
435
+ :param url: The URL for the API endpoint.
436
+ :param headers: The headers to include in the API call.
437
+ :param params: (Optional) The query parameters to include in the API call.
438
+ :param data: (Optional) The data to include in the API call.
439
+ :return: The response object from the API call.
440
+ """
441
+ with requests.Session() as session:
442
+ for attempt in Retrying(**self.retry_config): # type: ignore
443
+ with attempt:
444
+ if method.upper() in ("GET", "POST"):
445
+ response = session.request(
446
+ method=method.lower(), url=url, headers=headers, params=params, json=json
447
+ )
448
+ else:
449
+ raise ValueError(f"Unsupported HTTP method: {method}")
450
+ response.raise_for_status()
451
+ return response.status_code, response.json()
452
+
453
+ async def _make_api_call_with_retries_async(self, method, url, headers, params=None):
454
+ """
455
+ Make an API call to the Snowflake SQL API asynchronously with retry logic for specific HTTP errors.
456
+
457
+ Error handling implemented based on Snowflake error handling docs:
458
+ https://docs.snowflake.com/en/developer-guide/sql-api/handling-errors
459
+
460
+ :param method: The HTTP method to use for the API call. Only GET is supported as is synchronous.
461
+ :param url: The URL for the API endpoint.
462
+ :param headers: The headers to include in the API call.
463
+ :param params: (Optional) The query parameters to include in the API call.
464
+ :return: The response object from the API call.
465
+ """
466
+ async with aiohttp.ClientSession(headers=headers) as session:
467
+ async for attempt in AsyncRetrying(**self.retry_config): # type: ignore
468
+ with attempt:
469
+ if method.upper() == "GET":
470
+ async with session.request(method=method.lower(), url=url, params=params) as response:
471
+ response.raise_for_status()
472
+ # Return status and json content for async processing
473
+ content = await response.json()
474
+ return response.status, content
475
+ else:
476
+ raise ValueError(f"Unsupported HTTP method: {method}")
@@ -355,6 +355,7 @@ class SnowflakeSqlApiOperator(SQLExecuteQueryOperator):
355
355
  When executing the statement, Snowflake replaces placeholders (? and :name) in
356
356
  the statement with these specified values.
357
357
  :param deferrable: Run operator in the deferrable mode.
358
+ :param snowflake_api_retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` & ``tenacity.AsyncRetrying`` classes.
358
359
  """
359
360
 
360
361
  LIFETIME = timedelta(minutes=59) # The tokens will have a 59 minutes lifetime
@@ -381,6 +382,7 @@ class SnowflakeSqlApiOperator(SQLExecuteQueryOperator):
381
382
  token_renewal_delta: timedelta = RENEWAL_DELTA,
382
383
  bindings: dict[str, Any] | None = None,
383
384
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
385
+ snowflake_api_retry_args: dict[str, Any] | None = None,
384
386
  **kwargs: Any,
385
387
  ) -> None:
386
388
  self.snowflake_conn_id = snowflake_conn_id
@@ -390,6 +392,7 @@ class SnowflakeSqlApiOperator(SQLExecuteQueryOperator):
390
392
  self.token_renewal_delta = token_renewal_delta
391
393
  self.bindings = bindings
392
394
  self.execute_async = False
395
+ self.snowflake_api_retry_args = snowflake_api_retry_args or {}
393
396
  self.deferrable = deferrable
394
397
  self.query_ids: list[str] = []
395
398
  if any([warehouse, database, role, schema, authenticator, session_parameters]): # pragma: no cover
@@ -412,6 +415,7 @@ class SnowflakeSqlApiOperator(SQLExecuteQueryOperator):
412
415
  token_life_time=self.token_life_time,
413
416
  token_renewal_delta=self.token_renewal_delta,
414
417
  deferrable=self.deferrable,
418
+ api_retry_args=self.snowflake_api_retry_args,
415
419
  **self.hook_params,
416
420
  )
417
421
 
@@ -17,8 +17,8 @@
17
17
 
18
18
  from __future__ import annotations
19
19
 
20
- from collections.abc import Collection, Mapping, Sequence
21
- from typing import Any, Callable
20
+ from collections.abc import Callable, Collection, Mapping, Sequence
21
+ from typing import Any
22
22
 
23
23
  from airflow.providers.common.compat.standard.operators import PythonOperator, get_current_context
24
24
  from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
@@ -22,9 +22,9 @@ from __future__ import annotations
22
22
  from collections.abc import Sequence
23
23
  from typing import Any
24
24
 
25
- from airflow.models import BaseOperator
26
25
  from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
27
26
  from airflow.providers.snowflake.utils.common import enclose_param
27
+ from airflow.providers.snowflake.version_compat import BaseOperator
28
28
 
29
29
 
30
30
  def _validate_parameter(param_name: str, value: str | None) -> str | None:
@@ -19,7 +19,7 @@ from __future__ import annotations
19
19
  import datetime
20
20
  import logging
21
21
  from contextlib import closing
22
- from typing import TYPE_CHECKING
22
+ from typing import TYPE_CHECKING, Any
23
23
  from urllib.parse import quote, urlparse, urlunparse
24
24
 
25
25
  from airflow.providers.common.compat.openlineage.check import require_openlineage_version
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
31
31
  from openlineage.client.facet_v2 import JobFacet
32
32
 
33
33
  from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
34
+ from airflow.providers.snowflake.hooks.snowflake_sql_api import SnowflakeSqlApiHook
34
35
 
35
36
 
36
37
  log = logging.getLogger(__name__)
@@ -204,9 +205,29 @@ def _run_single_query_with_hook(hook: SnowflakeHook, sql: str) -> list[dict]:
204
205
  return result
205
206
 
206
207
 
208
+ def _run_single_query_with_api_hook(hook: SnowflakeSqlApiHook, sql: str) -> list[dict[str, Any]]:
209
+ """Execute a query against Snowflake API without adding extra logging or instrumentation."""
210
+ # `hook.execute_query` resets the query_ids, so we need to save them and re-assign after we're done
211
+ query_ids_before_execution = list(hook.query_ids)
212
+ try:
213
+ _query_ids = hook.execute_query(sql=sql, statement_count=0)
214
+ hook.wait_for_query(query_id=_query_ids[0], raise_error=True, poll_interval=1, timeout=3)
215
+ return hook.get_result_from_successful_sql_api_query(query_id=_query_ids[0])
216
+ finally:
217
+ hook.query_ids = query_ids_before_execution
218
+
219
+
220
+ def _process_data_from_api(data: list[dict[str, Any]]) -> list[dict[str, Any]]:
221
+ """Convert 'START_TIME' and 'END_TIME' fields to UTC datetime objects."""
222
+ for row in data:
223
+ for key in ("START_TIME", "END_TIME"):
224
+ row[key] = datetime.datetime.fromtimestamp(float(row[key]), timezone.utc)
225
+ return data
226
+
227
+
207
228
  def _get_queries_details_from_snowflake(
208
- hook: SnowflakeHook, query_ids: list[str]
209
- ) -> dict[str, dict[str, str]]:
229
+ hook: SnowflakeHook | SnowflakeSqlApiHook, query_ids: list[str]
230
+ ) -> dict[str, dict[str, Any]]:
210
231
  """Retrieve execution details for specific queries from Snowflake's query history."""
211
232
  if not query_ids:
212
233
  return {}
@@ -221,7 +242,16 @@ def _get_queries_details_from_snowflake(
221
242
  f";"
222
243
  )
223
244
 
224
- result = _run_single_query_with_hook(hook=hook, sql=query)
245
+ try:
246
+ # Can't import the SnowflakeSqlApiHook class and do proper isinstance check - circular imports
247
+ if hook.__class__.__name__ == "SnowflakeSqlApiHook":
248
+ result = _run_single_query_with_api_hook(hook=hook, sql=query) # type: ignore[arg-type]
249
+ result = _process_data_from_api(data=result)
250
+ else:
251
+ result = _run_single_query_with_hook(hook=hook, sql=query)
252
+ except Exception as e:
253
+ log.warning("OpenLineage could not retrieve extra metadata from Snowflake. Error encountered: %s", e)
254
+ result = []
225
255
 
226
256
  return {row["QUERY_ID"]: row for row in result} if result else {}
227
257
 
@@ -259,17 +289,18 @@ def _create_snowflake_event_pair(
259
289
 
260
290
  @require_openlineage_version(provider_min_version="2.3.0")
261
291
  def emit_openlineage_events_for_snowflake_queries(
262
- query_ids: list[str],
263
- query_source_namespace: str,
264
292
  task_instance,
265
- hook: SnowflakeHook | None = None,
293
+ hook: SnowflakeHook | SnowflakeSqlApiHook | None = None,
294
+ query_ids: list[str] | None = None,
295
+ query_source_namespace: str | None = None,
296
+ query_for_extra_metadata: bool = False,
266
297
  additional_run_facets: dict | None = None,
267
298
  additional_job_facets: dict | None = None,
268
299
  ) -> None:
269
300
  """
270
301
  Emit OpenLineage events for executed Snowflake queries.
271
302
 
272
- Metadata retrieval from Snowflake is attempted only if a `SnowflakeHook` is provided.
303
+ Metadata retrieval from Snowflake is attempted only if `get_extra_metadata` is True and hook is provided.
273
304
  If metadata is available, execution details such as start time, end time, execution status,
274
305
  error messages, and SQL text are included in the events. If no metadata is found, the function
275
306
  defaults to using the Airflow task instance's state and the current timestamp.
@@ -279,10 +310,16 @@ def emit_openlineage_events_for_snowflake_queries(
279
310
  will correspond to actual query execution times.
280
311
 
281
312
  Args:
282
- query_ids: A list of Snowflake query IDs to emit events for.
283
- query_source_namespace: The namespace to be included in ExternalQueryRunFacet.
284
313
  task_instance: The Airflow task instance that run these queries.
285
- hook: A SnowflakeHook instance used to retrieve query metadata if available.
314
+ hook: A supported Snowflake hook instance used to retrieve query metadata if available.
315
+ If omitted, `query_ids` and `query_source_namespace` must be provided explicitly and
316
+ `query_for_extra_metadata` must be `False`.
317
+ query_ids: A list of Snowflake query IDs to emit events for, can only be None if `hook` is provided
318
+ and `hook.query_ids` are present.
319
+ query_source_namespace: The namespace to be included in ExternalQueryRunFacet,
320
+ can be `None` only if hook is provided.
321
+ query_for_extra_metadata: Whether to query Snowflake for additional metadata about queries.
322
+ Must be `False` if `hook` is not provided.
286
323
  additional_run_facets: Additional run facets to include in OpenLineage events.
287
324
  additional_job_facets: Additional job facets to include in OpenLineage events.
288
325
  """
@@ -297,23 +334,49 @@ def emit_openlineage_events_for_snowflake_queries(
297
334
  from airflow.providers.openlineage.conf import namespace
298
335
  from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
299
336
 
300
- if not query_ids:
301
- log.debug("No Snowflake query IDs provided; skipping OpenLineage event emission.")
302
- return
303
-
304
- query_ids = [q for q in query_ids] # Make a copy to make sure it does not change
337
+ log.info("OpenLineage will emit events for Snowflake queries.")
305
338
 
306
339
  if hook:
340
+ if not query_ids:
341
+ log.debug("No Snowflake query IDs provided; Checking `hook.query_ids` property.")
342
+ query_ids = getattr(hook, "query_ids", [])
343
+ if not query_ids:
344
+ raise ValueError("No Snowflake query IDs provided and `hook.query_ids` are not present.")
345
+
346
+ if not query_source_namespace:
347
+ log.debug("No Snowflake query namespace provided; Creating one from scratch.")
348
+ from airflow.providers.openlineage.sqlparser import SQLParser
349
+
350
+ connection = hook.get_connection(hook.get_conn_id())
351
+ query_source_namespace = SQLParser.create_namespace(
352
+ hook.get_openlineage_database_info(connection)
353
+ )
354
+ else:
355
+ if not query_ids:
356
+ raise ValueError("If 'hook' is not provided, 'query_ids' must be set.")
357
+ if not query_source_namespace:
358
+ raise ValueError("If 'hook' is not provided, 'query_source_namespace' must be set.")
359
+ if query_for_extra_metadata:
360
+ raise ValueError("If 'hook' is not provided, 'query_for_extra_metadata' must be False.")
361
+
362
+ query_ids = [q for q in query_ids] # Make a copy to make sure we do not change hook's attribute
363
+
364
+ if query_for_extra_metadata and hook:
307
365
  log.debug("Retrieving metadata for %s queries from Snowflake.", len(query_ids))
308
366
  snowflake_metadata = _get_queries_details_from_snowflake(hook, query_ids)
309
367
  else:
310
- log.debug("SnowflakeHook not provided. No extra metadata fill be fetched from Snowflake.")
368
+ log.debug("`query_for_extra_metadata` is False. No extra metadata fill be fetched from Snowflake.")
311
369
  snowflake_metadata = {}
312
370
 
313
371
  # If real metadata is unavailable, we send events with eventTime=now
314
372
  default_event_time = timezone.utcnow()
315
373
  # If no query metadata is provided, we use task_instance's state when checking for success
316
- default_state = task_instance.state.value if hasattr(task_instance, "state") else ""
374
+ # ti.state has no `value` attr (AF2) when task it's still running, in AF3 we get 'running', in that case
375
+ # assuming it's user call and query succeeded, so we replace it with success.
376
+ default_state = (
377
+ getattr(task_instance.state, "value", "running") if hasattr(task_instance, "state") else ""
378
+ )
379
+ default_state = "success" if default_state == "running" else default_state
317
380
 
318
381
  common_run_facets = {"parent": _get_parent_run_facet(task_instance)}
319
382
  common_job_facets: dict[str, JobFacet] = {
@@ -18,7 +18,8 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  import inspect
21
- from typing import TYPE_CHECKING, Callable
21
+ from collections.abc import Callable
22
+ from typing import TYPE_CHECKING
22
23
 
23
24
  if TYPE_CHECKING:
24
25
  from snowflake.snowpark import Session
@@ -33,3 +33,13 @@ def get_base_airflow_version_tuple() -> tuple[int, int, int]:
33
33
 
34
34
 
35
35
  AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
36
+
37
+ if AIRFLOW_V_3_0_PLUS:
38
+ from airflow.sdk import BaseOperator
39
+ else:
40
+ from airflow.models import BaseOperator # type: ignore[no-redef]
41
+
42
+ __all__ = [
43
+ "AIRFLOW_V_3_0_PLUS",
44
+ "BaseOperator",
45
+ ]
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-snowflake
3
- Version: 6.4.0
3
+ Version: 6.5.0
4
4
  Summary: Provider package apache-airflow-providers-snowflake for Apache Airflow
5
5
  Keywords: airflow-provider,snowflake,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
7
7
  Maintainer-email: Apache Software Foundation <dev@airflow.apache.org>
8
- Requires-Python: ~=3.9
8
+ Requires-Python: ~=3.10
9
9
  Description-Content-Type: text/x-rst
10
10
  Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Environment :: Console
@@ -15,7 +15,6 @@ Classifier: Intended Audience :: System Administrators
15
15
  Classifier: Framework :: Apache Airflow
16
16
  Classifier: Framework :: Apache Airflow :: Provider
17
17
  Classifier: License :: OSI Approved :: Apache Software License
18
- Classifier: Programming Language :: Python :: 3.9
19
18
  Classifier: Programming Language :: Python :: 3.10
20
19
  Classifier: Programming Language :: Python :: 3.11
21
20
  Classifier: Programming Language :: Python :: 3.12
@@ -23,15 +22,17 @@ Classifier: Topic :: System :: Monitoring
23
22
  Requires-Dist: apache-airflow>=2.10.0
24
23
  Requires-Dist: apache-airflow-providers-common-compat>=1.6.0
25
24
  Requires-Dist: apache-airflow-providers-common-sql>=1.21.0
26
- Requires-Dist: pandas>=2.1.2,<2.2
27
- Requires-Dist: pyarrow>=14.0.1
25
+ Requires-Dist: pandas>=2.1.2; python_version <"3.13"
26
+ Requires-Dist: pandas>=2.2.3; python_version >="3.13"
27
+ Requires-Dist: pyarrow>=16.1.0
28
28
  Requires-Dist: snowflake-connector-python>=3.7.1
29
29
  Requires-Dist: snowflake-sqlalchemy>=1.4.0
30
30
  Requires-Dist: snowflake-snowpark-python>=1.17.0;python_version<'3.12'
31
+ Requires-Dist: snowflake-snowpark-python>=1.27.0;python_version>='3.12'
31
32
  Requires-Dist: apache-airflow-providers-openlineage>=2.3.0 ; extra == "openlineage"
32
33
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
33
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.4.0/changelog.html
34
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.4.0
34
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.5.0/changelog.html
35
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.5.0
35
36
  Project-URL: Mastodon, https://fosstodon.org/@airflow
36
37
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
37
38
  Project-URL: Source Code, https://github.com/apache/airflow
@@ -63,7 +64,7 @@ Provides-Extra: openlineage
63
64
 
64
65
  Package ``apache-airflow-providers-snowflake``
65
66
 
66
- Release: ``6.4.0``
67
+ Release: ``6.5.0``
67
68
 
68
69
 
69
70
  `Snowflake <https://www.snowflake.com/>`__
@@ -76,7 +77,7 @@ This is a provider package for ``snowflake`` provider. All classes for this prov
76
77
  are in ``airflow.providers.snowflake`` python package.
77
78
 
78
79
  You can find package information and changelog for the provider
79
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.4.0/>`_.
80
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.5.0/>`_.
80
81
 
81
82
  Installation
82
83
  ------------
@@ -85,23 +86,25 @@ You can install this package on top of an existing Airflow 2 installation (see `
85
86
  for the minimum Airflow version supported) via
86
87
  ``pip install apache-airflow-providers-snowflake``
87
88
 
88
- The package supports the following python versions: 3.9,3.10,3.11,3.12
89
+ The package supports the following python versions: 3.10,3.11,3.12
89
90
 
90
91
  Requirements
91
92
  ------------
92
93
 
93
- ========================================== =====================================
94
+ ========================================== ======================================
94
95
  PIP package Version required
95
- ========================================== =====================================
96
+ ========================================== ======================================
96
97
  ``apache-airflow`` ``>=2.10.0``
97
98
  ``apache-airflow-providers-common-compat`` ``>=1.6.0``
98
99
  ``apache-airflow-providers-common-sql`` ``>=1.21.0``
99
- ``pandas`` ``>=2.1.2,<2.2``
100
- ``pyarrow`` ``>=14.0.1``
100
+ ``pandas`` ``>=2.1.2; python_version < "3.13"``
101
+ ``pandas`` ``>=2.2.3; python_version >= "3.13"``
102
+ ``pyarrow`` ``>=16.1.0``
101
103
  ``snowflake-connector-python`` ``>=3.7.1``
102
104
  ``snowflake-sqlalchemy`` ``>=1.4.0``
103
105
  ``snowflake-snowpark-python`` ``>=1.17.0; python_version < "3.12"``
104
- ========================================== =====================================
106
+ ``snowflake-snowpark-python`` ``>=1.27.0; python_version >= "3.12"``
107
+ ========================================== ======================================
105
108
 
106
109
  Cross provider package dependencies
107
110
  -----------------------------------
@@ -125,5 +128,5 @@ Dependent package
125
128
  ================================================================================================================== =================
126
129
 
127
130
  The changelog for the provider package can be found in the
128
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.4.0/changelog.html>`_.
131
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-snowflake/6.5.0/changelog.html>`_.
129
132
 
@@ -1,25 +1,25 @@
1
1
  airflow/providers/snowflake/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
2
- airflow/providers/snowflake/__init__.py,sha256=hfjXA59cpm_yb00p5Y7jNTIBCe3BPNoYYXkAVAcF_c4,1498
2
+ airflow/providers/snowflake/__init__.py,sha256=adBe6ltW-rLtYQFNU8hpf6foAV8Pe60l_xFgAkGQCBg,1498
3
3
  airflow/providers/snowflake/get_provider_info.py,sha256=NdNRMfulBbpD-I4yFRr8U533m9djD18ijEMvuxOp4_g,3875
4
- airflow/providers/snowflake/version_compat.py,sha256=j5PCtXvZ71aBjixu-EFTNtVDPsngzzs7os0ZQDgFVDk,1536
4
+ airflow/providers/snowflake/version_compat.py,sha256=IlaClFJYi0uPxuC8cJt0Ro3Kl3tjIGc31ALoKDQbw5Q,1738
5
5
  airflow/providers/snowflake/decorators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
6
- airflow/providers/snowflake/decorators/snowpark.py,sha256=tKXOjP8m8SEIu0jx2KSrd0n3jGMaIKDOwG2lMkvk3cI,5523
6
+ airflow/providers/snowflake/decorators/snowpark.py,sha256=5ocPY8wrXvKbZJokefV4HDfX0WXzrHmcekXoZjkfHEw,5523
7
7
  airflow/providers/snowflake/hooks/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
8
- airflow/providers/snowflake/hooks/snowflake.py,sha256=t-sukzbQ1OYMeyZBDrJ9s9DuJbnDZFJKhBMZn_mQLgY,28933
9
- airflow/providers/snowflake/hooks/snowflake_sql_api.py,sha256=mREZ0nHc6L-9YSHZARYpgqrETqzVJ3Q6EfbWtEy5TV4,14745
8
+ airflow/providers/snowflake/hooks/snowflake.py,sha256=RUWO9j_gd26BbktGzavxvgihCbOaBTNKlxyDvTxIBig,28502
9
+ airflow/providers/snowflake/hooks/snowflake_sql_api.py,sha256=bCY3lSar-k3XjiDnrrWVRWTRunJL6U0Kss7fCLxFqTM,22287
10
10
  airflow/providers/snowflake/operators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
11
- airflow/providers/snowflake/operators/snowflake.py,sha256=Abu0MVsUPwVxfDVNYn5OtwVoUOhQanUp-YSFtLdcn6c,22915
12
- airflow/providers/snowflake/operators/snowpark.py,sha256=Wt3wzcsja0ed4q2KE9WyL74XH6mUVSPNZvcCHWEHQtc,5815
11
+ airflow/providers/snowflake/operators/snowflake.py,sha256=Xfz_bv1Y0M8IWv67dOXcupeYWYlG91kPVgCe_vEqntY,23253
12
+ airflow/providers/snowflake/operators/snowpark.py,sha256=Tfd31My6arGXKo0yfi46HyVfkHO3yeT085l3ymxtGpk,5815
13
13
  airflow/providers/snowflake/transfers/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
14
- airflow/providers/snowflake/transfers/copy_into_snowflake.py,sha256=bXmqkNwthJqUo65DsI_pC3mwk_V_Iikwi646oRCyWus,13590
14
+ airflow/providers/snowflake/transfers/copy_into_snowflake.py,sha256=2WQDhD9U1l38ZoIv7FImsV6S3gT_rSisg_isNi4k08E,13618
15
15
  airflow/providers/snowflake/triggers/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
16
16
  airflow/providers/snowflake/triggers/snowflake_trigger.py,sha256=QXNLijmtZI7NIdPtOwbvS-4ohgrm8RV_jaBKvekosHQ,4051
17
17
  airflow/providers/snowflake/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
18
18
  airflow/providers/snowflake/utils/common.py,sha256=DG-KLy2KpZWAqZqm_XIECm8lmdoUlzwkXv9onmkQThc,1644
19
- airflow/providers/snowflake/utils/openlineage.py,sha256=dr57b0fidPo7A451UE6s6d3PnOf4dOv3iM5aoyE4oBI,15067
20
- airflow/providers/snowflake/utils/snowpark.py,sha256=9kzWRkdgoNQ8f3Wnr92LdZylMpcpRasxefpOXrM30Cw,1602
19
+ airflow/providers/snowflake/utils/openlineage.py,sha256=5qpLU7D9EFolXGWEKI90py45iU0OPNyUEim7_Y7a2yw,18686
20
+ airflow/providers/snowflake/utils/snowpark.py,sha256=-S6ltYiW-KooqUMGzY0OebmAzpUAu7GIjFWwuYERuk8,1629
21
21
  airflow/providers/snowflake/utils/sql_api_generate_jwt.py,sha256=9mR-vHIquv60tfAni87f6FAjKsiRHUDDrsVhzw4M9vM,6762
22
- apache_airflow_providers_snowflake-6.4.0.dist-info/entry_points.txt,sha256=bCrl5J1PXUMzbgnrKYho61rkbL2gHRT4I6f_1jlxAX4,105
23
- apache_airflow_providers_snowflake-6.4.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
24
- apache_airflow_providers_snowflake-6.4.0.dist-info/METADATA,sha256=qUfhrrfvXGgroPJP_vrVoAtzRB9YEJ6Fmko0cxu1AyU,6213
25
- apache_airflow_providers_snowflake-6.4.0.dist-info/RECORD,,
22
+ apache_airflow_providers_snowflake-6.5.0.dist-info/entry_points.txt,sha256=bCrl5J1PXUMzbgnrKYho61rkbL2gHRT4I6f_1jlxAX4,105
23
+ apache_airflow_providers_snowflake-6.5.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
24
+ apache_airflow_providers_snowflake-6.5.0.dist-info/METADATA,sha256=QLJIMF-ReENIcH9744z8e8TcXxny65U6XTPiFx_17Gc,6493
25
+ apache_airflow_providers_snowflake-6.5.0.dist-info/RECORD,,