airflow-toolkit 2.0.1__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/PKG-INFO +70 -48
  2. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/README.md +69 -44
  3. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/pyproject.toml +1 -5
  4. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/filesystem_protocol.py +7 -1
  5. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/azure_databricks_volume_filesystem.py +15 -15
  6. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/azure_file_share_filesystem.py +9 -6
  7. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/blob_storage_filesystem.py +2 -2
  8. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/local_filesystem.py +1 -1
  9. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/sftp_filesystem.py +2 -1
  10. airflow_toolkit-2.2.0/src/airflow_toolkit/notifications/__init__.py +134 -0
  11. airflow_toolkit-2.2.0/src/airflow_toolkit/notifications/channels/discord.py +57 -0
  12. airflow_toolkit-2.2.0/src/airflow_toolkit/notifications/channels/email.py +93 -0
  13. airflow_toolkit-2.2.0/src/airflow_toolkit/notifications/channels/slack.py +78 -0
  14. airflow_toolkit-2.2.0/src/airflow_toolkit/notifications/channels/teams.py +76 -0
  15. airflow_toolkit-2.2.0/src/airflow_toolkit/notifications/context.py +94 -0
  16. airflow_toolkit-2.2.0/src/airflow_toolkit/protocols.py +21 -0
  17. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/deltalake/operators/duckdb_to_deltalake.py +2 -1
  18. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/deltalake/operators/filesystem_to_database.py +113 -70
  19. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/filesystem/operators/filesystem.py +11 -7
  20. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/filesystem/operators/http_to_filesystem.py +5 -11
  21. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/filesystem/tasks.py +1 -1
  22. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit.egg-info/PKG-INFO +70 -48
  23. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit.egg-info/SOURCES.txt +9 -9
  24. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit.egg-info/requires.txt +0 -4
  25. airflow_toolkit-2.0.1/src/airflow_toolkit/data_lake_facade.py +0 -128
  26. airflow_toolkit-2.0.1/src/airflow_toolkit/notifications/slack/webhook.py +0 -114
  27. airflow_toolkit-2.0.1/src/airflow_toolkit/providers/data_lake/operators/data_lake.py +0 -67
  28. airflow_toolkit-2.0.1/src/airflow_toolkit/providers/filesystem/operators/__init__.py +0 -0
  29. airflow_toolkit-2.0.1/src/airflow_toolkit/providers/http_to_data_lake/__init__.py +0 -0
  30. airflow_toolkit-2.0.1/src/airflow_toolkit/providers/http_to_data_lake/operators/__init__.py +0 -0
  31. airflow_toolkit-2.0.1/src/airflow_toolkit/providers/http_to_data_lake/operators/http_to_data_lake.py +0 -250
  32. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/LICENSE.txt +0 -0
  33. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/setup.cfg +0 -0
  34. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/__init__.py +0 -0
  35. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/_compact/airflow_shim.py +0 -0
  36. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/compression_utils.py +0 -0
  37. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/exceptions.py +0 -0
  38. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/__init__.py +0 -0
  39. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/filesystem_factory.py +0 -0
  40. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/__init__.py +0 -0
  41. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/google_cloud_storage_filesystem.py +0 -0
  42. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/filesystems/impl/s3_filesystem.py +0 -0
  43. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers → airflow_toolkit-2.2.0/src/airflow_toolkit/notifications/channels}/__init__.py +0 -0
  44. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/azure → airflow_toolkit-2.2.0/src/airflow_toolkit/providers}/__init__.py +0 -0
  45. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/azure/hooks → airflow_toolkit-2.2.0/src/airflow_toolkit/providers/azure}/__init__.py +0 -0
  46. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/data_lake → airflow_toolkit-2.2.0/src/airflow_toolkit/providers/azure/hooks}/__init__.py +0 -0
  47. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/azure/hooks/azure_databricks.py +0 -0
  48. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/azure/hooks/azure_file_share.py +0 -0
  49. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/data_lake/operators → airflow_toolkit-2.2.0/src/airflow_toolkit/providers/deltalake}/__init__.py +0 -0
  50. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/deltalake → airflow_toolkit-2.2.0/src/airflow_toolkit/providers/deltalake/operators}/__init__.py +0 -0
  51. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/deltalake/operators → airflow_toolkit-2.2.0/src/airflow_toolkit/providers/deltalake/sensors}/__init__.py +0 -0
  52. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/deltalake/sensors/filesystem_file.py +0 -0
  53. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/deltalake/sensors → airflow_toolkit-2.2.0/src/airflow_toolkit/providers/filesystem}/__init__.py +0 -0
  54. {airflow_toolkit-2.0.1/src/airflow_toolkit/providers/filesystem → airflow_toolkit-2.2.0/src/airflow_toolkit/providers/filesystem/operators}/__init__.py +0 -0
  55. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/providers/package.py +0 -0
  56. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit/py.typed +0 -0
  57. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit.egg-info/dependency_links.txt +0 -0
  58. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit.egg-info/entry_points.txt +0 -0
  59. {airflow_toolkit-2.0.1 → airflow_toolkit-2.2.0}/src/airflow_toolkit.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: airflow-toolkit
3
- Version: 2.0.1
3
+ Version: 2.2.0
4
4
  Summary: A toolkit of operators, hooks and utilities for Apache Airflow 3
5
5
  Author-email: Biel Llobera <biel_llobera@dkl.digital>
6
6
  Requires-Python: <3.15,>=3.11
@@ -23,8 +23,6 @@ Provides-Extra: azure
23
23
  Requires-Dist: apache-airflow-providers-microsoft-azure>=8; extra == "azure"
24
24
  Provides-Extra: sftp
25
25
  Requires-Dist: apache-airflow-providers-sftp>=5.3; extra == "sftp"
26
- Provides-Extra: slack
27
- Requires-Dist: apache-airflow-providers-slack>=9.3; extra == "slack"
28
26
  Provides-Extra: http
29
27
  Requires-Dist: apache-airflow-providers-http>=5.6.4; extra == "http"
30
28
  Requires-Dist: requests>=2.31.0; extra == "http"
@@ -46,7 +44,6 @@ Requires-Dist: apache-airflow-providers-amazon>=9.15.0; extra == "airflow3-full"
46
44
  Requires-Dist: apache-airflow-providers-google>=18; extra == "airflow3-full"
47
45
  Requires-Dist: apache-airflow-providers-microsoft-azure>=8; extra == "airflow3-full"
48
46
  Requires-Dist: apache-airflow-providers-sftp>=5.3; extra == "airflow3-full"
49
- Requires-Dist: apache-airflow-providers-slack>=9.3; extra == "airflow3-full"
50
47
  Requires-Dist: apache-airflow-providers-http>=5.6.4; extra == "airflow3-full"
51
48
  Requires-Dist: requests>=2.31.0; extra == "airflow3-full"
52
49
  Requires-Dist: jmespath<2,>=1.0.1; extra == "airflow3-full"
@@ -437,74 +434,99 @@ Because `AzureDatabricksSqlHook` implements `DbApiHook`, it can be used as `sour
437
434
 
438
435
  ## Notifications
439
436
 
440
- ### Slack (incoming webhook)
437
+ Send rich failure notifications to Slack, email, Microsoft Teams, and Discord from a single call. The notification system is built around three ideas:
441
438
 
442
- Send DAG or task failure alerts to a Slack channel using `dag_failure_slack_notification_webhook`. Requires a Slack App with Incoming Webhooks enabled.
439
+ 1. **Context builder** — extracts DAG run metadata (run ID, logical date, schedule, interval, duration, environment) from the Airflow callback context once, and makes it available to all channels.
440
+ 2. **Channel formatters** — each channel (Slack Block Kit, HTML email, Teams Adaptive Card, Discord embed) formats the same context into the right payload for that platform.
441
+ 3. **Two usage patterns** — as an `on_failure_callback` (invisible to the graph) or as an explicit `@task` node in the DAG graph.
443
442
 
444
- Create an Airflow connection named `SLACK_WEBHOOK_NOTIFICATION_CONN` (or set `AIRFLOW_CONN_SLACK_WEBHOOK_NOTIFICATION_CONN`).
443
+ ### Pattern A `on_failure_callback`
445
444
 
446
- #### DAG-level notification
445
+ The callback fires automatically when any task in the DAG fails. Nothing appears in the task graph.
447
446
 
448
447
  ```python
449
- from datetime import datetime, timedelta
450
- from airflow.sdk import DAG
451
- from airflow.providers.standard.operators.bash import BashOperator
452
- from airflow_toolkit.notifications.slack.webhook import dag_failure_slack_notification_webhook
448
+ from airflow_toolkit.notifications import dag_failure_notification
453
449
 
454
450
  with DAG(
455
451
  'my_pipeline',
456
- schedule=timedelta(days=1),
457
- start_date=datetime(2024, 1, 1),
458
- catchup=False,
459
- on_failure_callback=dag_failure_slack_notification_webhook(),
460
- ) as dag:
461
-
462
- t = BashOperator(task_id='run', bash_command='python my_script.py')
452
+ schedule='0 6 * * *',
453
+ on_failure_callback=dag_failure_notification(
454
+ channels=['slack', 'email'],
455
+ environment='PROD',
456
+ slack_webhook_url='https://hooks.slack.com/services/...',
457
+ email_to=['data-team@example.com'],
458
+ ),
459
+ ):
460
+ ...
463
461
  ```
464
462
 
465
- #### Task-level notification
463
+ ### Pattern B — explicit task in the graph
464
+
465
+ `get_failure_notification_task` returns an Airflow task with `trigger_rule='one_failed'`. It fires when any upstream task fails and is **skipped** when all tasks succeed. The notification step is visible in the Airflow UI, has its own logs, and appears in the task history.
466
466
 
467
467
  ```python
468
- BashOperator(
469
- task_id='run',
470
- bash_command='python my_script.py',
471
- on_failure_callback=dag_failure_slack_notification_webhook(source='TASK'),
472
- )
473
- ```
468
+ from airflow_toolkit.notifications import get_failure_notification_task
474
469
 
475
- #### Custom message
470
+ with DAG('my_pipeline', schedule='0 6 * * *'):
471
+ extract = ...
472
+ load = ...
476
473
 
477
- ```python
478
- on_failure_callback=dag_failure_slack_notification_webhook(
479
- text='Pipeline {{ dag.dag_id }} failed on {{ ds }}',
480
- include_blocks=False,
481
- )
474
+ notify = get_failure_notification_task(
475
+ channels=['slack', 'email'],
476
+ environment='PROD',
477
+ slack_webhook_url='https://hooks.slack.com/services/...',
478
+ email_to=['data-team@example.com'],
479
+ )
480
+
481
+ [extract, load] >> notify
482
482
  ```
483
483
 
484
- #### Custom Slack blocks
484
+ ### Supported channels
485
+
486
+ | Channel | Parameter | Requires |
487
+ |---|---|---|
488
+ | `slack` | `slack_webhook_url` | — |
489
+ | `email` | `email_to: list[str]`, `email_from` (optional) | Airflow SMTP configured |
490
+ | `teams` | `teams_webhook_url` | — |
491
+ | `discord` | `discord_webhook_url` | — |
492
+
493
+ Any combination of channels can be used in a single call. Channels are delivered sequentially in the order listed.
494
+
495
+ ### All parameters
485
496
 
486
497
  ```python
487
- on_failure_callback=dag_failure_slack_notification_webhook(
488
- blocks={
489
- 'type': 'section',
490
- 'text': {'type': 'mrkdwn', 'text': '*Pipeline failed* check the logs.'},
491
- }
498
+ dag_failure_notification(
499
+ channels=['slack', 'email', 'teams', 'discord'],
500
+
501
+ # Environment label shown in every notification (DEV / STG / PROD)
502
+ environment='PROD',
503
+
504
+ # Slack
505
+ slack_webhook_url='https://hooks.slack.com/services/...',
506
+
507
+ # Email
508
+ email_to=['ops@example.com'],
509
+ email_from=None, # uses Airflow SMTP default if omitted
510
+
511
+ # Teams
512
+ teams_webhook_url='https://outlook.office.com/webhook/...',
513
+
514
+ # Discord
515
+ discord_webhook_url='https://discord.com/api/webhooks/...',
492
516
  )
493
517
  ```
494
518
 
495
- Default notification format:
519
+ `get_failure_notification_task` accepts the same parameters.
496
520
 
497
- ![image](https://github.com/DeepKernelLabs/airflow-toolkit/assets/152852247/52a5bf95-21bc-4c3b-8093-79953c0c5d61)
521
+ ### Environment colours
498
522
 
499
- **Parameters:**
523
+ Each environment maps to a distinct colour across all channels so alerts are recognisable at a glance:
500
524
 
501
- | Parameter | Type | Description |
502
- |---|---|---|
503
- | `text` | `str` (optional) | Plain-text message. Overrides blocks if provided. |
504
- | `blocks` | `dict` (optional) | Custom Slack Block Kit payload. |
505
- | `include_blocks` | `bool` (optional) | Whether to include the default formatted block. |
506
- | `source` | `'DAG'` \| `'TASK'` (optional) | Source of the failure. Default: `'DAG'`. |
507
- | `image_url` | `str` (optional) | Accessory image URL. Can also be set via `AIRFLOW_TOOLKIT__SLACK_NOTIFICATION_IMG_URL`. |
525
+ | Environment | Slack | Teams | Discord |
526
+ |---|---|---|---|
527
+ | `PROD` | 🔴 red | Attention (red) | #ED4245 |
528
+ | `STG` | 🟡 yellow | Warning (orange) | #FF8C00 |
529
+ | `DEV` | 🟢 green | Good (green) | #57F287 |
508
530
 
509
531
  ---
510
532
 
@@ -381,74 +381,99 @@ Because `AzureDatabricksSqlHook` implements `DbApiHook`, it can be used as `sour
381
381
 
382
382
  ## Notifications
383
383
 
384
- ### Slack (incoming webhook)
384
+ Send rich failure notifications to Slack, email, Microsoft Teams, and Discord from a single call. The notification system is built around three ideas:
385
385
 
386
- Send DAG or task failure alerts to a Slack channel using `dag_failure_slack_notification_webhook`. Requires a Slack App with Incoming Webhooks enabled.
386
+ 1. **Context builder** — extracts DAG run metadata (run ID, logical date, schedule, interval, duration, environment) from the Airflow callback context once, and makes it available to all channels.
387
+ 2. **Channel formatters** — each channel (Slack Block Kit, HTML email, Teams Adaptive Card, Discord embed) formats the same context into the right payload for that platform.
388
+ 3. **Two usage patterns** — as an `on_failure_callback` (invisible to the graph) or as an explicit `@task` node in the DAG graph.
387
389
 
388
- Create an Airflow connection named `SLACK_WEBHOOK_NOTIFICATION_CONN` (or set `AIRFLOW_CONN_SLACK_WEBHOOK_NOTIFICATION_CONN`).
390
+ ### Pattern A `on_failure_callback`
389
391
 
390
- #### DAG-level notification
392
+ The callback fires automatically when any task in the DAG fails. Nothing appears in the task graph.
391
393
 
392
394
  ```python
393
- from datetime import datetime, timedelta
394
- from airflow.sdk import DAG
395
- from airflow.providers.standard.operators.bash import BashOperator
396
- from airflow_toolkit.notifications.slack.webhook import dag_failure_slack_notification_webhook
395
+ from airflow_toolkit.notifications import dag_failure_notification
397
396
 
398
397
  with DAG(
399
398
  'my_pipeline',
400
- schedule=timedelta(days=1),
401
- start_date=datetime(2024, 1, 1),
402
- catchup=False,
403
- on_failure_callback=dag_failure_slack_notification_webhook(),
404
- ) as dag:
405
-
406
- t = BashOperator(task_id='run', bash_command='python my_script.py')
399
+ schedule='0 6 * * *',
400
+ on_failure_callback=dag_failure_notification(
401
+ channels=['slack', 'email'],
402
+ environment='PROD',
403
+ slack_webhook_url='https://hooks.slack.com/services/...',
404
+ email_to=['data-team@example.com'],
405
+ ),
406
+ ):
407
+ ...
407
408
  ```
408
409
 
409
- #### Task-level notification
410
+ ### Pattern B — explicit task in the graph
411
+
412
+ `get_failure_notification_task` returns an Airflow task with `trigger_rule='one_failed'`. It fires when any upstream task fails and is **skipped** when all tasks succeed. The notification step is visible in the Airflow UI, has its own logs, and appears in the task history.
410
413
 
411
414
  ```python
412
- BashOperator(
413
- task_id='run',
414
- bash_command='python my_script.py',
415
- on_failure_callback=dag_failure_slack_notification_webhook(source='TASK'),
416
- )
417
- ```
415
+ from airflow_toolkit.notifications import get_failure_notification_task
418
416
 
419
- #### Custom message
417
+ with DAG('my_pipeline', schedule='0 6 * * *'):
418
+ extract = ...
419
+ load = ...
420
420
 
421
- ```python
422
- on_failure_callback=dag_failure_slack_notification_webhook(
423
- text='Pipeline {{ dag.dag_id }} failed on {{ ds }}',
424
- include_blocks=False,
425
- )
421
+ notify = get_failure_notification_task(
422
+ channels=['slack', 'email'],
423
+ environment='PROD',
424
+ slack_webhook_url='https://hooks.slack.com/services/...',
425
+ email_to=['data-team@example.com'],
426
+ )
427
+
428
+ [extract, load] >> notify
426
429
  ```
427
430
 
428
- #### Custom Slack blocks
431
+ ### Supported channels
432
+
433
+ | Channel | Parameter | Requires |
434
+ |---|---|---|
435
+ | `slack` | `slack_webhook_url` | — |
436
+ | `email` | `email_to: list[str]`, `email_from` (optional) | Airflow SMTP configured |
437
+ | `teams` | `teams_webhook_url` | — |
438
+ | `discord` | `discord_webhook_url` | — |
439
+
440
+ Any combination of channels can be used in a single call. Channels are delivered sequentially in the order listed.
441
+
442
+ ### All parameters
429
443
 
430
444
  ```python
431
- on_failure_callback=dag_failure_slack_notification_webhook(
432
- blocks={
433
- 'type': 'section',
434
- 'text': {'type': 'mrkdwn', 'text': '*Pipeline failed* check the logs.'},
435
- }
445
+ dag_failure_notification(
446
+ channels=['slack', 'email', 'teams', 'discord'],
447
+
448
+ # Environment label shown in every notification (DEV / STG / PROD)
449
+ environment='PROD',
450
+
451
+ # Slack
452
+ slack_webhook_url='https://hooks.slack.com/services/...',
453
+
454
+ # Email
455
+ email_to=['ops@example.com'],
456
+ email_from=None, # uses Airflow SMTP default if omitted
457
+
458
+ # Teams
459
+ teams_webhook_url='https://outlook.office.com/webhook/...',
460
+
461
+ # Discord
462
+ discord_webhook_url='https://discord.com/api/webhooks/...',
436
463
  )
437
464
  ```
438
465
 
439
- Default notification format:
466
+ `get_failure_notification_task` accepts the same parameters.
440
467
 
441
- ![image](https://github.com/DeepKernelLabs/airflow-toolkit/assets/152852247/52a5bf95-21bc-4c3b-8093-79953c0c5d61)
468
+ ### Environment colours
442
469
 
443
- **Parameters:**
470
+ Each environment maps to a distinct colour across all channels so alerts are recognisable at a glance:
444
471
 
445
- | Parameter | Type | Description |
446
- |---|---|---|
447
- | `text` | `str` (optional) | Plain-text message. Overrides blocks if provided. |
448
- | `blocks` | `dict` (optional) | Custom Slack Block Kit payload. |
449
- | `include_blocks` | `bool` (optional) | Whether to include the default formatted block. |
450
- | `source` | `'DAG'` \| `'TASK'` (optional) | Source of the failure. Default: `'DAG'`. |
451
- | `image_url` | `str` (optional) | Accessory image URL. Can also be set via `AIRFLOW_TOOLKIT__SLACK_NOTIFICATION_IMG_URL`. |
472
+ | Environment | Slack | Teams | Discord |
473
+ |---|---|---|---|
474
+ | `PROD` | 🔴 red | Attention (red) | #ED4245 |
475
+ | `STG` | 🟡 yellow | Warning (orange) | #FF8C00 |
476
+ | `DEV` | 🟢 green | Good (green) | #57F287 |
452
477
 
453
478
  ---
454
479
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "airflow-toolkit"
3
- version = "2.0.1"
3
+ version = "2.2.0"
4
4
  description = "A toolkit of operators, hooks and utilities for Apache Airflow 3"
5
5
  authors = [{ name = "Biel Llobera", email = "biel_llobera@dkl.digital" }]
6
6
  requires-python = ">=3.11,<3.15"
@@ -37,9 +37,6 @@ azure = [
37
37
  sftp = [
38
38
  "apache-airflow-providers-sftp>=5.3",
39
39
  ]
40
- slack = [
41
- "apache-airflow-providers-slack>=9.3",
42
- ]
43
40
  http = [
44
41
  "apache-airflow-providers-http>=5.6.4",
45
42
  "requests>=2.31.0",
@@ -64,7 +61,6 @@ airflow3-full = [
64
61
  "apache-airflow-providers-google>=18",
65
62
  "apache-airflow-providers-microsoft-azure>=8",
66
63
  "apache-airflow-providers-sftp>=5.3",
67
- "apache-airflow-providers-slack>=9.3",
68
64
  "apache-airflow-providers-http>=5.6.4",
69
65
  "requests>=2.31.0",
70
66
  "jmespath>=1.0.1,<2",
@@ -17,4 +17,10 @@ class FilesystemProtocol(Protocol):
17
17
 
18
18
  def check_prefix(self, prefix: str) -> bool: ...
19
19
 
20
- def list_files(self, prefix: str) -> list[str]: ...
20
+ def list_files(self, prefix: str) -> list[str]:
21
+ """Return paths of all files under *prefix*, recursively.
22
+
23
+ Directories are never included. Returned paths must be usable
24
+ with ``read()``, ``delete_file()``, etc.
25
+ """
26
+ ...
@@ -8,7 +8,7 @@ from airflow_toolkit.providers.azure.hooks.azure_databricks import (
8
8
  AzureDatabricksVolumeHook,
9
9
  )
10
10
 
11
- logger = logging.getLogger(__file__)
11
+ logger = logging.getLogger(__name__)
12
12
 
13
13
 
14
14
  class AzureDatabricksVolumeFilesystem(FilesystemProtocol):
@@ -52,16 +52,9 @@ class AzureDatabricksVolumeFilesystem(FilesystemProtocol):
52
52
  conn.files.delete_directory(prefix)
53
53
 
54
54
  def check_file(self, path: str) -> bool:
55
- prefix = path.rsplit("/", 1)[0]
56
-
57
55
  try:
58
- file_list_path = [
59
- entry.path == path
60
- for entry in self.hook.get_conn().files.list_directory_contents(prefix)
61
- if not entry.is_directory
62
- ]
63
- return any(file_list_path)
64
-
56
+ self.hook.get_conn().files.get_metadata(path)
57
+ return True
65
58
  except NotFound:
66
59
  return False
67
60
 
@@ -79,8 +72,15 @@ class AzureDatabricksVolumeFilesystem(FilesystemProtocol):
79
72
  return False
80
73
 
81
74
  def list_files(self, prefix: str) -> list[str]:
82
- return [
83
- entry.path
84
- for entry in self.hook.get_conn().files.list_directory_contents(prefix)
85
- if not entry.is_directory and entry.path is not None
86
- ]
75
+ results: list[str] = []
76
+ try:
77
+ for entry in self.hook.get_conn().files.list_directory_contents(prefix):
78
+ if entry.path is None:
79
+ continue
80
+ if entry.is_directory:
81
+ results.extend(self.list_files(entry.path))
82
+ else:
83
+ results.append(entry.path)
84
+ except NotFound:
85
+ return []
86
+ return results
@@ -8,7 +8,7 @@ from airflow_toolkit.providers.azure.hooks.azure_file_share import (
8
8
  AzureFileShareServicePrincipalHook,
9
9
  )
10
10
 
11
- logger = logging.getLogger(__file__)
11
+ logger = logging.getLogger(__name__)
12
12
 
13
13
 
14
14
  class AzureFileShareFilesystem(FilesystemProtocol):
@@ -47,8 +47,11 @@ class AzureFileShareFilesystem(FilesystemProtocol):
47
47
  return self.hook.get_conn().get_directory_client(prefix).exists()
48
48
 
49
49
  def list_files(self, prefix: str) -> list[str]:
50
- return [
51
- f"{prefix}/{item.name}"
52
- for item in self.hook.get_conn().list_directories_and_files(prefix)
53
- if not item.is_directory
54
- ]
50
+ results: list[str] = []
51
+ for item in self.hook.get_conn().list_directories_and_files(prefix):
52
+ full_path = f"{prefix}/{item.name}"
53
+ if isinstance(item, DirectoryProperties):
54
+ results.extend(self.list_files(full_path))
55
+ else:
56
+ results.append(full_path)
57
+ return results
@@ -5,7 +5,7 @@ from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
5
5
 
6
6
  from airflow_toolkit.filesystems.filesystem_protocol import FilesystemProtocol
7
7
 
8
- logger = logging.getLogger(__file__)
8
+ logger = logging.getLogger(__name__)
9
9
 
10
10
 
11
11
  class BlobStorageFilesystem(FilesystemProtocol):
@@ -15,7 +15,7 @@ class BlobStorageFilesystem(FilesystemProtocol):
15
15
  def read(self, path: str) -> bytes:
16
16
  container_name, blob_name = _get_container_and_blob_name(path)
17
17
  stream = self.hook.download(container_name=container_name, blob_name=blob_name)
18
- return stream.readall().encode()
18
+ return stream.readall()
19
19
 
20
20
  def write(self, data: str | bytes | BytesIO, path: str):
21
21
  container_name, blob_name = _get_container_and_blob_name(path)
@@ -47,4 +47,4 @@ class LocalFilesystem(FilesystemProtocol):
47
47
 
48
48
  def list_files(self, prefix: str) -> list[str]:
49
49
  path_to_list = Path(self.hook.get_path()) / prefix.lstrip("/")
50
- return [str(file) for file in path_to_list.glob("*") if file.is_file()]
50
+ return [str(file) for file in path_to_list.rglob("*") if file.is_file()]
@@ -55,4 +55,5 @@ class SFTPFilesystem(FilesystemProtocol):
55
55
  return self.hook.isdir(prefix)
56
56
 
57
57
  def list_files(self, prefix: str) -> list[str]:
58
- return self.hook.list_directory(prefix)
58
+ files, _, _ = self.hook.get_tree_map(prefix)
59
+ return files
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ import typing
4
+
5
+ from airflow_toolkit.notifications.context import (
6
+ NotificationContext,
7
+ build_notification_context,
8
+ )
9
+
10
+ __all__ = [
11
+ "dag_failure_notification",
12
+ "get_failure_notification_task",
13
+ "build_notification_context",
14
+ "NotificationContext",
15
+ ]
16
+
17
+ _VALID_CHANNELS = {"slack", "email", "teams", "discord"}
18
+
19
+
20
+ def dag_failure_notification(
21
+ channels: list[str],
22
+ environment: str = "PROD",
23
+ slack_webhook_url: str | None = None,
24
+ email_to: list[str] | None = None,
25
+ email_from: str | None = None,
26
+ teams_webhook_url: str | None = None,
27
+ discord_webhook_url: str | None = None,
28
+ ) -> typing.Callable[[dict[str, typing.Any]], None]:
29
+ """Return an on_failure_callback that sends DAG failure notifications.
30
+
31
+ Usage::
32
+
33
+ with DAG(..., on_failure_callback=dag_failure_notification(
34
+ channels=["slack", "email"],
35
+ environment="PROD",
36
+ slack_webhook_url="https://hooks.slack.com/services/...",
37
+ email_to=["ops@example.com"],
38
+ )):
39
+ ...
40
+ """
41
+ unknown = set(channels) - _VALID_CHANNELS
42
+ if unknown:
43
+ raise ValueError(
44
+ f"Unknown notification channels: {unknown}. Valid: {_VALID_CHANNELS}"
45
+ )
46
+
47
+ def callback(context: dict[str, typing.Any]) -> None:
48
+ ctx = build_notification_context(context, environment=environment)
49
+
50
+ if "slack" in channels:
51
+ if not slack_webhook_url:
52
+ raise ValueError(
53
+ "slack_webhook_url is required when 'slack' is in channels"
54
+ )
55
+ from airflow_toolkit.notifications.channels.slack import (
56
+ send_slack_notification,
57
+ )
58
+
59
+ send_slack_notification(ctx, webhook_url=slack_webhook_url)
60
+
61
+ if "email" in channels:
62
+ if not email_to:
63
+ raise ValueError("email_to is required when 'email' is in channels")
64
+ from airflow_toolkit.notifications.channels.email import (
65
+ send_email_notification,
66
+ )
67
+
68
+ send_email_notification(ctx, to=email_to, from_email=email_from)
69
+
70
+ if "teams" in channels:
71
+ if not teams_webhook_url:
72
+ raise ValueError(
73
+ "teams_webhook_url is required when 'teams' is in channels"
74
+ )
75
+ from airflow_toolkit.notifications.channels.teams import (
76
+ send_teams_notification,
77
+ )
78
+
79
+ send_teams_notification(ctx, webhook_url=teams_webhook_url)
80
+
81
+ if "discord" in channels:
82
+ if not discord_webhook_url:
83
+ raise ValueError(
84
+ "discord_webhook_url is required when 'discord' is in channels"
85
+ )
86
+ from airflow_toolkit.notifications.channels.discord import (
87
+ send_discord_notification,
88
+ )
89
+
90
+ send_discord_notification(ctx, webhook_url=discord_webhook_url)
91
+
92
+ return callback
93
+
94
+
95
+ def get_failure_notification_task(
96
+ channels: list[str],
97
+ environment: str = "PROD",
98
+ slack_webhook_url: str | None = None,
99
+ email_to: list[str] | None = None,
100
+ email_from: str | None = None,
101
+ teams_webhook_url: str | None = None,
102
+ discord_webhook_url: str | None = None,
103
+ ) -> typing.Any:
104
+ """Return an @task with trigger_rule='one_failed' that sends failure notifications.
105
+
106
+ Usage::
107
+
108
+ notify = get_failure_notification_task(channels=["slack"], environment="PROD")
109
+ [task_a, task_b] >> notify
110
+ """
111
+ from airflow.sdk import task as airflow_task
112
+
113
+ _channels = channels
114
+ _environment = environment
115
+ _slack_webhook_url = slack_webhook_url
116
+ _email_to = email_to
117
+ _email_from = email_from
118
+ _teams_webhook_url = teams_webhook_url
119
+ _discord_webhook_url = discord_webhook_url
120
+
121
+ @airflow_task(task_id="notify_failure", trigger_rule="one_failed")
122
+ def notify_failure(**context: typing.Any) -> None:
123
+ callback = dag_failure_notification(
124
+ channels=_channels,
125
+ environment=_environment,
126
+ slack_webhook_url=_slack_webhook_url,
127
+ email_to=_email_to,
128
+ email_from=_email_from,
129
+ teams_webhook_url=_teams_webhook_url,
130
+ discord_webhook_url=_discord_webhook_url,
131
+ )
132
+ callback(context)
133
+
134
+ return notify_failure()
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+
3
+ import typing
4
+
5
+ from airflow_toolkit.notifications.context import NotificationContext
6
+
7
+ # Discord embed colors (decimal RGB)
8
+ _ENV_COLOR: dict[str, int] = {
9
+ "PROD": 15548997, # red #ED4245
10
+ "STG": 16750592, # orange #FF8C00
11
+ "DEV": 5763719, # green #57F287
12
+ }
13
+
14
+
15
+ def build_discord_payload(ctx: NotificationContext) -> dict[str, typing.Any]:
16
+ env = ctx["environment"]
17
+ color = _ENV_COLOR.get(env, 15548997)
18
+
19
+ return {
20
+ "content": f"\U0001f534 DAG `{ctx['dag_id']}` failed",
21
+ "embeds": [
22
+ {
23
+ "title": f"[{env}] DAG Failure — {ctx['dag_id']}",
24
+ "url": ctx["dag_url"],
25
+ "color": color,
26
+ "fields": [
27
+ {"name": "Run ID", "value": ctx["run_id"], "inline": False},
28
+ {"name": "Environment", "value": env, "inline": True},
29
+ {"name": "Logical Date", "value": ctx["ds"], "inline": True},
30
+ {"name": "Schedule", "value": ctx["schedule"], "inline": True},
31
+ {
32
+ "name": "Interval Start",
33
+ "value": ctx["data_interval_start"],
34
+ "inline": True,
35
+ },
36
+ {
37
+ "name": "Interval End",
38
+ "value": ctx["data_interval_end"],
39
+ "inline": True,
40
+ },
41
+ {
42
+ "name": "Execution At",
43
+ "value": ctx["execution_at"],
44
+ "inline": True,
45
+ },
46
+ {"name": "Duration", "value": ctx["duration"], "inline": True},
47
+ ],
48
+ "footer": {"text": ctx["base_url"]},
49
+ }
50
+ ],
51
+ }
52
+
53
+
54
+ def send_discord_notification(ctx: NotificationContext, webhook_url: str) -> None:
55
+ import requests
56
+
57
+ requests.post(webhook_url, json=build_discord_payload(ctx)).raise_for_status()