@sylix/coworker 2.0.11 → 2.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/dist/commands/slash/config.d.ts.map +1 -1
  2. package/dist/commands/slash/config.js +22 -4
  3. package/dist/commands/slash/config.js.map +1 -1
  4. package/dist/core/CoWorkerAgent.d.ts.map +1 -1
  5. package/dist/core/CoWorkerAgent.js +6 -3
  6. package/dist/core/CoWorkerAgent.js.map +1 -1
  7. package/dist/skills/defaults/accessibility/screen-reader-testing.md +545 -0
  8. package/dist/skills/defaults/accessibility/wcag-audit-patterns.md +555 -0
  9. package/dist/skills/defaults/ai-ml/rag.md +276 -0
  10. package/dist/skills/defaults/backend-development/api-design-principles.md +528 -0
  11. package/dist/skills/defaults/backend-development/api-design.md +285 -0
  12. package/dist/skills/defaults/backend-development/architecture-patterns.md +494 -0
  13. package/dist/skills/defaults/backend-development/async-python.md +237 -0
  14. package/dist/skills/defaults/backend-development/auth-implementation-patterns.md +638 -0
  15. package/dist/skills/defaults/backend-development/bazel-build-optimization.md +387 -0
  16. package/dist/skills/defaults/backend-development/billing-automation/SKILL.md +566 -0
  17. package/dist/skills/defaults/backend-development/code-review-excellence.md +538 -0
  18. package/dist/skills/defaults/backend-development/cqrs-implementation.md +554 -0
  19. package/dist/skills/defaults/backend-development/database-design.md +305 -0
  20. package/dist/skills/defaults/backend-development/debugging-strategies.md +536 -0
  21. package/dist/skills/defaults/backend-development/e2e-testing-patterns.md +544 -0
  22. package/dist/skills/defaults/backend-development/error-handling-patterns.md +641 -0
  23. package/dist/skills/defaults/backend-development/fastapi-templates.md +559 -0
  24. package/dist/skills/defaults/backend-development/fastapi.md +309 -0
  25. package/dist/skills/defaults/backend-development/git-advanced-workflows.md +405 -0
  26. package/dist/skills/defaults/backend-development/microservices-patterns.md +595 -0
  27. package/dist/skills/defaults/backend-development/microservices.md +284 -0
  28. package/dist/skills/defaults/backend-development/monorepo-management.md +623 -0
  29. package/dist/skills/defaults/backend-development/nodejs-backend-patterns.md +1048 -0
  30. package/dist/skills/defaults/backend-development/nx-workspace-patterns.md +457 -0
  31. package/dist/skills/defaults/backend-development/paypal-integration/SKILL.md +478 -0
  32. package/dist/skills/defaults/backend-development/pci-compliance/SKILL.md +480 -0
  33. package/dist/skills/defaults/backend-development/python-anti-patterns.md +349 -0
  34. package/dist/skills/defaults/backend-development/python-background-jobs.md +364 -0
  35. package/dist/skills/defaults/backend-development/python-code-style.md +360 -0
  36. package/dist/skills/defaults/backend-development/python-configuration.md +368 -0
  37. package/dist/skills/defaults/backend-development/python-design-patterns.md +296 -0
  38. package/dist/skills/defaults/backend-development/python-error-handling.md +323 -0
  39. package/dist/skills/defaults/backend-development/python-packaging.md +887 -0
  40. package/dist/skills/defaults/backend-development/python-performance-optimization.md +874 -0
  41. package/dist/skills/defaults/backend-development/python-project-structure.md +252 -0
  42. package/dist/skills/defaults/backend-development/python-resilience.md +376 -0
  43. package/dist/skills/defaults/backend-development/python-resource-management.md +421 -0
  44. package/dist/skills/defaults/backend-development/python-type-safety.md +428 -0
  45. package/dist/skills/defaults/backend-development/sql-optimization-patterns.md +509 -0
  46. package/dist/skills/defaults/backend-development/stripe-integration/SKILL.md +522 -0
  47. package/dist/skills/defaults/backend-development/turborepo-caching.md +376 -0
  48. package/dist/skills/defaults/blockchain/defi-protocol-templates.md +430 -0
  49. package/dist/skills/defaults/blockchain/nft-standards.md +364 -0
  50. package/dist/skills/defaults/blockchain/solidity-security.md +514 -0
  51. package/dist/skills/defaults/blockchain/web3-testing.md +360 -0
  52. package/dist/skills/defaults/business/competitive-landscape/SKILL.md +527 -0
  53. package/dist/skills/defaults/business/market-sizing-analysis/SKILL.md +451 -0
  54. package/dist/skills/defaults/business/startup-financial-modeling/SKILL.md +494 -0
  55. package/dist/skills/defaults/business/startup-metrics-framework/SKILL.md +564 -0
  56. package/dist/skills/defaults/business/team-composition-analysis.md +437 -0
  57. package/dist/skills/defaults/compliance/employment-contract-templates/SKILL.md +527 -0
  58. package/dist/skills/defaults/compliance/gdpr-data-handling/SKILL.md +630 -0
  59. package/dist/skills/defaults/data-engineering/airflow-dag-patterns.md +436 -0
  60. package/dist/skills/defaults/data-engineering/airflow.md +519 -0
  61. package/dist/skills/defaults/data-engineering/data-quality.md +583 -0
  62. package/dist/skills/defaults/data-engineering/dbt-transformation-patterns.md +482 -0
  63. package/dist/skills/defaults/data-engineering/dbt.md +556 -0
  64. package/dist/skills/defaults/data-engineering/ml-pipeline-workflow/SKILL.md +247 -0
  65. package/dist/skills/defaults/data-engineering/spark-optimization.md +348 -0
  66. package/dist/skills/defaults/data-engineering/spark.md +411 -0
  67. package/dist/skills/defaults/database/postgresql.md +202 -0
  68. package/dist/skills/defaults/debugging/systematic-debugging.md +249 -0
  69. package/dist/skills/defaults/devops/architecture-decision-records.md +448 -0
  70. package/dist/skills/defaults/devops/changelog-automation.md +580 -0
  71. package/dist/skills/defaults/devops/cicd.md +314 -0
  72. package/dist/skills/defaults/devops/cloud.md +263 -0
  73. package/dist/skills/defaults/devops/code-review-excellence.md +299 -0
  74. package/dist/skills/defaults/devops/cost-optimization.md +295 -0
  75. package/dist/skills/defaults/devops/deployment-pipeline-design.md +356 -0
  76. package/dist/skills/defaults/devops/docker.md +281 -0
  77. package/dist/skills/defaults/devops/git-workflows.md +205 -0
  78. package/dist/skills/defaults/devops/github-actions.md +311 -0
  79. package/dist/skills/defaults/devops/gitlab-ci-patterns.md +266 -0
  80. package/dist/skills/defaults/devops/hybrid-cloud-networking.md +241 -0
  81. package/dist/skills/defaults/devops/istio-traffic-management.md +327 -0
  82. package/dist/skills/defaults/devops/kubernetes.md +339 -0
  83. package/dist/skills/defaults/devops/linkerd-patterns.md +311 -0
  84. package/dist/skills/defaults/devops/multi-cloud-architecture.md +181 -0
  85. package/dist/skills/defaults/devops/observability.md +243 -0
  86. package/dist/skills/defaults/devops/openapi-spec-generation.md +1024 -0
  87. package/dist/skills/defaults/devops/postmortem-writing.md +396 -0
  88. package/dist/skills/defaults/devops/prometheus-configuration.md +265 -0
  89. package/dist/skills/defaults/devops/secrets-management.md +341 -0
  90. package/dist/skills/defaults/devops/service-mesh-observability.md +385 -0
  91. package/dist/skills/defaults/devops/terraform-module-library.md +244 -0
  92. package/dist/skills/defaults/finance/backtesting-frameworks/SKILL.md +663 -0
  93. package/dist/skills/defaults/finance/risk-metrics-calculation/SKILL.md +557 -0
  94. package/dist/skills/defaults/frontend/accessibility-compliance.md +420 -0
  95. package/dist/skills/defaults/frontend/design-system-patterns.md +337 -0
  96. package/dist/skills/defaults/frontend/interaction-design.md +327 -0
  97. package/dist/skills/defaults/frontend/javascript.md +311 -0
  98. package/dist/skills/defaults/frontend/modern-javascript-patterns.md +927 -0
  99. package/dist/skills/defaults/frontend/react-native-design.md +440 -0
  100. package/dist/skills/defaults/frontend/react.md +345 -0
  101. package/dist/skills/defaults/frontend/responsive-design.md +472 -0
  102. package/dist/skills/defaults/frontend/tailwind-design-system.md +337 -0
  103. package/dist/skills/defaults/frontend/typescript-advanced-types.md +724 -0
  104. package/dist/skills/defaults/frontend/typescript.md +334 -0
  105. package/dist/skills/defaults/frontend/visual-design-foundations.md +326 -0
  106. package/dist/skills/defaults/frontend/web-component-design.md +279 -0
  107. package/dist/skills/defaults/game-development/godot-gdscript-patterns.md +188 -0
  108. package/dist/skills/defaults/game-development/unity-ecs-patterns.md +594 -0
  109. package/dist/skills/defaults/kubernetes/gitops-workflow.md +285 -0
  110. package/dist/skills/defaults/kubernetes/gitops.md +280 -0
  111. package/dist/skills/defaults/kubernetes/helm-chart-scaffolding.md +553 -0
  112. package/dist/skills/defaults/kubernetes/helm.md +343 -0
  113. package/dist/skills/defaults/kubernetes/k8s-manifest-generator.md +501 -0
  114. package/dist/skills/defaults/kubernetes/k8s-security-policies.md +342 -0
  115. package/dist/skills/defaults/kubernetes/manifests.md +330 -0
  116. package/dist/skills/defaults/kubernetes/security.md +337 -0
  117. package/dist/skills/defaults/llm-application/embedding-strategies.md +608 -0
  118. package/dist/skills/defaults/llm-application/hybrid-search-implementation.md +570 -0
  119. package/dist/skills/defaults/llm-application/hybrid-search.md +570 -0
  120. package/dist/skills/defaults/llm-application/langchain-architecture.md +666 -0
  121. package/dist/skills/defaults/llm-application/langchain.md +259 -0
  122. package/dist/skills/defaults/llm-application/llm-evaluation.md +695 -0
  123. package/dist/skills/defaults/llm-application/prompt-engineering-patterns.md +449 -0
  124. package/dist/skills/defaults/llm-application/prompt-engineering.md +219 -0
  125. package/dist/skills/defaults/llm-application/rag-implementation.md +434 -0
  126. package/dist/skills/defaults/llm-application/similarity-search-patterns.md +560 -0
  127. package/dist/skills/defaults/llm-application/similarity-search.md +560 -0
  128. package/dist/skills/defaults/llm-application/vector-index-tuning.md +523 -0
  129. package/dist/skills/defaults/mobile/mobile-android-design.md +440 -0
  130. package/dist/skills/defaults/mobile/mobile-ios-design.md +266 -0
  131. package/dist/skills/defaults/monitoring/distributed-tracing.md +436 -0
  132. package/dist/skills/defaults/monitoring/grafana-dashboards.md +370 -0
  133. package/dist/skills/defaults/monitoring/prometheus-configuration.md +379 -0
  134. package/dist/skills/defaults/monitoring/slo-implementation.md +323 -0
  135. package/dist/skills/defaults/refactoring/code-refactoring.md +349 -0
  136. package/dist/skills/defaults/security/anti-reversing-techniques/SKILL.md +559 -0
  137. package/dist/skills/defaults/security/auditor.md +168 -0
  138. package/dist/skills/defaults/security/binary-analysis-patterns/SKILL.md +438 -0
  139. package/dist/skills/defaults/security/memory-forensics/SKILL.md +483 -0
  140. package/dist/skills/defaults/security/mtls-configuration.md +349 -0
  141. package/dist/skills/defaults/security/protocol-reverse-engineering/SKILL.md +520 -0
  142. package/dist/skills/defaults/security/sast-configuration.md +182 -0
  143. package/dist/skills/defaults/security/security.md +313 -0
  144. package/dist/skills/defaults/security/stride-analysis.md +273 -0
  145. package/dist/skills/defaults/security/threat-mitigation-mapping.md +290 -0
  146. package/dist/skills/defaults/systems/bash-defensive-patterns/SKILL.md +539 -0
  147. package/dist/skills/defaults/systems/bats-testing-patterns/SKILL.md +631 -0
  148. package/dist/skills/defaults/systems/go-concurrency-patterns.md +657 -0
  149. package/dist/skills/defaults/systems/memory-safety-patterns.md +605 -0
  150. package/dist/skills/defaults/systems/rust-async-patterns.md +519 -0
  151. package/dist/skills/defaults/systems/shellcheck-configuration/SKILL.md +456 -0
  152. package/dist/skills/defaults/team-collaboration/multi-reviewer-patterns.md +126 -0
  153. package/dist/skills/defaults/team-collaboration/parallel-feature-development.md +151 -0
  154. package/dist/skills/defaults/testing/javascript-testing-patterns.md +1021 -0
  155. package/dist/skills/defaults/testing/python-testing-patterns.md +351 -0
  156. package/dist/skills/defaults/testing/testing.md +332 -0
  157. package/dist/skills/defaults/workflows/context-driven-development.md +384 -0
  158. package/dist/skills/defaults/workflows/track-management.md +592 -0
  159. package/dist/skills/defaults/workflows/workflow-patterns.md +622 -0
  160. package/dist/skills/index.d.ts +11 -0
  161. package/dist/skills/index.d.ts.map +1 -0
  162. package/dist/skills/index.js +129 -0
  163. package/dist/skills/index.js.map +1 -0
  164. package/dist/utils/character.js +4 -4
  165. package/dist/utils/character.js.map +1 -1
  166. package/dist/utils/inputbar.d.ts.map +1 -1
  167. package/dist/utils/inputbar.js +7 -0
  168. package/dist/utils/inputbar.js.map +1 -1
  169. package/package.json +1 -1
@@ -0,0 +1,436 @@
1
+ ---
2
+ name: airflow-dag-patterns
3
+ description: Build production Apache Airflow DAGs with best practices for operators, sensors, testing, and deployment
4
+ ---
5
+
6
+ # Apache Airflow DAG Patterns
7
+
8
+ Production-ready patterns for Apache Airflow including DAG design, operators, sensors, testing, and deployment strategies.
9
+
10
+ ## When to Use This Skill
11
+
12
+ - Creating data pipeline orchestration with Airflow
13
+ - Designing DAG structures and dependencies
14
+ - Implementing custom operators and sensors
15
+ - Testing Airflow DAGs locally
16
+ - Setting up Airflow in production
17
+ - Debugging failed DAG runs
18
+
19
+ ## Core Concepts
20
+
21
+ ### DAG Design Principles
22
+
23
+ | Principle | Description |
24
+ | --------------- | ----------------------------------- |
25
+ | **Idempotent** | Running twice produces same result |
26
+ | **Atomic** | Tasks succeed or fail completely |
27
+ | **Incremental** | Process only new/changed data |
28
+ | **Observable** | Logs, metrics, alerts at every step |
29
+
30
+ ### Task Dependencies
31
+
32
+ ```python
33
+ # Linear
34
+ task1 >> task2 >> task3
35
+
36
+ # Fan-out
37
+ task1 >> [task2, task3, task4]
38
+
39
+ # Fan-in
40
+ [task1, task2, task3] >> task4
41
+
42
+ # Complex
43
+ task1 >> task2 >> task4
44
+ task1 >> task3 >> task4
45
+ ```
46
+
47
+ ## Quick Start
48
+
49
+ ```python
50
+ from datetime import datetime, timedelta
51
+ from airflow import DAG
52
+ from airflow.operators.python import PythonOperator
53
+ from airflow.operators.empty import EmptyOperator
54
+
55
+ default_args = {
56
+ 'owner': 'data-team',
57
+ 'depends_on_past': False,
58
+ 'email_on_failure': True,
59
+ 'email_on_retry': False,
60
+ 'retries': 3,
61
+ 'retry_delay': timedelta(minutes=5),
62
+ 'retry_exponential_backoff': True,
63
+ 'max_retry_delay': timedelta(hours=1),
64
+ }
65
+
66
+ with DAG(
67
+ dag_id='example_etl',
68
+ default_args=default_args,
69
+ description='Example ETL pipeline',
70
+ schedule='0 6 * * *',
71
+ start_date=datetime(2024, 1, 1),
72
+ catchup=False,
73
+ tags=['etl', 'example'],
74
+ max_active_runs=1,
75
+ ) as dag:
76
+
77
+ start = EmptyOperator(task_id='start')
78
+
79
+ def extract_data(**context):
80
+ execution_date = context['ds']
81
+ return {'records': 1000}
82
+
83
+ extract = PythonOperator(
84
+ task_id='extract',
85
+ python_callable=extract_data,
86
+ )
87
+
88
+ end = EmptyOperator(task_id='end')
89
+
90
+ start >> extract >> end
91
+ ```
92
+
93
+ ## Patterns
94
+
95
+ ### Pattern 1: TaskFlow API (Airflow 2.0+)
96
+
97
+ ```python
98
+ from datetime import datetime
99
+ from airflow.decorators import dag, task
100
+ from airflow.models import Variable
101
+
102
+ @dag(
103
+ dag_id='taskflow_etl',
104
+ schedule='@daily',
105
+ start_date=datetime(2024, 1, 1),
106
+ catchup=False,
107
+ tags=['etl', 'taskflow'],
108
+ )
109
+ def taskflow_etl():
110
+
111
+ @task()
112
+ def extract(source: str) -> dict:
113
+ import pandas as pd
114
+ df = pd.read_csv(f's3://bucket/{source}/{{ ds }}.csv')
115
+ return {'data': df.to_dict(), 'rows': len(df)}
116
+
117
+ @task()
118
+ def transform(extracted: dict) -> dict:
119
+ import pandas as pd
120
+ df = pd.DataFrame(extracted['data'])
121
+ df['processed_at'] = datetime.now()
122
+ df = df.dropna()
123
+ return {'data': df.to_dict(), 'rows': len(df)}
124
+
125
+ @task()
126
+ def load(transformed: dict, target: str):
127
+ import pandas as pd
128
+ df = pd.DataFrame(transformed['data'])
129
+ df.to_parquet(f's3://bucket/{target}/{{ ds }}.parquet')
130
+ return transformed['rows']
131
+
132
+ @task()
133
+ def notify(rows_loaded: int):
134
+ print(f'Loaded {rows_loaded} rows')
135
+
136
+ extracted = extract(source='raw_data')
137
+ transformed = transform(extracted)
138
+ loaded = load(transformed, target='processed_data')
139
+ notify(loaded)
140
+
141
+ taskflow_etl()
142
+ ```
143
+
144
+ ### Pattern 2: Dynamic DAG Generation
145
+
146
+ ```python
147
+ from datetime import datetime, timedelta
148
+ from airflow import DAG
149
+ from airflow.operators.python import PythonOperator
150
+
151
+ PIPELINE_CONFIGS = [
152
+ {'name': 'customers', 'schedule': '@daily', 'source': 's3://raw/customers'},
153
+ {'name': 'orders', 'schedule': '@hourly', 'source': 's3://raw/orders'},
154
+ {'name': 'products', 'schedule': '@weekly', 'source': 's3://raw/products'},
155
+ ]
156
+
157
+ def create_dag(config: dict) -> DAG:
158
+ dag_id = f"etl_{config['name']}"
159
+
160
+ default_args = {
161
+ 'owner': 'data-team',
162
+ 'retries': 3,
163
+ 'retry_delay': timedelta(minutes=5),
164
+ }
165
+
166
+ dag = DAG(
167
+ dag_id=dag_id,
168
+ default_args=default_args,
169
+ schedule=config['schedule'],
170
+ start_date=datetime(2024, 1, 1),
171
+ catchup=False,
172
+ tags=['etl', 'dynamic', config['name']],
173
+ )
174
+
175
+ with dag:
176
+ def extract_fn(source, **context):
177
+ print(f"Extracting from {source} for {context['ds']}")
178
+
179
+ def transform_fn(**context):
180
+ print(f"Transforming data for {context['ds']}")
181
+
182
+ def load_fn(table_name, **context):
183
+ print(f"Loading to {table_name} for {context['ds']}")
184
+
185
+ extract = PythonOperator(task_id='extract', python_callable=extract_fn, op_kwargs={'source': config['source']})
186
+ transform = PythonOperator(task_id='transform', python_callable=transform_fn)
187
+ load = PythonOperator(task_id='load', python_callable=load_fn, op_kwargs={'table_name': config['name']})
188
+
189
+ extract >> transform >> load
190
+
191
+ return dag
192
+
193
+ for config in PIPELINE_CONFIGS:
194
+ globals()[f"dag_{config['name']}"] = create_dag(config)
195
+ ```
196
+
197
+ ### Pattern 3: Branching and Conditional Logic
198
+
199
+ ```python
200
+ from airflow.decorators import dag, task
201
+ from airflow.operators.python import BranchPythonOperator
202
+ from airflow.operators.empty import EmptyOperator
203
+ from airflow.utils.trigger_rule import TriggerRule
204
+
205
+ @dag(dag_id='branching_pipeline', schedule='@daily', start_date=datetime(2024, 1, 1), catchup=False)
206
+ def branching_pipeline():
207
+
208
+ @task()
209
+ def check_data_quality() -> dict:
210
+ quality_score = 0.95
211
+ return {'score': quality_score, 'rows': 10000}
212
+
213
+ def choose_branch(**context) -> str:
214
+ ti = context['ti']
215
+ metrics = ti.xcom_pull(task_ids='check_data_quality')
216
+
217
+ if metrics['score'] >= 0.9:
218
+ return 'high_quality_path'
219
+ elif metrics['score'] >= 0.7:
220
+ return 'medium_quality_path'
221
+ else:
222
+ return 'low_quality_path'
223
+
224
+ quality_check = check_data_quality()
225
+
226
+ branch = BranchPythonOperator(task_id='branch', python_callable=choose_branch)
227
+
228
+ high_quality = EmptyOperator(task_id='high_quality_path')
229
+ medium_quality = EmptyOperator(task_id='medium_quality_path')
230
+ low_quality = EmptyOperator(task_id='low_quality_path')
231
+
232
+ join = EmptyOperator(
233
+ task_id='join',
234
+ trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
235
+ )
236
+
237
+ quality_check >> branch >> [high_quality, medium_quality, low_quality] >> join
238
+
239
+ branching_pipeline()
240
+ ```
241
+
242
+ ### Pattern 4: Sensors and External Dependencies
243
+
244
+ ```python
245
+ from datetime import datetime
246
+ from airflow import DAG
247
+ from airflow.sensors.filesystem import FileSensor
248
+ from airflow.providers.amazon.aws.sensors.s3 import S3KeySensor
249
+ from airflow.sensors.external_task import ExternalTaskSensor
250
+ from airflow.operators.python import PythonOperator
251
+
252
+ with DAG(dag_id='sensor_example', schedule='@daily', start_date=datetime(2024, 1, 1), catchup=False) as dag:
253
+
254
+ wait_for_file = S3KeySensor(
255
+ task_id='wait_for_s3_file',
256
+ bucket_name='data-lake',
257
+ bucket_key='raw/{{ ds }}/data.parquet',
258
+ aws_conn_id='aws_default',
259
+ timeout=60 * 60 * 2,
260
+ poke_interval=60 * 5,
261
+ mode='reschedule',
262
+ )
263
+
264
+ wait_for_upstream = ExternalTaskSensor(
265
+ task_id='wait_for_upstream_dag',
266
+ external_dag_id='upstream_etl',
267
+ external_task_id='final_task',
268
+ execution_date_fn=lambda dt: dt,
269
+ timeout=60 * 60 * 3,
270
+ mode='reschedule',
271
+ )
272
+
273
+ @task.sensor(poke_interval=60, timeout=3600, mode='reschedule')
274
+ def wait_for_api() -> PokeReturnValue:
275
+ import requests
276
+ response = requests.get('https://api.example.com/health')
277
+ is_done = response.status_code == 200
278
+ return PokeReturnValue(is_done=is_done, xcom_value=response.json())
279
+
280
+ api_ready = wait_for_api()
281
+
282
+ def process_data(**context):
283
+ api_result = context['ti'].xcom_pull(task_ids='wait_for_api')
284
+ print(f"API returned: {api_result}")
285
+
286
+ process = PythonOperator(task_id='process', python_callable=process_data)
287
+
288
+ [wait_for_file, wait_for_upstream, api_ready] >> process
289
+ ```
290
+
291
+ ### Pattern 5: Error Handling and Alerts
292
+
293
+ ```python
294
+ from datetime import datetime, timedelta
295
+ from airflow import DAG
296
+ from airflow.operators.python import PythonOperator
297
+ from airflow.utils.trigger_rule import TriggerRule
298
+
299
+ def task_failure_callback(context):
300
+ task_instance = context['task_instance']
301
+ exception = context.get('exception')
302
+
303
+ message = f"""
304
+ Task Failed!
305
+ DAG: {task_instance.dag_id}
306
+ Task: {task_instance.task_id}
307
+ Execution Date: {context['ds']}
308
+ Error: {exception}
309
+ Log URL: {task_instance.log_url}
310
+ """
311
+ print(message)
312
+
313
+ def dag_failure_callback(context):
314
+ pass
315
+
316
+ with DAG(
317
+ dag_id='error_handling_example',
318
+ schedule='@daily',
319
+ start_date=datetime(2024, 1, 1),
320
+ catchup=False,
321
+ on_failure_callback=dag_failure_callback,
322
+ default_args={
323
+ 'on_failure_callback': task_failure_callback,
324
+ 'retries': 3,
325
+ 'retry_delay': timedelta(minutes=5),
326
+ },
327
+ ) as dag:
328
+
329
+ def might_fail(**context):
330
+ import random
331
+ if random.random() < 0.3:
332
+ raise ValueError("Random failure!")
333
+ return "Success"
334
+
335
+ risky_task = PythonOperator(task_id='risky_task', python_callable=might_fail)
336
+
337
+ def cleanup(**context):
338
+ print("Cleaning up...")
339
+
340
+ cleanup_task = PythonOperator(
341
+ task_id='cleanup',
342
+ python_callable=cleanup,
343
+ trigger_rule=TriggerRule.ALL_DONE,
344
+ )
345
+
346
+ def notify_success(**context):
347
+ print("All tasks succeeded!")
348
+
349
+ success_notification = PythonOperator(
350
+ task_id='notify_success',
351
+ python_callable=notify_success,
352
+ trigger_rule=TriggerRule.ALL_SUCCESS,
353
+ )
354
+
355
+ risky_task >> [cleanup_task, success_notification]
356
+ ```
357
+
358
+ ### Pattern 6: Testing DAGs
359
+
360
+ ```python
361
+ import pytest
362
+ from datetime import datetime
363
+ from airflow.models import DagBag
364
+
365
+ @pytest.fixture
366
+ def dagbag():
367
+ return DagBag(dag_folder='dags/', include_examples=False)
368
+
369
+ def test_dag_loaded(dagbag):
370
+ assert len(dagbag.import_errors) == 0, f"DAG import errors: {dagbag.import_errors}"
371
+
372
+ def test_dag_structure(dagbag):
373
+ dag = dagbag.get_dag('example_etl')
374
+ assert dag is not None
375
+ assert len(dag.tasks) == 3
376
+ assert dag.schedule_interval == '0 6 * * *'
377
+
378
+ def test_task_dependencies(dagbag):
379
+ dag = dagbag.get_dag('example_etl')
380
+ extract_task = dag.get_task('extract')
381
+ assert 'start' in [t.task_id for t in extract_task.upstream_list]
382
+ assert 'end' in [t.task_id for t in extract_task.downstream_list]
383
+
384
+ def test_dag_integrity(dagbag):
385
+ for dag_id, dag in dagbag.dags.items():
386
+ assert dag.test_cycle() is None, f"Cycle detected in {dag_id}"
387
+
388
+ def test_extract_function():
389
+ from dags.example_dag import extract_data
390
+ result = extract_data(ds='2024-01-01')
391
+ assert 'records' in result
392
+ assert isinstance(result['records'], int)
393
+ ```
394
+
395
+ ## Project Structure
396
+
397
+ ```
398
+ airflow/
399
+ ├── dags/
400
+ │ ├── __init__.py
401
+ │ ├── common/
402
+ │ │ ├── operators.py
403
+ │ │ ├── sensors.py
404
+ │ │ └── callbacks.py
405
+ │ ├── etl/
406
+ │ │ ├── customers.py
407
+ │ │ └── orders.py
408
+ │ └── ml/
409
+ │ └── training.py
410
+ ├── plugins/
411
+ │ └── custom_plugin.py
412
+ ├── tests/
413
+ │ ├── __init__.py
414
+ │ ├── test_dags.py
415
+ │ └── test_operators.py
416
+ ├── docker-compose.yml
417
+ └── requirements.txt
418
+ ```
419
+
420
+ ## Best Practices
421
+
422
+ ### Do's
423
+
424
+ - **Use TaskFlow API** - Cleaner code, automatic XCom
425
+ - **Set timeouts** - Prevent zombie tasks
426
+ - **Use `mode='reschedule'`** - For sensors, free up workers
427
+ - **Test DAGs** - Unit tests and integration tests
428
+ - **Idempotent tasks** - Safe to retry
429
+
430
+ ### Don'ts
431
+
432
+ - **Don't use `depends_on_past=True`** - Creates bottlenecks
433
+ - **Don't hardcode dates** - Use `{{ ds }}` macros
434
+ - **Don't use global state** - Tasks should be stateless
435
+ - **Don't skip catchup blindly** - Understand implications
436
+ - **Don't put heavy logic in DAG file** - Import from modules