@sylix/coworker 2.0.11 → 2.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/dist/commands/slash/config.d.ts.map +1 -1
  2. package/dist/commands/slash/config.js +22 -4
  3. package/dist/commands/slash/config.js.map +1 -1
  4. package/dist/core/CoWorkerAgent.d.ts.map +1 -1
  5. package/dist/core/CoWorkerAgent.js +6 -3
  6. package/dist/core/CoWorkerAgent.js.map +1 -1
  7. package/dist/skills/defaults/accessibility/screen-reader-testing.md +545 -0
  8. package/dist/skills/defaults/accessibility/wcag-audit-patterns.md +555 -0
  9. package/dist/skills/defaults/ai-ml/rag.md +276 -0
  10. package/dist/skills/defaults/backend-development/api-design-principles.md +528 -0
  11. package/dist/skills/defaults/backend-development/api-design.md +285 -0
  12. package/dist/skills/defaults/backend-development/architecture-patterns.md +494 -0
  13. package/dist/skills/defaults/backend-development/async-python.md +237 -0
  14. package/dist/skills/defaults/backend-development/auth-implementation-patterns.md +638 -0
  15. package/dist/skills/defaults/backend-development/bazel-build-optimization.md +387 -0
  16. package/dist/skills/defaults/backend-development/billing-automation/SKILL.md +566 -0
  17. package/dist/skills/defaults/backend-development/code-review-excellence.md +538 -0
  18. package/dist/skills/defaults/backend-development/cqrs-implementation.md +554 -0
  19. package/dist/skills/defaults/backend-development/database-design.md +305 -0
  20. package/dist/skills/defaults/backend-development/debugging-strategies.md +536 -0
  21. package/dist/skills/defaults/backend-development/e2e-testing-patterns.md +544 -0
  22. package/dist/skills/defaults/backend-development/error-handling-patterns.md +641 -0
  23. package/dist/skills/defaults/backend-development/fastapi-templates.md +559 -0
  24. package/dist/skills/defaults/backend-development/fastapi.md +309 -0
  25. package/dist/skills/defaults/backend-development/git-advanced-workflows.md +405 -0
  26. package/dist/skills/defaults/backend-development/microservices-patterns.md +595 -0
  27. package/dist/skills/defaults/backend-development/microservices.md +284 -0
  28. package/dist/skills/defaults/backend-development/monorepo-management.md +623 -0
  29. package/dist/skills/defaults/backend-development/nodejs-backend-patterns.md +1048 -0
  30. package/dist/skills/defaults/backend-development/nx-workspace-patterns.md +457 -0
  31. package/dist/skills/defaults/backend-development/paypal-integration/SKILL.md +478 -0
  32. package/dist/skills/defaults/backend-development/pci-compliance/SKILL.md +480 -0
  33. package/dist/skills/defaults/backend-development/python-anti-patterns.md +349 -0
  34. package/dist/skills/defaults/backend-development/python-background-jobs.md +364 -0
  35. package/dist/skills/defaults/backend-development/python-code-style.md +360 -0
  36. package/dist/skills/defaults/backend-development/python-configuration.md +368 -0
  37. package/dist/skills/defaults/backend-development/python-design-patterns.md +296 -0
  38. package/dist/skills/defaults/backend-development/python-error-handling.md +323 -0
  39. package/dist/skills/defaults/backend-development/python-packaging.md +887 -0
  40. package/dist/skills/defaults/backend-development/python-performance-optimization.md +874 -0
  41. package/dist/skills/defaults/backend-development/python-project-structure.md +252 -0
  42. package/dist/skills/defaults/backend-development/python-resilience.md +376 -0
  43. package/dist/skills/defaults/backend-development/python-resource-management.md +421 -0
  44. package/dist/skills/defaults/backend-development/python-type-safety.md +428 -0
  45. package/dist/skills/defaults/backend-development/sql-optimization-patterns.md +509 -0
  46. package/dist/skills/defaults/backend-development/stripe-integration/SKILL.md +522 -0
  47. package/dist/skills/defaults/backend-development/turborepo-caching.md +376 -0
  48. package/dist/skills/defaults/blockchain/defi-protocol-templates.md +430 -0
  49. package/dist/skills/defaults/blockchain/nft-standards.md +364 -0
  50. package/dist/skills/defaults/blockchain/solidity-security.md +514 -0
  51. package/dist/skills/defaults/blockchain/web3-testing.md +360 -0
  52. package/dist/skills/defaults/business/competitive-landscape/SKILL.md +527 -0
  53. package/dist/skills/defaults/business/market-sizing-analysis/SKILL.md +451 -0
  54. package/dist/skills/defaults/business/startup-financial-modeling/SKILL.md +494 -0
  55. package/dist/skills/defaults/business/startup-metrics-framework/SKILL.md +564 -0
  56. package/dist/skills/defaults/business/team-composition-analysis.md +437 -0
  57. package/dist/skills/defaults/compliance/employment-contract-templates/SKILL.md +527 -0
  58. package/dist/skills/defaults/compliance/gdpr-data-handling/SKILL.md +630 -0
  59. package/dist/skills/defaults/data-engineering/airflow-dag-patterns.md +436 -0
  60. package/dist/skills/defaults/data-engineering/airflow.md +519 -0
  61. package/dist/skills/defaults/data-engineering/data-quality.md +583 -0
  62. package/dist/skills/defaults/data-engineering/dbt-transformation-patterns.md +482 -0
  63. package/dist/skills/defaults/data-engineering/dbt.md +556 -0
  64. package/dist/skills/defaults/data-engineering/ml-pipeline-workflow/SKILL.md +247 -0
  65. package/dist/skills/defaults/data-engineering/spark-optimization.md +348 -0
  66. package/dist/skills/defaults/data-engineering/spark.md +411 -0
  67. package/dist/skills/defaults/database/postgresql.md +202 -0
  68. package/dist/skills/defaults/debugging/systematic-debugging.md +249 -0
  69. package/dist/skills/defaults/devops/architecture-decision-records.md +448 -0
  70. package/dist/skills/defaults/devops/changelog-automation.md +580 -0
  71. package/dist/skills/defaults/devops/cicd.md +314 -0
  72. package/dist/skills/defaults/devops/cloud.md +263 -0
  73. package/dist/skills/defaults/devops/code-review-excellence.md +299 -0
  74. package/dist/skills/defaults/devops/cost-optimization.md +295 -0
  75. package/dist/skills/defaults/devops/deployment-pipeline-design.md +356 -0
  76. package/dist/skills/defaults/devops/docker.md +281 -0
  77. package/dist/skills/defaults/devops/git-workflows.md +205 -0
  78. package/dist/skills/defaults/devops/github-actions.md +311 -0
  79. package/dist/skills/defaults/devops/gitlab-ci-patterns.md +266 -0
  80. package/dist/skills/defaults/devops/hybrid-cloud-networking.md +241 -0
  81. package/dist/skills/defaults/devops/istio-traffic-management.md +327 -0
  82. package/dist/skills/defaults/devops/kubernetes.md +339 -0
  83. package/dist/skills/defaults/devops/linkerd-patterns.md +311 -0
  84. package/dist/skills/defaults/devops/multi-cloud-architecture.md +181 -0
  85. package/dist/skills/defaults/devops/observability.md +243 -0
  86. package/dist/skills/defaults/devops/openapi-spec-generation.md +1024 -0
  87. package/dist/skills/defaults/devops/postmortem-writing.md +396 -0
  88. package/dist/skills/defaults/devops/prometheus-configuration.md +265 -0
  89. package/dist/skills/defaults/devops/secrets-management.md +341 -0
  90. package/dist/skills/defaults/devops/service-mesh-observability.md +385 -0
  91. package/dist/skills/defaults/devops/terraform-module-library.md +244 -0
  92. package/dist/skills/defaults/finance/backtesting-frameworks/SKILL.md +663 -0
  93. package/dist/skills/defaults/finance/risk-metrics-calculation/SKILL.md +557 -0
  94. package/dist/skills/defaults/frontend/accessibility-compliance.md +420 -0
  95. package/dist/skills/defaults/frontend/design-system-patterns.md +337 -0
  96. package/dist/skills/defaults/frontend/interaction-design.md +327 -0
  97. package/dist/skills/defaults/frontend/javascript.md +311 -0
  98. package/dist/skills/defaults/frontend/modern-javascript-patterns.md +927 -0
  99. package/dist/skills/defaults/frontend/react-native-design.md +440 -0
  100. package/dist/skills/defaults/frontend/react.md +345 -0
  101. package/dist/skills/defaults/frontend/responsive-design.md +472 -0
  102. package/dist/skills/defaults/frontend/tailwind-design-system.md +337 -0
  103. package/dist/skills/defaults/frontend/typescript-advanced-types.md +724 -0
  104. package/dist/skills/defaults/frontend/typescript.md +334 -0
  105. package/dist/skills/defaults/frontend/visual-design-foundations.md +326 -0
  106. package/dist/skills/defaults/frontend/web-component-design.md +279 -0
  107. package/dist/skills/defaults/game-development/godot-gdscript-patterns.md +188 -0
  108. package/dist/skills/defaults/game-development/unity-ecs-patterns.md +594 -0
  109. package/dist/skills/defaults/kubernetes/gitops-workflow.md +285 -0
  110. package/dist/skills/defaults/kubernetes/gitops.md +280 -0
  111. package/dist/skills/defaults/kubernetes/helm-chart-scaffolding.md +553 -0
  112. package/dist/skills/defaults/kubernetes/helm.md +343 -0
  113. package/dist/skills/defaults/kubernetes/k8s-manifest-generator.md +501 -0
  114. package/dist/skills/defaults/kubernetes/k8s-security-policies.md +342 -0
  115. package/dist/skills/defaults/kubernetes/manifests.md +330 -0
  116. package/dist/skills/defaults/kubernetes/security.md +337 -0
  117. package/dist/skills/defaults/llm-application/embedding-strategies.md +608 -0
  118. package/dist/skills/defaults/llm-application/hybrid-search-implementation.md +570 -0
  119. package/dist/skills/defaults/llm-application/hybrid-search.md +570 -0
  120. package/dist/skills/defaults/llm-application/langchain-architecture.md +666 -0
  121. package/dist/skills/defaults/llm-application/langchain.md +259 -0
  122. package/dist/skills/defaults/llm-application/llm-evaluation.md +695 -0
  123. package/dist/skills/defaults/llm-application/prompt-engineering-patterns.md +449 -0
  124. package/dist/skills/defaults/llm-application/prompt-engineering.md +219 -0
  125. package/dist/skills/defaults/llm-application/rag-implementation.md +434 -0
  126. package/dist/skills/defaults/llm-application/similarity-search-patterns.md +560 -0
  127. package/dist/skills/defaults/llm-application/similarity-search.md +560 -0
  128. package/dist/skills/defaults/llm-application/vector-index-tuning.md +523 -0
  129. package/dist/skills/defaults/mobile/mobile-android-design.md +440 -0
  130. package/dist/skills/defaults/mobile/mobile-ios-design.md +266 -0
  131. package/dist/skills/defaults/monitoring/distributed-tracing.md +436 -0
  132. package/dist/skills/defaults/monitoring/grafana-dashboards.md +370 -0
  133. package/dist/skills/defaults/monitoring/prometheus-configuration.md +379 -0
  134. package/dist/skills/defaults/monitoring/slo-implementation.md +323 -0
  135. package/dist/skills/defaults/refactoring/code-refactoring.md +349 -0
  136. package/dist/skills/defaults/security/anti-reversing-techniques/SKILL.md +559 -0
  137. package/dist/skills/defaults/security/auditor.md +168 -0
  138. package/dist/skills/defaults/security/binary-analysis-patterns/SKILL.md +438 -0
  139. package/dist/skills/defaults/security/memory-forensics/SKILL.md +483 -0
  140. package/dist/skills/defaults/security/mtls-configuration.md +349 -0
  141. package/dist/skills/defaults/security/protocol-reverse-engineering/SKILL.md +520 -0
  142. package/dist/skills/defaults/security/sast-configuration.md +182 -0
  143. package/dist/skills/defaults/security/security.md +313 -0
  144. package/dist/skills/defaults/security/stride-analysis.md +273 -0
  145. package/dist/skills/defaults/security/threat-mitigation-mapping.md +290 -0
  146. package/dist/skills/defaults/systems/bash-defensive-patterns/SKILL.md +539 -0
  147. package/dist/skills/defaults/systems/bats-testing-patterns/SKILL.md +631 -0
  148. package/dist/skills/defaults/systems/go-concurrency-patterns.md +657 -0
  149. package/dist/skills/defaults/systems/memory-safety-patterns.md +605 -0
  150. package/dist/skills/defaults/systems/rust-async-patterns.md +519 -0
  151. package/dist/skills/defaults/systems/shellcheck-configuration/SKILL.md +456 -0
  152. package/dist/skills/defaults/team-collaboration/multi-reviewer-patterns.md +126 -0
  153. package/dist/skills/defaults/team-collaboration/parallel-feature-development.md +151 -0
  154. package/dist/skills/defaults/testing/javascript-testing-patterns.md +1021 -0
  155. package/dist/skills/defaults/testing/python-testing-patterns.md +351 -0
  156. package/dist/skills/defaults/testing/testing.md +332 -0
  157. package/dist/skills/defaults/workflows/context-driven-development.md +384 -0
  158. package/dist/skills/defaults/workflows/track-management.md +592 -0
  159. package/dist/skills/defaults/workflows/workflow-patterns.md +622 -0
  160. package/dist/skills/index.d.ts +11 -0
  161. package/dist/skills/index.d.ts.map +1 -0
  162. package/dist/skills/index.js +129 -0
  163. package/dist/skills/index.js.map +1 -0
  164. package/dist/utils/character.js +4 -4
  165. package/dist/utils/character.js.map +1 -1
  166. package/dist/utils/inputbar.d.ts.map +1 -1
  167. package/dist/utils/inputbar.js +7 -0
  168. package/dist/utils/inputbar.js.map +1 -1
  169. package/package.json +1 -1
@@ -0,0 +1,519 @@
1
+ ---
2
+ name: airflow-dag-patterns
3
+ description: Build production Apache Airflow DAGs with best practices for operators, sensors, testing, and deployment. Use when creating data pipelines, orchestrating workflows, or scheduling batch jobs.
4
+ ---
5
+
6
+ # Apache Airflow DAG Patterns
7
+
8
+ Production-ready patterns for Apache Airflow including DAG design, operators, sensors, testing, and deployment strategies.
9
+
10
+ ## When to Use This Skill
11
+
12
+ - Creating data pipeline orchestration with Airflow
13
+ - Designing DAG structures and dependencies
14
+ - Implementing custom operators and sensors
15
+ - Testing Airflow DAGs locally
16
+ - Setting up Airflow in production
17
+ - Debugging failed DAG runs
18
+
19
+ ## Core Concepts
20
+
21
+ ### 1. DAG Design Principles
22
+
23
+ | Principle | Description |
24
+ | --------------- | ----------------------------------- |
25
+ | **Idempotent** | Running twice produces same result |
26
+ | **Atomic** | Tasks succeed or fail completely |
27
+ | **Incremental** | Process only new/changed data |
28
+ | **Observable** | Logs, metrics, alerts at every step |
29
+
30
+ ### 2. Task Dependencies
31
+
32
+ ```python
33
+ # Linear
34
+ task1 >> task2 >> task3
35
+
36
+ # Fan-out
37
+ task1 >> [task2, task3, task4]
38
+
39
+ # Fan-in
40
+ [task1, task2, task3] >> task4
41
+
42
+ # Complex
43
+ task1 >> task2 >> task4
44
+ task1 >> task3 >> task4
45
+ ```
46
+
47
+ ## Quick Start
48
+
49
+ ```python
50
+ # dags/example_dag.py
51
+ from datetime import datetime, timedelta
52
+ from airflow import DAG
53
+ from airflow.operators.python import PythonOperator
54
+ from airflow.operators.empty import EmptyOperator
55
+
56
+ default_args = {
57
+ 'owner': 'data-team',
58
+ 'depends_on_past': False,
59
+ 'email_on_failure': True,
60
+ 'email_on_retry': False,
61
+ 'retries': 3,
62
+ 'retry_delay': timedelta(minutes=5),
63
+ 'retry_exponential_backoff': True,
64
+ 'max_retry_delay': timedelta(hours=1),
65
+ }
66
+
67
+ with DAG(
68
+ dag_id='example_etl',
69
+ default_args=default_args,
70
+ description='Example ETL pipeline',
71
+ schedule='0 6 * * *', # Daily at 6 AM
72
+ start_date=datetime(2024, 1, 1),
73
+ catchup=False,
74
+ tags=['etl', 'example'],
75
+ max_active_runs=1,
76
+ ) as dag:
77
+
78
+ start = EmptyOperator(task_id='start')
79
+
80
+ def extract_data(**context):
81
+ execution_date = context['ds']
82
+ # Extract logic here
83
+ return {'records': 1000}
84
+
85
+ extract = PythonOperator(
86
+ task_id='extract',
87
+ python_callable=extract_data,
88
+ )
89
+
90
+ end = EmptyOperator(task_id='end')
91
+
92
+ start >> extract >> end
93
+ ```
94
+
95
+ ## Patterns
96
+
97
+ ### Pattern 1: TaskFlow API (Airflow 2.0+)
98
+
99
+ ```python
100
+ # dags/taskflow_example.py
101
+ from datetime import datetime
102
+ from airflow.decorators import dag, task
103
+ from airflow.models import Variable
104
+
105
+ @dag(
106
+ dag_id='taskflow_etl',
107
+ schedule='@daily',
108
+ start_date=datetime(2024, 1, 1),
109
+ catchup=False,
110
+ tags=['etl', 'taskflow'],
111
+ )
112
+ def taskflow_etl():
113
+ """ETL pipeline using TaskFlow API"""
114
+
115
+ @task()
116
+ def extract(source: str) -> dict:
117
+ """Extract data from source"""
118
+ import pandas as pd
119
+
120
+ df = pd.read_csv(f's3://bucket/{source}/{{ ds }}.csv')
121
+ return {'data': df.to_dict(), 'rows': len(df)}
122
+
123
+ @task()
124
+ def transform(extracted: dict) -> dict:
125
+ """Transform extracted data"""
126
+ import pandas as pd
127
+
128
+ df = pd.DataFrame(extracted['data'])
129
+ df['processed_at'] = datetime.now()
130
+ df = df.dropna()
131
+ return {'data': df.to_dict(), 'rows': len(df)}
132
+
133
+ @task()
134
+ def load(transformed: dict, target: str):
135
+ """Load data to target"""
136
+ import pandas as pd
137
+
138
+ df = pd.DataFrame(transformed['data'])
139
+ df.to_parquet(f's3://bucket/{target}/{{ ds }}.parquet')
140
+ return transformed['rows']
141
+
142
+ @task()
143
+ def notify(rows_loaded: int):
144
+ """Send notification"""
145
+ print(f'Loaded {rows_loaded} rows')
146
+
147
+ # Define dependencies with XCom passing
148
+ extracted = extract(source='raw_data')
149
+ transformed = transform(extracted)
150
+ loaded = load(transformed, target='processed_data')
151
+ notify(loaded)
152
+
153
+ # Instantiate the DAG
154
+ taskflow_etl()
155
+ ```
156
+
157
+ ### Pattern 2: Dynamic DAG Generation
158
+
159
+ ```python
160
+ # dags/dynamic_dag_factory.py
161
+ from datetime import datetime, timedelta
162
+ from airflow import DAG
163
+ from airflow.operators.python import PythonOperator
164
+ from airflow.models import Variable
165
+ import json
166
+
167
+ # Configuration for multiple similar pipelines
168
+ PIPELINE_CONFIGS = [
169
+ {'name': 'customers', 'schedule': '@daily', 'source': 's3://raw/customers'},
170
+ {'name': 'orders', 'schedule': '@hourly', 'source': 's3://raw/orders'},
171
+ {'name': 'products', 'schedule': '@weekly', 'source': 's3://raw/products'},
172
+ ]
173
+
174
+ def create_dag(config: dict) -> DAG:
175
+ """Factory function to create DAGs from config"""
176
+
177
+ dag_id = f"etl_{config['name']}"
178
+
179
+ default_args = {
180
+ 'owner': 'data-team',
181
+ 'retries': 3,
182
+ 'retry_delay': timedelta(minutes=5),
183
+ }
184
+
185
+ dag = DAG(
186
+ dag_id=dag_id,
187
+ default_args=default_args,
188
+ schedule=config['schedule'],
189
+ start_date=datetime(2024, 1, 1),
190
+ catchup=False,
191
+ tags=['etl', 'dynamic', config['name']],
192
+ )
193
+
194
+ with dag:
195
+ def extract_fn(source, **context):
196
+ print(f"Extracting from {source} for {context['ds']}")
197
+
198
+ def transform_fn(**context):
199
+ print(f"Transforming data for {context['ds']}")
200
+
201
+ def load_fn(table_name, **context):
202
+ print(f"Loading to {table_name} for {context['ds']}")
203
+
204
+ extract = PythonOperator(
205
+ task_id='extract',
206
+ python_callable=extract_fn,
207
+ op_kwargs={'source': config['source']},
208
+ )
209
+
210
+ transform = PythonOperator(
211
+ task_id='transform',
212
+ python_callable=transform_fn,
213
+ )
214
+
215
+ load = PythonOperator(
216
+ task_id='load',
217
+ python_callable=load_fn,
218
+ op_kwargs={'table_name': config['name']},
219
+ )
220
+
221
+ extract >> transform >> load
222
+
223
+ return dag
224
+
225
+ # Generate DAGs
226
+ for config in PIPELINE_CONFIGS:
227
+ globals()[f"dag_{config['name']}"] = create_dag(config)
228
+ ```
229
+
230
+ ### Pattern 3: Branching and Conditional Logic
231
+
232
+ ```python
233
+ # dags/branching_example.py
234
+ from airflow.decorators import dag, task
235
+ from airflow.operators.python import BranchPythonOperator
236
+ from airflow.operators.empty import EmptyOperator
237
+ from airflow.utils.trigger_rule import TriggerRule
238
+
239
+ @dag(
240
+ dag_id='branching_pipeline',
241
+ schedule='@daily',
242
+ start_date=datetime(2024, 1, 1),
243
+ catchup=False,
244
+ )
245
+ def branching_pipeline():
246
+
247
+ @task()
248
+ def check_data_quality() -> dict:
249
+ """Check data quality and return metrics"""
250
+ quality_score = 0.95 # Simulated
251
+ return {'score': quality_score, 'rows': 10000}
252
+
253
+ def choose_branch(**context) -> str:
254
+ """Determine which branch to execute"""
255
+ ti = context['ti']
256
+ metrics = ti.xcom_pull(task_ids='check_data_quality')
257
+
258
+ if metrics['score'] >= 0.9:
259
+ return 'high_quality_path'
260
+ elif metrics['score'] >= 0.7:
261
+ return 'medium_quality_path'
262
+ else:
263
+ return 'low_quality_path'
264
+
265
+ quality_check = check_data_quality()
266
+
267
+ branch = BranchPythonOperator(
268
+ task_id='branch',
269
+ python_callable=choose_branch,
270
+ )
271
+
272
+ high_quality = EmptyOperator(task_id='high_quality_path')
273
+ medium_quality = EmptyOperator(task_id='medium_quality_path')
274
+ low_quality = EmptyOperator(task_id='low_quality_path')
275
+
276
+ # Join point - runs after any branch completes
277
+ join = EmptyOperator(
278
+ task_id='join',
279
+ trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
280
+ )
281
+
282
+ quality_check >> branch >> [high_quality, medium_quality, low_quality] >> join
283
+
284
+ branching_pipeline()
285
+ ```
286
+
287
+ ### Pattern 4: Sensors and External Dependencies
288
+
289
+ ```python
290
+ # dags/sensor_patterns.py
291
+ from datetime import datetime, timedelta
292
+ from airflow import DAG
293
+ from airflow.sensors.filesystem import FileSensor
294
+ from airflow.providers.amazon.aws.sensors.s3 import S3KeySensor
295
+ from airflow.sensors.external_task import ExternalTaskSensor
296
+ from airflow.operators.python import PythonOperator
297
+
298
+ with DAG(
299
+ dag_id='sensor_example',
300
+ schedule='@daily',
301
+ start_date=datetime(2024, 1, 1),
302
+ catchup=False,
303
+ ) as dag:
304
+
305
+ # Wait for file on S3
306
+ wait_for_file = S3KeySensor(
307
+ task_id='wait_for_s3_file',
308
+ bucket_name='data-lake',
309
+ bucket_key='raw/{{ ds }}/data.parquet',
310
+ aws_conn_id='aws_default',
311
+ timeout=60 * 60 * 2, # 2 hours
312
+ poke_interval=60 * 5, # Check every 5 minutes
313
+ mode='reschedule', # Free up worker slot while waiting
314
+ )
315
+
316
+ # Wait for another DAG to complete
317
+ wait_for_upstream = ExternalTaskSensor(
318
+ task_id='wait_for_upstream_dag',
319
+ external_dag_id='upstream_etl',
320
+ external_task_id='final_task',
321
+ execution_date_fn=lambda dt: dt, # Same execution date
322
+ timeout=60 * 60 * 3,
323
+ mode='reschedule',
324
+ )
325
+
326
+ # Custom sensor using @task.sensor decorator
327
+ @task.sensor(poke_interval=60, timeout=3600, mode='reschedule')
328
+ def wait_for_api() -> PokeReturnValue:
329
+ """Custom sensor for API availability"""
330
+ import requests
331
+
332
+ response = requests.get('https://api.example.com/health')
333
+ is_done = response.status_code == 200
334
+
335
+ return PokeReturnValue(is_done=is_done, xcom_value=response.json())
336
+
337
+ api_ready = wait_for_api()
338
+
339
+ def process_data(**context):
340
+ api_result = context['ti'].xcom_pull(task_ids='wait_for_api')
341
+ print(f"API returned: {api_result}")
342
+
343
+ process = PythonOperator(
344
+ task_id='process',
345
+ python_callable=process_data,
346
+ )
347
+
348
+ [wait_for_file, wait_for_upstream, api_ready] >> process
349
+ ```
350
+
351
+ ### Pattern 5: Error Handling and Alerts
352
+
353
+ ```python
354
+ # dags/error_handling.py
355
+ from datetime import datetime, timedelta
356
+ from airflow import DAG
357
+ from airflow.operators.python import PythonOperator
358
+ from airflow.utils.trigger_rule import TriggerRule
359
+ from airflow.models import Variable
360
+
361
+ def task_failure_callback(context):
362
+ """Callback on task failure"""
363
+ task_instance = context['task_instance']
364
+ exception = context.get('exception')
365
+
366
+ # Send to Slack/PagerDuty/etc
367
+ message = f"""
368
+ Task Failed!
369
+ DAG: {task_instance.dag_id}
370
+ Task: {task_instance.task_id}
371
+ Execution Date: {context['ds']}
372
+ Error: {exception}
373
+ Log URL: {task_instance.log_url}
374
+ """
375
+ # send_slack_alert(message)
376
+ print(message)
377
+
378
+ def dag_failure_callback(context):
379
+ """Callback on DAG failure"""
380
+ # Aggregate failures, send summary
381
+ pass
382
+
383
+ with DAG(
384
+ dag_id='error_handling_example',
385
+ schedule='@daily',
386
+ start_date=datetime(2024, 1, 1),
387
+ catchup=False,
388
+ on_failure_callback=dag_failure_callback,
389
+ default_args={
390
+ 'on_failure_callback': task_failure_callback,
391
+ 'retries': 3,
392
+ 'retry_delay': timedelta(minutes=5),
393
+ },
394
+ ) as dag:
395
+
396
+ def might_fail(**context):
397
+ import random
398
+ if random.random() < 0.3:
399
+ raise ValueError("Random failure!")
400
+ return "Success"
401
+
402
+ risky_task = PythonOperator(
403
+ task_id='risky_task',
404
+ python_callable=might_fail,
405
+ )
406
+
407
+ def cleanup(**context):
408
+ """Cleanup runs regardless of upstream failures"""
409
+ print("Cleaning up...")
410
+
411
+ cleanup_task = PythonOperator(
412
+ task_id='cleanup',
413
+ python_callable=cleanup,
414
+ trigger_rule=TriggerRule.ALL_DONE, # Run even if upstream fails
415
+ )
416
+
417
+ def notify_success(**context):
418
+ """Only runs if all upstream succeeded"""
419
+ print("All tasks succeeded!")
420
+
421
+ success_notification = PythonOperator(
422
+ task_id='notify_success',
423
+ python_callable=notify_success,
424
+ trigger_rule=TriggerRule.ALL_SUCCESS,
425
+ )
426
+
427
+ risky_task >> [cleanup_task, success_notification]
428
+ ```
429
+
430
+ ### Pattern 6: Testing DAGs
431
+
432
+ ```python
433
+ # tests/test_dags.py
434
+ import pytest
435
+ from datetime import datetime
436
+ from airflow.models import DagBag
437
+
438
+ @pytest.fixture
439
+ def dagbag():
440
+ return DagBag(dag_folder='dags/', include_examples=False)
441
+
442
+ def test_dag_loaded(dagbag):
443
+ """Test that all DAGs load without errors"""
444
+ assert len(dagbag.import_errors) == 0, f"DAG import errors: {dagbag.import_errors}"
445
+
446
+ def test_dag_structure(dagbag):
447
+ """Test specific DAG structure"""
448
+ dag = dagbag.get_dag('example_etl')
449
+
450
+ assert dag is not None
451
+ assert len(dag.tasks) == 3
452
+ assert dag.schedule_interval == '0 6 * * *'
453
+
454
+ def test_task_dependencies(dagbag):
455
+ """Test task dependencies are correct"""
456
+ dag = dagbag.get_dag('example_etl')
457
+
458
+ extract_task = dag.get_task('extract')
459
+ assert 'start' in [t.task_id for t in extract_task.upstream_list]
460
+ assert 'end' in [t.task_id for t in extract_task.downstream_list]
461
+
462
+ def test_dag_integrity(dagbag):
463
+ """Test DAG has no cycles and is valid"""
464
+ for dag_id, dag in dagbag.dags.items():
465
+ assert dag.test_cycle() is None, f"Cycle detected in {dag_id}"
466
+
467
+ # Test individual task logic
468
+ def test_extract_function():
469
+ """Unit test for extract function"""
470
+ from dags.example_dag import extract_data
471
+
472
+ result = extract_data(ds='2024-01-01')
473
+ assert 'records' in result
474
+ assert isinstance(result['records'], int)
475
+ ```
476
+
477
+ ## Project Structure
478
+
479
+ ```
480
+ airflow/
481
+ ├── dags/
482
+ │ ├── __init__.py
483
+ │ ├── common/
484
+ │ │ ├── __init__.py
485
+ │ │ ├── operators.py # Custom operators
486
+ │ │ ├── sensors.py # Custom sensors
487
+ │ │ └── callbacks.py # Alert callbacks
488
+ │ ├── etl/
489
+ │ │ ├── customers.py
490
+ │ │ └── orders.py
491
+ │ └── ml/
492
+ │ └── training.py
493
+ ├── plugins/
494
+ │ └── custom_plugin.py
495
+ ├── tests/
496
+ │ ├── __init__.py
497
+ │ ├── test_dags.py
498
+ │ └── test_operators.py
499
+ ├── docker-compose.yml
500
+ └── requirements.txt
501
+ ```
502
+
503
+ ## Best Practices
504
+
505
+ ### Do's
506
+
507
+ - **Use TaskFlow API** - Cleaner code, automatic XCom
508
+ - **Set timeouts** - Prevent zombie tasks
509
+ - **Use `mode='reschedule'`** - For sensors, free up workers
510
+ - **Test DAGs** - Unit tests and integration tests
511
+ - **Idempotent tasks** - Safe to retry
512
+
513
+ ### Don'ts
514
+
515
+ - **Don't use `depends_on_past=True`** - Creates bottlenecks
516
+ - **Don't hardcode dates** - Use `{{ ds }}` macros
517
+ - **Don't use global state** - Tasks should be stateless
518
+ - **Don't skip catchup blindly** - Understand implications
519
+ - **Don't put heavy logic in DAG file** - Import from modules