pingpulse-airflow 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pingpulse_airflow-0.1.0/PKG-INFO +338 -0
- pingpulse_airflow-0.1.0/README.md +302 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow/__init__.py +77 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow/callbacks.py +311 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow/decorators.py +260 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow/hooks/__init__.py +5 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow/hooks/pingpulse.py +200 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow/operators/__init__.py +5 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow/operators/pingpulse.py +218 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow.egg-info/PKG-INFO +338 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow.egg-info/SOURCES.txt +18 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow.egg-info/dependency_links.txt +1 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow.egg-info/entry_points.txt +2 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow.egg-info/requires.txt +7 -0
- pingpulse_airflow-0.1.0/pingpulse_airflow.egg-info/top_level.txt +1 -0
- pingpulse_airflow-0.1.0/pyproject.toml +71 -0
- pingpulse_airflow-0.1.0/setup.cfg +4 -0
- pingpulse_airflow-0.1.0/tests/test_callbacks.py +281 -0
- pingpulse_airflow-0.1.0/tests/test_hooks.py +146 -0
- pingpulse_airflow-0.1.0/tests/test_operators.py +192 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pingpulse-airflow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Apache Airflow provider for PingPulse workflow monitoring
|
|
5
|
+
Author-email: PingPulse Team <support@pingpulse.com>
|
|
6
|
+
Maintainer-email: PingPulse Team <support@pingpulse.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://pingpulse.com
|
|
9
|
+
Project-URL: Documentation, https://pingpulse.com/api-docs
|
|
10
|
+
Project-URL: Repository, https://github.com/pingpulse/pingpulse-airflow
|
|
11
|
+
Project-URL: Issues, https://github.com/pingpulse/pingpulse-airflow/issues
|
|
12
|
+
Keywords: airflow,pingpulse,monitoring,observability,workflow,webhook,alerting
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Framework :: Apache Airflow
|
|
16
|
+
Classifier: Framework :: Apache Airflow :: Provider
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: System Administrators
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Topic :: System :: Monitoring
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
Requires-Dist: apache-airflow>=2.0.0
|
|
31
|
+
Requires-Dist: requests>=2.25.0
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
|
|
35
|
+
Requires-Dist: responses>=0.22.0; extra == "dev"
|
|
36
|
+
|
|
37
|
+
# PingPulse Airflow Provider
|
|
38
|
+
|
|
39
|
+
Official Apache Airflow provider for [PingPulse](https://pingpulse.com) workflow monitoring.
|
|
40
|
+
|
|
41
|
+
Monitor your Airflow DAGs in real-time with PingPulse - get instant alerts when tasks fail, track execution metrics, and visualize your data pipelines.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install pingpulse-airflow
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
### 1. Configure Connection (Recommended)
|
|
52
|
+
|
|
53
|
+
In Airflow UI, go to **Admin > Connections** and create a new connection:
|
|
54
|
+
|
|
55
|
+
| Field | Value |
|
|
56
|
+
|-------|-------|
|
|
57
|
+
| Conn Id | `pingpulse_default` |
|
|
58
|
+
| Conn Type | `HTTP` |
|
|
59
|
+
| Host | `app.pingpulse.com` |
|
|
60
|
+
| Password | Your API key (`ppk_xxx_xxx`) |
|
|
61
|
+
|
|
62
|
+
### 2. Choose Your Integration Pattern
|
|
63
|
+
|
|
64
|
+
PingPulse offers three ways to integrate - pick what fits your workflow:
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Option 1: Operator (Explicit Tasks)
|
|
69
|
+
|
|
70
|
+
Best for: **New DAGs where you want visible ping tasks**
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from airflow import DAG
|
|
74
|
+
from airflow.operators.python import PythonOperator
|
|
75
|
+
from pingpulse_airflow import PingPulseOperator
|
|
76
|
+
from datetime import datetime
|
|
77
|
+
|
|
78
|
+
with DAG('etl_pipeline', start_date=datetime(2024, 1, 1), schedule='@daily') as dag:
|
|
79
|
+
|
|
80
|
+
extract = PythonOperator(
|
|
81
|
+
task_id='extract',
|
|
82
|
+
python_callable=extract_data,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
ping_extract = PingPulseOperator(
|
|
86
|
+
task_id='ping_extract',
|
|
87
|
+
workflow_id='dwf123abc',
|
|
88
|
+
stage_path='1',
|
|
89
|
+
start=True, # Starts new workflow instance
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
transform = PythonOperator(
|
|
93
|
+
task_id='transform',
|
|
94
|
+
python_callable=transform_data,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
ping_transform = PingPulseOperator(
|
|
98
|
+
task_id='ping_transform',
|
|
99
|
+
workflow_id='dwf123abc',
|
|
100
|
+
stage_path='2',
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
load = PythonOperator(
|
|
104
|
+
task_id='load',
|
|
105
|
+
python_callable=load_data,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
ping_complete = PingPulseOperator(
|
|
109
|
+
task_id='ping_complete',
|
|
110
|
+
workflow_id='dwf123abc',
|
|
111
|
+
stage_path='3',
|
|
112
|
+
final=True, # Marks workflow complete
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
extract >> ping_extract >> transform >> ping_transform >> load >> ping_complete
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**DAG visualization:**
|
|
119
|
+
```
|
|
120
|
+
[Extract] → [Ping 1] → [Transform] → [Ping 2] → [Load] → [Ping 3]
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Option 2: Decorator (Auto-Ping)
|
|
126
|
+
|
|
127
|
+
Best for: **Clean DAGs with Python tasks, invisible instrumentation**
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from airflow import DAG
|
|
131
|
+
from pingpulse_airflow import pingpulse_task
|
|
132
|
+
from datetime import datetime
|
|
133
|
+
|
|
134
|
+
with DAG('etl_pipeline', start_date=datetime(2024, 1, 1), schedule='@daily') as dag:
|
|
135
|
+
|
|
136
|
+
@pingpulse_task(workflow_id='dwf123abc', stage_path='1', start=True)
|
|
137
|
+
def extract():
|
|
138
|
+
data = fetch_from_source()
|
|
139
|
+
return {'records': len(data)}
|
|
140
|
+
|
|
141
|
+
@pingpulse_task(workflow_id='dwf123abc', stage_path='2')
|
|
142
|
+
def transform(data):
|
|
143
|
+
transformed = process(data)
|
|
144
|
+
return {'processed': len(transformed)}
|
|
145
|
+
|
|
146
|
+
@pingpulse_task(workflow_id='dwf123abc', stage_path='3', final=True)
|
|
147
|
+
def load(data):
|
|
148
|
+
save_to_destination(data)
|
|
149
|
+
return {'status': 'complete'}
|
|
150
|
+
|
|
151
|
+
extract() >> transform() >> load()
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**DAG visualization:**
|
|
155
|
+
```
|
|
156
|
+
[Extract] → [Transform] → [Load]
|
|
157
|
+
```
|
|
158
|
+
*(Pings happen automatically inside each task)*
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## Option 3: Callbacks (Retrofit Existing DAGs)
|
|
163
|
+
|
|
164
|
+
Best for: **Adding monitoring to existing DAGs without code changes**
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from airflow import DAG
|
|
168
|
+
from airflow.operators.python import PythonOperator
|
|
169
|
+
from pingpulse_airflow import pingpulse_success, pingpulse_failure
|
|
170
|
+
from datetime import datetime
|
|
171
|
+
|
|
172
|
+
with DAG('existing_pipeline', start_date=datetime(2024, 1, 1), schedule='@daily') as dag:
|
|
173
|
+
|
|
174
|
+
extract = PythonOperator(
|
|
175
|
+
task_id='extract',
|
|
176
|
+
python_callable=extract_data,
|
|
177
|
+
on_success_callback=pingpulse_success('dwf123abc', '1', start=True),
|
|
178
|
+
on_failure_callback=pingpulse_failure('dwf123abc', '1'),
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
transform = PythonOperator(
|
|
182
|
+
task_id='transform',
|
|
183
|
+
python_callable=transform_data,
|
|
184
|
+
on_success_callback=pingpulse_success('dwf123abc', '2'),
|
|
185
|
+
on_failure_callback=pingpulse_failure('dwf123abc', '2'),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
load = PythonOperator(
|
|
189
|
+
task_id='load',
|
|
190
|
+
python_callable=load_data,
|
|
191
|
+
on_success_callback=pingpulse_success('dwf123abc', '3', final=True),
|
|
192
|
+
on_failure_callback=pingpulse_failure('dwf123abc', '3'),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
extract >> transform >> load
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Comparison
|
|
201
|
+
|
|
202
|
+
| Pattern | Modifies Task Code | Visible in DAG | Best For |
|
|
203
|
+
|---------|-------------------|----------------|----------|
|
|
204
|
+
| **Operator** | No | Yes (extra nodes) | New DAGs, explicit tracking |
|
|
205
|
+
| **Decorator** | Yes (wrap function) | No | Clean DAGs, TaskFlow API |
|
|
206
|
+
| **Callback** | No | No | Existing DAGs, any operator |
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Advanced Usage
|
|
211
|
+
|
|
212
|
+
### Custom Payload
|
|
213
|
+
|
|
214
|
+
Send custom metrics with your pings:
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
PingPulseOperator(
|
|
218
|
+
task_id='ping_with_metrics',
|
|
219
|
+
workflow_id='dwf123abc',
|
|
220
|
+
stage_path='2',
|
|
221
|
+
payload={
|
|
222
|
+
'records_processed': 10000,
|
|
223
|
+
'duration_seconds': 45.2,
|
|
224
|
+
'source': 'postgres',
|
|
225
|
+
},
|
|
226
|
+
)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Direct API Key (No Connection)
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
PingPulseOperator(
|
|
233
|
+
task_id='ping',
|
|
234
|
+
workflow_id='dwf123abc',
|
|
235
|
+
stage_path='1',
|
|
236
|
+
api_key='ppk_xxx_xxx', # Direct key
|
|
237
|
+
base_url='https://your-instance.pingpulse.com', # Self-hosted
|
|
238
|
+
)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Static vs Dynamic Workflows
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
# Dynamic workflow (default) - stages created on-the-fly
|
|
245
|
+
PingPulseOperator(
|
|
246
|
+
workflow_id='dwf123abc',
|
|
247
|
+
workflow_type='dynamic', # Uses /dhk/ endpoint
|
|
248
|
+
...
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Static workflow - predefined stages
|
|
252
|
+
PingPulseOperator(
|
|
253
|
+
workflow_id='swf456def',
|
|
254
|
+
workflow_type='static', # Uses /shk/ endpoint
|
|
255
|
+
...
|
|
256
|
+
)
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Heartbeat Monitoring
|
|
260
|
+
|
|
261
|
+
For scheduled jobs that should run on a cron:
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
from pingpulse_airflow.operators.pingpulse import PingPulseHeartbeatOperator
|
|
265
|
+
|
|
266
|
+
heartbeat = PingPulseHeartbeatOperator(
|
|
267
|
+
task_id='send_heartbeat',
|
|
268
|
+
monitor_id='hb123abc',
|
|
269
|
+
)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### DAG-Level Callbacks
|
|
273
|
+
|
|
274
|
+
Monitor entire DAG success/failure:
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
from pingpulse_airflow.callbacks import pingpulse_dag_callbacks
|
|
278
|
+
|
|
279
|
+
callbacks = pingpulse_dag_callbacks('dwf123abc')
|
|
280
|
+
|
|
281
|
+
with DAG(
|
|
282
|
+
'my_dag',
|
|
283
|
+
on_success_callback=callbacks['on_success'],
|
|
284
|
+
on_failure_callback=callbacks['on_failure'],
|
|
285
|
+
...
|
|
286
|
+
) as dag:
|
|
287
|
+
...
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## What Gets Sent to PingPulse
|
|
293
|
+
|
|
294
|
+
Each ping includes:
|
|
295
|
+
|
|
296
|
+
```json
|
|
297
|
+
{
|
|
298
|
+
"status": "success",
|
|
299
|
+
"duration": 12.345,
|
|
300
|
+
"airflow": {
|
|
301
|
+
"dag_id": "etl_pipeline",
|
|
302
|
+
"task_id": "transform",
|
|
303
|
+
"run_id": "scheduled__2024-01-15T00:00:00+00:00",
|
|
304
|
+
"execution_date": "2024-01-15T00:00:00+00:00",
|
|
305
|
+
"try_number": 1
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
On failure, also includes:
|
|
311
|
+
```json
|
|
312
|
+
{
|
|
313
|
+
"status": "failed",
|
|
314
|
+
"error": "Connection refused to database..."
|
|
315
|
+
}
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## Requirements
|
|
321
|
+
|
|
322
|
+
- Python 3.8+
|
|
323
|
+
- Apache Airflow 2.0+
|
|
324
|
+
- PingPulse account with API key
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## Support
|
|
329
|
+
|
|
330
|
+
- Documentation: https://pingpulse.com/api-docs
|
|
331
|
+
- Issues: https://github.com/pingpulse/pingpulse-airflow/issues
|
|
332
|
+
- Email: support@pingpulse.com
|
|
333
|
+
|
|
334
|
+
---
|
|
335
|
+
|
|
336
|
+
## License
|
|
337
|
+
|
|
338
|
+
MIT License - see LICENSE file for details.
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
# PingPulse Airflow Provider
|
|
2
|
+
|
|
3
|
+
Official Apache Airflow provider for [PingPulse](https://pingpulse.com) workflow monitoring.
|
|
4
|
+
|
|
5
|
+
Monitor your Airflow DAGs in real-time with PingPulse - get instant alerts when tasks fail, track execution metrics, and visualize your data pipelines.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install pingpulse-airflow
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
### 1. Configure Connection (Recommended)
|
|
16
|
+
|
|
17
|
+
In Airflow UI, go to **Admin > Connections** and create a new connection:
|
|
18
|
+
|
|
19
|
+
| Field | Value |
|
|
20
|
+
|-------|-------|
|
|
21
|
+
| Conn Id | `pingpulse_default` |
|
|
22
|
+
| Conn Type | `HTTP` |
|
|
23
|
+
| Host | `app.pingpulse.com` |
|
|
24
|
+
| Password | Your API key (`ppk_xxx_xxx`) |
|
|
25
|
+
|
|
26
|
+
### 2. Choose Your Integration Pattern
|
|
27
|
+
|
|
28
|
+
PingPulse offers three ways to integrate - pick what fits your workflow:
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Option 1: Operator (Explicit Tasks)
|
|
33
|
+
|
|
34
|
+
Best for: **New DAGs where you want visible ping tasks**
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from airflow import DAG
|
|
38
|
+
from airflow.operators.python import PythonOperator
|
|
39
|
+
from pingpulse_airflow import PingPulseOperator
|
|
40
|
+
from datetime import datetime
|
|
41
|
+
|
|
42
|
+
with DAG('etl_pipeline', start_date=datetime(2024, 1, 1), schedule='@daily') as dag:
|
|
43
|
+
|
|
44
|
+
extract = PythonOperator(
|
|
45
|
+
task_id='extract',
|
|
46
|
+
python_callable=extract_data,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
ping_extract = PingPulseOperator(
|
|
50
|
+
task_id='ping_extract',
|
|
51
|
+
workflow_id='dwf123abc',
|
|
52
|
+
stage_path='1',
|
|
53
|
+
start=True, # Starts new workflow instance
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
transform = PythonOperator(
|
|
57
|
+
task_id='transform',
|
|
58
|
+
python_callable=transform_data,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
ping_transform = PingPulseOperator(
|
|
62
|
+
task_id='ping_transform',
|
|
63
|
+
workflow_id='dwf123abc',
|
|
64
|
+
stage_path='2',
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
load = PythonOperator(
|
|
68
|
+
task_id='load',
|
|
69
|
+
python_callable=load_data,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
ping_complete = PingPulseOperator(
|
|
73
|
+
task_id='ping_complete',
|
|
74
|
+
workflow_id='dwf123abc',
|
|
75
|
+
stage_path='3',
|
|
76
|
+
final=True, # Marks workflow complete
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
extract >> ping_extract >> transform >> ping_transform >> load >> ping_complete
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**DAG visualization:**
|
|
83
|
+
```
|
|
84
|
+
[Extract] → [Ping 1] → [Transform] → [Ping 2] → [Load] → [Ping 3]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Option 2: Decorator (Auto-Ping)
|
|
90
|
+
|
|
91
|
+
Best for: **Clean DAGs with Python tasks, invisible instrumentation**
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from airflow import DAG
|
|
95
|
+
from pingpulse_airflow import pingpulse_task
|
|
96
|
+
from datetime import datetime
|
|
97
|
+
|
|
98
|
+
with DAG('etl_pipeline', start_date=datetime(2024, 1, 1), schedule='@daily') as dag:
|
|
99
|
+
|
|
100
|
+
@pingpulse_task(workflow_id='dwf123abc', stage_path='1', start=True)
|
|
101
|
+
def extract():
|
|
102
|
+
data = fetch_from_source()
|
|
103
|
+
return {'records': len(data)}
|
|
104
|
+
|
|
105
|
+
@pingpulse_task(workflow_id='dwf123abc', stage_path='2')
|
|
106
|
+
def transform(data):
|
|
107
|
+
transformed = process(data)
|
|
108
|
+
return {'processed': len(transformed)}
|
|
109
|
+
|
|
110
|
+
@pingpulse_task(workflow_id='dwf123abc', stage_path='3', final=True)
|
|
111
|
+
def load(data):
|
|
112
|
+
save_to_destination(data)
|
|
113
|
+
return {'status': 'complete'}
|
|
114
|
+
|
|
115
|
+
extract() >> transform() >> load()
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**DAG visualization:**
|
|
119
|
+
```
|
|
120
|
+
[Extract] → [Transform] → [Load]
|
|
121
|
+
```
|
|
122
|
+
*(Pings happen automatically inside each task)*
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Option 3: Callbacks (Retrofit Existing DAGs)
|
|
127
|
+
|
|
128
|
+
Best for: **Adding monitoring to existing DAGs without code changes**
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from airflow import DAG
|
|
132
|
+
from airflow.operators.python import PythonOperator
|
|
133
|
+
from pingpulse_airflow import pingpulse_success, pingpulse_failure
|
|
134
|
+
from datetime import datetime
|
|
135
|
+
|
|
136
|
+
with DAG('existing_pipeline', start_date=datetime(2024, 1, 1), schedule='@daily') as dag:
|
|
137
|
+
|
|
138
|
+
extract = PythonOperator(
|
|
139
|
+
task_id='extract',
|
|
140
|
+
python_callable=extract_data,
|
|
141
|
+
on_success_callback=pingpulse_success('dwf123abc', '1', start=True),
|
|
142
|
+
on_failure_callback=pingpulse_failure('dwf123abc', '1'),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
transform = PythonOperator(
|
|
146
|
+
task_id='transform',
|
|
147
|
+
python_callable=transform_data,
|
|
148
|
+
on_success_callback=pingpulse_success('dwf123abc', '2'),
|
|
149
|
+
on_failure_callback=pingpulse_failure('dwf123abc', '2'),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
load = PythonOperator(
|
|
153
|
+
task_id='load',
|
|
154
|
+
python_callable=load_data,
|
|
155
|
+
on_success_callback=pingpulse_success('dwf123abc', '3', final=True),
|
|
156
|
+
on_failure_callback=pingpulse_failure('dwf123abc', '3'),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
extract >> transform >> load
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Comparison
|
|
165
|
+
|
|
166
|
+
| Pattern | Modifies Task Code | Visible in DAG | Best For |
|
|
167
|
+
|---------|-------------------|----------------|----------|
|
|
168
|
+
| **Operator** | No | Yes (extra nodes) | New DAGs, explicit tracking |
|
|
169
|
+
| **Decorator** | Yes (wrap function) | No | Clean DAGs, TaskFlow API |
|
|
170
|
+
| **Callback** | No | No | Existing DAGs, any operator |
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Advanced Usage
|
|
175
|
+
|
|
176
|
+
### Custom Payload
|
|
177
|
+
|
|
178
|
+
Send custom metrics with your pings:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
PingPulseOperator(
|
|
182
|
+
task_id='ping_with_metrics',
|
|
183
|
+
workflow_id='dwf123abc',
|
|
184
|
+
stage_path='2',
|
|
185
|
+
payload={
|
|
186
|
+
'records_processed': 10000,
|
|
187
|
+
'duration_seconds': 45.2,
|
|
188
|
+
'source': 'postgres',
|
|
189
|
+
},
|
|
190
|
+
)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Direct API Key (No Connection)
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
PingPulseOperator(
|
|
197
|
+
task_id='ping',
|
|
198
|
+
workflow_id='dwf123abc',
|
|
199
|
+
stage_path='1',
|
|
200
|
+
api_key='ppk_xxx_xxx', # Direct key
|
|
201
|
+
base_url='https://your-instance.pingpulse.com', # Self-hosted
|
|
202
|
+
)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Static vs Dynamic Workflows
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
# Dynamic workflow (default) - stages created on-the-fly
|
|
209
|
+
PingPulseOperator(
|
|
210
|
+
workflow_id='dwf123abc',
|
|
211
|
+
workflow_type='dynamic', # Uses /dhk/ endpoint
|
|
212
|
+
...
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Static workflow - predefined stages
|
|
216
|
+
PingPulseOperator(
|
|
217
|
+
workflow_id='swf456def',
|
|
218
|
+
workflow_type='static', # Uses /shk/ endpoint
|
|
219
|
+
...
|
|
220
|
+
)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Heartbeat Monitoring
|
|
224
|
+
|
|
225
|
+
For scheduled jobs that should run on a cron:
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from pingpulse_airflow.operators.pingpulse import PingPulseHeartbeatOperator
|
|
229
|
+
|
|
230
|
+
heartbeat = PingPulseHeartbeatOperator(
|
|
231
|
+
task_id='send_heartbeat',
|
|
232
|
+
monitor_id='hb123abc',
|
|
233
|
+
)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### DAG-Level Callbacks
|
|
237
|
+
|
|
238
|
+
Monitor entire DAG success/failure:
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
from pingpulse_airflow.callbacks import pingpulse_dag_callbacks
|
|
242
|
+
|
|
243
|
+
callbacks = pingpulse_dag_callbacks('dwf123abc')
|
|
244
|
+
|
|
245
|
+
with DAG(
|
|
246
|
+
'my_dag',
|
|
247
|
+
on_success_callback=callbacks['on_success'],
|
|
248
|
+
on_failure_callback=callbacks['on_failure'],
|
|
249
|
+
...
|
|
250
|
+
) as dag:
|
|
251
|
+
...
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## What Gets Sent to PingPulse
|
|
257
|
+
|
|
258
|
+
Each ping includes:
|
|
259
|
+
|
|
260
|
+
```json
|
|
261
|
+
{
|
|
262
|
+
"status": "success",
|
|
263
|
+
"duration": 12.345,
|
|
264
|
+
"airflow": {
|
|
265
|
+
"dag_id": "etl_pipeline",
|
|
266
|
+
"task_id": "transform",
|
|
267
|
+
"run_id": "scheduled__2024-01-15T00:00:00+00:00",
|
|
268
|
+
"execution_date": "2024-01-15T00:00:00+00:00",
|
|
269
|
+
"try_number": 1
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
On failure, also includes:
|
|
275
|
+
```json
|
|
276
|
+
{
|
|
277
|
+
"status": "failed",
|
|
278
|
+
"error": "Connection refused to database..."
|
|
279
|
+
}
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Requirements
|
|
285
|
+
|
|
286
|
+
- Python 3.8+
|
|
287
|
+
- Apache Airflow 2.0+
|
|
288
|
+
- PingPulse account with API key
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## Support
|
|
293
|
+
|
|
294
|
+
- Documentation: https://pingpulse.com/api-docs
|
|
295
|
+
- Issues: https://github.com/pingpulse/pingpulse-airflow/issues
|
|
296
|
+
- Email: support@pingpulse.com
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## License
|
|
301
|
+
|
|
302
|
+
MIT License - see LICENSE file for details.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PingPulse Airflow Provider
|
|
3
|
+
|
|
4
|
+
Seamlessly integrate PingPulse workflow monitoring with Apache Airflow.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
pip install pingpulse-airflow
|
|
8
|
+
|
|
9
|
+
Three integration patterns:
|
|
10
|
+
|
|
11
|
+
1. Operator (explicit task):
|
|
12
|
+
from pingpulse_airflow import PingPulseOperator
|
|
13
|
+
|
|
14
|
+
ping = PingPulseOperator(
|
|
15
|
+
task_id='ping_stage_1',
|
|
16
|
+
workflow_id='dwf123abc',
|
|
17
|
+
stage_path='1',
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
2. Decorator (auto-ping on success/failure):
|
|
21
|
+
from pingpulse_airflow import pingpulse_task
|
|
22
|
+
|
|
23
|
+
@pingpulse_task(workflow_id='dwf123abc', stage_path='2')
|
|
24
|
+
def my_task():
|
|
25
|
+
return process_data()
|
|
26
|
+
|
|
27
|
+
3. Callbacks (attach to existing tasks):
|
|
28
|
+
from pingpulse_airflow import pingpulse_success, pingpulse_failure
|
|
29
|
+
|
|
30
|
+
task = PythonOperator(
|
|
31
|
+
task_id='process',
|
|
32
|
+
python_callable=run_job,
|
|
33
|
+
on_success_callback=pingpulse_success('dwf123abc', '3'),
|
|
34
|
+
on_failure_callback=pingpulse_failure('dwf123abc', '3'),
|
|
35
|
+
)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
__version__ = "0.1.0"
|
|
39
|
+
|
|
40
|
+
from pingpulse_airflow.hooks.pingpulse import PingPulseHook
|
|
41
|
+
from pingpulse_airflow.operators.pingpulse import PingPulseOperator
|
|
42
|
+
from pingpulse_airflow.decorators import pingpulse_task
|
|
43
|
+
from pingpulse_airflow.callbacks import (
|
|
44
|
+
pingpulse_success,
|
|
45
|
+
pingpulse_failure,
|
|
46
|
+
pingpulse_callback,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
"PingPulseHook",
|
|
51
|
+
"PingPulseOperator",
|
|
52
|
+
"pingpulse_task",
|
|
53
|
+
"pingpulse_success",
|
|
54
|
+
"pingpulse_failure",
|
|
55
|
+
"pingpulse_callback",
|
|
56
|
+
"get_provider_info",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_provider_info():
|
|
61
|
+
"""
|
|
62
|
+
Return provider metadata for Apache Airflow.
|
|
63
|
+
|
|
64
|
+
This is used by Airflow to discover and register the provider.
|
|
65
|
+
"""
|
|
66
|
+
return {
|
|
67
|
+
"package-name": "pingpulse-airflow",
|
|
68
|
+
"name": "PingPulse",
|
|
69
|
+
"description": "Apache Airflow provider for PingPulse workflow monitoring",
|
|
70
|
+
"connection-types": [
|
|
71
|
+
{
|
|
72
|
+
"connection-type": "pingpulse",
|
|
73
|
+
"hook-class-name": "pingpulse_airflow.hooks.pingpulse.PingPulseHook",
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
"versions": [__version__],
|
|
77
|
+
}
|