acryl-datahub-actions 1.3.1.3rc2__py3-none-any.whl → 1.3.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: acryl-datahub-actions
3
- Version: 1.3.1.3rc2
3
+ Version: 1.3.1.5
4
4
  Summary: An action framework to work with DataHub real time changes.
5
5
  Home-page: https://docs.datahub.com/
6
6
  License: Apache-2.0
@@ -21,201 +21,216 @@ Classifier: Environment :: MacOS X
21
21
  Classifier: Topic :: Software Development
22
22
  Requires-Python: >=3.9
23
23
  Description-Content-Type: text/markdown
24
- Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.3rc2
24
+ Requires-Dist: httpcore>=1.0.9
25
25
  Requires-Dist: python-dateutil>=2.8.0
26
- Requires-Dist: entrypoints
27
26
  Requires-Dist: click>=6.0.0
28
- Requires-Dist: progressbar2
29
27
  Requires-Dist: ratelimit
30
- Requires-Dist: click-default-group
31
- Requires-Dist: azure-identity==1.21.0
32
- Requires-Dist: stackprinter
28
+ Requires-Dist: entrypoints
29
+ Requires-Dist: progressbar2
30
+ Requires-Dist: h11>=0.16
31
+ Requires-Dist: tenacity
32
+ Requires-Dist: prometheus-client
33
33
  Requires-Dist: PyYAML
34
- Requires-Dist: aws-msk-iam-sasl-signer-python==1.0.2
34
+ Requires-Dist: click-default-group
35
35
  Requires-Dist: toml>=0.10.0
36
- Requires-Dist: typing-inspect
36
+ Requires-Dist: stackprinter
37
37
  Requires-Dist: pydantic<3.0.0,>=2.0.0
38
- Requires-Dist: tenacity
39
- Requires-Dist: httpcore>=1.0.9
40
- Requires-Dist: h11>=0.16
41
- Requires-Dist: prometheus-client
38
+ Requires-Dist: typing-inspect
39
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5
40
+ Requires-Dist: azure-identity==1.21.0
41
+ Requires-Dist: aws-msk-iam-sasl-signer-python==1.0.2
42
42
  Provides-Extra: base
43
43
  Requires-Dist: python-dateutil>=2.8.0; extra == "base"
44
- Requires-Dist: entrypoints; extra == "base"
45
44
  Requires-Dist: click>=6.0.0; extra == "base"
45
+ Requires-Dist: entrypoints; extra == "base"
46
46
  Requires-Dist: progressbar2; extra == "base"
47
- Requires-Dist: click-default-group; extra == "base"
48
- Requires-Dist: stackprinter; extra == "base"
49
- Requires-Dist: PyYAML; extra == "base"
50
- Requires-Dist: toml>=0.10.0; extra == "base"
51
47
  Requires-Dist: tenacity; extra == "base"
52
48
  Requires-Dist: prometheus-client; extra == "base"
49
+ Requires-Dist: PyYAML; extra == "base"
50
+ Requires-Dist: click-default-group; extra == "base"
51
+ Requires-Dist: toml>=0.10.0; extra == "base"
52
+ Requires-Dist: stackprinter; extra == "base"
53
53
  Provides-Extra: kafka
54
54
  Requires-Dist: python-dateutil>=2.8.0; extra == "kafka"
55
- Requires-Dist: entrypoints; extra == "kafka"
56
55
  Requires-Dist: click>=6.0.0; extra == "kafka"
56
+ Requires-Dist: entrypoints; extra == "kafka"
57
57
  Requires-Dist: progressbar2; extra == "kafka"
58
- Requires-Dist: click-default-group; extra == "kafka"
59
58
  Requires-Dist: confluent-kafka[schemaregistry]; extra == "kafka"
60
- Requires-Dist: stackprinter; extra == "kafka"
61
- Requires-Dist: PyYAML; extra == "kafka"
62
- Requires-Dist: toml>=0.10.0; extra == "kafka"
63
59
  Requires-Dist: tenacity; extra == "kafka"
64
60
  Requires-Dist: prometheus-client; extra == "kafka"
61
+ Requires-Dist: PyYAML; extra == "kafka"
62
+ Requires-Dist: click-default-group; extra == "kafka"
63
+ Requires-Dist: toml>=0.10.0; extra == "kafka"
64
+ Requires-Dist: stackprinter; extra == "kafka"
65
65
  Provides-Extra: executor
66
66
  Requires-Dist: python-dateutil>=2.8.0; extra == "executor"
67
- Requires-Dist: entrypoints; extra == "executor"
68
67
  Requires-Dist: click>=6.0.0; extra == "executor"
68
+ Requires-Dist: entrypoints; extra == "executor"
69
69
  Requires-Dist: progressbar2; extra == "executor"
70
- Requires-Dist: click-default-group; extra == "executor"
71
- Requires-Dist: stackprinter; extra == "executor"
72
- Requires-Dist: acryl-executor==0.3.1; extra == "executor"
73
- Requires-Dist: PyYAML; extra == "executor"
74
- Requires-Dist: toml>=0.10.0; extra == "executor"
75
70
  Requires-Dist: tenacity; extra == "executor"
76
71
  Requires-Dist: prometheus-client; extra == "executor"
72
+ Requires-Dist: PyYAML; extra == "executor"
73
+ Requires-Dist: click-default-group; extra == "executor"
74
+ Requires-Dist: toml>=0.10.0; extra == "executor"
75
+ Requires-Dist: stackprinter; extra == "executor"
76
+ Requires-Dist: acryl-executor==0.3.2; extra == "executor"
77
77
  Provides-Extra: slack
78
78
  Requires-Dist: python-dateutil>=2.8.0; extra == "slack"
79
- Requires-Dist: entrypoints; extra == "slack"
80
79
  Requires-Dist: click>=6.0.0; extra == "slack"
80
+ Requires-Dist: entrypoints; extra == "slack"
81
81
  Requires-Dist: progressbar2; extra == "slack"
82
- Requires-Dist: click-default-group; extra == "slack"
83
- Requires-Dist: slack-bolt>=1.15.5; extra == "slack"
84
- Requires-Dist: stackprinter; extra == "slack"
85
- Requires-Dist: PyYAML; extra == "slack"
86
- Requires-Dist: toml>=0.10.0; extra == "slack"
87
82
  Requires-Dist: tenacity; extra == "slack"
88
83
  Requires-Dist: prometheus-client; extra == "slack"
84
+ Requires-Dist: PyYAML; extra == "slack"
85
+ Requires-Dist: click-default-group; extra == "slack"
86
+ Requires-Dist: toml>=0.10.0; extra == "slack"
87
+ Requires-Dist: stackprinter; extra == "slack"
88
+ Requires-Dist: slack-bolt>=1.15.5; extra == "slack"
89
89
  Provides-Extra: teams
90
90
  Requires-Dist: python-dateutil>=2.8.0; extra == "teams"
91
- Requires-Dist: entrypoints; extra == "teams"
92
91
  Requires-Dist: click>=6.0.0; extra == "teams"
92
+ Requires-Dist: entrypoints; extra == "teams"
93
93
  Requires-Dist: progressbar2; extra == "teams"
94
- Requires-Dist: click-default-group; extra == "teams"
95
- Requires-Dist: stackprinter; extra == "teams"
96
- Requires-Dist: PyYAML; extra == "teams"
97
- Requires-Dist: toml>=0.10.0; extra == "teams"
98
94
  Requires-Dist: pymsteams>=0.2.2; extra == "teams"
99
95
  Requires-Dist: tenacity; extra == "teams"
100
96
  Requires-Dist: prometheus-client; extra == "teams"
97
+ Requires-Dist: PyYAML; extra == "teams"
98
+ Requires-Dist: click-default-group; extra == "teams"
99
+ Requires-Dist: toml>=0.10.0; extra == "teams"
100
+ Requires-Dist: stackprinter; extra == "teams"
101
101
  Provides-Extra: tag-propagation
102
102
  Requires-Dist: python-dateutil>=2.8.0; extra == "tag-propagation"
103
- Requires-Dist: entrypoints; extra == "tag-propagation"
104
103
  Requires-Dist: click>=6.0.0; extra == "tag-propagation"
104
+ Requires-Dist: entrypoints; extra == "tag-propagation"
105
105
  Requires-Dist: progressbar2; extra == "tag-propagation"
106
- Requires-Dist: click-default-group; extra == "tag-propagation"
107
- Requires-Dist: stackprinter; extra == "tag-propagation"
108
- Requires-Dist: PyYAML; extra == "tag-propagation"
109
- Requires-Dist: toml>=0.10.0; extra == "tag-propagation"
110
106
  Requires-Dist: tenacity; extra == "tag-propagation"
111
107
  Requires-Dist: prometheus-client; extra == "tag-propagation"
108
+ Requires-Dist: PyYAML; extra == "tag-propagation"
109
+ Requires-Dist: click-default-group; extra == "tag-propagation"
110
+ Requires-Dist: toml>=0.10.0; extra == "tag-propagation"
111
+ Requires-Dist: stackprinter; extra == "tag-propagation"
112
112
  Provides-Extra: term-propagation
113
113
  Requires-Dist: python-dateutil>=2.8.0; extra == "term-propagation"
114
- Requires-Dist: entrypoints; extra == "term-propagation"
115
114
  Requires-Dist: click>=6.0.0; extra == "term-propagation"
115
+ Requires-Dist: entrypoints; extra == "term-propagation"
116
116
  Requires-Dist: progressbar2; extra == "term-propagation"
117
- Requires-Dist: click-default-group; extra == "term-propagation"
118
- Requires-Dist: stackprinter; extra == "term-propagation"
119
- Requires-Dist: PyYAML; extra == "term-propagation"
120
- Requires-Dist: toml>=0.10.0; extra == "term-propagation"
121
117
  Requires-Dist: tenacity; extra == "term-propagation"
122
118
  Requires-Dist: prometheus-client; extra == "term-propagation"
119
+ Requires-Dist: PyYAML; extra == "term-propagation"
120
+ Requires-Dist: click-default-group; extra == "term-propagation"
121
+ Requires-Dist: toml>=0.10.0; extra == "term-propagation"
122
+ Requires-Dist: stackprinter; extra == "term-propagation"
123
123
  Provides-Extra: snowflake-tag-propagation
124
124
  Requires-Dist: python-dateutil>=2.8.0; extra == "snowflake-tag-propagation"
125
- Requires-Dist: entrypoints; extra == "snowflake-tag-propagation"
126
125
  Requires-Dist: click>=6.0.0; extra == "snowflake-tag-propagation"
126
+ Requires-Dist: entrypoints; extra == "snowflake-tag-propagation"
127
127
  Requires-Dist: progressbar2; extra == "snowflake-tag-propagation"
128
- Requires-Dist: click-default-group; extra == "snowflake-tag-propagation"
129
- Requires-Dist: stackprinter; extra == "snowflake-tag-propagation"
130
- Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.3rc2; extra == "snowflake-tag-propagation"
131
- Requires-Dist: PyYAML; extra == "snowflake-tag-propagation"
132
- Requires-Dist: toml>=0.10.0; extra == "snowflake-tag-propagation"
133
128
  Requires-Dist: tenacity; extra == "snowflake-tag-propagation"
134
129
  Requires-Dist: prometheus-client; extra == "snowflake-tag-propagation"
130
+ Requires-Dist: PyYAML; extra == "snowflake-tag-propagation"
131
+ Requires-Dist: click-default-group; extra == "snowflake-tag-propagation"
132
+ Requires-Dist: toml>=0.10.0; extra == "snowflake-tag-propagation"
133
+ Requires-Dist: stackprinter; extra == "snowflake-tag-propagation"
134
+ Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.5; extra == "snowflake-tag-propagation"
135
135
  Provides-Extra: doc-propagation
136
136
  Requires-Dist: python-dateutil>=2.8.0; extra == "doc-propagation"
137
- Requires-Dist: entrypoints; extra == "doc-propagation"
138
137
  Requires-Dist: click>=6.0.0; extra == "doc-propagation"
138
+ Requires-Dist: entrypoints; extra == "doc-propagation"
139
139
  Requires-Dist: progressbar2; extra == "doc-propagation"
140
- Requires-Dist: click-default-group; extra == "doc-propagation"
141
- Requires-Dist: stackprinter; extra == "doc-propagation"
142
- Requires-Dist: PyYAML; extra == "doc-propagation"
143
- Requires-Dist: toml>=0.10.0; extra == "doc-propagation"
144
140
  Requires-Dist: tenacity; extra == "doc-propagation"
145
141
  Requires-Dist: prometheus-client; extra == "doc-propagation"
142
+ Requires-Dist: PyYAML; extra == "doc-propagation"
143
+ Requires-Dist: click-default-group; extra == "doc-propagation"
144
+ Requires-Dist: toml>=0.10.0; extra == "doc-propagation"
145
+ Requires-Dist: stackprinter; extra == "doc-propagation"
146
+ Provides-Extra: observability
147
+ Requires-Dist: python-dateutil>=2.8.0; extra == "observability"
148
+ Requires-Dist: click>=6.0.0; extra == "observability"
149
+ Requires-Dist: entrypoints; extra == "observability"
150
+ Requires-Dist: progressbar2; extra == "observability"
151
+ Requires-Dist: tenacity; extra == "observability"
152
+ Requires-Dist: prometheus-client; extra == "observability"
153
+ Requires-Dist: PyYAML; extra == "observability"
154
+ Requires-Dist: click-default-group; extra == "observability"
155
+ Requires-Dist: toml>=0.10.0; extra == "observability"
156
+ Requires-Dist: stackprinter; extra == "observability"
157
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == "observability"
158
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == "observability"
146
159
  Provides-Extra: all
147
160
  Requires-Dist: python-dateutil>=2.8.0; extra == "all"
148
- Requires-Dist: entrypoints; extra == "all"
149
161
  Requires-Dist: click>=6.0.0; extra == "all"
162
+ Requires-Dist: entrypoints; extra == "all"
150
163
  Requires-Dist: progressbar2; extra == "all"
151
- Requires-Dist: click-default-group; extra == "all"
152
- Requires-Dist: confluent-kafka[schemaregistry]; extra == "all"
153
- Requires-Dist: stackprinter; extra == "all"
154
- Requires-Dist: slack-bolt>=1.15.5; extra == "all"
155
- Requires-Dist: acryl-executor==0.3.1; extra == "all"
156
- Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.3rc2; extra == "all"
157
- Requires-Dist: PyYAML; extra == "all"
158
- Requires-Dist: toml>=0.10.0; extra == "all"
159
164
  Requires-Dist: pymsteams>=0.2.2; extra == "all"
165
+ Requires-Dist: confluent-kafka[schemaregistry]; extra == "all"
160
166
  Requires-Dist: tenacity; extra == "all"
161
167
  Requires-Dist: prometheus-client; extra == "all"
168
+ Requires-Dist: PyYAML; extra == "all"
169
+ Requires-Dist: click-default-group; extra == "all"
170
+ Requires-Dist: toml>=0.10.0; extra == "all"
171
+ Requires-Dist: stackprinter; extra == "all"
172
+ Requires-Dist: slack-bolt>=1.15.5; extra == "all"
173
+ Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.5; extra == "all"
174
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == "all"
175
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == "all"
176
+ Requires-Dist: acryl-executor==0.3.2; extra == "all"
162
177
  Provides-Extra: dev
163
- Requires-Dist: types-dataclasses; extra == "dev"
164
- Requires-Dist: pytest-docker>=0.10.3; extra == "dev"
165
178
  Requires-Dist: python-dateutil>=2.8.0; extra == "dev"
166
- Requires-Dist: entrypoints; extra == "dev"
167
- Requires-Dist: types-freezegun; extra == "dev"
168
- Requires-Dist: progressbar2; extra == "dev"
169
- Requires-Dist: ratelimit; extra == "dev"
170
- Requires-Dist: azure-identity==1.21.0; extra == "dev"
171
- Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.3rc2; extra == "dev"
179
+ Requires-Dist: mypy==1.17.1; extra == "dev"
180
+ Requires-Dist: types-toml; extra == "dev"
181
+ Requires-Dist: types-dataclasses; extra == "dev"
182
+ Requires-Dist: click-default-group; extra == "dev"
183
+ Requires-Dist: types-setuptools; extra == "dev"
184
+ Requires-Dist: deepdiff; extra == "dev"
185
+ Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
186
+ Requires-Dist: tox; extra == "dev"
187
+ Requires-Dist: types-python-dateutil; extra == "dev"
188
+ Requires-Dist: requests-mock; extra == "dev"
172
189
  Requires-Dist: freezegun; extra == "dev"
173
- Requires-Dist: toml>=0.10.0; extra == "dev"
190
+ Requires-Dist: progressbar2; extra == "dev"
174
191
  Requires-Dist: pymsteams>=0.2.2; extra == "dev"
175
- Requires-Dist: sqlalchemy-stubs; extra == "dev"
192
+ Requires-Dist: coverage>=5.1; extra == "dev"
193
+ Requires-Dist: h11>=0.16; extra == "dev"
194
+ Requires-Dist: types-pytz; extra == "dev"
195
+ Requires-Dist: pytest-dependency>=0.5.1; extra == "dev"
176
196
  Requires-Dist: tenacity; extra == "dev"
177
- Requires-Dist: types-PyMySQL; extra == "dev"
178
- Requires-Dist: types-cachetools; extra == "dev"
179
- Requires-Dist: types-PyYAML; extra == "dev"
180
- Requires-Dist: confluent-kafka[schemaregistry]; extra == "dev"
181
- Requires-Dist: click>=6.0.0; extra == "dev"
182
197
  Requires-Dist: slack-bolt>=1.15.5; extra == "dev"
183
- Requires-Dist: mypy==1.17.1; extra == "dev"
184
- Requires-Dist: acryl-executor==0.3.1; extra == "dev"
185
- Requires-Dist: tox; extra == "dev"
186
- Requires-Dist: aws-msk-iam-sasl-signer-python==1.0.2; extra == "dev"
187
- Requires-Dist: types-pytz; extra == "dev"
188
- Requires-Dist: jsonpickle; extra == "dev"
189
- Requires-Dist: h11>=0.16; extra == "dev"
190
- Requires-Dist: types-setuptools; extra == "dev"
191
- Requires-Dist: httpcore>=1.0.9; extra == "dev"
192
- Requires-Dist: coverage>=5.1; extra == "dev"
193
- Requires-Dist: click-default-group; extra == "dev"
194
- Requires-Dist: stackprinter; extra == "dev"
198
+ Requires-Dist: toml>=0.10.0; extra == "dev"
199
+ Requires-Dist: azure-identity==1.21.0; extra == "dev"
195
200
  Requires-Dist: types-six; extra == "dev"
196
- Requires-Dist: types-click==0.1.12; extra == "dev"
197
- Requires-Dist: pytest-dependency>=0.5.1; extra == "dev"
198
- Requires-Dist: PyYAML; extra == "dev"
199
- Requires-Dist: requests-mock; extra == "dev"
200
- Requires-Dist: typing-inspect; extra == "dev"
201
- Requires-Dist: pydantic<3.0.0,>=2.0.0; extra == "dev"
202
- Requires-Dist: deepdiff; extra == "dev"
203
- Requires-Dist: pytest>=6.2.2; extra == "dev"
204
- Requires-Dist: types-toml; extra == "dev"
205
- Requires-Dist: prometheus-client; extra == "dev"
201
+ Requires-Dist: types-PyYAML; extra == "dev"
206
202
  Requires-Dist: twine; extra == "dev"
207
- Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.3rc2; extra == "dev"
208
- Requires-Dist: build; extra == "dev"
209
- Requires-Dist: types-python-dateutil; extra == "dev"
203
+ Requires-Dist: ratelimit; extra == "dev"
204
+ Requires-Dist: entrypoints; extra == "dev"
210
205
  Requires-Dist: ruff==0.11.7; extra == "dev"
211
- Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
206
+ Requires-Dist: pytest>=6.2.2; extra == "dev"
207
+ Requires-Dist: stackprinter; extra == "dev"
208
+ Requires-Dist: pydantic<3.0.0,>=2.0.0; extra == "dev"
209
+ Requires-Dist: types-click==0.1.12; extra == "dev"
210
+ Requires-Dist: acryl-executor==0.3.2; extra == "dev"
212
211
  Requires-Dist: types-requests; extra == "dev"
212
+ Requires-Dist: aws-msk-iam-sasl-signer-python==1.0.2; extra == "dev"
213
+ Requires-Dist: httpcore>=1.0.9; extra == "dev"
214
+ Requires-Dist: click>=6.0.0; extra == "dev"
215
+ Requires-Dist: jsonpickle; extra == "dev"
216
+ Requires-Dist: build; extra == "dev"
217
+ Requires-Dist: types-PyMySQL; extra == "dev"
218
+ Requires-Dist: prometheus-client; extra == "dev"
219
+ Requires-Dist: confluent-kafka[schemaregistry]; extra == "dev"
220
+ Requires-Dist: types-freezegun; extra == "dev"
221
+ Requires-Dist: PyYAML; extra == "dev"
222
+ Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.5; extra == "dev"
223
+ Requires-Dist: typing-inspect; extra == "dev"
224
+ Requires-Dist: pytest-docker>=0.10.3; extra == "dev"
225
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5; extra == "dev"
226
+ Requires-Dist: sqlalchemy-stubs; extra == "dev"
227
+ Requires-Dist: types-cachetools; extra == "dev"
213
228
  Provides-Extra: integration-tests
229
+ Requires-Dist: pymsteams>=0.2.2; extra == "integration-tests"
214
230
  Requires-Dist: confluent-kafka[schemaregistry]; extra == "integration-tests"
215
231
  Requires-Dist: slack-bolt>=1.15.5; extra == "integration-tests"
216
- Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.3rc2; extra == "integration-tests"
217
- Requires-Dist: acryl-executor==0.3.1; extra == "integration-tests"
218
- Requires-Dist: pymsteams>=0.2.2; extra == "integration-tests"
232
+ Requires-Dist: acryl-datahub[snowflake-slim]==1.3.1.5; extra == "integration-tests"
233
+ Requires-Dist: acryl-executor==0.3.2; extra == "integration-tests"
219
234
  Dynamic: classifier
220
235
  Dynamic: description
221
236
  Dynamic: description-content-type
@@ -1,5 +1,5 @@
1
1
  datahub_actions/__init__.py,sha256=Pn9UTDbqYPt6jY_acE7MQIveX_Nzdfl5oGmi-Ze8CHs,647
2
- datahub_actions/_version.py,sha256=2fmT-OPiPEWFLH1asol6ZJ-I8WM8kY1iZSShq2JELhc,339
2
+ datahub_actions/_version.py,sha256=hJKt_5TzC3gmpk1gMO_bZIrwgZ5C_WNJH7va8i2jA6Q,336
3
3
  datahub_actions/entrypoints.py,sha256=_6NOpKhlfXuSUdPhDpPya7d9kJmwoRGrunxcNPMQE9k,4743
4
4
  datahub_actions/action/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
5
5
  datahub_actions/action/action.py,sha256=ET1fpeRn6KVD9diJ9ZOObsojrN9y6Vfn4tK7jzBQKHg,1537
@@ -13,9 +13,11 @@ datahub_actions/event/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1U
13
13
  datahub_actions/event/event.py,sha256=2Eyizwzbf3fXsUV4n5p7gsYZS_CjEE5y9m1YvkmKOKU,990
14
14
  datahub_actions/event/event_envelope.py,sha256=x1QfDetMM7k5SLecD0Nb-duxMxKWU0rmeLroScvkicY,2258
15
15
  datahub_actions/event/event_registry.py,sha256=bWV2n9u1n8p9Onu9G2AVgZIfOxCjaBT0pKg2eOQdaig,4663
16
+ datahub_actions/observability/__init__.py,sha256=llKqfushdo7d6RNAtIx9ofS8fqFltea-BHkkzZCUyAA,643
17
+ datahub_actions/observability/kafka_lag_monitor.py,sha256=UrGSC18iUqvKCpxN8HzZ5gpvDjdNtAwuDgXgd77pfpE,7865
16
18
  datahub_actions/pipeline/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
17
19
  datahub_actions/pipeline/pipeline.py,sha256=6Bod5W3QJNAV0kXymooBxxJVuvAYv3mpvAa6zp-9u5c,12194
18
- datahub_actions/pipeline/pipeline_config.py,sha256=ikJYdDpBv0PI0lpbtubseh8SsuK0032i4Gb6Uum2jck,2208
20
+ datahub_actions/pipeline/pipeline_config.py,sha256=6mJKK0J9EuXaxPVjl7UuE0qzACpPGjQLr6yNv_-O7Xg,2347
19
21
  datahub_actions/pipeline/pipeline_context.py,sha256=RollJBpjyw_BChsPPDaXC8_t97Di5eYaAYchGcBKWjs,964
20
22
  datahub_actions/pipeline/pipeline_manager.py,sha256=vFqcq1PY2SWtWZElntWzF8P_5wgmZBqTWjE_1NiHGyY,4039
21
23
  datahub_actions/pipeline/pipeline_stats.py,sha256=p6R6y4NlaHv6MKkMkAAM5ZgMye1wRK0y6Dlk5eCmbZ8,5062
@@ -33,12 +35,12 @@ datahub_actions/plugin/action/metadata_change_sync/metadata_change_sync.py,sha25
33
35
  datahub_actions/plugin/action/propagation/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
34
36
  datahub_actions/plugin/action/propagation/propagation_utils.py,sha256=411ojU1UzOLURecxCXhdcmi1v0tfuIQyLrRuwT1fpc4,10810
35
37
  datahub_actions/plugin/action/propagation/docs/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
36
- datahub_actions/plugin/action/propagation/docs/propagation_action.py,sha256=mtK-sZwERmqOe3xkwFqtelX3vWzF9NMQXZ-P-LK-vsI,37067
38
+ datahub_actions/plugin/action/propagation/docs/propagation_action.py,sha256=Q1Fvb-1DAKS0Ktlpf0ydK1jXu14nSxYP-mWkdmhpo_M,37161
37
39
  datahub_actions/plugin/action/slack/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
38
40
  datahub_actions/plugin/action/slack/slack.py,sha256=YT9kTQsX0FzsB3pglaDl2kIFFnNF7cWx8HcC_xzq9eM,5175
39
41
  datahub_actions/plugin/action/snowflake/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
40
42
  datahub_actions/plugin/action/snowflake/snowflake_util.py,sha256=ZJW7Wsb65-3JrA15EVfv0ojgvFSC-sV0a_LZ9QyP_bw,5190
41
- datahub_actions/plugin/action/snowflake/tag_propagator.py,sha256=GgyQHnLtZcmUPyxCNO2eTziMhXizC1N3dkUP_EdxUaw,5052
43
+ datahub_actions/plugin/action/snowflake/tag_propagator.py,sha256=fQ1BzarIQcI5QziydSleknIV-KQF2oEQ3M0KoE3WFLk,5110
42
44
  datahub_actions/plugin/action/tag/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
43
45
  datahub_actions/plugin/action/tag/tag_propagation_action.py,sha256=2iLG4oJURrpuwnS28ePDtBGKvvtk-2BWB8PHGLyttw4,6448
44
46
  datahub_actions/plugin/action/teams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -50,16 +52,16 @@ datahub_actions/plugin/action/utils/term_resolver.py,sha256=J3_u-iGFXxsGCPtaeV_p
50
52
  datahub_actions/plugin/source/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
51
53
  datahub_actions/plugin/source/acryl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
54
  datahub_actions/plugin/source/acryl/constants.py,sha256=f6vEsF6SdD0B1Vs90lCRglRQOCCYjiIlGanNtC7OsnY,307
53
- datahub_actions/plugin/source/acryl/datahub_cloud_event_source.py,sha256=izoNCkZa3DZwW_2wPBfVgXrfqPIFCoBUYFzw6OyIU6w,12538
55
+ datahub_actions/plugin/source/acryl/datahub_cloud_event_source.py,sha256=61kyW--hZD6gzs3_OO5a0XcBNuhNkLBo_7h1oX-hBek,12814
54
56
  datahub_actions/plugin/source/acryl/datahub_cloud_events_ack_manager.py,sha256=ky15ibq5lfYdPIwufv4w92XOpp9C6cRvlhPRbJ_cs10,993
55
- datahub_actions/plugin/source/acryl/datahub_cloud_events_consumer.py,sha256=T-Y8MB8b3KJXyr3ecEHl-bpNB8bxwy6yAAeIl5atuxI,6039
57
+ datahub_actions/plugin/source/acryl/datahub_cloud_events_consumer.py,sha256=1j6s-uTyZcRlF74toHTrdNeVNq5MMh9_Fp8FOsV-8a0,7116
56
58
  datahub_actions/plugin/source/acryl/datahub_cloud_events_consumer_offsets_store.py,sha256=5m_VR_5yHjt4_YZEi1die5sr0ngl9dVobX10AjIodvA,3969
57
59
  datahub_actions/plugin/source/kafka/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
58
- datahub_actions/plugin/source/kafka/kafka_event_source.py,sha256=i3JqjXuAhjg0fwK99vbOH92sQHex8BzcplZDcrSclZ0,11545
60
+ datahub_actions/plugin/source/kafka/kafka_event_source.py,sha256=5Nj4coUbboK7ZFtH0JzIvzzacdqWd9a1nHLBtpx3Uog,13380
59
61
  datahub_actions/plugin/source/kafka/utils.py,sha256=EEqBnv8Zd05zSg9T3f2FHaARaStD2j2M_xiSeaQBplA,758
60
62
  datahub_actions/plugin/transform/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
61
63
  datahub_actions/plugin/transform/filter/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
62
- datahub_actions/plugin/transform/filter/filter_transformer.py,sha256=aF5sTy5FuRV1odZckibLIR0uGYbLmehq5pRCZ2LYIwE,3136
64
+ datahub_actions/plugin/transform/filter/filter_transformer.py,sha256=Dc5xeM9CPxqhvNP2bHPcDrJ6NFDI2KUjVm8Q4Lt-KqA,3186
63
65
  datahub_actions/source/__init__.py,sha256=KYWPHGi7sDM0DXrrXmhlR6_zhym1qNbtFhjYk1Ug6ss,579
64
66
  datahub_actions/source/event_source.py,sha256=_7ufs1WyeAFQ1b4BxGypvN8mpW006U2kA-WCyZndf7c,1947
65
67
  datahub_actions/source/event_source_registry.py,sha256=5Nmz8mNE1G_zJ8zJ-WnNt3JMrJdteuKHdr4mKe3t6Ig,1147
@@ -75,8 +77,8 @@ datahub_actions/utils/event_util.py,sha256=VluTOeyFcot48moK9qLmYL1ADAjsau0346Ngi
75
77
  datahub_actions/utils/kafka_msk_iam.py,sha256=JWg0MBEMcsG2AmW4yXiHvH_dnnsQDIRASdlvDXGTVcI,1013
76
78
  datahub_actions/utils/name_resolver.py,sha256=uXICSpy1IUe5uyFUiRk4vDQ9_G0JytPgKPSnqMA6fZk,10540
77
79
  datahub_actions/utils/social_util.py,sha256=FI_3qDjayX9LKlDjf43QHafnOznQk3v5Vp3Xyhq-lno,5271
78
- acryl_datahub_actions-1.3.1.3rc2.dist-info/METADATA,sha256=UOrERR0VKvmNf1dgg9Q-B3USa5sZVq-8sBWM1T1yqIw,18124
79
- acryl_datahub_actions-1.3.1.3rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
- acryl_datahub_actions-1.3.1.3rc2.dist-info/entry_points.txt,sha256=Gbvj36kOFWrsJ1meJVFB7zYgrKbIGgufOpZDurJbehU,866
81
- acryl_datahub_actions-1.3.1.3rc2.dist-info/top_level.txt,sha256=93StcIqRM0PfcJoT06TFhcCjPnIw-CyFgBaF-4vqCKY,16
82
- acryl_datahub_actions-1.3.1.3rc2.dist-info/RECORD,,
80
+ acryl_datahub_actions-1.3.1.5.dist-info/METADATA,sha256=lqq2CebXYdT4MAVOEOtalLC2dsPgOmB8rjpi0YI_XAg,18932
81
+ acryl_datahub_actions-1.3.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
82
+ acryl_datahub_actions-1.3.1.5.dist-info/entry_points.txt,sha256=Gbvj36kOFWrsJ1meJVFB7zYgrKbIGgufOpZDurJbehU,866
83
+ acryl_datahub_actions-1.3.1.5.dist-info/top_level.txt,sha256=93StcIqRM0PfcJoT06TFhcCjPnIw-CyFgBaF-4vqCKY,16
84
+ acryl_datahub_actions-1.3.1.5.dist-info/RECORD,,
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub-actions/.
2
2
  __package_name__ = "acryl-datahub-actions"
3
- __version__ = "1.3.1.3rc2"
3
+ __version__ = "1.3.1.5"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -0,0 +1,15 @@
1
+ # Copyright 2021 Acryl Data, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Observability and metrics utilities for datahub-actions."""
@@ -0,0 +1,230 @@
1
+ # Copyright 2021 Acryl Data, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Background thread for monitoring Kafka consumer lag.
16
+
17
+ This module provides a KafkaLagMonitor class that periodically calculates
18
+ and reports Kafka consumer lag metrics to Prometheus.
19
+ """
20
+
21
+ import logging
22
+ import threading
23
+ from dataclasses import dataclass
24
+ from typing import Dict, Optional
25
+
26
+ from confluent_kafka import Consumer, KafkaException, TopicPartition
27
+ from prometheus_client import Gauge
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Prometheus metrics
32
+ KAFKA_LAG_GAUGE = Gauge(
33
+ name="kafka_consumer_lag",
34
+ documentation="Kafka consumer lag aggregated per topic",
35
+ labelnames=["topic", "pipeline_name"],
36
+ )
37
+
38
+
39
+ @dataclass
40
+ class LagStats:
41
+ """Statistics for a topic's consumer lag."""
42
+
43
+ topic: str
44
+ total_lag: int
45
+ partition_lags: Dict[int, int] # partition_id -> lag
46
+
47
+
48
+ class KafkaLagMonitor:
49
+ """Background thread that periodically reports Kafka consumer lag.
50
+
51
+ This monitor:
52
+ 1. Queries assigned partitions from the Kafka consumer
53
+ 2. Gets high water marks for each partition
54
+ 3. Gets committed offsets for each partition
55
+ 4. Calculates lag = high_water_mark - committed_offset
56
+ 5. Aggregates per-topic lag (sum across partitions)
57
+ 6. Updates Prometheus Gauge metrics
58
+ 7. Optionally updates OpenTelemetry metrics if available
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ consumer: Consumer,
64
+ pipeline_name: str,
65
+ interval_seconds: float = 30.0,
66
+ timeout_seconds: float = 5.0,
67
+ ):
68
+ """Initialize lag monitor.
69
+
70
+ Args:
71
+ consumer: confluent_kafka.Consumer instance to monitor
72
+ pipeline_name: Name of the action pipeline (for metric labels)
73
+ interval_seconds: How often to report lag (default: 30s)
74
+ timeout_seconds: Timeout for Kafka API calls (default: 5s)
75
+ """
76
+ self.consumer = consumer
77
+ self.pipeline_name = pipeline_name
78
+ self.interval_seconds = interval_seconds
79
+ self.timeout_seconds = timeout_seconds
80
+
81
+ self._stop_event = threading.Event()
82
+ self._thread: Optional[threading.Thread] = None
83
+
84
+ def start(self) -> None:
85
+ """Start the background monitoring thread."""
86
+ if self._thread is not None:
87
+ logger.warning("Lag monitor already started")
88
+ return
89
+
90
+ self._stop_event.clear()
91
+ self._thread = threading.Thread(
92
+ target=self._monitor_loop,
93
+ name=f"kafka-lag-monitor-{self.pipeline_name}",
94
+ daemon=True, # Daemon thread exits when main thread exits
95
+ )
96
+ self._thread.start()
97
+ logger.info(
98
+ f"Kafka lag monitor started for pipeline '{self.pipeline_name}' "
99
+ f"(interval={self.interval_seconds}s)"
100
+ )
101
+
102
+ def stop(self) -> None:
103
+ """Stop the background monitoring thread."""
104
+ if self._thread is None:
105
+ return
106
+
107
+ logger.info(f"Stopping Kafka lag monitor for pipeline '{self.pipeline_name}'")
108
+ self._stop_event.set()
109
+ self._thread.join(timeout=10.0)
110
+ self._thread = None
111
+
112
+ def _monitor_loop(self) -> None:
113
+ """Main monitoring loop that runs in background thread."""
114
+ while not self._stop_event.is_set():
115
+ try:
116
+ self._collect_and_report_lag()
117
+ except Exception as e:
118
+ # Log error but don't crash - monitoring should be resilient
119
+ logger.error(
120
+ f"Error collecting lag for pipeline '{self.pipeline_name}': {e}",
121
+ exc_info=True,
122
+ )
123
+
124
+ # Sleep with interrupt support
125
+ self._stop_event.wait(timeout=self.interval_seconds)
126
+
127
+ def _collect_and_report_lag(self) -> None:
128
+ """Collect lag statistics and update metrics."""
129
+ # Get assigned partitions
130
+ assignment = self.consumer.assignment()
131
+ if not assignment:
132
+ logger.debug(f"No partitions assigned to pipeline '{self.pipeline_name}'")
133
+ return
134
+
135
+ # Group partitions by topic
136
+ topic_partitions: Dict[str, list[TopicPartition]] = {}
137
+ for tp in assignment:
138
+ if tp.topic not in topic_partitions:
139
+ topic_partitions[tp.topic] = []
140
+ topic_partitions[tp.topic].append(tp)
141
+
142
+ # Calculate lag per topic
143
+ for topic, partitions in topic_partitions.items():
144
+ lag_stats = self._calculate_topic_lag(topic, partitions)
145
+ if lag_stats:
146
+ self._report_lag(lag_stats)
147
+
148
+ def _calculate_topic_lag(
149
+ self, topic: str, partitions: list[TopicPartition]
150
+ ) -> Optional[LagStats]:
151
+ """Calculate lag for all partitions of a topic.
152
+
153
+ Args:
154
+ topic: Topic name
155
+ partitions: List of TopicPartition objects for this topic
156
+
157
+ Returns:
158
+ LagStats with aggregated lag, or None if calculation failed
159
+ """
160
+ partition_lags: Dict[int, int] = {}
161
+
162
+ # Get committed offsets for all partitions at once
163
+ try:
164
+ committed_partitions = self.consumer.committed(
165
+ partitions, timeout=self.timeout_seconds
166
+ )
167
+ except KafkaException as e:
168
+ logger.warning(f"Failed to get committed offsets for topic '{topic}': {e}")
169
+ return None
170
+
171
+ # Calculate lag for each partition
172
+ for tp in committed_partitions:
173
+ try:
174
+ # Get high water mark
175
+ watermarks = self.consumer.get_watermark_offsets(
176
+ tp, timeout=self.timeout_seconds, cached=False
177
+ )
178
+ if watermarks is None:
179
+ logger.warning(
180
+ f"Failed to get watermarks for {topic}[{tp.partition}]"
181
+ )
182
+ continue
183
+
184
+ low, high = watermarks
185
+
186
+ # Calculate lag
187
+ if tp.offset < 0:
188
+ # No committed offset yet - show total available messages as lag
189
+ lag = high - low
190
+ else:
191
+ # Normal case: lag = high water mark - committed offset
192
+ lag = high - tp.offset
193
+
194
+ # Ensure non-negative lag
195
+ lag = max(0, lag)
196
+ partition_lags[tp.partition] = lag
197
+
198
+ except KafkaException as e:
199
+ logger.warning(
200
+ f"Error calculating lag for {topic}[{tp.partition}]: {e}"
201
+ )
202
+ continue
203
+
204
+ if not partition_lags:
205
+ return None
206
+
207
+ total_lag = sum(partition_lags.values())
208
+ return LagStats(
209
+ topic=topic,
210
+ total_lag=total_lag,
211
+ partition_lags=partition_lags,
212
+ )
213
+
214
+ def _report_lag(self, lag_stats: LagStats) -> None:
215
+ """Report lag statistics to metrics backends.
216
+
217
+ Args:
218
+ lag_stats: Lag statistics to report
219
+ """
220
+ # Always update Prometheus (base requirement)
221
+ KAFKA_LAG_GAUGE.labels(
222
+ topic=lag_stats.topic,
223
+ pipeline_name=self.pipeline_name,
224
+ ).set(lag_stats.total_lag)
225
+
226
+ logger.debug(
227
+ f"Pipeline '{self.pipeline_name}' topic '{lag_stats.topic}': "
228
+ f"lag={lag_stats.total_lag} "
229
+ f"(partitions: {lag_stats.partition_lags})"
230
+ )
@@ -14,7 +14,7 @@
14
14
 
15
15
  from typing import Any, Dict, List, Optional, Union
16
16
 
17
- from pydantic import BaseModel
17
+ from pydantic import BaseModel, Field
18
18
 
19
19
  from datahub.configuration import ConfigModel
20
20
  from datahub.configuration.common import ConfigEnum
@@ -30,29 +30,29 @@ class FailureMode(ConfigEnum):
30
30
 
31
31
  class SourceConfig(ConfigModel):
32
32
  type: str
33
- config: Optional[Dict[str, Any]] = None
33
+ config: Optional[Dict[str, Any]] = Field(default=None)
34
34
 
35
35
 
36
36
  class TransformConfig(ConfigModel):
37
37
  type: str
38
- config: Optional[Dict[str, Any]] = None
38
+ config: Optional[Dict[str, Any]] = Field(default=None)
39
39
 
40
40
 
41
41
  class FilterConfig(ConfigModel):
42
42
  event_type: Union[str, List[str]]
43
- event: Optional[Dict[str, Any]] = None
43
+ event: Optional[Dict[str, Any]] = Field(default=None)
44
44
 
45
45
 
46
46
  class ActionConfig(ConfigModel):
47
47
  type: str
48
- config: Optional[dict]
48
+ config: Optional[Dict[str, Any]] = Field(default=None)
49
49
 
50
50
 
51
51
  class PipelineOptions(BaseModel):
52
- retry_count: Optional[int] = None
53
- failure_mode: Optional[FailureMode] = None
54
- failed_events_dir: Optional[str] = (
55
- None # The path where failed events should be logged.
52
+ retry_count: Optional[int] = Field(default=None)
53
+ failure_mode: Optional[FailureMode] = Field(default=None)
54
+ failed_events_dir: Optional[str] = Field(
55
+ default=None, description="The path where failed events should be logged."
56
56
  )
57
57
 
58
58
 
@@ -132,10 +132,11 @@ class DocPropagationAction(Action):
132
132
  def __init__(self, config: DocPropagationConfig, ctx: PipelineContext):
133
133
  super().__init__()
134
134
  self.action_urn: str
135
- if not ctx.pipeline_name.startswith("urn:li:dataHubAction"):
136
- self.action_urn = f"urn:li:dataHubAction:{ctx.pipeline_name}"
135
+ if "urn:li:dataHubAction:" in ctx.pipeline_name:
136
+ action_urn_part = ctx.pipeline_name.split("urn:li:dataHubAction:")[1]
137
+ self.action_urn = f"urn:li:dataHubAction:{action_urn_part}"
137
138
  else:
138
- self.action_urn = ctx.pipeline_name
139
+ self.action_urn = f"urn:li:dataHubAction:{ctx.pipeline_name}"
139
140
 
140
141
  self.config: DocPropagationConfig = config
141
142
  self.last_config_refresh: float = 0
@@ -15,6 +15,8 @@
15
15
  import logging
16
16
  from typing import Optional
17
17
 
18
+ from pydantic import Field
19
+
18
20
  from datahub.configuration.common import ConfigModel
19
21
  from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
20
22
  from datahub_actions.action.action import Action
@@ -36,8 +38,8 @@ logger = logging.getLogger(__name__)
36
38
 
37
39
  class SnowflakeTagPropagatorConfig(ConfigModel):
38
40
  snowflake: SnowflakeV2Config
39
- tag_propagation: Optional[TagPropagationConfig] = None
40
- term_propagation: Optional[TermPropagationConfig] = None
41
+ tag_propagation: Optional[TagPropagationConfig] = Field(default=None)
42
+ term_propagation: Optional[TermPropagationConfig] = Field(default=None)
41
43
 
42
44
 
43
45
  class SnowflakeTagPropagatorAction(Action):
@@ -4,6 +4,8 @@ import time
4
4
  from dataclasses import dataclass
5
5
  from typing import Dict, Iterable, List, Optional, Union, cast
6
6
 
7
+ from pydantic import Field
8
+
7
9
  from datahub.configuration import ConfigModel
8
10
  from datahub.emitter.serialization_helper import post_json_transform
9
11
  from datahub.ingestion.graph.client import DataHubGraph
@@ -59,9 +61,12 @@ def build_metadata_change_log_event(msg: ExternalEvent) -> MetadataChangeLogEven
59
61
 
60
62
  class DataHubEventsSourceConfig(ConfigModel):
61
63
  topics: Union[str, List[str]] = PLATFORM_EVENT_TOPIC_NAME
62
- consumer_id: Optional[str] = None # Used to store offset for the consumer.
63
- lookback_days: Optional[int] = None
64
+ consumer_id: Optional[str] = Field(
65
+ default=None, description="Used to store offset for the consumer."
66
+ )
67
+ lookback_days: Optional[int] = Field(default=None)
64
68
  reset_offsets: Optional[bool] = False
69
+ infinite_retry: Optional[bool] = False
65
70
 
66
71
  # Time and Exit Conditions.
67
72
  kill_after_idle_timeout: bool = False
@@ -106,6 +111,7 @@ class DataHubEventSource(EventSource):
106
111
  graph=self.ctx.graph.graph,
107
112
  lookback_days=self.source_config.lookback_days,
108
113
  reset_offsets=self.source_config.reset_offsets,
114
+ infinite_retry=self.source_config.infinite_retry,
109
115
  )
110
116
 
111
117
  self.ack_manager = AckManager()
@@ -120,6 +126,7 @@ class DataHubEventSource(EventSource):
120
126
  graph: DataHubGraph,
121
127
  lookback_days: Optional[int],
122
128
  reset_offsets: Optional[bool],
129
+ infinite_retry: Optional[bool],
123
130
  ) -> Dict[str, DataHubEventsConsumer]:
124
131
  """
125
132
  Initialize DataHub consumers for each topic with appropriate consumer IDs.
@@ -156,6 +163,7 @@ class DataHubEventSource(EventSource):
156
163
  consumer_id=topic_consumer_id,
157
164
  lookback_days=lookback_days,
158
165
  reset_offsets=reset_offsets,
166
+ infinite_retry=infinite_retry,
159
167
  )
160
168
 
161
169
  return topic_consumers
@@ -4,11 +4,17 @@ from typing import List, Optional
4
4
 
5
5
  import requests
6
6
  from pydantic import BaseModel, Field
7
- from requests.exceptions import ConnectionError, HTTPError
7
+ from requests.exceptions import (
8
+ ChunkedEncodingError,
9
+ ConnectionError,
10
+ HTTPError,
11
+ Timeout,
12
+ )
8
13
  from tenacity import (
9
- retry,
14
+ Retrying,
10
15
  retry_if_exception_type,
11
16
  stop_after_attempt,
17
+ stop_never,
12
18
  wait_exponential,
13
19
  )
14
20
 
@@ -44,6 +50,7 @@ class DataHubEventsConsumer:
44
50
  offset_id: Optional[str] = None,
45
51
  lookback_days: Optional[int] = None,
46
52
  reset_offsets: Optional[bool] = False,
53
+ infinite_retry: Optional[bool] = False,
47
54
  ):
48
55
  # 1) Always set self.consumer_id, even if None, so tests can assert it safely.
49
56
  self.consumer_id: Optional[str] = consumer_id
@@ -52,6 +59,9 @@ class DataHubEventsConsumer:
52
59
  self.graph: DataHubGraph = graph
53
60
  self.offset_id: Optional[str] = offset_id
54
61
  self.default_lookback_days: Optional[int] = lookback_days
62
+ self.infinite_retry: bool = (
63
+ infinite_retry if infinite_retry is not None else False
64
+ )
55
65
  self.offsets_store: Optional[
56
66
  DataHubEventsConsumerPlatformResourceOffsetsStore
57
67
  ] = None
@@ -78,12 +88,6 @@ class DataHubEventsConsumer:
78
88
  else:
79
89
  logger.debug("Starting DataHub Events Consumer with no consumer ID.")
80
90
 
81
- @retry(
82
- retry=retry_if_exception_type((HTTPError, ConnectionError)),
83
- wait=wait_exponential(multiplier=1, min=2, max=30),
84
- stop=stop_after_attempt(3),
85
- reraise=True,
86
- )
87
91
  def poll_events(
88
92
  self,
89
93
  topic: str,
@@ -94,6 +98,36 @@ class DataHubEventsConsumer:
94
98
  """
95
99
  Fetch events for a specific topic.
96
100
  """
101
+ stop_condition = stop_never if self.infinite_retry else stop_after_attempt(15)
102
+
103
+ retry_strategy = Retrying(
104
+ retry=retry_if_exception_type(
105
+ (HTTPError, ConnectionError, ChunkedEncodingError, Timeout)
106
+ ),
107
+ wait=wait_exponential(multiplier=1, min=2, max=60),
108
+ stop=stop_condition,
109
+ reraise=True,
110
+ )
111
+
112
+ for attempt in retry_strategy:
113
+ with attempt:
114
+ return self._poll_events_impl(
115
+ topic, offset_id, limit, poll_timeout_seconds
116
+ )
117
+
118
+ # This should never be reached due to reraise=True, but mypy needs it
119
+ raise RuntimeError("Retry strategy exhausted without returning or raising")
120
+
121
+ def _poll_events_impl(
122
+ self,
123
+ topic: str,
124
+ offset_id: Optional[str] = None,
125
+ limit: Optional[int] = None,
126
+ poll_timeout_seconds: Optional[int] = None,
127
+ ) -> ExternalEventsResponse:
128
+ """
129
+ Internal implementation of poll_events.
130
+ """
97
131
  endpoint = f"{self.base_url}/v1/events/poll"
98
132
 
99
133
  # If the caller provided an offset_id, use it; otherwise fall back to self.offset_id.
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
+ import os
16
17
  from dataclasses import dataclass
17
18
  from typing import Any, Callable, Dict, Iterable, Optional
18
19
 
@@ -22,6 +23,7 @@ from confluent_kafka import KafkaError, KafkaException, TopicPartition
22
23
  from confluent_kafka.schema_registry.avro import AvroDeserializer
23
24
  from confluent_kafka.schema_registry.schema_registry_client import SchemaRegistryClient
24
25
  from prometheus_client import Counter, Gauge
26
+ from pydantic import Field
25
27
 
26
28
  from datahub.configuration import ConfigModel
27
29
  from datahub.configuration.kafka import KafkaConsumerConnectionConfig
@@ -40,6 +42,7 @@ from datahub_actions.event.event_registry import (
40
42
  )
41
43
 
42
44
  # May or may not need these.
45
+ from datahub_actions.observability.kafka_lag_monitor import KafkaLagMonitor
43
46
  from datahub_actions.pipeline.pipeline_context import PipelineContext
44
47
  from datahub_actions.plugin.source.kafka.utils import with_retry
45
48
  from datahub_actions.source.event_source import EventSource
@@ -94,7 +97,7 @@ def build_entity_change_event(payload: GenericPayloadClass) -> EntityChangeEvent
94
97
 
95
98
  class KafkaEventSourceConfig(ConfigModel):
96
99
  connection: KafkaConsumerConnectionConfig = KafkaConsumerConnectionConfig()
97
- topic_routes: Optional[Dict[str, str]]
100
+ topic_routes: Optional[Dict[str, str]] = Field(default=None)
98
101
  async_commit_enabled: bool = False
99
102
  async_commit_interval: int = 10000
100
103
  commit_retry_count: int = 5
@@ -123,6 +126,7 @@ def kafka_messages_observer(pipeline_name: str) -> Callable:
123
126
  class KafkaEventSource(EventSource):
124
127
  running = False
125
128
  source_config: KafkaEventSourceConfig
129
+ _lag_monitor: Optional[KafkaLagMonitor] = None
126
130
 
127
131
  def __init__(self, config: KafkaEventSourceConfig, ctx: PipelineContext):
128
132
  self.source_config = config
@@ -158,6 +162,41 @@ class KafkaEventSource(EventSource):
158
162
  )
159
163
  self._observe_message: Callable = kafka_messages_observer(ctx.pipeline_name)
160
164
 
165
+ # Initialize lag monitoring (if enabled)
166
+ if self._is_lag_monitoring_enabled():
167
+ lag_interval = float(
168
+ os.environ.get("DATAHUB_ACTIONS_KAFKA_LAG_INTERVAL_SECONDS", "30")
169
+ )
170
+ lag_timeout = float(
171
+ os.environ.get("DATAHUB_ACTIONS_KAFKA_LAG_TIMEOUT_SECONDS", "5")
172
+ )
173
+ self._lag_monitor = KafkaLagMonitor(
174
+ consumer=self.consumer,
175
+ pipeline_name=ctx.pipeline_name,
176
+ interval_seconds=lag_interval,
177
+ timeout_seconds=lag_timeout,
178
+ )
179
+ logger.info(
180
+ f"Kafka lag monitoring enabled for '{ctx.pipeline_name}' "
181
+ f"(interval={lag_interval}s, timeout={lag_timeout}s)"
182
+ )
183
+ else:
184
+ logger.debug(
185
+ f"Kafka lag monitoring disabled for pipeline '{ctx.pipeline_name}'"
186
+ )
187
+
188
+ @staticmethod
189
+ def _is_lag_monitoring_enabled() -> bool:
190
+ """Check if Kafka lag monitoring should be enabled.
191
+
192
+ Lag monitoring is enabled if:
193
+ 1. DATAHUB_ACTIONS_KAFKA_LAG_ENABLED=true (case-insensitive)
194
+
195
+ Default: False (conservative default for OSS rollout)
196
+ """
197
+ enabled_str = os.environ.get("DATAHUB_ACTIONS_KAFKA_LAG_ENABLED", "false")
198
+ return enabled_str.lower() in ("true", "1", "yes")
199
+
161
200
  @classmethod
162
201
  def create(cls, config_dict: dict, ctx: PipelineContext) -> "EventSource":
163
202
  config = KafkaEventSourceConfig.model_validate(config_dict)
@@ -168,6 +207,11 @@ class KafkaEventSource(EventSource):
168
207
  topics_to_subscribe = list(topic_routes.values())
169
208
  logger.debug(f"Subscribing to the following topics: {topics_to_subscribe}")
170
209
  self.consumer.subscribe(topics_to_subscribe)
210
+
211
+ # Start lag monitoring after subscription
212
+ if self._lag_monitor is not None:
213
+ self._lag_monitor.start()
214
+
171
215
  self.running = True
172
216
  while self.running:
173
217
  try:
@@ -228,6 +272,11 @@ class KafkaEventSource(EventSource):
228
272
  yield EventEnvelope(RELATIONSHIP_CHANGE_EVENT_V1_TYPE, rce, kafka_meta)
229
273
 
230
274
  def close(self) -> None:
275
+ # Stop lag monitoring first
276
+ if self._lag_monitor is not None:
277
+ self._lag_monitor.stop()
278
+
279
+ # Then close consumer
231
280
  if self.consumer:
232
281
  self.running = False
233
282
  self.consumer.close()
@@ -16,6 +16,8 @@ import json
16
16
  import logging
17
17
  from typing import Any, Dict, List, Optional, Union
18
18
 
19
+ from pydantic import Field
20
+
19
21
  from datahub.configuration import ConfigModel
20
22
  from datahub_actions.event.event_envelope import EventEnvelope
21
23
  from datahub_actions.pipeline.pipeline_context import PipelineContext
@@ -26,7 +28,7 @@ logger = logging.getLogger(__name__)
26
28
 
27
29
  class FilterTransformerConfig(ConfigModel):
28
30
  event_type: Union[str, List[str]]
29
- event: Optional[Dict[str, Any]]
31
+ event: Optional[Dict[str, Any]] = Field(default=None)
30
32
 
31
33
 
32
34
  class FilterTransformer(Transformer):