opteryx-catalog 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of opteryx-catalog might be problematic. Click here for more details.

@@ -0,0 +1,230 @@
1
+ """Webhook system for Opteryx Catalog events.
2
+
3
+ This module provides webhook notification capabilities for key catalog events.
4
+ Webhooks can be delivered either directly via HTTP or asynchronously via
5
+ Google Cloud Tasks.
6
+
7
+ Configuration:
8
+ OPTERYX_WEBHOOK_DOMAIN: Base domain for webhook delivery (e.g., router.opteryx.app)
9
+ OPTERYX_WEBHOOK_QUEUE: Cloud Tasks queue path for async delivery
10
+ Format: projects/PROJECT/locations/LOCATION/queues/QUEUE
11
+ If not set, webhooks are sent directly via HTTP
12
+
13
+ Example:
14
+ export OPTERYX_WEBHOOK_DOMAIN=router.opteryx.app
15
+ export OPTERYX_WEBHOOK_QUEUE=projects/my-project/locations/us-central1/queues/webhooks
16
+
17
+ Webhook Endpoint:
18
+ All webhooks are sent to: https://{OPTERYX_WEBHOOK_DOMAIN}/event
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ import os
25
+ import time
26
+ from typing import Any
27
+ from typing import Optional
28
+
29
+ import requests
30
+
31
+
32
+ class WebhookManager:
33
+ """Manages webhook delivery for catalog events.
34
+
35
+ Supports two delivery modes:
36
+ 1. Direct HTTP POST (when OPTERYX_WEBHOOK_QUEUE is not set)
37
+ 2. Cloud Tasks async delivery (when OPTERYX_WEBHOOK_QUEUE is set)
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ domain: Optional[str] = None,
43
+ queue_path: Optional[str] = None,
44
+ timeout: int = 10,
45
+ ):
46
+ """Initialize the webhook manager.
47
+
48
+ Args:
49
+ domain: Base domain for webhooks (e.g., 'hook.opteryx.app')
50
+ Falls back to OPTERYX_WEBHOOK_DOMAIN env var
51
+ queue_path: Cloud Tasks queue path for async delivery
52
+ Falls back to OPTERYX_WEBHOOK_QUEUE env var
53
+ timeout: HTTP timeout in seconds for direct delivery
54
+ """
55
+ self.domain = domain or os.getenv("OPTERYX_WEBHOOK_DOMAIN")
56
+ self.queue_path = queue_path or os.getenv("OPTERYX_WEBHOOK_QUEUE")
57
+ self.timeout = timeout
58
+ self.enabled = bool(self.domain)
59
+
60
+ # Initialize Cloud Tasks client only if needed
61
+ self._tasks_client = None
62
+ if self.enabled and self.queue_path:
63
+ try:
64
+ from google.cloud import tasks_v2
65
+
66
+ self._tasks_client = tasks_v2.CloudTasksClient()
67
+ except ImportError:
68
+ # Cloud Tasks not available, fall back to direct HTTP
69
+ self._tasks_client = None
70
+
71
+ def send(
72
+ self,
73
+ action: str,
74
+ workspace: str,
75
+ collection: str,
76
+ resource_type: str,
77
+ resource_name: str,
78
+ payload: Optional[dict[str, Any]] = None,
79
+ ) -> bool:
80
+ """Send a webhook notification.
81
+
82
+ Args:
83
+ action: Action type (create, delete, update, commit, execute)
84
+ workspace: Workspace name
85
+ collection: Collection name
86
+ resource_type: Type of resource (dataset, view, collection)
87
+ resource_name: Name of the resource
88
+ payload: Additional data to include in the webhook body
89
+
90
+ Returns:
91
+ True if webhook was sent successfully (or queued), False otherwise
92
+ """
93
+ if not self.enabled:
94
+ return False
95
+
96
+ # Simple endpoint URL
97
+ url = f"https://{self.domain}/event"
98
+
99
+ # Build the payload
100
+ body = self._build_payload(
101
+ action=action,
102
+ workspace=workspace,
103
+ collection=collection,
104
+ resource_type=resource_type,
105
+ resource_name=resource_name,
106
+ additional=payload or {},
107
+ )
108
+
109
+ # Deliver via Cloud Tasks or direct HTTP
110
+ if self._tasks_client and self.queue_path:
111
+ return self._send_via_cloud_tasks(url, body)
112
+ else:
113
+ return self._send_direct(url, body)
114
+
115
+ def _build_payload(
116
+ self,
117
+ action: str,
118
+ workspace: str,
119
+ collection: str,
120
+ resource_type: str,
121
+ resource_name: str,
122
+ additional: dict[str, Any],
123
+ ) -> dict[str, Any]:
124
+ """Build the webhook payload.
125
+
126
+ Returns a standardized payload with event metadata and additional data.
127
+ """
128
+ return {
129
+ "event": {
130
+ "action": action,
131
+ "workspace": workspace,
132
+ "collection": collection,
133
+ "resource_type": resource_type,
134
+ "resource_name": resource_name,
135
+ "timestamp": int(time.time() * 1000), # milliseconds
136
+ },
137
+ "data": additional,
138
+ }
139
+
140
+ def _send_direct(self, url: str, payload: dict[str, Any]) -> bool:
141
+ """Send webhook directly via HTTP POST.
142
+
143
+ Args:
144
+ url: Full webhook URL
145
+ payload: JSON payload
146
+
147
+ Returns:
148
+ True if successful (2xx response), False otherwise
149
+ """
150
+ try:
151
+ response = requests.post(
152
+ url,
153
+ json=payload,
154
+ timeout=self.timeout,
155
+ headers={
156
+ "Content-Type": "application/json",
157
+ "User-Agent": "opteryx-catalog-webhook/1.0",
158
+ },
159
+ )
160
+ return response.status_code >= 200 and response.status_code < 300
161
+ except Exception:
162
+ # Log errors in production; for now, silently fail
163
+ return False
164
+
165
+ def _send_via_cloud_tasks(self, url: str, payload: dict[str, Any]) -> bool:
166
+ """Send webhook asynchronously via Cloud Tasks.
167
+
168
+ Args:
169
+ url: Full webhook URL
170
+ payload: JSON payload
171
+
172
+ Returns:
173
+ True if task was created successfully, False otherwise
174
+ """
175
+ if not self._tasks_client:
176
+ # Fall back to direct delivery if client unavailable
177
+ return self._send_direct(url, payload)
178
+
179
+ try:
180
+ from google.cloud import tasks_v2
181
+
182
+ # Create the task
183
+ task = tasks_v2.Task(
184
+ http_request=tasks_v2.HttpRequest(
185
+ http_method=tasks_v2.HttpMethod.POST,
186
+ url=url,
187
+ headers={
188
+ "Content-Type": "application/json",
189
+ "User-Agent": "opteryx-catalog-webhook/1.0",
190
+ },
191
+ body=json.dumps(payload).encode(),
192
+ )
193
+ )
194
+
195
+ # Queue the task
196
+ self._tasks_client.create_task(
197
+ request=tasks_v2.CreateTaskRequest(
198
+ parent=self.queue_path,
199
+ task=task,
200
+ )
201
+ )
202
+ return True
203
+ except Exception:
204
+ # Log errors in production; for now, silently fail
205
+ return False
206
+
207
+
208
+ # Global webhook manager instance
209
+ _webhook_manager: Optional[WebhookManager] = None
210
+
211
+
212
+ def get_webhook_manager() -> WebhookManager:
213
+ """Get or create the global webhook manager instance."""
214
+ global _webhook_manager
215
+ if _webhook_manager is None:
216
+ _webhook_manager = WebhookManager()
217
+ return _webhook_manager
218
+
219
+
220
+ def send_webhook(
221
+ action: str,
222
+ workspace: str,
223
+ collection: str,
224
+ resource_type: str,
225
+ resource_name: str,
226
+ payload: Optional[dict[str, Any]] = None,
227
+ ) -> bool:
228
+ """Convenience function to send a webhook via the global manager."""
229
+ manager = get_webhook_manager()
230
+ return manager.send(action, workspace, collection, resource_type, resource_name, payload)
@@ -0,0 +1,177 @@
1
+ """Event definitions and payload builders for webhook notifications.
2
+
3
+ This module provides helper functions to create standardized payloads
4
+ for different types of catalog events.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+ from typing import Optional
11
+
12
+
13
+ def dataset_created_payload(
14
+ schema: Any,
15
+ location: Optional[str] = None,
16
+ properties: Optional[dict[str, Any]] = None,
17
+ ) -> dict[str, Any]:
18
+ """Build payload for dataset creation event.
19
+
20
+ Args:
21
+ schema: Dataset schema (arrow or pyiceberg schema)
22
+ location: GCS location of the dataset
23
+ properties: Additional dataset properties
24
+
25
+ Returns:
26
+ Payload dictionary with dataset metadata
27
+ """
28
+ payload = {
29
+ "location": location,
30
+ "properties": properties or {},
31
+ }
32
+
33
+ # Include schema information if available
34
+ try:
35
+ if hasattr(schema, "names"): # PyArrow schema
36
+ payload["schema"] = {
37
+ "fields": [
38
+ {"name": name, "type": str(schema.field(name).type)} for name in schema.names
39
+ ]
40
+ }
41
+ except Exception:
42
+ pass
43
+
44
+ return payload
45
+
46
+
47
+ def dataset_deleted_payload() -> dict[str, Any]:
48
+ """Build payload for dataset deletion event."""
49
+ return {}
50
+
51
+
52
+ def dataset_updated_payload(
53
+ description: Optional[str] = None,
54
+ properties: Optional[dict[str, Any]] = None,
55
+ ) -> dict[str, Any]:
56
+ """Build payload for dataset update event.
57
+
58
+ Args:
59
+ description: New description
60
+ properties: Updated properties
61
+
62
+ Returns:
63
+ Payload dictionary with updated fields
64
+ """
65
+ return {
66
+ "description": description,
67
+ "properties": properties or {},
68
+ }
69
+
70
+
71
+ def dataset_commit_payload(
72
+ snapshot_id: int,
73
+ sequence_number: int,
74
+ record_count: int,
75
+ file_count: int,
76
+ ) -> dict[str, Any]:
77
+ """Build payload for dataset commit (append) event.
78
+
79
+ Args:
80
+ snapshot_id: New snapshot ID
81
+ sequence_number: Sequence number of the commit
82
+ record_count: Number of records added
83
+ file_count: Number of files added
84
+
85
+ Returns:
86
+ Payload dictionary with commit metadata
87
+ """
88
+ return {
89
+ "snapshot_id": snapshot_id,
90
+ "sequence_number": sequence_number,
91
+ "record_count": record_count,
92
+ "file_count": file_count,
93
+ }
94
+
95
+
96
+ def collection_created_payload(
97
+ properties: Optional[dict[str, Any]] = None,
98
+ ) -> dict[str, Any]:
99
+ """Build payload for collection creation event.
100
+
101
+ Args:
102
+ properties: Collection properties
103
+
104
+ Returns:
105
+ Payload dictionary with collection metadata
106
+ """
107
+ return {
108
+ "properties": properties or {},
109
+ }
110
+
111
+
112
+ def view_created_payload(
113
+ definition: str,
114
+ properties: Optional[dict[str, Any]] = None,
115
+ ) -> dict[str, Any]:
116
+ """Build payload for view creation event.
117
+
118
+ Args:
119
+ definition: SQL definition of the view
120
+ properties: Additional view properties
121
+
122
+ Returns:
123
+ Payload dictionary with view metadata
124
+ """
125
+ return {
126
+ "definition": definition,
127
+ "properties": properties or {},
128
+ }
129
+
130
+
131
+ def view_deleted_payload() -> dict[str, Any]:
132
+ """Build payload for view deletion event."""
133
+ return {}
134
+
135
+
136
+ def view_updated_payload(
137
+ description: Optional[str] = None,
138
+ properties: Optional[dict[str, Any]] = None,
139
+ ) -> dict[str, Any]:
140
+ """Build payload for view update event.
141
+
142
+ Args:
143
+ description: New description
144
+ properties: Updated properties
145
+
146
+ Returns:
147
+ Payload dictionary with updated fields
148
+ """
149
+ return {
150
+ "description": description,
151
+ "properties": properties or {},
152
+ }
153
+
154
+
155
+ def view_executed_payload(
156
+ execution_time_ms: Optional[int] = None,
157
+ row_count: Optional[int] = None,
158
+ error: Optional[str] = None,
159
+ ) -> dict[str, Any]:
160
+ """Build payload for view execution event.
161
+
162
+ Args:
163
+ execution_time_ms: Execution time in milliseconds
164
+ row_count: Number of rows returned
165
+ error: Error message if execution failed
166
+
167
+ Returns:
168
+ Payload dictionary with execution metadata
169
+ """
170
+ payload = {}
171
+ if execution_time_ms is not None:
172
+ payload["execution_time_ms"] = execution_time_ms
173
+ if row_count is not None:
174
+ payload["row_count"] = row_count
175
+ if error is not None:
176
+ payload["error"] = error
177
+ return payload