opteryx-catalog 0.4.4__py3-none-any.whl → 0.4.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opteryx_catalog/__init__.py +1 -1
- opteryx_catalog/catalog/__init__.py +2 -1
- opteryx_catalog/catalog/compaction.py +536 -0
- opteryx_catalog/catalog/dataset.py +840 -520
- opteryx_catalog/catalog/manifest.py +475 -0
- opteryx_catalog/catalog/metadata.py +5 -2
- opteryx_catalog/catalog/metastore.py +2 -2
- opteryx_catalog/exceptions.py +1 -1
- opteryx_catalog/iops/fileio.py +13 -0
- opteryx_catalog/iops/gcs.py +35 -5
- opteryx_catalog/maki_nage/__init__.py +8 -0
- opteryx_catalog/maki_nage/distogram.py +558 -0
- opteryx_catalog/maki_nage/tests/_test_histogram.py +52 -0
- opteryx_catalog/maki_nage/tests/test_bounds.py +24 -0
- opteryx_catalog/maki_nage/tests/test_count.py +19 -0
- opteryx_catalog/maki_nage/tests/test_count_at.py +89 -0
- opteryx_catalog/maki_nage/tests/test_quantile.py +81 -0
- opteryx_catalog/maki_nage/tests/test_stats.py +25 -0
- opteryx_catalog/maki_nage/tests/test_update.py +44 -0
- opteryx_catalog/opteryx_catalog.py +296 -242
- opteryx_catalog/webhooks/__init__.py +230 -0
- opteryx_catalog/webhooks/events.py +177 -0
- {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/METADATA +15 -18
- opteryx_catalog-0.4.26.dist-info/RECORD +45 -0
- {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/WHEEL +1 -1
- scripts/collect_byte_counts.py +42 -0
- scripts/create_dataset.py +1 -1
- scripts/emit_full_single_file.py +81 -0
- scripts/inspect_manifest_dryrun.py +322 -0
- scripts/inspect_single_file.py +147 -0
- scripts/inspect_single_file_gcs.py +124 -0
- scripts/read_dataset.py +1 -1
- tests/test_collections.py +37 -0
- tests/test_compaction.py +233 -0
- tests/test_dataset_metadata.py +14 -0
- tests/test_describe_uncompressed.py +127 -0
- tests/test_refresh_manifest.py +275 -0
- tests/test_webhooks.py +177 -0
- opteryx_catalog-0.4.4.dist-info/RECORD +0 -23
- {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/licenses/LICENSE +0 -0
- {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/top_level.txt +0 -0
tests/test_webhooks.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Tests for the webhook system."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from unittest.mock import MagicMock
|
|
5
|
+
from unittest.mock import patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_webhook_manager_disabled_without_domain():
|
|
11
|
+
"""Test that webhook manager is disabled when no domain is configured."""
|
|
12
|
+
from opteryx_catalog.webhooks import WebhookManager
|
|
13
|
+
|
|
14
|
+
# Clear any existing env vars
|
|
15
|
+
os.environ.pop("OPTERYX_WEBHOOK_DOMAIN", None)
|
|
16
|
+
os.environ.pop("OPTERYX_WEBHOOK_QUEUE", None)
|
|
17
|
+
|
|
18
|
+
manager = WebhookManager()
|
|
19
|
+
assert not manager.enabled
|
|
20
|
+
|
|
21
|
+
# Should return False without making any HTTP calls
|
|
22
|
+
result = manager.send(
|
|
23
|
+
action="create",
|
|
24
|
+
workspace="test",
|
|
25
|
+
collection="test",
|
|
26
|
+
resource_type="dataset",
|
|
27
|
+
resource_name="test",
|
|
28
|
+
)
|
|
29
|
+
assert result is False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_webhook_manager_direct_http():
|
|
33
|
+
"""Test that webhooks are sent via direct HTTP when queue is not configured."""
|
|
34
|
+
from opteryx_catalog.webhooks import WebhookManager
|
|
35
|
+
|
|
36
|
+
with patch("opteryx_catalog.webhooks.requests.post") as mock_post:
|
|
37
|
+
mock_response = MagicMock()
|
|
38
|
+
mock_response.status_code = 200
|
|
39
|
+
mock_post.return_value = mock_response
|
|
40
|
+
|
|
41
|
+
manager = WebhookManager(domain="router.example.com", queue_path=None)
|
|
42
|
+
assert manager.enabled
|
|
43
|
+
assert manager._tasks_client is None
|
|
44
|
+
|
|
45
|
+
result = manager.send(
|
|
46
|
+
action="create",
|
|
47
|
+
workspace="test-workspace",
|
|
48
|
+
collection="test-collection",
|
|
49
|
+
resource_type="dataset",
|
|
50
|
+
resource_name="test-dataset",
|
|
51
|
+
payload={"location": "gs://bucket/path"},
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
assert result is True
|
|
55
|
+
mock_post.assert_called_once()
|
|
56
|
+
|
|
57
|
+
# Verify the call arguments
|
|
58
|
+
call_args = mock_post.call_args
|
|
59
|
+
assert call_args.args[0] == "https://router.example.com/event"
|
|
60
|
+
assert call_args.kwargs["json"]["event"]["action"] == "create"
|
|
61
|
+
assert call_args.kwargs["json"]["event"]["resource_type"] == "dataset"
|
|
62
|
+
assert call_args.kwargs["json"]["event"]["resource_name"] == "test-dataset"
|
|
63
|
+
assert call_args.kwargs["json"]["data"]["location"] == "gs://bucket/path"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_webhook_manager_payload_building():
|
|
67
|
+
"""Test that webhook payloads are built correctly."""
|
|
68
|
+
from opteryx_catalog.webhooks import WebhookManager
|
|
69
|
+
|
|
70
|
+
manager = WebhookManager(domain="hook.example.com")
|
|
71
|
+
|
|
72
|
+
payload = manager._build_payload(
|
|
73
|
+
action="update",
|
|
74
|
+
workspace="ws",
|
|
75
|
+
collection="col",
|
|
76
|
+
resource_type="dataset",
|
|
77
|
+
resource_name="ds",
|
|
78
|
+
additional={"description": "New description"},
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
assert payload["event"]["action"] == "update"
|
|
82
|
+
assert payload["event"]["workspace"] == "ws"
|
|
83
|
+
assert payload["event"]["collection"] == "col"
|
|
84
|
+
assert payload["event"]["resource_type"] == "dataset"
|
|
85
|
+
assert payload["event"]["resource_name"] == "ds"
|
|
86
|
+
assert "timestamp" in payload["event"]
|
|
87
|
+
assert payload["data"]["description"] == "New description"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_webhook_http_failure_returns_false():
|
|
91
|
+
"""Test that HTTP failures return False without raising exceptions."""
|
|
92
|
+
from opteryx_catalog.webhooks import WebhookManager
|
|
93
|
+
|
|
94
|
+
with patch("opteryx_catalog.webhooks.requests.post") as mock_post:
|
|
95
|
+
# Simulate HTTP error
|
|
96
|
+
mock_post.side_effect = Exception("Connection failed")
|
|
97
|
+
|
|
98
|
+
manager = WebhookManager(domain="router.example.com")
|
|
99
|
+
result = manager.send(
|
|
100
|
+
action="create",
|
|
101
|
+
workspace="test",
|
|
102
|
+
collection="test",
|
|
103
|
+
resource_type="dataset",
|
|
104
|
+
resource_name="test",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
assert result is False
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_send_webhook_convenience_function():
|
|
111
|
+
"""Test the convenience send_webhook function."""
|
|
112
|
+
from opteryx_catalog.webhooks import send_webhook
|
|
113
|
+
|
|
114
|
+
with patch("opteryx_catalog.webhooks.requests.post") as mock_post:
|
|
115
|
+
mock_response = MagicMock()
|
|
116
|
+
mock_response.status_code = 200
|
|
117
|
+
mock_post.return_value = mock_response
|
|
118
|
+
|
|
119
|
+
os.environ["OPTERYX_WEBHOOK_DOMAIN"] = "router.example.com"
|
|
120
|
+
os.environ.pop("OPTERYX_WEBHOOK_QUEUE", None)
|
|
121
|
+
|
|
122
|
+
# Reset the global manager to pick up new env vars
|
|
123
|
+
import opteryx_catalog.webhooks as webhook_module
|
|
124
|
+
|
|
125
|
+
webhook_module._webhook_manager = None
|
|
126
|
+
|
|
127
|
+
result = send_webhook(
|
|
128
|
+
action="create",
|
|
129
|
+
workspace="test",
|
|
130
|
+
collection="test",
|
|
131
|
+
resource_type="dataset",
|
|
132
|
+
resource_name="test",
|
|
133
|
+
payload={"snapshot_id": 123},
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
assert result is True
|
|
137
|
+
mock_post.assert_called_once()
|
|
138
|
+
|
|
139
|
+
# Clean up
|
|
140
|
+
os.environ.pop("OPTERYX_WEBHOOK_DOMAIN", None)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_event_payload_builders():
|
|
144
|
+
"""Test the event payload builder functions."""
|
|
145
|
+
from opteryx_catalog.webhooks.events import dataset_commit_payload
|
|
146
|
+
from opteryx_catalog.webhooks.events import dataset_created_payload
|
|
147
|
+
from opteryx_catalog.webhooks.events import view_created_payload
|
|
148
|
+
from opteryx_catalog.webhooks.events import view_executed_payload
|
|
149
|
+
|
|
150
|
+
# Test dataset created
|
|
151
|
+
payload = dataset_created_payload(
|
|
152
|
+
schema=None, location="gs://bucket/path", properties={"key": "value"}
|
|
153
|
+
)
|
|
154
|
+
assert payload["location"] == "gs://bucket/path"
|
|
155
|
+
assert payload["properties"]["key"] == "value"
|
|
156
|
+
|
|
157
|
+
# Test dataset commit
|
|
158
|
+
payload = dataset_commit_payload(
|
|
159
|
+
snapshot_id=123, sequence_number=5, record_count=1000, file_count=2
|
|
160
|
+
)
|
|
161
|
+
assert payload["snapshot_id"] == 123
|
|
162
|
+
assert payload["sequence_number"] == 5
|
|
163
|
+
assert payload["record_count"] == 1000
|
|
164
|
+
assert payload["file_count"] == 2
|
|
165
|
+
|
|
166
|
+
# Test view created
|
|
167
|
+
payload = view_created_payload(definition="SELECT * FROM table", properties={})
|
|
168
|
+
assert payload["definition"] == "SELECT * FROM table"
|
|
169
|
+
|
|
170
|
+
# Test view executed
|
|
171
|
+
payload = view_executed_payload(execution_time_ms=1500, row_count=100)
|
|
172
|
+
assert payload["execution_time_ms"] == 1500
|
|
173
|
+
assert payload["row_count"] == 100
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
if __name__ == "__main__":
|
|
177
|
+
pytest.main([__file__, "-v"])
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
opteryx_catalog/__init__.py,sha256=0fxsE-hZ6hcw0Fs4LEIys1CQwCL4W7iLSpp0M0v1f08,900
|
|
2
|
-
opteryx_catalog/exceptions.py,sha256=NKAwocRPYo1KbzFce56Dn8rQH0Jc8mfxES5qPDp-OO4,650
|
|
3
|
-
opteryx_catalog/opteryx_catalog.py,sha256=roKcwY2N4AoyujH-pIZo9L_EKy5gyUSGCYIaim9wqWU,38530
|
|
4
|
-
opteryx_catalog/catalog/__init__.py,sha256=0I67nvttc_sO3ntZp8rk6agMl1LXBd67j6ZsPGonKtM,58
|
|
5
|
-
opteryx_catalog/catalog/dataset.py,sha256=NWG2z4PSmLXr9Mwweq_bGt1fCczHJVk19aTZM1c7ucg,48738
|
|
6
|
-
opteryx_catalog/catalog/manifest.py,sha256=psnigON41ZV1hQ1CshGB_rBqpbUPG-JJeOtjQwxokaE,552
|
|
7
|
-
opteryx_catalog/catalog/metadata.py,sha256=i6AS-4M1OvzAKJ0XQx_gy5bqKKv98W2vWsDi8JXTIso,2869
|
|
8
|
-
opteryx_catalog/catalog/metastore.py,sha256=T34_ki5FXoREprCDoO1uBXcgH7yY9YslhTirbPAkPoM,1993
|
|
9
|
-
opteryx_catalog/catalog/view.py,sha256=mUzfRGYqLRx_9BfZdGY5HNz6na9VMEPITrYKiI5m694,219
|
|
10
|
-
opteryx_catalog/iops/__init__.py,sha256=_CxR-hg8XUD2cIFucb3aHyTFqwi41QmEDf9gXzXt3ZU,171
|
|
11
|
-
opteryx_catalog/iops/base.py,sha256=1IW9qjDkQEMXvrA2J73VSBCdzkf2W5xVsWVnpNglL1U,1206
|
|
12
|
-
opteryx_catalog/iops/fileio.py,sha256=cjBl9fN-vutvXskzZkwJjjbBcUlE0O1WrQe5Ryx7pIg,4315
|
|
13
|
-
opteryx_catalog/iops/gcs.py,sha256=SyXjJptrtgTuBfAJLnzl4efLK4TKAGhllGKxJKDhx7Q,7275
|
|
14
|
-
opteryx_catalog-0.4.4.dist-info/licenses/LICENSE,sha256=mc5l20siqdcNQM54xALIWJhyaWsmQJ-NZt81UjgJejo,11351
|
|
15
|
-
scripts/create_dataset.py,sha256=8C4I6bKk2ZiYypO9nM8OTKlVTqpxW3JgLifgklsmAAo,7796
|
|
16
|
-
scripts/read_dataset.py,sha256=HwBy-kgTtcNq_fjkDS2WrM5OTg5O7IcOKQf7RxfxUJA,9610
|
|
17
|
-
tests/test_dataset_metadata.py,sha256=KE7jqam-IoJYXklmGaxuO47nvdoiNn_RW4VuUtkilIY,546
|
|
18
|
-
tests/test_import.py,sha256=ZvoHW-rmcYqkW6TJKD_brgeePqHHbz2iTyRWKIBHGHk,137
|
|
19
|
-
tests/test_pyproject.py,sha256=o3rS_GOems1oYQDH3UATfqc6XUwDTKZF2Q4cspU-NYc,206
|
|
20
|
-
opteryx_catalog-0.4.4.dist-info/METADATA,sha256=ZUqsh7gmOoNAlegsAimP30u6VVz57zwzAcGuXpy2rmM,22383
|
|
21
|
-
opteryx_catalog-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
22
|
-
opteryx_catalog-0.4.4.dist-info/top_level.txt,sha256=HWATr4Wgxbg3c1X3EcsJ6cnHoR6ZAdTe1LQ2VssIBUo,30
|
|
23
|
-
opteryx_catalog-0.4.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|