opteryx-catalog 0.4.4__py3-none-any.whl → 0.4.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. opteryx_catalog/__init__.py +1 -1
  2. opteryx_catalog/catalog/__init__.py +2 -1
  3. opteryx_catalog/catalog/compaction.py +536 -0
  4. opteryx_catalog/catalog/dataset.py +840 -520
  5. opteryx_catalog/catalog/manifest.py +475 -0
  6. opteryx_catalog/catalog/metadata.py +5 -2
  7. opteryx_catalog/catalog/metastore.py +2 -2
  8. opteryx_catalog/exceptions.py +1 -1
  9. opteryx_catalog/iops/fileio.py +13 -0
  10. opteryx_catalog/iops/gcs.py +35 -5
  11. opteryx_catalog/maki_nage/__init__.py +8 -0
  12. opteryx_catalog/maki_nage/distogram.py +558 -0
  13. opteryx_catalog/maki_nage/tests/_test_histogram.py +52 -0
  14. opteryx_catalog/maki_nage/tests/test_bounds.py +24 -0
  15. opteryx_catalog/maki_nage/tests/test_count.py +19 -0
  16. opteryx_catalog/maki_nage/tests/test_count_at.py +89 -0
  17. opteryx_catalog/maki_nage/tests/test_quantile.py +81 -0
  18. opteryx_catalog/maki_nage/tests/test_stats.py +25 -0
  19. opteryx_catalog/maki_nage/tests/test_update.py +44 -0
  20. opteryx_catalog/opteryx_catalog.py +296 -242
  21. opteryx_catalog/webhooks/__init__.py +230 -0
  22. opteryx_catalog/webhooks/events.py +177 -0
  23. {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/METADATA +15 -18
  24. opteryx_catalog-0.4.26.dist-info/RECORD +45 -0
  25. {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/WHEEL +1 -1
  26. scripts/collect_byte_counts.py +42 -0
  27. scripts/create_dataset.py +1 -1
  28. scripts/emit_full_single_file.py +81 -0
  29. scripts/inspect_manifest_dryrun.py +322 -0
  30. scripts/inspect_single_file.py +147 -0
  31. scripts/inspect_single_file_gcs.py +124 -0
  32. scripts/read_dataset.py +1 -1
  33. tests/test_collections.py +37 -0
  34. tests/test_compaction.py +233 -0
  35. tests/test_dataset_metadata.py +14 -0
  36. tests/test_describe_uncompressed.py +127 -0
  37. tests/test_refresh_manifest.py +275 -0
  38. tests/test_webhooks.py +177 -0
  39. opteryx_catalog-0.4.4.dist-info/RECORD +0 -23
  40. {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/licenses/LICENSE +0 -0
  41. {opteryx_catalog-0.4.4.dist-info → opteryx_catalog-0.4.26.dist-info}/top_level.txt +0 -0
tests/test_webhooks.py ADDED
@@ -0,0 +1,177 @@
1
+ """Tests for the webhook system."""
2
+
3
+ import os
4
+ from unittest.mock import MagicMock
5
+ from unittest.mock import patch
6
+
7
+ import pytest
8
+
9
+
10
+ def test_webhook_manager_disabled_without_domain():
11
+ """Test that webhook manager is disabled when no domain is configured."""
12
+ from opteryx_catalog.webhooks import WebhookManager
13
+
14
+ # Clear any existing env vars
15
+ os.environ.pop("OPTERYX_WEBHOOK_DOMAIN", None)
16
+ os.environ.pop("OPTERYX_WEBHOOK_QUEUE", None)
17
+
18
+ manager = WebhookManager()
19
+ assert not manager.enabled
20
+
21
+ # Should return False without making any HTTP calls
22
+ result = manager.send(
23
+ action="create",
24
+ workspace="test",
25
+ collection="test",
26
+ resource_type="dataset",
27
+ resource_name="test",
28
+ )
29
+ assert result is False
30
+
31
+
32
+ def test_webhook_manager_direct_http():
33
+ """Test that webhooks are sent via direct HTTP when queue is not configured."""
34
+ from opteryx_catalog.webhooks import WebhookManager
35
+
36
+ with patch("opteryx_catalog.webhooks.requests.post") as mock_post:
37
+ mock_response = MagicMock()
38
+ mock_response.status_code = 200
39
+ mock_post.return_value = mock_response
40
+
41
+ manager = WebhookManager(domain="router.example.com", queue_path=None)
42
+ assert manager.enabled
43
+ assert manager._tasks_client is None
44
+
45
+ result = manager.send(
46
+ action="create",
47
+ workspace="test-workspace",
48
+ collection="test-collection",
49
+ resource_type="dataset",
50
+ resource_name="test-dataset",
51
+ payload={"location": "gs://bucket/path"},
52
+ )
53
+
54
+ assert result is True
55
+ mock_post.assert_called_once()
56
+
57
+ # Verify the call arguments
58
+ call_args = mock_post.call_args
59
+ assert call_args.args[0] == "https://router.example.com/event"
60
+ assert call_args.kwargs["json"]["event"]["action"] == "create"
61
+ assert call_args.kwargs["json"]["event"]["resource_type"] == "dataset"
62
+ assert call_args.kwargs["json"]["event"]["resource_name"] == "test-dataset"
63
+ assert call_args.kwargs["json"]["data"]["location"] == "gs://bucket/path"
64
+
65
+
66
+ def test_webhook_manager_payload_building():
67
+ """Test that webhook payloads are built correctly."""
68
+ from opteryx_catalog.webhooks import WebhookManager
69
+
70
+ manager = WebhookManager(domain="hook.example.com")
71
+
72
+ payload = manager._build_payload(
73
+ action="update",
74
+ workspace="ws",
75
+ collection="col",
76
+ resource_type="dataset",
77
+ resource_name="ds",
78
+ additional={"description": "New description"},
79
+ )
80
+
81
+ assert payload["event"]["action"] == "update"
82
+ assert payload["event"]["workspace"] == "ws"
83
+ assert payload["event"]["collection"] == "col"
84
+ assert payload["event"]["resource_type"] == "dataset"
85
+ assert payload["event"]["resource_name"] == "ds"
86
+ assert "timestamp" in payload["event"]
87
+ assert payload["data"]["description"] == "New description"
88
+
89
+
90
+ def test_webhook_http_failure_returns_false():
91
+ """Test that HTTP failures return False without raising exceptions."""
92
+ from opteryx_catalog.webhooks import WebhookManager
93
+
94
+ with patch("opteryx_catalog.webhooks.requests.post") as mock_post:
95
+ # Simulate HTTP error
96
+ mock_post.side_effect = Exception("Connection failed")
97
+
98
+ manager = WebhookManager(domain="router.example.com")
99
+ result = manager.send(
100
+ action="create",
101
+ workspace="test",
102
+ collection="test",
103
+ resource_type="dataset",
104
+ resource_name="test",
105
+ )
106
+
107
+ assert result is False
108
+
109
+
110
+ def test_send_webhook_convenience_function():
111
+ """Test the convenience send_webhook function."""
112
+ from opteryx_catalog.webhooks import send_webhook
113
+
114
+ with patch("opteryx_catalog.webhooks.requests.post") as mock_post:
115
+ mock_response = MagicMock()
116
+ mock_response.status_code = 200
117
+ mock_post.return_value = mock_response
118
+
119
+ os.environ["OPTERYX_WEBHOOK_DOMAIN"] = "router.example.com"
120
+ os.environ.pop("OPTERYX_WEBHOOK_QUEUE", None)
121
+
122
+ # Reset the global manager to pick up new env vars
123
+ import opteryx_catalog.webhooks as webhook_module
124
+
125
+ webhook_module._webhook_manager = None
126
+
127
+ result = send_webhook(
128
+ action="create",
129
+ workspace="test",
130
+ collection="test",
131
+ resource_type="dataset",
132
+ resource_name="test",
133
+ payload={"snapshot_id": 123},
134
+ )
135
+
136
+ assert result is True
137
+ mock_post.assert_called_once()
138
+
139
+ # Clean up
140
+ os.environ.pop("OPTERYX_WEBHOOK_DOMAIN", None)
141
+
142
+
143
+ def test_event_payload_builders():
144
+ """Test the event payload builder functions."""
145
+ from opteryx_catalog.webhooks.events import dataset_commit_payload
146
+ from opteryx_catalog.webhooks.events import dataset_created_payload
147
+ from opteryx_catalog.webhooks.events import view_created_payload
148
+ from opteryx_catalog.webhooks.events import view_executed_payload
149
+
150
+ # Test dataset created
151
+ payload = dataset_created_payload(
152
+ schema=None, location="gs://bucket/path", properties={"key": "value"}
153
+ )
154
+ assert payload["location"] == "gs://bucket/path"
155
+ assert payload["properties"]["key"] == "value"
156
+
157
+ # Test dataset commit
158
+ payload = dataset_commit_payload(
159
+ snapshot_id=123, sequence_number=5, record_count=1000, file_count=2
160
+ )
161
+ assert payload["snapshot_id"] == 123
162
+ assert payload["sequence_number"] == 5
163
+ assert payload["record_count"] == 1000
164
+ assert payload["file_count"] == 2
165
+
166
+ # Test view created
167
+ payload = view_created_payload(definition="SELECT * FROM table", properties={})
168
+ assert payload["definition"] == "SELECT * FROM table"
169
+
170
+ # Test view executed
171
+ payload = view_executed_payload(execution_time_ms=1500, row_count=100)
172
+ assert payload["execution_time_ms"] == 1500
173
+ assert payload["row_count"] == 100
174
+
175
+
176
+ if __name__ == "__main__":
177
+ pytest.main([__file__, "-v"])
@@ -1,23 +0,0 @@
1
- opteryx_catalog/__init__.py,sha256=0fxsE-hZ6hcw0Fs4LEIys1CQwCL4W7iLSpp0M0v1f08,900
2
- opteryx_catalog/exceptions.py,sha256=NKAwocRPYo1KbzFce56Dn8rQH0Jc8mfxES5qPDp-OO4,650
3
- opteryx_catalog/opteryx_catalog.py,sha256=roKcwY2N4AoyujH-pIZo9L_EKy5gyUSGCYIaim9wqWU,38530
4
- opteryx_catalog/catalog/__init__.py,sha256=0I67nvttc_sO3ntZp8rk6agMl1LXBd67j6ZsPGonKtM,58
5
- opteryx_catalog/catalog/dataset.py,sha256=NWG2z4PSmLXr9Mwweq_bGt1fCczHJVk19aTZM1c7ucg,48738
6
- opteryx_catalog/catalog/manifest.py,sha256=psnigON41ZV1hQ1CshGB_rBqpbUPG-JJeOtjQwxokaE,552
7
- opteryx_catalog/catalog/metadata.py,sha256=i6AS-4M1OvzAKJ0XQx_gy5bqKKv98W2vWsDi8JXTIso,2869
8
- opteryx_catalog/catalog/metastore.py,sha256=T34_ki5FXoREprCDoO1uBXcgH7yY9YslhTirbPAkPoM,1993
9
- opteryx_catalog/catalog/view.py,sha256=mUzfRGYqLRx_9BfZdGY5HNz6na9VMEPITrYKiI5m694,219
10
- opteryx_catalog/iops/__init__.py,sha256=_CxR-hg8XUD2cIFucb3aHyTFqwi41QmEDf9gXzXt3ZU,171
11
- opteryx_catalog/iops/base.py,sha256=1IW9qjDkQEMXvrA2J73VSBCdzkf2W5xVsWVnpNglL1U,1206
12
- opteryx_catalog/iops/fileio.py,sha256=cjBl9fN-vutvXskzZkwJjjbBcUlE0O1WrQe5Ryx7pIg,4315
13
- opteryx_catalog/iops/gcs.py,sha256=SyXjJptrtgTuBfAJLnzl4efLK4TKAGhllGKxJKDhx7Q,7275
14
- opteryx_catalog-0.4.4.dist-info/licenses/LICENSE,sha256=mc5l20siqdcNQM54xALIWJhyaWsmQJ-NZt81UjgJejo,11351
15
- scripts/create_dataset.py,sha256=8C4I6bKk2ZiYypO9nM8OTKlVTqpxW3JgLifgklsmAAo,7796
16
- scripts/read_dataset.py,sha256=HwBy-kgTtcNq_fjkDS2WrM5OTg5O7IcOKQf7RxfxUJA,9610
17
- tests/test_dataset_metadata.py,sha256=KE7jqam-IoJYXklmGaxuO47nvdoiNn_RW4VuUtkilIY,546
18
- tests/test_import.py,sha256=ZvoHW-rmcYqkW6TJKD_brgeePqHHbz2iTyRWKIBHGHk,137
19
- tests/test_pyproject.py,sha256=o3rS_GOems1oYQDH3UATfqc6XUwDTKZF2Q4cspU-NYc,206
20
- opteryx_catalog-0.4.4.dist-info/METADATA,sha256=ZUqsh7gmOoNAlegsAimP30u6VVz57zwzAcGuXpy2rmM,22383
21
- opteryx_catalog-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
- opteryx_catalog-0.4.4.dist-info/top_level.txt,sha256=HWATr4Wgxbg3c1X3EcsJ6cnHoR6ZAdTe1LQ2VssIBUo,30
23
- opteryx_catalog-0.4.4.dist-info/RECORD,,