faceberg 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,343 @@
1
+ """Tests for REST catalog server."""
2
+
3
+ from litestar.testing import TestClient
4
+
5
+ from faceberg.config import Config, Namespace
6
+ from faceberg.server import create_app
7
+
8
+
9
+ class TestConfigEndpoint:
10
+ """Tests for /v1/config endpoint."""
11
+
12
+ def test_get_config(self, session_mbpp):
13
+ """Test getting catalog configuration."""
14
+ app = create_app(session_mbpp.uri)
15
+
16
+ with TestClient(app=app) as client:
17
+ response = client.get("/v1/config")
18
+ assert response.status_code == 200
19
+
20
+ data = response.json()
21
+ assert "defaults" in data
22
+ assert "overrides" in data
23
+ assert "uri" in data["overrides"]
24
+
25
+ def test_get_config_with_warehouse_param(self, session_mbpp):
26
+ """Test config endpoint with warehouse parameter."""
27
+ app = create_app(session_mbpp.uri)
28
+
29
+ with TestClient(app=app) as client:
30
+ response = client.get("/v1/config?warehouse=/my/warehouse")
31
+ assert response.status_code == 200
32
+
33
+ data = response.json()
34
+ assert data["overrides"]["warehouse"] == "/my/warehouse"
35
+
36
+
37
+ class TestNamespaceEndpoints:
38
+ """Tests for namespace-related endpoints."""
39
+
40
+ def test_list_namespaces(self, session_mbpp):
41
+ """Test listing all namespaces."""
42
+ app = create_app(session_mbpp.uri)
43
+
44
+ with TestClient(app=app) as client:
45
+ response = client.get("/v1/namespaces")
46
+ assert response.status_code == 200
47
+
48
+ data = response.json()
49
+ assert "namespaces" in data
50
+ namespaces = data["namespaces"]
51
+ assert len(namespaces) > 0
52
+ # Check that 'google-research-datasets' namespace exists (from session_mbpp)
53
+ assert ["google-research-datasets"] in namespaces or (
54
+ "google-research-datasets",
55
+ ) in namespaces
56
+
57
+ def test_load_namespace(self, session_mbpp):
58
+ """Test loading namespace properties."""
59
+ app = create_app(session_mbpp.uri)
60
+
61
+ with TestClient(app=app) as client:
62
+ response = client.get("/v1/namespaces/google-research-datasets")
63
+ assert response.status_code == 200
64
+
65
+ data = response.json()
66
+ assert "namespace" in data
67
+ assert "properties" in data
68
+ # Namespace should be a list/tuple
69
+ assert data["namespace"] == ["google-research-datasets"] or data["namespace"] == (
70
+ "google-research-datasets",
71
+ )
72
+
73
+ def test_namespace_exists_head(self, session_mbpp):
74
+ """Test checking namespace existence with HEAD request."""
75
+ app = create_app(session_mbpp.uri)
76
+
77
+ with TestClient(app=app) as client:
78
+ # Existing namespace should return 204
79
+ response = client.head("/v1/namespaces/google-research-datasets")
80
+ assert response.status_code == 204
81
+
82
+ def test_namespace_not_exists(self, session_mbpp):
83
+ """Test loading non-existent namespace returns empty properties.
84
+
85
+ Note: The catalog behavior is to return empty properties rather than
86
+ raising an error for non-existent namespaces.
87
+ """
88
+ app = create_app(session_mbpp.uri)
89
+
90
+ with TestClient(app=app) as client:
91
+ response = client.get("/v1/namespaces/nonexistent")
92
+ assert response.status_code == 200
93
+
94
+ data = response.json()
95
+ assert "namespace" in data
96
+ assert "properties" in data
97
+ # Empty properties for non-existent namespace
98
+ assert data["properties"] == {}
99
+
100
+
101
+ class TestTableEndpoints:
102
+ """Tests for table-related endpoints."""
103
+
104
+ def test_list_tables(self, session_mbpp):
105
+ """Test listing tables in a namespace."""
106
+ app = create_app(session_mbpp.uri)
107
+
108
+ with TestClient(app=app) as client:
109
+ response = client.get("/v1/namespaces/google-research-datasets/tables")
110
+ assert response.status_code == 200
111
+
112
+ data = response.json()
113
+ assert "identifiers" in data
114
+ tables = data["identifiers"]
115
+ assert len(tables) > 0
116
+ # Check structure of table entries
117
+ assert all("namespace" in t and "name" in t for t in tables)
118
+
119
+ def test_load_table(self, session_mbpp):
120
+ """Test loading a table."""
121
+ app = create_app(session_mbpp.uri)
122
+
123
+ with TestClient(app=app) as client:
124
+ response = client.get("/v1/namespaces/google-research-datasets/tables/mbpp")
125
+ assert response.status_code == 200
126
+
127
+ data = response.json()
128
+ # TableResponse uses snake_case in JSON (by_alias=True converts to kebab-case)
129
+ assert "metadata-location" in data or "metadata_location" in data
130
+ assert "metadata" in data
131
+ # Verify metadata structure
132
+ metadata = data["metadata"]
133
+ assert "format-version" in metadata or "format_version" in metadata
134
+
135
+ def test_table_exists_head(self, session_mbpp):
136
+ """Test checking table existence with HEAD request."""
137
+ app = create_app(session_mbpp.uri)
138
+
139
+ with TestClient(app=app) as client:
140
+ # Existing table should return 204
141
+ response = client.head("/v1/namespaces/google-research-datasets/tables/mbpp")
142
+ assert response.status_code == 204
143
+
144
+ def test_table_not_exists(self, session_mbpp):
145
+ """Test loading non-existent table returns 404."""
146
+ app = create_app(session_mbpp.uri)
147
+
148
+ with TestClient(app=app) as client:
149
+ response = client.get("/v1/namespaces/google-research-datasets/tables/nonexistent")
150
+ assert response.status_code == 404
151
+
152
+ data = response.json()
153
+ assert "error" in data
154
+ assert data["error"]["type"] == "NoSuchTableError"
155
+
156
+ def test_table_exists_wrong_namespace(self, session_mbpp):
157
+ """Test checking table in wrong namespace returns 404."""
158
+ app = create_app(session_mbpp.uri)
159
+
160
+ with TestClient(app=app) as client:
161
+ response = client.head("/v1/namespaces/nonexistent/tables/mbpp")
162
+ assert response.status_code == 404
163
+
164
+
165
+ class TestErrorHandling:
166
+ """Tests for error handling."""
167
+
168
+ def test_error_response_format(self, session_mbpp):
169
+ """Test that errors follow Iceberg REST spec format."""
170
+ app = create_app(session_mbpp.uri)
171
+
172
+ with TestClient(app=app) as client:
173
+ # Use table not found as the test case for error format
174
+ response = client.get("/v1/namespaces/google-research-datasets/tables/nonexistent")
175
+ assert response.status_code == 404
176
+
177
+ data = response.json()
178
+ assert "error" in data
179
+ error = data["error"]
180
+ assert "message" in error
181
+ assert "type" in error
182
+ assert "code" in error
183
+ assert error["type"] == "NoSuchTableError"
184
+ assert error["code"] == 404
185
+
186
+ def test_internal_error_handling(self, session_mbpp):
187
+ """Test that unexpected errors are caught and formatted properly."""
188
+ app = create_app(session_mbpp.uri)
189
+
190
+ with TestClient(app=app) as client:
191
+ # Try to cause an error by requesting invalid paths
192
+ response = client.get("/v1/config/invalid")
193
+ # Should not crash, should return proper error format
194
+ assert response.status_code in [404, 405, 500]
195
+
196
+
197
+ class TestEmptyCatalogBehavior:
198
+ """Test server behavior with empty catalogs.
199
+
200
+ These tests verify that the server correctly handles catalogs that have
201
+ no tables or namespaces, which is important for new catalog initialization.
202
+ """
203
+
204
+ def test_empty_catalog_list_namespaces(self, tmp_path):
205
+ """Test that an empty catalog returns empty namespace list."""
206
+ # Create empty catalog
207
+ catalog_dir = tmp_path / "empty_catalog"
208
+ catalog_dir.mkdir()
209
+ catalog_config = Config()
210
+ catalog_config.to_yaml(catalog_dir / "faceberg.yml")
211
+
212
+ # Create server
213
+ app = create_app(str(catalog_dir))
214
+
215
+ with TestClient(app=app) as client:
216
+ response = client.get("/v1/namespaces")
217
+ assert response.status_code == 200
218
+
219
+ data = response.json()
220
+ assert "namespaces" in data
221
+ # Empty catalog should return empty list, not error
222
+ assert data["namespaces"] == []
223
+
224
+ def test_empty_catalog_list_tables_in_default(self, tmp_path):
225
+ """Test that listing tables in non-existent namespace returns 404."""
226
+ # Create empty catalog
227
+ catalog_dir = tmp_path / "empty_catalog"
228
+ catalog_dir.mkdir()
229
+ catalog_config = Config()
230
+ catalog_config.to_yaml(catalog_dir / "faceberg.yml")
231
+
232
+ # Create server
233
+ app = create_app(str(catalog_dir))
234
+
235
+ with TestClient(app=app) as client:
236
+ response = client.get("/v1/namespaces/default/tables")
237
+ # Non-existent namespace should return 404
238
+ assert response.status_code == 404
239
+
240
+ def test_catalog_with_namespace_but_no_tables(self, tmp_path):
241
+ """Test catalog with defined namespace but no tables."""
242
+ # Create catalog with empty namespace
243
+ catalog_dir = tmp_path / "catalog_with_empty_ns"
244
+ catalog_dir.mkdir()
245
+ catalog_config = Config({"default": Namespace()})
246
+ catalog_config.to_yaml(catalog_dir / "faceberg.yml")
247
+
248
+ # Create server
249
+ app = create_app(str(catalog_dir))
250
+
251
+ with TestClient(app=app) as client:
252
+ # Should list the namespace
253
+ response = client.get("/v1/namespaces")
254
+ assert response.status_code == 200
255
+ data = response.json()
256
+ assert "namespaces" in data
257
+ assert ["default"] in data["namespaces"] or ("default",) in data["namespaces"]
258
+
259
+ # But tables should be empty
260
+ response = client.get("/v1/namespaces/default/tables")
261
+ assert response.status_code == 200
262
+ data = response.json()
263
+ assert "identifiers" in data
264
+ assert data["identifiers"] == []
265
+
266
+
267
+ class TestSyncedCatalogDataIntegrity:
268
+ """Test that synced catalog data is correctly exposed via REST API.
269
+
270
+ These tests ensure that after syncing datasets, the REST API correctly
271
+ exposes the namespace and table information.
272
+ """
273
+
274
+ def test_session_mbpp_has_default_namespace(self, session_mbpp):
275
+ """Verify synced catalog exposes the google-research-datasets namespace."""
276
+ app = create_app(session_mbpp.uri)
277
+
278
+ with TestClient(app=app) as client:
279
+ response = client.get("/v1/namespaces")
280
+ assert response.status_code == 200
281
+
282
+ data = response.json()
283
+ namespaces = data["namespaces"]
284
+
285
+ # Must have at least one namespace
286
+ assert len(namespaces) > 0, "Synced catalog should have namespaces"
287
+
288
+ # Should include google-research-datasets namespace
289
+ namespace_list = [list(ns) if isinstance(ns, tuple) else ns for ns in namespaces]
290
+ assert ["google-research-datasets"] in namespace_list or (
291
+ "google-research-datasets",
292
+ ) in namespaces, f"Expected 'google-research-datasets' namespace in {namespace_list}"
293
+
294
+ def test_session_mbpp_has_mbpp_table(self, session_mbpp):
295
+ """Verify session catalog exposes the mbpp table."""
296
+ app = create_app(session_mbpp.uri)
297
+
298
+ with TestClient(app=app) as client:
299
+ response = client.get("/v1/namespaces/google-research-datasets/tables")
300
+ assert response.status_code == 200
301
+
302
+ data = response.json()
303
+ tables = data["identifiers"]
304
+
305
+ # Must have at least one table
306
+ assert len(tables) > 0, (
307
+ "Session catalog should have tables in google-research-datasets namespace"
308
+ )
309
+
310
+ # Should include mbpp table
311
+ table_names = [t["name"] if isinstance(t, dict) else str(t) for t in tables]
312
+ assert "mbpp" in table_names, f"Expected 'mbpp' in {table_names}"
313
+
314
+ def test_session_mbpp_table_has_valid_metadata(self, session_mbpp):
315
+ """Verify synced table returns valid Iceberg metadata."""
316
+ app = create_app(session_mbpp.uri)
317
+
318
+ with TestClient(app=app) as client:
319
+ response = client.get("/v1/namespaces/google-research-datasets/tables/mbpp")
320
+ assert response.status_code == 200
321
+
322
+ data = response.json()
323
+
324
+ # Must have metadata location and metadata
325
+ assert "metadata" in data, "Table response must include metadata"
326
+ assert "metadata-location" in data or "metadata_location" in data, (
327
+ "Table response must include metadata location"
328
+ )
329
+
330
+ metadata = data["metadata"]
331
+
332
+ # Verify metadata structure
333
+ assert "format-version" in metadata or "format_version" in metadata
334
+ assert "schemas" in metadata, "Metadata must include schemas"
335
+ assert "current-schema-id" in metadata or "current_schema_id" in metadata
336
+
337
+ # Verify schema has fields
338
+ schemas = metadata.get("schemas", [])
339
+ assert len(schemas) > 0, "Table must have at least one schema"
340
+
341
+ current_schema = schemas[0]
342
+ fields = current_schema.get("fields", [])
343
+ assert len(fields) > 0, "Schema must have fields"