faceberg 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- faceberg/__init__.py +15 -0
- faceberg/bridge.py +586 -0
- faceberg/catalog.py +1491 -0
- faceberg/cli.py +483 -0
- faceberg/config.py +208 -0
- faceberg/convert.py +813 -0
- faceberg/pretty.py +224 -0
- faceberg/server.py +439 -0
- faceberg/shell.py +83 -0
- faceberg/spaces/Dockerfile +10 -0
- faceberg/spaces/README.md +85 -0
- faceberg/spaces/landing.html +799 -0
- faceberg/tests/__init__.py +0 -0
- faceberg/tests/conftest.py +229 -0
- faceberg/tests/test_bridge.py +825 -0
- faceberg/tests/test_catalog.py +1347 -0
- faceberg/tests/test_catalog_duckdb.py +341 -0
- faceberg/tests/test_catalog_pandas.py +290 -0
- faceberg/tests/test_cli.py +62 -0
- faceberg/tests/test_config.py +367 -0
- faceberg/tests/test_convert.py +422 -0
- faceberg/tests/test_pretty.py +366 -0
- faceberg/tests/test_server.py +343 -0
- faceberg/tests/test_server_playwright.py +524 -0
- faceberg-0.1.0.dist-info/METADATA +175 -0
- faceberg-0.1.0.dist-info/RECORD +29 -0
- faceberg-0.1.0.dist-info/WHEEL +4 -0
- faceberg-0.1.0.dist-info/entry_points.txt +2 -0
- faceberg-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""Tests for REST catalog server."""
|
|
2
|
+
|
|
3
|
+
from litestar.testing import TestClient
|
|
4
|
+
|
|
5
|
+
from faceberg.config import Config, Namespace
|
|
6
|
+
from faceberg.server import create_app
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestConfigEndpoint:
|
|
10
|
+
"""Tests for /v1/config endpoint."""
|
|
11
|
+
|
|
12
|
+
def test_get_config(self, session_mbpp):
|
|
13
|
+
"""Test getting catalog configuration."""
|
|
14
|
+
app = create_app(session_mbpp.uri)
|
|
15
|
+
|
|
16
|
+
with TestClient(app=app) as client:
|
|
17
|
+
response = client.get("/v1/config")
|
|
18
|
+
assert response.status_code == 200
|
|
19
|
+
|
|
20
|
+
data = response.json()
|
|
21
|
+
assert "defaults" in data
|
|
22
|
+
assert "overrides" in data
|
|
23
|
+
assert "uri" in data["overrides"]
|
|
24
|
+
|
|
25
|
+
def test_get_config_with_warehouse_param(self, session_mbpp):
|
|
26
|
+
"""Test config endpoint with warehouse parameter."""
|
|
27
|
+
app = create_app(session_mbpp.uri)
|
|
28
|
+
|
|
29
|
+
with TestClient(app=app) as client:
|
|
30
|
+
response = client.get("/v1/config?warehouse=/my/warehouse")
|
|
31
|
+
assert response.status_code == 200
|
|
32
|
+
|
|
33
|
+
data = response.json()
|
|
34
|
+
assert data["overrides"]["warehouse"] == "/my/warehouse"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TestNamespaceEndpoints:
|
|
38
|
+
"""Tests for namespace-related endpoints."""
|
|
39
|
+
|
|
40
|
+
def test_list_namespaces(self, session_mbpp):
|
|
41
|
+
"""Test listing all namespaces."""
|
|
42
|
+
app = create_app(session_mbpp.uri)
|
|
43
|
+
|
|
44
|
+
with TestClient(app=app) as client:
|
|
45
|
+
response = client.get("/v1/namespaces")
|
|
46
|
+
assert response.status_code == 200
|
|
47
|
+
|
|
48
|
+
data = response.json()
|
|
49
|
+
assert "namespaces" in data
|
|
50
|
+
namespaces = data["namespaces"]
|
|
51
|
+
assert len(namespaces) > 0
|
|
52
|
+
# Check that 'google-research-datasets' namespace exists (from session_mbpp)
|
|
53
|
+
assert ["google-research-datasets"] in namespaces or (
|
|
54
|
+
"google-research-datasets",
|
|
55
|
+
) in namespaces
|
|
56
|
+
|
|
57
|
+
def test_load_namespace(self, session_mbpp):
|
|
58
|
+
"""Test loading namespace properties."""
|
|
59
|
+
app = create_app(session_mbpp.uri)
|
|
60
|
+
|
|
61
|
+
with TestClient(app=app) as client:
|
|
62
|
+
response = client.get("/v1/namespaces/google-research-datasets")
|
|
63
|
+
assert response.status_code == 200
|
|
64
|
+
|
|
65
|
+
data = response.json()
|
|
66
|
+
assert "namespace" in data
|
|
67
|
+
assert "properties" in data
|
|
68
|
+
# Namespace should be a list/tuple
|
|
69
|
+
assert data["namespace"] == ["google-research-datasets"] or data["namespace"] == (
|
|
70
|
+
"google-research-datasets",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def test_namespace_exists_head(self, session_mbpp):
|
|
74
|
+
"""Test checking namespace existence with HEAD request."""
|
|
75
|
+
app = create_app(session_mbpp.uri)
|
|
76
|
+
|
|
77
|
+
with TestClient(app=app) as client:
|
|
78
|
+
# Existing namespace should return 204
|
|
79
|
+
response = client.head("/v1/namespaces/google-research-datasets")
|
|
80
|
+
assert response.status_code == 204
|
|
81
|
+
|
|
82
|
+
def test_namespace_not_exists(self, session_mbpp):
|
|
83
|
+
"""Test loading non-existent namespace returns empty properties.
|
|
84
|
+
|
|
85
|
+
Note: The catalog behavior is to return empty properties rather than
|
|
86
|
+
raising an error for non-existent namespaces.
|
|
87
|
+
"""
|
|
88
|
+
app = create_app(session_mbpp.uri)
|
|
89
|
+
|
|
90
|
+
with TestClient(app=app) as client:
|
|
91
|
+
response = client.get("/v1/namespaces/nonexistent")
|
|
92
|
+
assert response.status_code == 200
|
|
93
|
+
|
|
94
|
+
data = response.json()
|
|
95
|
+
assert "namespace" in data
|
|
96
|
+
assert "properties" in data
|
|
97
|
+
# Empty properties for non-existent namespace
|
|
98
|
+
assert data["properties"] == {}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class TestTableEndpoints:
|
|
102
|
+
"""Tests for table-related endpoints."""
|
|
103
|
+
|
|
104
|
+
def test_list_tables(self, session_mbpp):
|
|
105
|
+
"""Test listing tables in a namespace."""
|
|
106
|
+
app = create_app(session_mbpp.uri)
|
|
107
|
+
|
|
108
|
+
with TestClient(app=app) as client:
|
|
109
|
+
response = client.get("/v1/namespaces/google-research-datasets/tables")
|
|
110
|
+
assert response.status_code == 200
|
|
111
|
+
|
|
112
|
+
data = response.json()
|
|
113
|
+
assert "identifiers" in data
|
|
114
|
+
tables = data["identifiers"]
|
|
115
|
+
assert len(tables) > 0
|
|
116
|
+
# Check structure of table entries
|
|
117
|
+
assert all("namespace" in t and "name" in t for t in tables)
|
|
118
|
+
|
|
119
|
+
def test_load_table(self, session_mbpp):
|
|
120
|
+
"""Test loading a table."""
|
|
121
|
+
app = create_app(session_mbpp.uri)
|
|
122
|
+
|
|
123
|
+
with TestClient(app=app) as client:
|
|
124
|
+
response = client.get("/v1/namespaces/google-research-datasets/tables/mbpp")
|
|
125
|
+
assert response.status_code == 200
|
|
126
|
+
|
|
127
|
+
data = response.json()
|
|
128
|
+
# TableResponse uses snake_case in JSON (by_alias=True converts to kebab-case)
|
|
129
|
+
assert "metadata-location" in data or "metadata_location" in data
|
|
130
|
+
assert "metadata" in data
|
|
131
|
+
# Verify metadata structure
|
|
132
|
+
metadata = data["metadata"]
|
|
133
|
+
assert "format-version" in metadata or "format_version" in metadata
|
|
134
|
+
|
|
135
|
+
def test_table_exists_head(self, session_mbpp):
|
|
136
|
+
"""Test checking table existence with HEAD request."""
|
|
137
|
+
app = create_app(session_mbpp.uri)
|
|
138
|
+
|
|
139
|
+
with TestClient(app=app) as client:
|
|
140
|
+
# Existing table should return 204
|
|
141
|
+
response = client.head("/v1/namespaces/google-research-datasets/tables/mbpp")
|
|
142
|
+
assert response.status_code == 204
|
|
143
|
+
|
|
144
|
+
def test_table_not_exists(self, session_mbpp):
|
|
145
|
+
"""Test loading non-existent table returns 404."""
|
|
146
|
+
app = create_app(session_mbpp.uri)
|
|
147
|
+
|
|
148
|
+
with TestClient(app=app) as client:
|
|
149
|
+
response = client.get("/v1/namespaces/google-research-datasets/tables/nonexistent")
|
|
150
|
+
assert response.status_code == 404
|
|
151
|
+
|
|
152
|
+
data = response.json()
|
|
153
|
+
assert "error" in data
|
|
154
|
+
assert data["error"]["type"] == "NoSuchTableError"
|
|
155
|
+
|
|
156
|
+
def test_table_exists_wrong_namespace(self, session_mbpp):
|
|
157
|
+
"""Test checking table in wrong namespace returns 404."""
|
|
158
|
+
app = create_app(session_mbpp.uri)
|
|
159
|
+
|
|
160
|
+
with TestClient(app=app) as client:
|
|
161
|
+
response = client.head("/v1/namespaces/nonexistent/tables/mbpp")
|
|
162
|
+
assert response.status_code == 404
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class TestErrorHandling:
|
|
166
|
+
"""Tests for error handling."""
|
|
167
|
+
|
|
168
|
+
def test_error_response_format(self, session_mbpp):
|
|
169
|
+
"""Test that errors follow Iceberg REST spec format."""
|
|
170
|
+
app = create_app(session_mbpp.uri)
|
|
171
|
+
|
|
172
|
+
with TestClient(app=app) as client:
|
|
173
|
+
# Use table not found as the test case for error format
|
|
174
|
+
response = client.get("/v1/namespaces/google-research-datasets/tables/nonexistent")
|
|
175
|
+
assert response.status_code == 404
|
|
176
|
+
|
|
177
|
+
data = response.json()
|
|
178
|
+
assert "error" in data
|
|
179
|
+
error = data["error"]
|
|
180
|
+
assert "message" in error
|
|
181
|
+
assert "type" in error
|
|
182
|
+
assert "code" in error
|
|
183
|
+
assert error["type"] == "NoSuchTableError"
|
|
184
|
+
assert error["code"] == 404
|
|
185
|
+
|
|
186
|
+
def test_internal_error_handling(self, session_mbpp):
|
|
187
|
+
"""Test that unexpected errors are caught and formatted properly."""
|
|
188
|
+
app = create_app(session_mbpp.uri)
|
|
189
|
+
|
|
190
|
+
with TestClient(app=app) as client:
|
|
191
|
+
# Try to cause an error by requesting invalid paths
|
|
192
|
+
response = client.get("/v1/config/invalid")
|
|
193
|
+
# Should not crash, should return proper error format
|
|
194
|
+
assert response.status_code in [404, 405, 500]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class TestEmptyCatalogBehavior:
|
|
198
|
+
"""Test server behavior with empty catalogs.
|
|
199
|
+
|
|
200
|
+
These tests verify that the server correctly handles catalogs that have
|
|
201
|
+
no tables or namespaces, which is important for new catalog initialization.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def test_empty_catalog_list_namespaces(self, tmp_path):
|
|
205
|
+
"""Test that an empty catalog returns empty namespace list."""
|
|
206
|
+
# Create empty catalog
|
|
207
|
+
catalog_dir = tmp_path / "empty_catalog"
|
|
208
|
+
catalog_dir.mkdir()
|
|
209
|
+
catalog_config = Config()
|
|
210
|
+
catalog_config.to_yaml(catalog_dir / "faceberg.yml")
|
|
211
|
+
|
|
212
|
+
# Create server
|
|
213
|
+
app = create_app(str(catalog_dir))
|
|
214
|
+
|
|
215
|
+
with TestClient(app=app) as client:
|
|
216
|
+
response = client.get("/v1/namespaces")
|
|
217
|
+
assert response.status_code == 200
|
|
218
|
+
|
|
219
|
+
data = response.json()
|
|
220
|
+
assert "namespaces" in data
|
|
221
|
+
# Empty catalog should return empty list, not error
|
|
222
|
+
assert data["namespaces"] == []
|
|
223
|
+
|
|
224
|
+
def test_empty_catalog_list_tables_in_default(self, tmp_path):
|
|
225
|
+
"""Test that listing tables in non-existent namespace returns 404."""
|
|
226
|
+
# Create empty catalog
|
|
227
|
+
catalog_dir = tmp_path / "empty_catalog"
|
|
228
|
+
catalog_dir.mkdir()
|
|
229
|
+
catalog_config = Config()
|
|
230
|
+
catalog_config.to_yaml(catalog_dir / "faceberg.yml")
|
|
231
|
+
|
|
232
|
+
# Create server
|
|
233
|
+
app = create_app(str(catalog_dir))
|
|
234
|
+
|
|
235
|
+
with TestClient(app=app) as client:
|
|
236
|
+
response = client.get("/v1/namespaces/default/tables")
|
|
237
|
+
# Non-existent namespace should return 404
|
|
238
|
+
assert response.status_code == 404
|
|
239
|
+
|
|
240
|
+
def test_catalog_with_namespace_but_no_tables(self, tmp_path):
|
|
241
|
+
"""Test catalog with defined namespace but no tables."""
|
|
242
|
+
# Create catalog with empty namespace
|
|
243
|
+
catalog_dir = tmp_path / "catalog_with_empty_ns"
|
|
244
|
+
catalog_dir.mkdir()
|
|
245
|
+
catalog_config = Config({"default": Namespace()})
|
|
246
|
+
catalog_config.to_yaml(catalog_dir / "faceberg.yml")
|
|
247
|
+
|
|
248
|
+
# Create server
|
|
249
|
+
app = create_app(str(catalog_dir))
|
|
250
|
+
|
|
251
|
+
with TestClient(app=app) as client:
|
|
252
|
+
# Should list the namespace
|
|
253
|
+
response = client.get("/v1/namespaces")
|
|
254
|
+
assert response.status_code == 200
|
|
255
|
+
data = response.json()
|
|
256
|
+
assert "namespaces" in data
|
|
257
|
+
assert ["default"] in data["namespaces"] or ("default",) in data["namespaces"]
|
|
258
|
+
|
|
259
|
+
# But tables should be empty
|
|
260
|
+
response = client.get("/v1/namespaces/default/tables")
|
|
261
|
+
assert response.status_code == 200
|
|
262
|
+
data = response.json()
|
|
263
|
+
assert "identifiers" in data
|
|
264
|
+
assert data["identifiers"] == []
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class TestSyncedCatalogDataIntegrity:
|
|
268
|
+
"""Test that synced catalog data is correctly exposed via REST API.
|
|
269
|
+
|
|
270
|
+
These tests ensure that after syncing datasets, the REST API correctly
|
|
271
|
+
exposes the namespace and table information.
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
def test_session_mbpp_has_default_namespace(self, session_mbpp):
|
|
275
|
+
"""Verify synced catalog exposes the google-research-datasets namespace."""
|
|
276
|
+
app = create_app(session_mbpp.uri)
|
|
277
|
+
|
|
278
|
+
with TestClient(app=app) as client:
|
|
279
|
+
response = client.get("/v1/namespaces")
|
|
280
|
+
assert response.status_code == 200
|
|
281
|
+
|
|
282
|
+
data = response.json()
|
|
283
|
+
namespaces = data["namespaces"]
|
|
284
|
+
|
|
285
|
+
# Must have at least one namespace
|
|
286
|
+
assert len(namespaces) > 0, "Synced catalog should have namespaces"
|
|
287
|
+
|
|
288
|
+
# Should include google-research-datasets namespace
|
|
289
|
+
namespace_list = [list(ns) if isinstance(ns, tuple) else ns for ns in namespaces]
|
|
290
|
+
assert ["google-research-datasets"] in namespace_list or (
|
|
291
|
+
"google-research-datasets",
|
|
292
|
+
) in namespaces, f"Expected 'google-research-datasets' namespace in {namespace_list}"
|
|
293
|
+
|
|
294
|
+
def test_session_mbpp_has_mbpp_table(self, session_mbpp):
|
|
295
|
+
"""Verify session catalog exposes the mbpp table."""
|
|
296
|
+
app = create_app(session_mbpp.uri)
|
|
297
|
+
|
|
298
|
+
with TestClient(app=app) as client:
|
|
299
|
+
response = client.get("/v1/namespaces/google-research-datasets/tables")
|
|
300
|
+
assert response.status_code == 200
|
|
301
|
+
|
|
302
|
+
data = response.json()
|
|
303
|
+
tables = data["identifiers"]
|
|
304
|
+
|
|
305
|
+
# Must have at least one table
|
|
306
|
+
assert len(tables) > 0, (
|
|
307
|
+
"Session catalog should have tables in google-research-datasets namespace"
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Should include mbpp table
|
|
311
|
+
table_names = [t["name"] if isinstance(t, dict) else str(t) for t in tables]
|
|
312
|
+
assert "mbpp" in table_names, f"Expected 'mbpp' in {table_names}"
|
|
313
|
+
|
|
314
|
+
def test_session_mbpp_table_has_valid_metadata(self, session_mbpp):
|
|
315
|
+
"""Verify synced table returns valid Iceberg metadata."""
|
|
316
|
+
app = create_app(session_mbpp.uri)
|
|
317
|
+
|
|
318
|
+
with TestClient(app=app) as client:
|
|
319
|
+
response = client.get("/v1/namespaces/google-research-datasets/tables/mbpp")
|
|
320
|
+
assert response.status_code == 200
|
|
321
|
+
|
|
322
|
+
data = response.json()
|
|
323
|
+
|
|
324
|
+
# Must have metadata location and metadata
|
|
325
|
+
assert "metadata" in data, "Table response must include metadata"
|
|
326
|
+
assert "metadata-location" in data or "metadata_location" in data, (
|
|
327
|
+
"Table response must include metadata location"
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
metadata = data["metadata"]
|
|
331
|
+
|
|
332
|
+
# Verify metadata structure
|
|
333
|
+
assert "format-version" in metadata or "format_version" in metadata
|
|
334
|
+
assert "schemas" in metadata, "Metadata must include schemas"
|
|
335
|
+
assert "current-schema-id" in metadata or "current_schema_id" in metadata
|
|
336
|
+
|
|
337
|
+
# Verify schema has fields
|
|
338
|
+
schemas = metadata.get("schemas", [])
|
|
339
|
+
assert len(schemas) > 0, "Table must have at least one schema"
|
|
340
|
+
|
|
341
|
+
current_schema = schemas[0]
|
|
342
|
+
fields = current_schema.get("fields", [])
|
|
343
|
+
assert len(fields) > 0, "Schema must have fields"
|