faceberg 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- faceberg/__init__.py +15 -0
- faceberg/bridge.py +586 -0
- faceberg/catalog.py +1491 -0
- faceberg/cli.py +483 -0
- faceberg/config.py +208 -0
- faceberg/convert.py +813 -0
- faceberg/pretty.py +224 -0
- faceberg/server.py +439 -0
- faceberg/shell.py +83 -0
- faceberg/spaces/Dockerfile +10 -0
- faceberg/spaces/README.md +85 -0
- faceberg/spaces/landing.html +799 -0
- faceberg/tests/__init__.py +0 -0
- faceberg/tests/conftest.py +229 -0
- faceberg/tests/test_bridge.py +825 -0
- faceberg/tests/test_catalog.py +1347 -0
- faceberg/tests/test_catalog_duckdb.py +341 -0
- faceberg/tests/test_catalog_pandas.py +290 -0
- faceberg/tests/test_cli.py +62 -0
- faceberg/tests/test_config.py +367 -0
- faceberg/tests/test_convert.py +422 -0
- faceberg/tests/test_pretty.py +366 -0
- faceberg/tests/test_server.py +343 -0
- faceberg/tests/test_server_playwright.py +524 -0
- faceberg-0.1.0.dist-info/METADATA +175 -0
- faceberg-0.1.0.dist-info/RECORD +29 -0
- faceberg-0.1.0.dist-info/WHEEL +4 -0
- faceberg-0.1.0.dist-info/entry_points.txt +2 -0
- faceberg-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
"""Playwright tests for Faceberg server."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
from playwright.sync_api import Page, expect
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture(scope="session")
|
|
11
|
+
def browser_context_args(browser_context_args):
|
|
12
|
+
"""Configure browser context for tests."""
|
|
13
|
+
return {
|
|
14
|
+
**browser_context_args,
|
|
15
|
+
"ignore_https_errors": True,
|
|
16
|
+
"viewport": {"width": 1280, "height": 720},
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture(scope="session")
|
|
21
|
+
def browser_type_launch_args(browser_type_launch_args):
|
|
22
|
+
"""Configure browser launch arguments."""
|
|
23
|
+
return {
|
|
24
|
+
**browser_type_launch_args,
|
|
25
|
+
"headless": True,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# =============================================================================
|
|
30
|
+
# Page Load and Structure Tests
|
|
31
|
+
# =============================================================================
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_landing_page_loads(session_rest_server: str, page: Page):
|
|
35
|
+
"""Test that the landing page loads successfully."""
|
|
36
|
+
page.goto(session_rest_server)
|
|
37
|
+
|
|
38
|
+
# Check that the page title contains "Faceberg"
|
|
39
|
+
expect(page).to_have_title(re.compile("Faceberg"))
|
|
40
|
+
|
|
41
|
+
# Check that the header with Faceberg text is visible
|
|
42
|
+
header = page.locator("h1:has-text('Faceberg')")
|
|
43
|
+
expect(header).to_be_visible()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_header_displays_correctly(session_rest_server: str, page: Page):
|
|
47
|
+
"""Test that the header displays catalog information correctly."""
|
|
48
|
+
page.goto(session_rest_server)
|
|
49
|
+
|
|
50
|
+
# Check header title
|
|
51
|
+
header_title = page.locator(".header-title")
|
|
52
|
+
expect(header_title).to_be_visible()
|
|
53
|
+
expect(header_title).to_contain_text("Faceberg")
|
|
54
|
+
|
|
55
|
+
# Check logo is present
|
|
56
|
+
logo = page.locator(".logo-icon img")
|
|
57
|
+
expect(logo).to_be_visible()
|
|
58
|
+
|
|
59
|
+
# Check catalog badge is present
|
|
60
|
+
catalog_badge = page.locator(".header-catalog-badge")
|
|
61
|
+
expect(catalog_badge).to_be_visible()
|
|
62
|
+
|
|
63
|
+
# Check table count is present
|
|
64
|
+
header_meta = page.locator(".header-meta")
|
|
65
|
+
expect(header_meta).to_contain_text("tables")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_layout_structure(session_rest_server: str, page: Page):
|
|
69
|
+
"""Test that the page has correct layout structure."""
|
|
70
|
+
page.goto(session_rest_server)
|
|
71
|
+
|
|
72
|
+
# Check main container exists
|
|
73
|
+
main_container = page.locator(".main-container")
|
|
74
|
+
expect(main_container).to_be_visible()
|
|
75
|
+
|
|
76
|
+
# Check left sidebar (table list)
|
|
77
|
+
left_sidebar = page.locator(".left-sidebar")
|
|
78
|
+
expect(left_sidebar).to_be_visible()
|
|
79
|
+
|
|
80
|
+
# Check right sidebar (shell)
|
|
81
|
+
right_sidebar = page.locator(".right-sidebar")
|
|
82
|
+
expect(right_sidebar).to_be_visible()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_catalog_hierarchy_section_present(session_rest_server: str, page: Page):
|
|
86
|
+
"""Test that the catalog hierarchy section is present."""
|
|
87
|
+
page.goto(session_rest_server)
|
|
88
|
+
|
|
89
|
+
# Check for catalog hierarchy
|
|
90
|
+
catalog_hierarchy = page.locator(".catalog-hierarchy")
|
|
91
|
+
expect(catalog_hierarchy).to_be_visible()
|
|
92
|
+
|
|
93
|
+
# Check section title
|
|
94
|
+
section_title = page.locator(".section-title")
|
|
95
|
+
expect(section_title).to_be_visible()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# =============================================================================
|
|
99
|
+
# Catalog with Data Tests
|
|
100
|
+
# =============================================================================
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_catalog_displays_tables(session_rest_server: str, page: Page):
|
|
104
|
+
"""Test that the catalog displays table list."""
|
|
105
|
+
page.goto(session_rest_server)
|
|
106
|
+
|
|
107
|
+
# Check that at least one table is visible
|
|
108
|
+
table_items = page.locator(".table-item")
|
|
109
|
+
expect(table_items.first).to_be_visible()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_catalog_shows_table_count(session_rest_server: str, page: Page):
|
|
113
|
+
"""Test that the catalog shows table count in header."""
|
|
114
|
+
page.goto(session_rest_server)
|
|
115
|
+
|
|
116
|
+
header_meta = page.locator(".header-meta")
|
|
117
|
+
expect(header_meta).to_contain_text("table")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_table_item_displays_metadata(session_rest_server: str, page: Page):
|
|
121
|
+
"""Test that table items display name and row count."""
|
|
122
|
+
page.goto(session_rest_server)
|
|
123
|
+
|
|
124
|
+
# Get first table item
|
|
125
|
+
first_table = page.locator(".table-item").first
|
|
126
|
+
|
|
127
|
+
# Check table name is visible
|
|
128
|
+
table_name = first_table.locator(".table-name")
|
|
129
|
+
expect(table_name).to_be_visible()
|
|
130
|
+
|
|
131
|
+
# Check row count is visible
|
|
132
|
+
row_count = first_table.locator(".table-row-count")
|
|
133
|
+
expect(row_count).to_be_visible()
|
|
134
|
+
expect(row_count).to_contain_text("rows")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_table_expansion(session_rest_server: str, page: Page):
|
|
138
|
+
"""Test that clicking a table expands its details."""
|
|
139
|
+
page.goto(session_rest_server)
|
|
140
|
+
|
|
141
|
+
# Get first table
|
|
142
|
+
first_table = page.locator(".table-item").first
|
|
143
|
+
summary = first_table.locator("summary")
|
|
144
|
+
|
|
145
|
+
# Initially should be collapsed (not open)
|
|
146
|
+
is_open = first_table.evaluate("el => el.hasAttribute('open')")
|
|
147
|
+
assert not is_open, "Table should start collapsed"
|
|
148
|
+
|
|
149
|
+
# Click to expand
|
|
150
|
+
summary.click()
|
|
151
|
+
|
|
152
|
+
# Should now be open
|
|
153
|
+
expect(first_table).to_have_attribute("open", "")
|
|
154
|
+
|
|
155
|
+
# Check that table content is visible
|
|
156
|
+
table_content = first_table.locator(".table-content")
|
|
157
|
+
expect(table_content).to_be_visible()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_table_schema_displays(session_rest_server: str, page: Page):
|
|
161
|
+
"""Test that expanded table shows schema information."""
|
|
162
|
+
page.goto(session_rest_server)
|
|
163
|
+
|
|
164
|
+
# Expand first table
|
|
165
|
+
first_table = page.locator(".table-item").first
|
|
166
|
+
first_table.locator("summary").click()
|
|
167
|
+
|
|
168
|
+
# Wait for content to be visible
|
|
169
|
+
table_content = first_table.locator(".table-content")
|
|
170
|
+
expect(table_content).to_be_visible()
|
|
171
|
+
|
|
172
|
+
# Check schema table exists
|
|
173
|
+
schema_table = first_table.locator(".schema-table")
|
|
174
|
+
expect(schema_table).to_be_visible()
|
|
175
|
+
|
|
176
|
+
# Check for column headers
|
|
177
|
+
expect(schema_table).to_contain_text("Column")
|
|
178
|
+
expect(schema_table).to_contain_text("Type")
|
|
179
|
+
expect(schema_table).to_contain_text("Req")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def test_table_metadata_grid(session_rest_server: str, page: Page):
|
|
183
|
+
"""Test that expanded table shows metadata grid with stats."""
|
|
184
|
+
page.goto(session_rest_server)
|
|
185
|
+
|
|
186
|
+
# Expand first table
|
|
187
|
+
first_table = page.locator(".table-item").first
|
|
188
|
+
first_table.locator("summary").click()
|
|
189
|
+
|
|
190
|
+
# Check metadata grid
|
|
191
|
+
metadata_grid = first_table.locator(".metadata-grid")
|
|
192
|
+
expect(metadata_grid).to_be_visible()
|
|
193
|
+
|
|
194
|
+
# Check for metadata labels
|
|
195
|
+
expect(metadata_grid).to_contain_text("Rows:")
|
|
196
|
+
expect(metadata_grid).to_contain_text("Files:")
|
|
197
|
+
expect(metadata_grid).to_contain_text("Cols:")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_query_button_present(session_rest_server: str, page: Page):
|
|
201
|
+
"""Test that query button is present in expanded table."""
|
|
202
|
+
page.goto(session_rest_server)
|
|
203
|
+
|
|
204
|
+
# Expand first table
|
|
205
|
+
first_table = page.locator(".table-item").first
|
|
206
|
+
first_table.locator("summary").click()
|
|
207
|
+
|
|
208
|
+
# Check for query button
|
|
209
|
+
query_button = first_table.locator(".action-button")
|
|
210
|
+
expect(query_button).to_be_visible()
|
|
211
|
+
expect(query_button).to_contain_text("Query")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# =============================================================================
|
|
215
|
+
# DuckDB Shell Tests
|
|
216
|
+
# =============================================================================
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_shell_container_visible(session_rest_server: str, page: Page):
|
|
220
|
+
"""Test that the shell container is visible on page load."""
|
|
221
|
+
page.goto(session_rest_server)
|
|
222
|
+
|
|
223
|
+
shell_container = page.locator("#shell-container")
|
|
224
|
+
expect(shell_container).to_be_visible()
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def test_quick_tips_section(session_rest_server: str, page: Page):
|
|
228
|
+
"""Test that the quick tips section displays correctly."""
|
|
229
|
+
page.goto(session_rest_server)
|
|
230
|
+
|
|
231
|
+
quick_tips = page.locator(".quick-tips")
|
|
232
|
+
expect(quick_tips).to_be_visible()
|
|
233
|
+
expect(quick_tips).to_contain_text("DuckDB Shell")
|
|
234
|
+
expect(quick_tips).to_contain_text("iceberg")
|
|
235
|
+
expect(quick_tips).to_contain_text("httpfs")
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def test_duckdb_shell_initializes(session_rest_server: str, page: Page):
|
|
239
|
+
"""Test that the DuckDB shell initializes without errors."""
|
|
240
|
+
# Set up console message and error listeners
|
|
241
|
+
console_messages = []
|
|
242
|
+
errors = []
|
|
243
|
+
|
|
244
|
+
page.on("console", lambda msg: console_messages.append(msg))
|
|
245
|
+
page.on("pageerror", lambda exc: errors.append(str(exc)))
|
|
246
|
+
|
|
247
|
+
# Navigate to the page
|
|
248
|
+
page.goto(session_rest_server)
|
|
249
|
+
|
|
250
|
+
# Wait for the shell container to be visible
|
|
251
|
+
shell_container = page.locator("#shell-container")
|
|
252
|
+
expect(shell_container).to_be_visible(timeout=10000)
|
|
253
|
+
|
|
254
|
+
# Wait for the shell to initialize
|
|
255
|
+
page.wait_for_timeout(8000)
|
|
256
|
+
|
|
257
|
+
# Check for the specific postMessage error
|
|
258
|
+
post_message_errors = [
|
|
259
|
+
err for err in errors if "postMessage" in err and "could not be cloned" in err
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
# Assert that no postMessage cloning errors occurred
|
|
263
|
+
assert len(post_message_errors) == 0, (
|
|
264
|
+
f"Found {len(post_message_errors)} postMessage cloning errors: {post_message_errors}"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Check that shell initialization didn't show an error message in the UI
|
|
268
|
+
error_divs = page.locator("div:has-text('Error initializing shell')").all()
|
|
269
|
+
visible_errors = [div for div in error_divs if div.is_visible()]
|
|
270
|
+
assert len(visible_errors) == 0, "Error message is visible in UI"
|
|
271
|
+
|
|
272
|
+
# Verify the shell is ready by checking for terminal-like elements
|
|
273
|
+
terminal = page.locator(".xterm")
|
|
274
|
+
expect(terminal).to_be_visible(timeout=5000)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def test_shell_has_xterm(session_rest_server: str, page: Page):
|
|
278
|
+
"""Test that XTerm.js terminal renders in the shell."""
|
|
279
|
+
page.goto(session_rest_server)
|
|
280
|
+
|
|
281
|
+
# Wait for XTerm to initialize
|
|
282
|
+
page.wait_for_timeout(8000)
|
|
283
|
+
|
|
284
|
+
# Check for XTerm elements
|
|
285
|
+
xterm = page.locator(".xterm")
|
|
286
|
+
expect(xterm).to_be_visible()
|
|
287
|
+
|
|
288
|
+
# Check for XTerm viewport
|
|
289
|
+
xterm_viewport = page.locator(".xterm-viewport")
|
|
290
|
+
expect(xterm_viewport).to_be_visible()
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def test_extension_badges_visible(session_rest_server: str, page: Page):
|
|
294
|
+
"""Test that extension badges are visible in quick tips."""
|
|
295
|
+
page.goto(session_rest_server)
|
|
296
|
+
|
|
297
|
+
# Check for extension badges
|
|
298
|
+
extension_badges = page.locator(".extension-badge")
|
|
299
|
+
expect(extension_badges).to_have_count(2)
|
|
300
|
+
|
|
301
|
+
# Check specific extension names
|
|
302
|
+
expect(page.locator(".extension-badge:has-text('iceberg')")).to_be_visible()
|
|
303
|
+
expect(page.locator(".extension-badge:has-text('httpfs')")).to_be_visible()
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
# =============================================================================
|
|
307
|
+
# Responsive Behavior Tests
|
|
308
|
+
# =============================================================================
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def test_page_responsive_at_smaller_viewport(session_rest_server: str, page: Page):
|
|
312
|
+
"""Test that the page is responsive at smaller viewport sizes."""
|
|
313
|
+
page.set_viewport_size({"width": 1024, "height": 768})
|
|
314
|
+
page.goto(session_rest_server)
|
|
315
|
+
|
|
316
|
+
# Check that main components are still visible
|
|
317
|
+
main_container = page.locator(".main-container")
|
|
318
|
+
expect(main_container).to_be_visible()
|
|
319
|
+
|
|
320
|
+
left_sidebar = page.locator(".left-sidebar")
|
|
321
|
+
expect(left_sidebar).to_be_visible()
|
|
322
|
+
|
|
323
|
+
right_sidebar = page.locator(".right-sidebar")
|
|
324
|
+
expect(right_sidebar).to_be_visible()
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def test_scrollbar_styling_applied(session_rest_server: str, page: Page):
|
|
328
|
+
"""Test that custom scrollbar styling is applied."""
|
|
329
|
+
page.goto(session_rest_server)
|
|
330
|
+
|
|
331
|
+
# Check that left sidebar is scrollable
|
|
332
|
+
left_sidebar = page.locator(".left-sidebar")
|
|
333
|
+
expect(left_sidebar).to_be_visible()
|
|
334
|
+
|
|
335
|
+
# Verify overflow-y is set to auto
|
|
336
|
+
overflow = left_sidebar.evaluate("el => window.getComputedStyle(el).overflowY")
|
|
337
|
+
assert overflow == "auto", f"Expected overflow-y: auto, got {overflow}"
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# =============================================================================
|
|
341
|
+
# API Endpoint Tests (via browser fetch)
|
|
342
|
+
# =============================================================================
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def test_config_endpoint_accessible(session_rest_server: str, page: Page):
|
|
346
|
+
"""Test that the /v1/config endpoint is accessible."""
|
|
347
|
+
page.goto(session_rest_server)
|
|
348
|
+
|
|
349
|
+
# Use page.evaluate to fetch config endpoint
|
|
350
|
+
config_response = page.evaluate("""
|
|
351
|
+
async () => {
|
|
352
|
+
const response = await fetch('/v1/config');
|
|
353
|
+
return {
|
|
354
|
+
status: response.status,
|
|
355
|
+
data: await response.json()
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
""")
|
|
359
|
+
|
|
360
|
+
assert config_response["status"] == 200
|
|
361
|
+
assert "overrides" in config_response["data"]
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def test_namespaces_endpoint_accessible(session_rest_server: str, page: Page):
|
|
365
|
+
"""Test that the /v1/namespaces endpoint is accessible."""
|
|
366
|
+
page.goto(session_rest_server)
|
|
367
|
+
|
|
368
|
+
# Use page.evaluate to fetch namespaces endpoint
|
|
369
|
+
namespaces_response = page.evaluate("""
|
|
370
|
+
async () => {
|
|
371
|
+
const response = await fetch('/v1/namespaces');
|
|
372
|
+
return {
|
|
373
|
+
status: response.status,
|
|
374
|
+
data: await response.json()
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
""")
|
|
378
|
+
|
|
379
|
+
assert namespaces_response["status"] == 200
|
|
380
|
+
assert "namespaces" in namespaces_response["data"]
|
|
381
|
+
# Catalog should have at least one namespace (google-research-datasets)
|
|
382
|
+
assert len(namespaces_response["data"]["namespaces"]) > 0
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def test_tables_endpoint_accessible(session_rest_server: str, page: Page):
|
|
386
|
+
"""Test that the /v1/namespaces/{namespace}/tables endpoint is accessible."""
|
|
387
|
+
page.goto(session_rest_server)
|
|
388
|
+
|
|
389
|
+
# Use page.evaluate to fetch tables endpoint
|
|
390
|
+
tables_response = page.evaluate("""
|
|
391
|
+
async () => {
|
|
392
|
+
const response = await fetch('/v1/namespaces/google-research-datasets/tables');
|
|
393
|
+
return {
|
|
394
|
+
status: response.status,
|
|
395
|
+
data: await response.json()
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
""")
|
|
399
|
+
|
|
400
|
+
assert tables_response["status"] == 200
|
|
401
|
+
assert "identifiers" in tables_response["data"]
|
|
402
|
+
# Should have at least one table (mbpp)
|
|
403
|
+
assert len(tables_response["data"]["identifiers"]) > 0
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
# =============================================================================
|
|
407
|
+
# JavaScript Error Tests
|
|
408
|
+
# =============================================================================
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def test_no_javascript_errors_on_load(session_rest_server: str, page: Page):
|
|
412
|
+
"""Test that no JavaScript errors occur on page load."""
|
|
413
|
+
errors = []
|
|
414
|
+
page.on("pageerror", lambda exc: errors.append(str(exc)))
|
|
415
|
+
|
|
416
|
+
page.goto(session_rest_server)
|
|
417
|
+
page.wait_for_timeout(2000)
|
|
418
|
+
|
|
419
|
+
# Filter out known acceptable warnings
|
|
420
|
+
critical_errors = [
|
|
421
|
+
err
|
|
422
|
+
for err in errors
|
|
423
|
+
if "postMessage" not in err # Known DuckDB WASM issue
|
|
424
|
+
]
|
|
425
|
+
|
|
426
|
+
assert len(critical_errors) == 0, f"Found JavaScript errors: {critical_errors}"
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def test_no_console_errors(session_rest_server: str, page: Page):
|
|
430
|
+
"""Test that no critical console errors are logged."""
|
|
431
|
+
console_errors = []
|
|
432
|
+
|
|
433
|
+
def handle_console(msg):
|
|
434
|
+
if msg.type == "error":
|
|
435
|
+
console_errors.append(msg.text)
|
|
436
|
+
|
|
437
|
+
page.on("console", handle_console)
|
|
438
|
+
|
|
439
|
+
page.goto(session_rest_server)
|
|
440
|
+
page.wait_for_timeout(3000)
|
|
441
|
+
|
|
442
|
+
# Some console errors might be acceptable (DuckDB initialization messages)
|
|
443
|
+
# Filter for critical errors that indicate real problems
|
|
444
|
+
critical_errors = [
|
|
445
|
+
err
|
|
446
|
+
for err in console_errors
|
|
447
|
+
if "Failed to load resource" in err
|
|
448
|
+
or "Uncaught" in err
|
|
449
|
+
or "SyntaxError" in err
|
|
450
|
+
or "ReferenceError" in err
|
|
451
|
+
]
|
|
452
|
+
|
|
453
|
+
assert len(critical_errors) == 0, f"Found critical console errors: {critical_errors}"
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
# =============================================================================
|
|
457
|
+
# Visual Elements Tests
|
|
458
|
+
# =============================================================================
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def test_logo_image_loads(session_rest_server: str, page: Page):
|
|
462
|
+
"""Test that the logo image loads correctly."""
|
|
463
|
+
page.goto(session_rest_server)
|
|
464
|
+
|
|
465
|
+
logo = page.locator(".logo-icon img")
|
|
466
|
+
expect(logo).to_be_visible()
|
|
467
|
+
|
|
468
|
+
# Check that image has a source
|
|
469
|
+
src = logo.get_attribute("src")
|
|
470
|
+
assert src is not None
|
|
471
|
+
assert "faceberg" in src.lower()
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def test_color_scheme_applied(session_rest_server: str, page: Page):
|
|
475
|
+
"""Test that the color scheme is properly applied."""
|
|
476
|
+
page.goto(session_rest_server)
|
|
477
|
+
|
|
478
|
+
# Check header background color
|
|
479
|
+
header = page.locator(".app-header")
|
|
480
|
+
bg_color = header.evaluate("el => window.getComputedStyle(el).backgroundColor")
|
|
481
|
+
|
|
482
|
+
# Should be some shade of blue (primary-blue from CSS)
|
|
483
|
+
assert bg_color is not None, "Header should have background color"
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def test_section_title_visible(session_rest_server: str, page: Page):
|
|
487
|
+
"""Test that the section title is visible."""
|
|
488
|
+
page.goto(session_rest_server)
|
|
489
|
+
|
|
490
|
+
section_title = page.locator(".section-title")
|
|
491
|
+
expect(section_title).to_be_visible()
|
|
492
|
+
expect(section_title).to_contain_text("Tables")
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
# =============================================================================
|
|
496
|
+
# Performance and Loading Tests
|
|
497
|
+
# =============================================================================
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def test_page_loads_quickly(session_rest_server: str, page: Page):
|
|
501
|
+
"""Test that the page loads within a reasonable time."""
|
|
502
|
+
start_time = time.time()
|
|
503
|
+
|
|
504
|
+
page.goto(session_rest_server)
|
|
505
|
+
|
|
506
|
+
# Wait for main content to be visible
|
|
507
|
+
page.locator(".main-container").wait_for(state="visible")
|
|
508
|
+
|
|
509
|
+
load_time = time.time() - start_time
|
|
510
|
+
|
|
511
|
+
# Page should load in under 5 seconds
|
|
512
|
+
assert load_time < 5.0, f"Page took too long to load: {load_time:.2f}s"
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def test_fonts_load(session_rest_server: str, page: Page):
|
|
516
|
+
"""Test that custom fonts are loaded."""
|
|
517
|
+
page.goto(session_rest_server)
|
|
518
|
+
|
|
519
|
+
# Check that DM Sans font is applied to body
|
|
520
|
+
body_font = page.locator("body").evaluate("el => window.getComputedStyle(el).fontFamily")
|
|
521
|
+
|
|
522
|
+
assert "DM Sans" in body_font or "dm sans" in body_font.lower(), (
|
|
523
|
+
f"Expected DM Sans font, got: {body_font}"
|
|
524
|
+
)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: faceberg
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bridge HuggingFace datasets with Apache Iceberg
|
|
5
|
+
Project-URL: Homepage, https://github.com/kszucs/faceberg
|
|
6
|
+
Project-URL: Documentation, https://github.com/kszucs/faceberg
|
|
7
|
+
Project-URL: Repository, https://github.com/kszucs/faceberg
|
|
8
|
+
Author-email: Krisztian Szucs <kszucs@users.noreply.github.com>
|
|
9
|
+
License: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: data-lake,datasets,huggingface,iceberg
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Requires-Dist: click>=8.0.0
|
|
22
|
+
Requires-Dist: datasets>=2.0.0
|
|
23
|
+
Requires-Dist: fsspec>=2023.1.0
|
|
24
|
+
Requires-Dist: huggingface-hub>=0.20.0
|
|
25
|
+
Requires-Dist: jinja2>=3.1.6
|
|
26
|
+
Requires-Dist: litestar>=2.0.0
|
|
27
|
+
Requires-Dist: pyarrow>=21.0.0
|
|
28
|
+
Requires-Dist: pyiceberg>=0.6.0
|
|
29
|
+
Requires-Dist: pyyaml>=6.0
|
|
30
|
+
Requires-Dist: rich>=13.0.0
|
|
31
|
+
Requires-Dist: uuid-utils>=0.9.0
|
|
32
|
+
Requires-Dist: uvicorn[standard]>=0.27.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: duckdb>=0.10.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-playwright>=0.7.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: requests>=2.31.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
|
|
44
|
+

|
|
45
|
+
|
|
46
|
+
# Faceberg
|
|
47
|
+
|
|
48
|
+
Bridge HuggingFace datasets with Apache Iceberg tables.
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install faceberg
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Create a catalog and add a dataset
|
|
60
|
+
faceberg mycatalog init
|
|
61
|
+
faceberg mycatalog add stanfordnlp/imdb --config plain_text
|
|
62
|
+
faceberg mycatalog sync
|
|
63
|
+
|
|
64
|
+
# Query the data
|
|
65
|
+
faceberg mycatalog scan default.imdb --limit 5
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Python API:**
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from faceberg import catalog
|
|
72
|
+
|
|
73
|
+
cat = catalog("mycatalog")
|
|
74
|
+
table = cat.load_table("default.imdb")
|
|
75
|
+
df = table.scan().to_pandas()
|
|
76
|
+
print(df.head())
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Documentation:**
|
|
80
|
+
- [Getting Started](docs/index.qmd) - Quickstart guide
|
|
81
|
+
- [Local Catalogs](docs/local.qmd) - Use local catalogs for testing
|
|
82
|
+
- [DuckDB Integration](docs/integrations/duckdb.qmd) - Query with SQL
|
|
83
|
+
- [Pandas Integration](docs/integrations/pandas.qmd) - Load into DataFrames
|
|
84
|
+
|
|
85
|
+
## How It Works
|
|
86
|
+
|
|
87
|
+
Faceberg creates lightweight Iceberg metadata that points to original HuggingFace dataset files:
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
HuggingFace Dataset Your Catalog
|
|
91
|
+
┌─────────────────┐ ┌──────────────────┐
|
|
92
|
+
│ org/dataset │ │ mycatalog/ │
|
|
93
|
+
│ ├── train.pq ◄──┼─────────┼─ default/ │
|
|
94
|
+
│ └── test.pq ◄──┼─────────┼─ └── imdb/ │
|
|
95
|
+
└─────────────────┘ │ └── metadata/
|
|
96
|
+
└──────────────────┘
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
No data is copied—only metadata is created. Query with DuckDB, PyIceberg, Spark, or any Iceberg-compatible tool.
|
|
100
|
+
|
|
101
|
+
## Usage
|
|
102
|
+
|
|
103
|
+
### CLI Commands
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Initialize catalog
|
|
107
|
+
faceberg mycatalog init
|
|
108
|
+
|
|
109
|
+
# Add datasets
|
|
110
|
+
faceberg mycatalog add openai/gsm8k --config main
|
|
111
|
+
|
|
112
|
+
# Sync datasets (creates Iceberg metadata)
|
|
113
|
+
faceberg mycatalog sync
|
|
114
|
+
|
|
115
|
+
# List tables
|
|
116
|
+
faceberg mycatalog list
|
|
117
|
+
|
|
118
|
+
# Show table info
|
|
119
|
+
faceberg mycatalog info default.gsm8k
|
|
120
|
+
|
|
121
|
+
# Scan data
|
|
122
|
+
faceberg mycatalog scan default.gsm8k --limit 10
|
|
123
|
+
|
|
124
|
+
# Start REST server
|
|
125
|
+
faceberg mycatalog serve --port 8181
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Remote Catalogs on HuggingFace Hub
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# Initialize remote catalog
|
|
132
|
+
export HF_TOKEN=your_token
|
|
133
|
+
faceberg org/catalog-repo init
|
|
134
|
+
|
|
135
|
+
# Add and sync datasets
|
|
136
|
+
faceberg org/catalog-repo add deepmind/code_contests --config default
|
|
137
|
+
faceberg org/catalog-repo sync
|
|
138
|
+
|
|
139
|
+
# Serve remote catalog
|
|
140
|
+
faceberg org/catalog-repo serve
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Query with DuckDB
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
import duckdb
|
|
147
|
+
|
|
148
|
+
conn = duckdb.connect()
|
|
149
|
+
conn.execute("INSTALL httpfs; LOAD httpfs")
|
|
150
|
+
conn.execute("INSTALL iceberg; LOAD iceberg")
|
|
151
|
+
|
|
152
|
+
# Query local catalog
|
|
153
|
+
result = conn.execute("""
|
|
154
|
+
SELECT * FROM iceberg_scan('mycatalog/default/imdb/metadata/v1.metadata.json')
|
|
155
|
+
LIMIT 10
|
|
156
|
+
""").fetchall()
|
|
157
|
+
|
|
158
|
+
# Query remote catalog
|
|
159
|
+
result = conn.execute("""
|
|
160
|
+
SELECT * FROM iceberg_scan('hf://datasets/org/catalog/default/table/metadata/v1.metadata.json')
|
|
161
|
+
LIMIT 10
|
|
162
|
+
""").fetchall()
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Development
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
git clone https://github.com/kszucs/faceberg
|
|
169
|
+
cd faceberg
|
|
170
|
+
pip install -e .
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
Apache 2.0
|