faceberg 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,524 @@
1
+ """Playwright tests for Faceberg server."""
2
+
3
+ import re
4
+ import time
5
+
6
+ import pytest
7
+ from playwright.sync_api import Page, expect
8
+
9
+
10
+ @pytest.fixture(scope="session")
11
+ def browser_context_args(browser_context_args):
12
+ """Configure browser context for tests."""
13
+ return {
14
+ **browser_context_args,
15
+ "ignore_https_errors": True,
16
+ "viewport": {"width": 1280, "height": 720},
17
+ }
18
+
19
+
20
+ @pytest.fixture(scope="session")
21
+ def browser_type_launch_args(browser_type_launch_args):
22
+ """Configure browser launch arguments."""
23
+ return {
24
+ **browser_type_launch_args,
25
+ "headless": True,
26
+ }
27
+
28
+
29
+ # =============================================================================
30
+ # Page Load and Structure Tests
31
+ # =============================================================================
32
+
33
+
34
+ def test_landing_page_loads(session_rest_server: str, page: Page):
35
+ """Test that the landing page loads successfully."""
36
+ page.goto(session_rest_server)
37
+
38
+ # Check that the page title contains "Faceberg"
39
+ expect(page).to_have_title(re.compile("Faceberg"))
40
+
41
+ # Check that the header with Faceberg text is visible
42
+ header = page.locator("h1:has-text('Faceberg')")
43
+ expect(header).to_be_visible()
44
+
45
+
46
+ def test_header_displays_correctly(session_rest_server: str, page: Page):
47
+ """Test that the header displays catalog information correctly."""
48
+ page.goto(session_rest_server)
49
+
50
+ # Check header title
51
+ header_title = page.locator(".header-title")
52
+ expect(header_title).to_be_visible()
53
+ expect(header_title).to_contain_text("Faceberg")
54
+
55
+ # Check logo is present
56
+ logo = page.locator(".logo-icon img")
57
+ expect(logo).to_be_visible()
58
+
59
+ # Check catalog badge is present
60
+ catalog_badge = page.locator(".header-catalog-badge")
61
+ expect(catalog_badge).to_be_visible()
62
+
63
+ # Check table count is present
64
+ header_meta = page.locator(".header-meta")
65
+ expect(header_meta).to_contain_text("tables")
66
+
67
+
68
+ def test_layout_structure(session_rest_server: str, page: Page):
69
+ """Test that the page has correct layout structure."""
70
+ page.goto(session_rest_server)
71
+
72
+ # Check main container exists
73
+ main_container = page.locator(".main-container")
74
+ expect(main_container).to_be_visible()
75
+
76
+ # Check left sidebar (table list)
77
+ left_sidebar = page.locator(".left-sidebar")
78
+ expect(left_sidebar).to_be_visible()
79
+
80
+ # Check right sidebar (shell)
81
+ right_sidebar = page.locator(".right-sidebar")
82
+ expect(right_sidebar).to_be_visible()
83
+
84
+
85
+ def test_catalog_hierarchy_section_present(session_rest_server: str, page: Page):
86
+ """Test that the catalog hierarchy section is present."""
87
+ page.goto(session_rest_server)
88
+
89
+ # Check for catalog hierarchy
90
+ catalog_hierarchy = page.locator(".catalog-hierarchy")
91
+ expect(catalog_hierarchy).to_be_visible()
92
+
93
+ # Check section title
94
+ section_title = page.locator(".section-title")
95
+ expect(section_title).to_be_visible()
96
+
97
+
98
+ # =============================================================================
99
+ # Catalog with Data Tests
100
+ # =============================================================================
101
+
102
+
103
+ def test_catalog_displays_tables(session_rest_server: str, page: Page):
104
+ """Test that the catalog displays table list."""
105
+ page.goto(session_rest_server)
106
+
107
+ # Check that at least one table is visible
108
+ table_items = page.locator(".table-item")
109
+ expect(table_items.first).to_be_visible()
110
+
111
+
112
+ def test_catalog_shows_table_count(session_rest_server: str, page: Page):
113
+ """Test that the catalog shows table count in header."""
114
+ page.goto(session_rest_server)
115
+
116
+ header_meta = page.locator(".header-meta")
117
+ expect(header_meta).to_contain_text("table")
118
+
119
+
120
+ def test_table_item_displays_metadata(session_rest_server: str, page: Page):
121
+ """Test that table items display name and row count."""
122
+ page.goto(session_rest_server)
123
+
124
+ # Get first table item
125
+ first_table = page.locator(".table-item").first
126
+
127
+ # Check table name is visible
128
+ table_name = first_table.locator(".table-name")
129
+ expect(table_name).to_be_visible()
130
+
131
+ # Check row count is visible
132
+ row_count = first_table.locator(".table-row-count")
133
+ expect(row_count).to_be_visible()
134
+ expect(row_count).to_contain_text("rows")
135
+
136
+
137
+ def test_table_expansion(session_rest_server: str, page: Page):
138
+ """Test that clicking a table expands its details."""
139
+ page.goto(session_rest_server)
140
+
141
+ # Get first table
142
+ first_table = page.locator(".table-item").first
143
+ summary = first_table.locator("summary")
144
+
145
+ # Initially should be collapsed (not open)
146
+ is_open = first_table.evaluate("el => el.hasAttribute('open')")
147
+ assert not is_open, "Table should start collapsed"
148
+
149
+ # Click to expand
150
+ summary.click()
151
+
152
+ # Should now be open
153
+ expect(first_table).to_have_attribute("open", "")
154
+
155
+ # Check that table content is visible
156
+ table_content = first_table.locator(".table-content")
157
+ expect(table_content).to_be_visible()
158
+
159
+
160
+ def test_table_schema_displays(session_rest_server: str, page: Page):
161
+ """Test that expanded table shows schema information."""
162
+ page.goto(session_rest_server)
163
+
164
+ # Expand first table
165
+ first_table = page.locator(".table-item").first
166
+ first_table.locator("summary").click()
167
+
168
+ # Wait for content to be visible
169
+ table_content = first_table.locator(".table-content")
170
+ expect(table_content).to_be_visible()
171
+
172
+ # Check schema table exists
173
+ schema_table = first_table.locator(".schema-table")
174
+ expect(schema_table).to_be_visible()
175
+
176
+ # Check for column headers
177
+ expect(schema_table).to_contain_text("Column")
178
+ expect(schema_table).to_contain_text("Type")
179
+ expect(schema_table).to_contain_text("Req")
180
+
181
+
182
+ def test_table_metadata_grid(session_rest_server: str, page: Page):
183
+ """Test that expanded table shows metadata grid with stats."""
184
+ page.goto(session_rest_server)
185
+
186
+ # Expand first table
187
+ first_table = page.locator(".table-item").first
188
+ first_table.locator("summary").click()
189
+
190
+ # Check metadata grid
191
+ metadata_grid = first_table.locator(".metadata-grid")
192
+ expect(metadata_grid).to_be_visible()
193
+
194
+ # Check for metadata labels
195
+ expect(metadata_grid).to_contain_text("Rows:")
196
+ expect(metadata_grid).to_contain_text("Files:")
197
+ expect(metadata_grid).to_contain_text("Cols:")
198
+
199
+
200
+ def test_query_button_present(session_rest_server: str, page: Page):
201
+ """Test that query button is present in expanded table."""
202
+ page.goto(session_rest_server)
203
+
204
+ # Expand first table
205
+ first_table = page.locator(".table-item").first
206
+ first_table.locator("summary").click()
207
+
208
+ # Check for query button
209
+ query_button = first_table.locator(".action-button")
210
+ expect(query_button).to_be_visible()
211
+ expect(query_button).to_contain_text("Query")
212
+
213
+
214
+ # =============================================================================
215
+ # DuckDB Shell Tests
216
+ # =============================================================================
217
+
218
+
219
+ def test_shell_container_visible(session_rest_server: str, page: Page):
220
+ """Test that the shell container is visible on page load."""
221
+ page.goto(session_rest_server)
222
+
223
+ shell_container = page.locator("#shell-container")
224
+ expect(shell_container).to_be_visible()
225
+
226
+
227
+ def test_quick_tips_section(session_rest_server: str, page: Page):
228
+ """Test that the quick tips section displays correctly."""
229
+ page.goto(session_rest_server)
230
+
231
+ quick_tips = page.locator(".quick-tips")
232
+ expect(quick_tips).to_be_visible()
233
+ expect(quick_tips).to_contain_text("DuckDB Shell")
234
+ expect(quick_tips).to_contain_text("iceberg")
235
+ expect(quick_tips).to_contain_text("httpfs")
236
+
237
+
238
+ def test_duckdb_shell_initializes(session_rest_server: str, page: Page):
239
+ """Test that the DuckDB shell initializes without errors."""
240
+ # Set up console message and error listeners
241
+ console_messages = []
242
+ errors = []
243
+
244
+ page.on("console", lambda msg: console_messages.append(msg))
245
+ page.on("pageerror", lambda exc: errors.append(str(exc)))
246
+
247
+ # Navigate to the page
248
+ page.goto(session_rest_server)
249
+
250
+ # Wait for the shell container to be visible
251
+ shell_container = page.locator("#shell-container")
252
+ expect(shell_container).to_be_visible(timeout=10000)
253
+
254
+ # Wait for the shell to initialize
255
+ page.wait_for_timeout(8000)
256
+
257
+ # Check for the specific postMessage error
258
+ post_message_errors = [
259
+ err for err in errors if "postMessage" in err and "could not be cloned" in err
260
+ ]
261
+
262
+ # Assert that no postMessage cloning errors occurred
263
+ assert len(post_message_errors) == 0, (
264
+ f"Found {len(post_message_errors)} postMessage cloning errors: {post_message_errors}"
265
+ )
266
+
267
+ # Check that shell initialization didn't show an error message in the UI
268
+ error_divs = page.locator("div:has-text('Error initializing shell')").all()
269
+ visible_errors = [div for div in error_divs if div.is_visible()]
270
+ assert len(visible_errors) == 0, "Error message is visible in UI"
271
+
272
+ # Verify the shell is ready by checking for terminal-like elements
273
+ terminal = page.locator(".xterm")
274
+ expect(terminal).to_be_visible(timeout=5000)
275
+
276
+
277
+ def test_shell_has_xterm(session_rest_server: str, page: Page):
278
+ """Test that XTerm.js terminal renders in the shell."""
279
+ page.goto(session_rest_server)
280
+
281
+ # Wait for XTerm to initialize
282
+ page.wait_for_timeout(8000)
283
+
284
+ # Check for XTerm elements
285
+ xterm = page.locator(".xterm")
286
+ expect(xterm).to_be_visible()
287
+
288
+ # Check for XTerm viewport
289
+ xterm_viewport = page.locator(".xterm-viewport")
290
+ expect(xterm_viewport).to_be_visible()
291
+
292
+
293
+ def test_extension_badges_visible(session_rest_server: str, page: Page):
294
+ """Test that extension badges are visible in quick tips."""
295
+ page.goto(session_rest_server)
296
+
297
+ # Check for extension badges
298
+ extension_badges = page.locator(".extension-badge")
299
+ expect(extension_badges).to_have_count(2)
300
+
301
+ # Check specific extension names
302
+ expect(page.locator(".extension-badge:has-text('iceberg')")).to_be_visible()
303
+ expect(page.locator(".extension-badge:has-text('httpfs')")).to_be_visible()
304
+
305
+
306
+ # =============================================================================
307
+ # Responsive Behavior Tests
308
+ # =============================================================================
309
+
310
+
311
+ def test_page_responsive_at_smaller_viewport(session_rest_server: str, page: Page):
312
+ """Test that the page is responsive at smaller viewport sizes."""
313
+ page.set_viewport_size({"width": 1024, "height": 768})
314
+ page.goto(session_rest_server)
315
+
316
+ # Check that main components are still visible
317
+ main_container = page.locator(".main-container")
318
+ expect(main_container).to_be_visible()
319
+
320
+ left_sidebar = page.locator(".left-sidebar")
321
+ expect(left_sidebar).to_be_visible()
322
+
323
+ right_sidebar = page.locator(".right-sidebar")
324
+ expect(right_sidebar).to_be_visible()
325
+
326
+
327
+ def test_scrollbar_styling_applied(session_rest_server: str, page: Page):
328
+ """Test that custom scrollbar styling is applied."""
329
+ page.goto(session_rest_server)
330
+
331
+ # Check that left sidebar is scrollable
332
+ left_sidebar = page.locator(".left-sidebar")
333
+ expect(left_sidebar).to_be_visible()
334
+
335
+ # Verify overflow-y is set to auto
336
+ overflow = left_sidebar.evaluate("el => window.getComputedStyle(el).overflowY")
337
+ assert overflow == "auto", f"Expected overflow-y: auto, got {overflow}"
338
+
339
+
340
+ # =============================================================================
341
+ # API Endpoint Tests (via browser fetch)
342
+ # =============================================================================
343
+
344
+
345
+ def test_config_endpoint_accessible(session_rest_server: str, page: Page):
346
+ """Test that the /v1/config endpoint is accessible."""
347
+ page.goto(session_rest_server)
348
+
349
+ # Use page.evaluate to fetch config endpoint
350
+ config_response = page.evaluate("""
351
+ async () => {
352
+ const response = await fetch('/v1/config');
353
+ return {
354
+ status: response.status,
355
+ data: await response.json()
356
+ };
357
+ }
358
+ """)
359
+
360
+ assert config_response["status"] == 200
361
+ assert "overrides" in config_response["data"]
362
+
363
+
364
+ def test_namespaces_endpoint_accessible(session_rest_server: str, page: Page):
365
+ """Test that the /v1/namespaces endpoint is accessible."""
366
+ page.goto(session_rest_server)
367
+
368
+ # Use page.evaluate to fetch namespaces endpoint
369
+ namespaces_response = page.evaluate("""
370
+ async () => {
371
+ const response = await fetch('/v1/namespaces');
372
+ return {
373
+ status: response.status,
374
+ data: await response.json()
375
+ };
376
+ }
377
+ """)
378
+
379
+ assert namespaces_response["status"] == 200
380
+ assert "namespaces" in namespaces_response["data"]
381
+ # Catalog should have at least one namespace (google-research-datasets)
382
+ assert len(namespaces_response["data"]["namespaces"]) > 0
383
+
384
+
385
+ def test_tables_endpoint_accessible(session_rest_server: str, page: Page):
386
+ """Test that the /v1/namespaces/{namespace}/tables endpoint is accessible."""
387
+ page.goto(session_rest_server)
388
+
389
+ # Use page.evaluate to fetch tables endpoint
390
+ tables_response = page.evaluate("""
391
+ async () => {
392
+ const response = await fetch('/v1/namespaces/google-research-datasets/tables');
393
+ return {
394
+ status: response.status,
395
+ data: await response.json()
396
+ };
397
+ }
398
+ """)
399
+
400
+ assert tables_response["status"] == 200
401
+ assert "identifiers" in tables_response["data"]
402
+ # Should have at least one table (mbpp)
403
+ assert len(tables_response["data"]["identifiers"]) > 0
404
+
405
+
406
+ # =============================================================================
407
+ # JavaScript Error Tests
408
+ # =============================================================================
409
+
410
+
411
+ def test_no_javascript_errors_on_load(session_rest_server: str, page: Page):
412
+ """Test that no JavaScript errors occur on page load."""
413
+ errors = []
414
+ page.on("pageerror", lambda exc: errors.append(str(exc)))
415
+
416
+ page.goto(session_rest_server)
417
+ page.wait_for_timeout(2000)
418
+
419
+ # Filter out known acceptable warnings
420
+ critical_errors = [
421
+ err
422
+ for err in errors
423
+ if "postMessage" not in err # Known DuckDB WASM issue
424
+ ]
425
+
426
+ assert len(critical_errors) == 0, f"Found JavaScript errors: {critical_errors}"
427
+
428
+
429
+ def test_no_console_errors(session_rest_server: str, page: Page):
430
+ """Test that no critical console errors are logged."""
431
+ console_errors = []
432
+
433
+ def handle_console(msg):
434
+ if msg.type == "error":
435
+ console_errors.append(msg.text)
436
+
437
+ page.on("console", handle_console)
438
+
439
+ page.goto(session_rest_server)
440
+ page.wait_for_timeout(3000)
441
+
442
+ # Some console errors might be acceptable (DuckDB initialization messages)
443
+ # Filter for critical errors that indicate real problems
444
+ critical_errors = [
445
+ err
446
+ for err in console_errors
447
+ if "Failed to load resource" in err
448
+ or "Uncaught" in err
449
+ or "SyntaxError" in err
450
+ or "ReferenceError" in err
451
+ ]
452
+
453
+ assert len(critical_errors) == 0, f"Found critical console errors: {critical_errors}"
454
+
455
+
456
+ # =============================================================================
457
+ # Visual Elements Tests
458
+ # =============================================================================
459
+
460
+
461
+ def test_logo_image_loads(session_rest_server: str, page: Page):
462
+ """Test that the logo image loads correctly."""
463
+ page.goto(session_rest_server)
464
+
465
+ logo = page.locator(".logo-icon img")
466
+ expect(logo).to_be_visible()
467
+
468
+ # Check that image has a source
469
+ src = logo.get_attribute("src")
470
+ assert src is not None
471
+ assert "faceberg" in src.lower()
472
+
473
+
474
+ def test_color_scheme_applied(session_rest_server: str, page: Page):
475
+ """Test that the color scheme is properly applied."""
476
+ page.goto(session_rest_server)
477
+
478
+ # Check header background color
479
+ header = page.locator(".app-header")
480
+ bg_color = header.evaluate("el => window.getComputedStyle(el).backgroundColor")
481
+
482
+ # Should be some shade of blue (primary-blue from CSS)
483
+ assert bg_color is not None, "Header should have background color"
484
+
485
+
486
+ def test_section_title_visible(session_rest_server: str, page: Page):
487
+ """Test that the section title is visible."""
488
+ page.goto(session_rest_server)
489
+
490
+ section_title = page.locator(".section-title")
491
+ expect(section_title).to_be_visible()
492
+ expect(section_title).to_contain_text("Tables")
493
+
494
+
495
+ # =============================================================================
496
+ # Performance and Loading Tests
497
+ # =============================================================================
498
+
499
+
500
+ def test_page_loads_quickly(session_rest_server: str, page: Page):
501
+ """Test that the page loads within a reasonable time."""
502
+ start_time = time.time()
503
+
504
+ page.goto(session_rest_server)
505
+
506
+ # Wait for main content to be visible
507
+ page.locator(".main-container").wait_for(state="visible")
508
+
509
+ load_time = time.time() - start_time
510
+
511
+ # Page should load in under 5 seconds
512
+ assert load_time < 5.0, f"Page took too long to load: {load_time:.2f}s"
513
+
514
+
515
+ def test_fonts_load(session_rest_server: str, page: Page):
516
+ """Test that custom fonts are loaded."""
517
+ page.goto(session_rest_server)
518
+
519
+ # Check that DM Sans font is applied to body
520
+ body_font = page.locator("body").evaluate("el => window.getComputedStyle(el).fontFamily")
521
+
522
+ assert "DM Sans" in body_font or "dm sans" in body_font.lower(), (
523
+ f"Expected DM Sans font, got: {body_font}"
524
+ )
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: faceberg
3
+ Version: 0.1.0
4
+ Summary: Bridge HuggingFace datasets with Apache Iceberg
5
+ Project-URL: Homepage, https://github.com/kszucs/faceberg
6
+ Project-URL: Documentation, https://github.com/kszucs/faceberg
7
+ Project-URL: Repository, https://github.com/kszucs/faceberg
8
+ Author-email: Krisztian Szucs <kszucs@users.noreply.github.com>
9
+ License: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: data-lake,datasets,huggingface,iceberg
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: click>=8.0.0
22
+ Requires-Dist: datasets>=2.0.0
23
+ Requires-Dist: fsspec>=2023.1.0
24
+ Requires-Dist: huggingface-hub>=0.20.0
25
+ Requires-Dist: jinja2>=3.1.6
26
+ Requires-Dist: litestar>=2.0.0
27
+ Requires-Dist: pyarrow>=21.0.0
28
+ Requires-Dist: pyiceberg>=0.6.0
29
+ Requires-Dist: pyyaml>=6.0
30
+ Requires-Dist: rich>=13.0.0
31
+ Requires-Dist: uuid-utils>=0.9.0
32
+ Requires-Dist: uvicorn[standard]>=0.27.0
33
+ Provides-Extra: dev
34
+ Requires-Dist: black>=23.0.0; extra == 'dev'
35
+ Requires-Dist: duckdb>=0.10.0; extra == 'dev'
36
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
37
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
38
+ Requires-Dist: pytest-playwright>=0.7.0; extra == 'dev'
39
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
40
+ Requires-Dist: requests>=2.31.0; extra == 'dev'
41
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
42
+ Description-Content-Type: text/markdown
43
+
44
+ ![Faceberg](faceberg.png)
45
+
46
+ # Faceberg
47
+
48
+ Bridge HuggingFace datasets with Apache Iceberg tables.
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ pip install faceberg
54
+ ```
55
+
56
+ ## Quick Start
57
+
58
+ ```bash
59
+ # Create a catalog and add a dataset
60
+ faceberg mycatalog init
61
+ faceberg mycatalog add stanfordnlp/imdb --config plain_text
62
+ faceberg mycatalog sync
63
+
64
+ # Query the data
65
+ faceberg mycatalog scan default.imdb --limit 5
66
+ ```
67
+
68
+ **Python API:**
69
+
70
+ ```python
71
+ from faceberg import catalog
72
+
73
+ cat = catalog("mycatalog")
74
+ table = cat.load_table("default.imdb")
75
+ df = table.scan().to_pandas()
76
+ print(df.head())
77
+ ```
78
+
79
+ **Documentation:**
80
+ - [Getting Started](docs/index.qmd) - Quickstart guide
81
+ - [Local Catalogs](docs/local.qmd) - Use local catalogs for testing
82
+ - [DuckDB Integration](docs/integrations/duckdb.qmd) - Query with SQL
83
+ - [Pandas Integration](docs/integrations/pandas.qmd) - Load into DataFrames
84
+
85
+ ## How It Works
86
+
87
+ Faceberg creates lightweight Iceberg metadata that points to original HuggingFace dataset files:
88
+
89
+ ```
90
+ HuggingFace Dataset Your Catalog
91
+ ┌─────────────────┐ ┌──────────────────┐
92
+ │ org/dataset │ │ mycatalog/ │
93
+ │ ├── train.pq ◄──┼─────────┼─ default/ │
94
+ │ └── test.pq ◄──┼─────────┼─ └── imdb/ │
95
+ └─────────────────┘ │ └── metadata/
96
+ └──────────────────┘
97
+ ```
98
+
99
+ No data is copied—only metadata is created. Query with DuckDB, PyIceberg, Spark, or any Iceberg-compatible tool.
100
+
101
+ ## Usage
102
+
103
+ ### CLI Commands
104
+
105
+ ```bash
106
+ # Initialize catalog
107
+ faceberg mycatalog init
108
+
109
+ # Add datasets
110
+ faceberg mycatalog add openai/gsm8k --config main
111
+
112
+ # Sync datasets (creates Iceberg metadata)
113
+ faceberg mycatalog sync
114
+
115
+ # List tables
116
+ faceberg mycatalog list
117
+
118
+ # Show table info
119
+ faceberg mycatalog info default.gsm8k
120
+
121
+ # Scan data
122
+ faceberg mycatalog scan default.gsm8k --limit 10
123
+
124
+ # Start REST server
125
+ faceberg mycatalog serve --port 8181
126
+ ```
127
+
128
+ ### Remote Catalogs on HuggingFace Hub
129
+
130
+ ```bash
131
+ # Initialize remote catalog
132
+ export HF_TOKEN=your_token
133
+ faceberg org/catalog-repo init
134
+
135
+ # Add and sync datasets
136
+ faceberg org/catalog-repo add deepmind/code_contests --config default
137
+ faceberg org/catalog-repo sync
138
+
139
+ # Serve remote catalog
140
+ faceberg org/catalog-repo serve
141
+ ```
142
+
143
+ ### Query with DuckDB
144
+
145
+ ```python
146
+ import duckdb
147
+
148
+ conn = duckdb.connect()
149
+ conn.execute("INSTALL httpfs; LOAD httpfs")
150
+ conn.execute("INSTALL iceberg; LOAD iceberg")
151
+
152
+ # Query local catalog
153
+ result = conn.execute("""
154
+ SELECT * FROM iceberg_scan('mycatalog/default/imdb/metadata/v1.metadata.json')
155
+ LIMIT 10
156
+ """).fetchall()
157
+
158
+ # Query remote catalog
159
+ result = conn.execute("""
160
+ SELECT * FROM iceberg_scan('hf://datasets/org/catalog/default/table/metadata/v1.metadata.json')
161
+ LIMIT 10
162
+ """).fetchall()
163
+ ```
164
+
165
+ ## Development
166
+
167
+ ```bash
168
+ git clone https://github.com/kszucs/faceberg
169
+ cd faceberg
170
+ pip install -e .
171
+ ```
172
+
173
+ ## License
174
+
175
+ Apache 2.0