faceberg 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,341 @@
1
+ """Tests for reading catalogs using DuckDB.
2
+
3
+ These tests verify that DuckDB can properly read Iceberg tables
4
+ created by the Faceberg catalog from HuggingFace datasets.
5
+
6
+ DuckDB supports both file:// and hf:// URIs:
7
+ - file:// URIs work with local catalogs
8
+ - hf:// URIs work through the httpfs extension for remote catalogs
9
+
10
+ The tests automatically load the httpfs and iceberg extensions to enable
11
+ reading Iceberg tables with both local and remote storage.
12
+
13
+ Note: DuckDB's httpfs extension requires hf:// URLs in the format
14
+ hf://datasets/{org}/{dataset}/{file}. Datasets must have an organization/user
15
+ prefix (e.g., google-research-datasets/mbpp or glue/mrpc work, but rotten_tomatoes fails).
16
+ """
17
+
18
+ import duckdb
19
+ import pytest
20
+
21
+
22
+ @pytest.fixture
23
+ def duckdb_conn():
24
+ """Create a DuckDB connection for testing with required extensions."""
25
+ conn = duckdb.connect()
26
+
27
+ # Load httpfs extension for hf:// protocol support
28
+ try:
29
+ conn.execute("INSTALL httpfs")
30
+ conn.execute("LOAD httpfs")
31
+ except Exception as e:
32
+ pytest.skip(f"Could not load httpfs extension: {e}")
33
+
34
+ # Load iceberg extension for iceberg_scan support
35
+ try:
36
+ conn.execute("INSTALL iceberg")
37
+ conn.execute("LOAD iceberg")
38
+ except Exception as e:
39
+ pytest.skip(f"Could not load iceberg extension: {e}")
40
+
41
+ yield conn
42
+ conn.close()
43
+
44
+
45
+ @pytest.fixture
46
+ def mbpp_metadata_path(session_mbpp):
47
+ """Return path to MBPP table metadata for DuckDB.
48
+
49
+ DuckDB supports both file:// and hf:// URIs through httpfs extension.
50
+ """
51
+ # Construct path to v1.metadata.json directly from catalog URI
52
+ return f"{session_mbpp.uri}/google-research-datasets/mbpp/metadata/v1.metadata.json"
53
+
54
+
55
+ # =============================================================================
56
+ # A. Basic Scanning Tests
57
+ # =============================================================================
58
+
59
+
60
+ def test_duckdb_iceberg_scan_basic(duckdb_conn, mbpp_metadata_path):
61
+ """Test basic DuckDB iceberg_scan functionality."""
62
+ # Use iceberg_scan to read the table
63
+ result = duckdb_conn.execute(
64
+ f"""
65
+ SELECT COUNT(*) as cnt
66
+ FROM iceberg_scan('{mbpp_metadata_path}')
67
+ """
68
+ ).fetchone()
69
+
70
+ # Verify we got a count
71
+ assert result is not None
72
+ assert result[0] > 0
73
+
74
+
75
+ def test_duckdb_query_data(duckdb_conn, mbpp_metadata_path):
76
+ """Test querying data with WHERE clause."""
77
+
78
+ # Query with WHERE clause on split column
79
+ result = duckdb_conn.execute(
80
+ f"""
81
+ SELECT COUNT(*) as cnt, split
82
+ FROM iceberg_scan('{mbpp_metadata_path}')
83
+ WHERE split = 'train'
84
+ GROUP BY split
85
+ """
86
+ ).fetchall()
87
+
88
+ # Verify we got results
89
+ assert len(result) > 0
90
+ assert result[0][1] == "train" # Split column value
91
+ assert result[0][0] > 0 # Count
92
+
93
+
94
+ def test_duckdb_aggregation(duckdb_conn, mbpp_metadata_path):
95
+ """Test aggregation queries (GROUP BY)."""
96
+
97
+ # Run GROUP BY query on split column
98
+ result = duckdb_conn.execute(
99
+ f"""
100
+ SELECT split, COUNT(*) as cnt
101
+ FROM iceberg_scan('{mbpp_metadata_path}')
102
+ GROUP BY split
103
+ ORDER BY split
104
+ """
105
+ ).fetchall()
106
+
107
+ # Verify we got multiple splits
108
+ assert len(result) > 0
109
+
110
+ # Verify each split has a count
111
+ for row in result:
112
+ split_name, count = row
113
+ assert split_name in ["train", "test", "validation", "prompt"]
114
+ assert count > 0
115
+
116
+
117
+ # =============================================================================
118
+ # B. Schema and Metadata Tests
119
+ # =============================================================================
120
+
121
+
122
+ def test_duckdb_read_schema(duckdb_conn, mbpp_metadata_path):
123
+ """Test reading table schema via DuckDB."""
124
+
125
+ # Use DESCRIBE to get schema information
126
+ result = duckdb_conn.execute(
127
+ f"""
128
+ DESCRIBE SELECT * FROM iceberg_scan('{mbpp_metadata_path}') LIMIT 0
129
+ """
130
+ ).fetchall()
131
+
132
+ # Verify we got column information
133
+ assert len(result) > 0
134
+
135
+ # Extract column names
136
+ column_names = [row[0] for row in result]
137
+
138
+ # Verify expected columns
139
+ assert "split" in column_names
140
+ assert "prompt" in column_names
141
+ assert "code" in column_names
142
+
143
+
144
+ def test_duckdb_table_info(duckdb_conn, mbpp_metadata_path):
145
+ """Test reading basic table information.
146
+
147
+ Uses google-research-datasets/mbpp which has an org prefix compatible with DuckDB's
148
+ httpfs hf:// URL format requirements.
149
+ """
150
+
151
+ # Query to verify table can be opened and scanned
152
+ result = duckdb_conn.execute(
153
+ f"""
154
+ SELECT COUNT(*)
155
+ FROM iceberg_scan('{mbpp_metadata_path}')
156
+ """
157
+ ).fetchone()
158
+
159
+ # Verify we can read the table
160
+ assert result is not None
161
+ assert result[0] > 0 # IMDB dataset has data
162
+
163
+
164
+ # =============================================================================
165
+ # C. Partition Pruning Tests
166
+ # =============================================================================
167
+
168
+
169
+ def test_duckdb_partition_filter(duckdb_conn, mbpp_metadata_path):
170
+ """Test partition pruning with WHERE clause."""
171
+
172
+ # Query with and without filter to compare
173
+ total_count = duckdb_conn.execute(
174
+ f"""
175
+ SELECT COUNT(*)
176
+ FROM iceberg_scan('{mbpp_metadata_path}')
177
+ """
178
+ ).fetchone()[0]
179
+
180
+ train_count = duckdb_conn.execute(
181
+ f"""
182
+ SELECT COUNT(*)
183
+ FROM iceberg_scan('{mbpp_metadata_path}')
184
+ WHERE split = 'train'
185
+ """
186
+ ).fetchone()[0]
187
+
188
+ # Verify partition pruning occurred (train < total)
189
+ assert train_count > 0
190
+ assert train_count < total_count
191
+
192
+
193
+ def test_duckdb_partition_comparison(session_mbpp, duckdb_conn, mbpp_metadata_path):
194
+ """Test that DuckDB partition filtering matches PyIceberg.
195
+
196
+ Both DuckDB (via httpfs extension) and PyIceberg (via HfFileIO) can read
197
+ hf:// URIs, allowing direct comparison of query results.
198
+ """
199
+ # Get count from DuckDB
200
+ duckdb_count = duckdb_conn.execute(
201
+ f"""
202
+ SELECT COUNT(*)
203
+ FROM iceberg_scan('{mbpp_metadata_path}')
204
+ WHERE split = 'train'
205
+ """
206
+ ).fetchone()[0]
207
+
208
+ # Get count from PyIceberg
209
+ table = session_mbpp.load_table("google-research-datasets.mbpp")
210
+ scan = table.scan().filter("split = 'train'")
211
+ arrow_table = scan.to_arrow()
212
+ pyiceberg_count = arrow_table.num_rows
213
+
214
+ # Verify counts match
215
+ assert duckdb_count == pyiceberg_count
216
+
217
+
218
+ # =============================================================================
219
+ # D. REST Catalog Tests
220
+ # =============================================================================
221
+
222
+
223
+ @pytest.fixture(scope="session")
224
+ def duckdb_rest_conn(session_rest_server):
225
+ """Create a DuckDB connection configured to use REST catalog.
226
+
227
+ Note: DuckDB REST catalog support is still evolving. As of DuckDB 1.4.3,
228
+ the REST catalog configuration may not be fully supported. These tests
229
+ are marked as expected to fail until DuckDB adds stable REST catalog support.
230
+ """
231
+ conn = duckdb.connect()
232
+
233
+ # Load required extensions
234
+ try:
235
+ conn.execute("INSTALL httpfs")
236
+ conn.execute("LOAD httpfs")
237
+ conn.execute("INSTALL iceberg")
238
+ conn.execute("LOAD iceberg")
239
+ except Exception as e:
240
+ pytest.skip(f"Could not load required extensions: {e}")
241
+
242
+ # Attach REST catalog
243
+ # Note: DuckDB REST catalog support requires specifying ENDPOINT in ATTACH
244
+ # AUTHORIZATION_TYPE 'none' disables authentication for local test server
245
+ conn.execute(f"""
246
+ ATTACH 'warehouse' AS iceberg_catalog (
247
+ TYPE ICEBERG,
248
+ ENDPOINT '{session_rest_server}',
249
+ AUTHORIZATION_TYPE 'none'
250
+ )
251
+ """)
252
+
253
+ yield conn
254
+ conn.close()
255
+
256
+
257
+ def test_duckdb_rest_list_tables(duckdb_rest_conn):
258
+ """Test listing tables via REST catalog in DuckDB."""
259
+ # List tables in the google-research-datasets namespace using SHOW TABLES
260
+ result = duckdb_rest_conn.execute("""
261
+ SHOW TABLES FROM "iceberg_catalog"."google-research-datasets"
262
+ """).fetchall()
263
+
264
+ # Verify we can list tables and mbpp is present
265
+ assert len(result) > 0
266
+ table_names = [row[0] for row in result]
267
+ assert "mbpp" in table_names
268
+
269
+
270
+ def test_duckdb_rest_query_data(duckdb_rest_conn):
271
+ """Test querying data via REST catalog in DuckDB."""
272
+ # Query with WHERE clause
273
+ result = duckdb_rest_conn.execute("""
274
+ SELECT COUNT(*) as cnt, split
275
+ FROM iceberg_catalog."google-research-datasets".mbpp
276
+ WHERE split = 'train'
277
+ GROUP BY split
278
+ """).fetchall()
279
+
280
+ # Verify we got results
281
+ assert len(result) > 0
282
+ assert result[0][1] == "train"
283
+ assert result[0][0] > 0
284
+
285
+
286
+ def test_duckdb_rest_aggregation(duckdb_rest_conn):
287
+ """Test aggregation queries via REST catalog."""
288
+ # Run GROUP BY query
289
+ result = duckdb_rest_conn.execute("""
290
+ SELECT split, COUNT(*) as cnt
291
+ FROM iceberg_catalog."google-research-datasets".mbpp
292
+ GROUP BY split
293
+ ORDER BY split
294
+ """).fetchall()
295
+
296
+ # Verify we got multiple splits
297
+ assert len(result) > 0
298
+
299
+ # Verify each split has a count
300
+ for row in result:
301
+ split_name, count = row
302
+ assert split_name in ["train", "test", "validation", "prompt"]
303
+ assert count > 0
304
+
305
+
306
+ def test_duckdb_rest_schema(duckdb_rest_conn):
307
+ """Test reading schema via REST catalog in DuckDB."""
308
+ # Use DESCRIBE to get schema
309
+ result = duckdb_rest_conn.execute("""
310
+ DESCRIBE SELECT * FROM iceberg_catalog."google-research-datasets".mbpp LIMIT 0
311
+ """).fetchall()
312
+
313
+ # Verify we got column information
314
+ assert len(result) > 0
315
+
316
+ # Extract column names
317
+ column_names = [row[0] for row in result]
318
+
319
+ # Verify expected columns
320
+ assert "split" in column_names
321
+ assert "prompt" in column_names
322
+ assert "code" in column_names
323
+
324
+
325
+ def test_duckdb_rest_partition_filter(duckdb_rest_conn):
326
+ """Test partition filtering via REST catalog."""
327
+ # Query with and without filter
328
+ total_count = duckdb_rest_conn.execute("""
329
+ SELECT COUNT(*)
330
+ FROM iceberg_catalog."google-research-datasets".mbpp
331
+ """).fetchone()[0]
332
+
333
+ train_count = duckdb_rest_conn.execute("""
334
+ SELECT COUNT(*)
335
+ FROM iceberg_catalog."google-research-datasets".mbpp
336
+ WHERE split = 'train'
337
+ """).fetchone()[0]
338
+
339
+ # Verify partition pruning
340
+ assert train_count > 0
341
+ assert train_count < total_count
@@ -0,0 +1,290 @@
1
+ """Tests for pandas.read_iceberg() integration with Faceberg catalogs.
2
+
3
+ These tests verify that pandas can read Iceberg tables created by Faceberg
4
+ using both catalog_properties and environment variable configuration.
5
+ """
6
+
7
+ import os
8
+
9
+ import pandas as pd
10
+ import pytest
11
+
12
+
13
+ @pytest.fixture
14
+ def catalog_properties(session_mbpp):
15
+ """Return catalog properties for pandas.read_iceberg()."""
16
+ # Use appropriate catalog implementation based on URI scheme
17
+ if session_mbpp.uri.startswith("hf://"):
18
+ catalog_impl = "faceberg.catalog.RemoteCatalog"
19
+ else:
20
+ catalog_impl = "faceberg.catalog.LocalCatalog"
21
+
22
+ return {"py-catalog-impl": catalog_impl, "uri": session_mbpp.uri}
23
+
24
+
25
+ # =============================================================================
26
+ # A. Basic pandas.read_iceberg() Tests
27
+ # =============================================================================
28
+
29
+
30
+ def test_read_iceberg_with_catalog_properties(catalog_properties):
31
+ """Test reading table using pandas with catalog_properties."""
32
+ df = pd.read_iceberg(
33
+ table_identifier="google-research-datasets.mbpp",
34
+ catalog_name="test_catalog", # Name doesn't matter when passing properties
35
+ catalog_properties=catalog_properties,
36
+ limit=10,
37
+ )
38
+
39
+ # Verify DataFrame was created
40
+ assert isinstance(df, pd.DataFrame)
41
+ assert len(df) == 10
42
+ assert len(df.columns) > 0
43
+
44
+ # Verify expected columns
45
+ assert "split" in df.columns
46
+ assert "prompt" in df.columns
47
+ assert "code" in df.columns
48
+
49
+
50
+ def test_read_iceberg_with_env_vars(catalog_properties):
51
+ """Test reading table using pandas with environment variables.
52
+
53
+ Note: catalog_properties is the recommended approach for programmatic usage.
54
+ This test demonstrates that env vars can provide the URI, combined with
55
+ catalog_properties for py-catalog-impl to bypass PyIceberg's URI inference.
56
+ """
57
+ catalog_uri = catalog_properties["uri"]
58
+
59
+ # Set environment variables
60
+ os.environ["PYICEBERG_CATALOG__TEST_CATALOG__PY_CATALOG_IMPL"] = "faceberg.catalog.LocalCatalog"
61
+ os.environ["PYICEBERG_CATALOG__TEST_CATALOG__URI"] = catalog_uri
62
+
63
+ try:
64
+ # Pass py-catalog-impl and uri in catalog_properties
65
+ # Env vars can also be used, but catalog_properties takes precedence
66
+ df = pd.read_iceberg(
67
+ table_identifier="google-research-datasets.mbpp",
68
+ catalog_name="test_catalog",
69
+ catalog_properties=catalog_properties,
70
+ limit=10,
71
+ )
72
+
73
+ # Verify DataFrame was created
74
+ assert isinstance(df, pd.DataFrame)
75
+ assert len(df) == 10
76
+ assert "split" in df.columns
77
+ finally:
78
+ # Clean up environment variables
79
+ os.environ.pop("PYICEBERG_CATALOG__TEST_CATALOG__PY_CATALOG_IMPL", None)
80
+ os.environ.pop("PYICEBERG_CATALOG__TEST_CATALOG__URI", None)
81
+
82
+
83
+ def test_read_iceberg_all_rows(catalog_properties):
84
+ """Test reading all rows without limit."""
85
+ df = pd.read_iceberg(
86
+ table_identifier="google-research-datasets.mbpp",
87
+ catalog_name="test",
88
+ catalog_properties=catalog_properties,
89
+ )
90
+
91
+ # Verify we got data
92
+ assert len(df) > 0
93
+ assert "split" in df.columns
94
+
95
+ # Verify multiple splits exist
96
+ split_values = df["split"].unique()
97
+ assert len(split_values) > 1
98
+
99
+
100
+ # =============================================================================
101
+ # B. Column Selection Tests
102
+ # =============================================================================
103
+
104
+
105
+ def test_read_iceberg_column_selection(catalog_properties):
106
+ """Test reading specific columns - both multiple and single column."""
107
+
108
+ # Test multiple columns
109
+ df = pd.read_iceberg(
110
+ table_identifier="google-research-datasets.mbpp",
111
+ catalog_name="test",
112
+ catalog_properties=catalog_properties,
113
+ columns=["prompt", "code"],
114
+ limit=5,
115
+ )
116
+ assert list(df.columns) == ["prompt", "code"]
117
+ assert "split" not in df.columns
118
+ assert len(df) == 5
119
+
120
+ # Test single column
121
+ df = pd.read_iceberg(
122
+ table_identifier="google-research-datasets.mbpp",
123
+ catalog_name="test",
124
+ catalog_properties=catalog_properties,
125
+ columns=["split"],
126
+ limit=10,
127
+ )
128
+ assert list(df.columns) == ["split"]
129
+ assert len(df) == 10
130
+
131
+
132
+ # =============================================================================
133
+ # C. Row Filtering Tests
134
+ # =============================================================================
135
+
136
+
137
+ def test_read_iceberg_row_filtering(catalog_properties):
138
+ """Test various row filtering scenarios."""
139
+
140
+ # Test filtering by partition column
141
+ df = pd.read_iceberg(
142
+ table_identifier="google-research-datasets.mbpp",
143
+ catalog_name="test",
144
+ catalog_properties=catalog_properties,
145
+ row_filter="split = 'train'",
146
+ limit=20,
147
+ )
148
+ assert len(df) == 20
149
+ assert all(df["split"] == "train")
150
+
151
+ # Test filtering with IN clause
152
+ df = pd.read_iceberg(
153
+ table_identifier="google-research-datasets.mbpp",
154
+ catalog_name="test",
155
+ catalog_properties=catalog_properties,
156
+ row_filter="split IN ('train', 'test')",
157
+ limit=30,
158
+ )
159
+ unique_splits = df["split"].unique()
160
+ assert set(unique_splits).issubset({"train", "test"})
161
+ assert "validation" not in unique_splits
162
+ assert len(df) == 30
163
+
164
+ # Test filtering by non-partition column (task_id exists in mbpp and is an integer)
165
+ df = pd.read_iceberg(
166
+ table_identifier="google-research-datasets.mbpp",
167
+ catalog_name="test",
168
+ catalog_properties=catalog_properties,
169
+ row_filter="task_id = 602",
170
+ limit=10,
171
+ )
172
+ assert len(df) <= 10 # May be less if task_id=602 doesn't have 10 rows
173
+ if len(df) > 0:
174
+ assert all(df["task_id"] == 602)
175
+
176
+
177
+ # =============================================================================
178
+ # D. Combined Filter and Column Selection Tests
179
+ # =============================================================================
180
+
181
+
182
+ def test_read_iceberg_filter_and_column_selection(catalog_properties):
183
+ """Test combining row filters and column selection."""
184
+
185
+ # Test basic filter with column selection
186
+ df = pd.read_iceberg(
187
+ table_identifier="google-research-datasets.mbpp",
188
+ catalog_name="test",
189
+ catalog_properties=catalog_properties,
190
+ columns=["prompt", "code"],
191
+ row_filter="split = 'train'",
192
+ limit=5,
193
+ )
194
+ assert list(df.columns) == ["prompt", "code"]
195
+ # Note: Some versions may optimize away rows if columns don't include filter columns
196
+ assert len(df) <= 5 # May be less if optimizer is aggressive
197
+
198
+ # Test complex filtering with column selection
199
+ df = pd.read_iceberg(
200
+ table_identifier="google-research-datasets.mbpp",
201
+ catalog_name="test",
202
+ catalog_properties=catalog_properties,
203
+ columns=["prompt"],
204
+ row_filter="split = 'train' AND task_id = '602'",
205
+ limit=3,
206
+ )
207
+ assert list(df.columns) == ["prompt"]
208
+ # Note: Filter may be optimized differently when split/task_id not in projection
209
+ assert len(df) <= 3
210
+
211
+
212
+ # =============================================================================
213
+ # E. Edge Cases and Error Handling
214
+ # =============================================================================
215
+
216
+
217
+ def test_read_iceberg_empty_result(catalog_properties):
218
+ """Test reading with filter that returns no rows."""
219
+
220
+ df = pd.read_iceberg(
221
+ table_identifier="google-research-datasets.mbpp",
222
+ catalog_name="test",
223
+ catalog_properties=catalog_properties,
224
+ row_filter="split = 'nonexistent'",
225
+ )
226
+
227
+ # Verify empty DataFrame with correct schema
228
+ assert len(df) == 0
229
+ assert "split" in df.columns
230
+ assert "prompt" in df.columns
231
+ assert "code" in df.columns
232
+
233
+
234
+ def test_read_iceberg_invalid_table(catalog_properties):
235
+ """Test reading non-existent table."""
236
+
237
+ with pytest.raises(Exception): # Will raise NoSuchTableError
238
+ pd.read_iceberg(
239
+ table_identifier="default.nonexistent_table",
240
+ catalog_name="test",
241
+ catalog_properties=catalog_properties,
242
+ )
243
+
244
+
245
+ def test_read_iceberg_case_sensitive_false(catalog_properties):
246
+ """Test case-insensitive column matching."""
247
+
248
+ df = pd.read_iceberg(
249
+ table_identifier="google-research-datasets.mbpp",
250
+ catalog_name="test",
251
+ catalog_properties=catalog_properties,
252
+ columns=["PROMPT", "CODE"], # Uppercase column names
253
+ case_sensitive=False,
254
+ limit=5,
255
+ )
256
+
257
+ # Should still work with case-insensitive matching
258
+ assert len(df) == 5
259
+ assert len(df.columns) == 2
260
+
261
+
262
+ # =============================================================================
263
+ # G. Data Type Verification Tests
264
+ # =============================================================================
265
+
266
+
267
+ def test_read_iceberg_data_integrity(catalog_properties):
268
+ """Test that data types and content are valid."""
269
+
270
+ df = pd.read_iceberg(
271
+ table_identifier="google-research-datasets.mbpp",
272
+ catalog_name="test",
273
+ catalog_properties=catalog_properties,
274
+ limit=10,
275
+ )
276
+
277
+ # Verify data types (pandas 2.x uses StringDtype for strings)
278
+ assert df["prompt"].dtype.name in ["object", "string", "str"] # String type
279
+ assert df["code"].dtype.name in ["object", "string", "str"] # String type
280
+ assert df["task_id"].dtype.name in ["int32", "int64"] # Integer type
281
+ assert df["split"].dtype.name in ["object", "string", "str"] # String type
282
+
283
+ # Verify prompt column contains actual text
284
+ assert all(df["prompt"].str.len() > 0)
285
+
286
+ # Verify code column contains actual code
287
+ assert all(df["code"].str.len() > 0)
288
+
289
+ # Verify split has valid values
290
+ assert all(df["split"].isin(["train", "test", "validation", "prompt"]))
@@ -0,0 +1,62 @@
1
+ """Tests for CLI commands."""
2
+
3
+ from click.testing import CliRunner
4
+
5
+ from faceberg import config as cfg
6
+ from faceberg.cli import main
7
+
8
+
9
+ def test_list_command_with_tree_view(tmp_path):
10
+ """Test list command uses CatalogTreeView for rich display."""
11
+ # Create a local catalog
12
+ catalog_dir = tmp_path / "test_catalog"
13
+ catalog_dir.mkdir()
14
+
15
+ # Create config with some tables
16
+ config = cfg.Config()
17
+ config["default"] = cfg.Namespace()
18
+ config["default"]["imdb"] = cfg.Dataset(repo="stanfordnlp/imdb", config="plain_text")
19
+ config["default"]["squad"] = cfg.Dataset(repo="squad", config="plain_text")
20
+ config["analytics"] = cfg.Namespace()
21
+ config["analytics"]["aggregated"] = cfg.Table(uri="")
22
+
23
+ # Save config
24
+ config.to_yaml(catalog_dir / "faceberg.yml")
25
+
26
+ # Run list command
27
+ runner = CliRunner()
28
+ result = runner.invoke(main, [str(catalog_dir), "list"])
29
+
30
+ # Verify command succeeded
31
+ assert result.exit_code == 0
32
+
33
+ # Verify output contains catalog name in rich format
34
+ output = result.output
35
+
36
+ # Verify namespaces are shown
37
+ assert "default" in output
38
+ assert "analytics" in output
39
+
40
+ # Verify dataset nodes are shown with their icons
41
+ assert "imdb" in output
42
+ assert "squad" in output
43
+ assert "aggregated" in output
44
+
45
+ # Verify dataset metadata is shown (repo info)
46
+ assert "stanfordnlp/imdb" in output
47
+
48
+
49
+ def test_list_command_empty_catalog(tmp_path):
50
+ """Test list command with empty catalog."""
51
+ catalog_dir = tmp_path / "empty_catalog"
52
+ catalog_dir.mkdir()
53
+
54
+ # Create empty config
55
+ config = cfg.Config()
56
+ config.to_yaml(catalog_dir / "faceberg.yml")
57
+
58
+ runner = CliRunner()
59
+ result = runner.invoke(main, [str(catalog_dir), "list"])
60
+
61
+ # Command should succeed even with empty catalog
62
+ assert result.exit_code == 0