contoso 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ build/
7
+ develop-eggs/
8
+ dist/
9
+ downloads/
10
+ eggs/
11
+ .eggs/
12
+ lib/
13
+ lib64/
14
+ parts/
15
+ sdist/
16
+ var/
17
+ wheels/
18
+ *.egg-info/
19
+ .installed.cfg
20
+ *.egg
21
+ .pytest_cache/
22
+ .coverage
23
+ htmlcov/
24
+ .tox/
25
+ .nox/
26
+ .hypothesis/
27
+ *.log
28
+ .venv/
29
+ venv/
30
+ ENV/
contoso-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Alejandro Hagan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
contoso-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.4
2
+ Name: contoso
3
+ Version: 0.1.0
4
+ Summary: Synthetic business datasets for the fictional Contoso company
5
+ Project-URL: Homepage, https://codeberg.org/usrbinr/contoso.py
6
+ Project-URL: Repository, https://codeberg.org/usrbinr/contoso.py
7
+ Project-URL: Issues, https://codeberg.org/usrbinr/contoso.py/issues
8
+ Author-email: Alejandro Hagan <alejandro.hagan@outlook.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: business,contoso,dataset,duckdb,sales
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: Scientific/Engineering
24
+ Requires-Python: >=3.9
25
+ Requires-Dist: duckdb>=1.0.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
28
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # contoso
32
+
33
+ Synthetic business datasets for the fictional Contoso company. This Python package provides access to 8 interconnected business datasets stored as Parquet files on Cloudflare R2, loaded efficiently via DuckDB.
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install contoso
39
+ ```
40
+
41
+ Or install from source:
42
+
43
+ ```bash
44
+ pip install git+https://codeberg.org/usrbinr/contoso.py
45
+ ```
46
+
47
+ ## Usage
48
+
49
+ ```python
50
+ import contoso
51
+
52
+ # Create a DuckDB connection with Contoso datasets
53
+ db = contoso.create_duckdb(size="small")
54
+
55
+ # Access tables (lazy evaluation via DuckDB relations)
56
+ db["sales"].limit(10).fetchdf()
57
+ db["customer"].filter("Country = 'United States'").fetchdf()
58
+
59
+ # Available tables
60
+ # - sales: Sales transaction data
61
+ # - product: Product details (name, manufacturer, category)
62
+ # - customer: Customer demographics and geography
63
+ # - store: Store locations and attributes
64
+ # - fx: Foreign exchange rates
65
+ # - calendar: Date dimension table
66
+ # - orders: Order header information
67
+ # - orderrows: Order line items
68
+
69
+ # Launch interactive web UI
70
+ contoso.launch_ui(db["con"])
71
+
72
+ # Close connection when done
73
+ db["con"].close()
74
+ ```
75
+
76
+ ## Dataset Sizes
77
+
78
+ | Size | Sales Rows |
79
+ |--------|--------------|
80
+ | small | ~8,000 |
81
+ | medium | ~2.3 million |
82
+ | large | ~47 million |
83
+ | mega | ~237 million |
84
+
85
+ ## Requirements
86
+
87
+ - Python >= 3.9
88
+ - DuckDB >= 1.0.0
89
+
90
+ ## License
91
+
92
+ MIT
@@ -0,0 +1,62 @@
1
+ # contoso
2
+
3
+ Synthetic business datasets for the fictional Contoso company. This Python package provides access to 8 interconnected business datasets stored as Parquet files on Cloudflare R2, loaded efficiently via DuckDB.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install contoso
9
+ ```
10
+
11
+ Or install from source:
12
+
13
+ ```bash
14
+ pip install git+https://codeberg.org/usrbinr/contoso.py
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```python
20
+ import contoso
21
+
22
+ # Create a DuckDB connection with Contoso datasets
23
+ db = contoso.create_duckdb(size="small")
24
+
25
+ # Access tables (lazy evaluation via DuckDB relations)
26
+ db["sales"].limit(10).fetchdf()
27
+ db["customer"].filter("Country = 'United States'").fetchdf()
28
+
29
+ # Available tables
30
+ # - sales: Sales transaction data
31
+ # - product: Product details (name, manufacturer, category)
32
+ # - customer: Customer demographics and geography
33
+ # - store: Store locations and attributes
34
+ # - fx: Foreign exchange rates
35
+ # - calendar: Date dimension table
36
+ # - orders: Order header information
37
+ # - orderrows: Order line items
38
+
39
+ # Launch interactive web UI
40
+ contoso.launch_ui(db["con"])
41
+
42
+ # Close connection when done
43
+ db["con"].close()
44
+ ```
45
+
46
+ ## Dataset Sizes
47
+
48
+ | Size | Sales Rows |
49
+ |--------|--------------|
50
+ | small | ~8,000 |
51
+ | medium | ~2.3 million |
52
+ | large | ~47 million |
53
+ | mega | ~237 million |
54
+
55
+ ## Requirements
56
+
57
+ - Python >= 3.9
58
+ - DuckDB >= 1.0.0
59
+
60
+ ## License
61
+
62
+ MIT
@@ -0,0 +1,51 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "contoso"
7
+ version = "0.1.0"
8
+ description = "Synthetic business datasets for the fictional Contoso company"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ license-files = ["LICENSE"]
12
+ authors = [
13
+ { name = "Alejandro Hagan", email = "alejandro.hagan@outlook.com" }
14
+ ]
15
+ requires-python = ">=3.9"
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "Intended Audience :: Science/Research",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ "Topic :: Database",
28
+ "Topic :: Scientific/Engineering",
29
+ ]
30
+ keywords = ["contoso", "duckdb", "dataset", "sales", "business"]
31
+ dependencies = [
32
+ "duckdb>=1.0.0",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ dev = [
37
+ "pytest>=7.0.0",
38
+ "pytest-cov>=4.0.0",
39
+ ]
40
+
41
+ [project.urls]
42
+ Homepage = "https://codeberg.org/usrbinr/contoso.py"
43
+ Repository = "https://codeberg.org/usrbinr/contoso.py"
44
+ Issues = "https://codeberg.org/usrbinr/contoso.py/issues"
45
+
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/contoso"]
48
+
49
+ [tool.pytest.ini_options]
50
+ testpaths = ["tests"]
51
+ python_files = ["test_*.py"]
@@ -0,0 +1,11 @@
1
+ """
2
+ Contoso: Synthetic business datasets for the fictional Contoso company.
3
+
4
+ This package provides access to 8 interconnected business datasets stored
5
+ as Parquet files on Cloudflare R2, loaded efficiently via DuckDB.
6
+ """
7
+
8
+ from contoso.database import create_duckdb, launch_ui
9
+
10
+ __version__ = "0.1.0"
11
+ __all__ = ["create_duckdb", "launch_ui"]
@@ -0,0 +1,117 @@
1
+ """
2
+ Database functions for creating and interacting with Contoso DuckDB databases.
3
+ """
4
+
5
+ from typing import Literal
6
+
7
+ import duckdb
8
+
9
+ # Cloudflare R2 public bucket URL
10
+ R2_BASE_URL = "https://pub-6aa63519a4b945948cb8c88949b320ca.r2.dev"
11
+
12
+ # Size to folder mapping
13
+ SIZE_TO_FOLDER = {
14
+ "small": "contoso_100k",
15
+ "medium": "contoso_1m",
16
+ "large": "contoso_10m",
17
+ "mega": "contoso_100m",
18
+ }
19
+
20
+ # Table names
21
+ TABLES = ["sales", "product", "customer", "store", "orders", "orderrows", "fx", "calendar"]
22
+
23
+
24
+ def create_duckdb(
25
+ size: Literal["small", "medium", "large", "mega"] = "small"
26
+ ) -> dict:
27
+ """
28
+ Create a DuckDB connection with Contoso datasets loaded from cloud storage.
29
+
30
+ The datasets are stored as Parquet files on Cloudflare R2 and streamed
31
+ directly into DuckDB as views.
32
+
33
+ Parameters
34
+ ----------
35
+ size : {"small", "medium", "large", "mega"}, default "small"
36
+ Dataset size to load:
37
+ - small: ~8,000 sales rows
38
+ - medium: ~2.3 million rows
39
+ - large: ~47 million rows
40
+ - mega: ~237 million rows
41
+
42
+ Returns
43
+ -------
44
+ dict
45
+ A dictionary containing:
46
+ - 'sales', 'product', 'customer', 'store', 'fx', 'calendar',
47
+ 'orders', 'orderrows': DuckDB Relation objects (lazy evaluation)
48
+ - 'con': The DuckDB connection object
49
+
50
+ Examples
51
+ --------
52
+ >>> import contoso
53
+ >>> db = contoso.create_duckdb(size="small")
54
+ >>> db["sales"].limit(5).fetchdf()
55
+ >>> db["con"].close()
56
+ """
57
+ size = size.lower()
58
+ if size not in SIZE_TO_FOLDER:
59
+ raise ValueError(
60
+ f"Invalid size '{size}'. Must be one of: {', '.join(SIZE_TO_FOLDER.keys())}"
61
+ )
62
+
63
+ folder = SIZE_TO_FOLDER[size]
64
+
65
+ # Create DuckDB connection
66
+ con = duckdb.connect()
67
+
68
+ # Install and load httpfs extension
69
+ con.execute("INSTALL httpfs; LOAD httpfs;")
70
+
71
+ # Create views for each table pointing to R2 parquet files
72
+ for table in TABLES:
73
+ parquet_url = f"{R2_BASE_URL}/{folder}/{table}.parquet"
74
+ con.execute(f"CREATE VIEW {table} AS SELECT * FROM read_parquet('{parquet_url}');")
75
+
76
+ # Build result dictionary with lazy relations
77
+ result = {table: con.table(table) for table in TABLES}
78
+ result["con"] = con
79
+
80
+ return result
81
+
82
+
83
+ def launch_ui(con: duckdb.DuckDBPyConnection) -> None:
84
+ """
85
+ Launch the DuckDB UI in your browser.
86
+
87
+ Installs and launches the DuckDB UI extension for an active DuckDB
88
+ database connection, allowing interactive exploration via a web interface.
89
+
90
+ Parameters
91
+ ----------
92
+ con : duckdb.DuckDBPyConnection
93
+ A valid DuckDB connection object (e.g., from create_duckdb()["con"])
94
+
95
+ Returns
96
+ -------
97
+ None
98
+ The function is called for its side effects (launches browser).
99
+
100
+ Raises
101
+ ------
102
+ ValueError
103
+ If the connection is None or invalid.
104
+
105
+ Examples
106
+ --------
107
+ >>> import contoso
108
+ >>> db = contoso.create_duckdb()
109
+ >>> contoso.launch_ui(db["con"])
110
+ >>> db["con"].close()
111
+ """
112
+ if con is None:
113
+ raise ValueError("Database connection is None. Please provide a valid connection.")
114
+
115
+ # Install and start the UI extension
116
+ con.execute("INSTALL ui;")
117
+ con.execute("CALL start_ui();")
@@ -0,0 +1,58 @@
1
+ """Tests for contoso database functions."""
2
+
3
+ import pytest
4
+
5
+ import contoso
6
+
7
+
8
+ class TestCreateDuckdb:
9
+ """Tests for create_duckdb function."""
10
+
11
+ def test_creates_all_expected_tables(self):
12
+ """Test that create_duckdb creates all expected tables."""
13
+ db = contoso.create_duckdb(size="small")
14
+
15
+ expected_keys = [
16
+ "sales", "product", "customer", "store", "fx",
17
+ "calendar", "orders", "orderrows", "con"
18
+ ]
19
+
20
+ assert set(db.keys()) == set(expected_keys)
21
+
22
+ # Clean up
23
+ db["con"].close()
24
+
25
+ def test_tables_are_queryable(self):
26
+ """Test that created tables can be queried."""
27
+ db = contoso.create_duckdb(size="small")
28
+
29
+ # Each table should be queryable
30
+ for table_name in ["sales", "product", "customer", "store", "fx", "calendar", "orders", "orderrows"]:
31
+ result = db[table_name].limit(1).fetchall()
32
+ assert result is not None
33
+
34
+ db["con"].close()
35
+
36
+ def test_invalid_size_raises_error(self):
37
+ """Test that invalid size raises ValueError."""
38
+ with pytest.raises(ValueError, match="Invalid size"):
39
+ contoso.create_duckdb(size="invalid")
40
+
41
+ def test_size_case_insensitive(self):
42
+ """Test that size parameter is case insensitive."""
43
+ db = contoso.create_duckdb(size="SMALL")
44
+ assert "con" in db
45
+ db["con"].close()
46
+
47
+ db = contoso.create_duckdb(size="Small")
48
+ assert "con" in db
49
+ db["con"].close()
50
+
51
+
52
+ class TestLaunchUi:
53
+ """Tests for launch_ui function."""
54
+
55
+ def test_none_connection_raises_error(self):
56
+ """Test that None connection raises ValueError."""
57
+ with pytest.raises(ValueError, match="connection is None"):
58
+ contoso.launch_ui(None)