PyPI - datadepot - Versions diffs - 0.0.35.post0__tar.gz - Mend

datadepot 0.0.35.post0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

datadepot-0.0.35.post0/.github/workflows/release.yaml +33 -0
datadepot-0.0.35.post0/.gitignore +6 -0
datadepot-0.0.35.post0/LICENSE +21 -0
datadepot-0.0.35.post0/MANIFEST.in +3 -0
datadepot-0.0.35.post0/PKG-INFO +107 -0
datadepot-0.0.35.post0/README.md +69 -0
datadepot-0.0.35.post0/README.pdf +0 -0
datadepot-0.0.35.post0/datadepot/__init__.py +25 -0
datadepot-0.0.35.post0/datadepot/_version.py +34 -0
datadepot-0.0.35.post0/datadepot/datasets.py +62 -0
datadepot-0.0.35.post0/datadepot.egg-info/PKG-INFO +107 -0
datadepot-0.0.35.post0/datadepot.egg-info/SOURCES.txt +15 -0
datadepot-0.0.35.post0/datadepot.egg-info/dependency_links.txt +1 -0
datadepot-0.0.35.post0/datadepot.egg-info/requires.txt +2 -0
datadepot-0.0.35.post0/datadepot.egg-info/top_level.txt +1 -0
datadepot-0.0.35.post0/pyproject.toml +38 -0
datadepot-0.0.35.post0/setup.cfg +4 -0

datadepot-0.0.35.post0/.github/workflows/release.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+name: Release
+on:
+  push:
+    tags:
+      - 'v*'
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.11
+      - name: Install build tools
+        run: pip install build setuptools_scm wheel
+      - name: Build package
+        run: python -m build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

datadepot-0.0.35.post0/.gitignore ADDED Viewed

@@ -0,0 +1,6 @@
+# Ignore build output
+build/
+dist/
+dsf/__pycache__/
+*.egg-info/
+*.egg

datadepot-0.0.35.post0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+Copyright (c) 2025 Jeroen van Raak
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

datadepot-0.0.35.post0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,3 @@
+include README.md
+include LICENSE
+recursive-include dsf/data *

datadepot-0.0.35.post0/PKG-INFO ADDED Viewed

@@ -0,0 +1,107 @@
+Metadata-Version: 2.4
+Name: datadepot
+Version: 0.0.35.post0
+Summary: The datadepot package provides a collection of datasets used in the book Data Science Foundations and Machine Learning with Python.
+Author-email: Jeroen van Raak <j.j.f.vanraak@uva.nl>
+License: Copyright (c) 2025 Jeroen van Raak
+        MIT License
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/vanraak/datadepot
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pandas>=1.5
+Requires-Dist: numpy>=1.21
+Dynamic: license-file
+# Package `datadepot`
+**Package ‘datadepot’**
+**Title** \`\`DataDepot’’
+**Description**
+The **datadepot** package provides a collection of datasets used in the book `Data Science Foundations and Machine Learning with Python`.
+**URL** <https://github.com/vanraak/datadepot>
+**Depends** Python (\>= 3.8) and Pandas (\>2.0)
+**License** GPL (\>= 2)
+**Repository** Pypi
+**Authors** Jeroen van Raak and Reza Mohammadi
+**Maintainer** Jeroen van Raak, <j.j.f.vanraak@uva.nl>
+**NeedsCompilation** no
+**Installation**
+    pip install datadepot
+**Usage**
+    import datadepot
+    df=datadepot.load('<dataset>')
+Replace <dataset> with the name of the dataset, such as ‘bank’, ‘house’, or ‘churn’.
+**Example**
+    df=datadepot.load('bank') # Load the bank dataset.
+**Datasets**
+The following datasets are included:
+- adult
+- advertising
+- bank
+- caravan
+- cereal
+- churn
+- churn_ibm
+- churn_tel
+- corona
+- diamonds
+- drug
+- gapminder
+- house
+- house_price
+- insurance
+- marketing
+- mpg
+- red_wines
+- risk
+- white_wines
+**Documentation**
+The full documentation is available at:
+<https://github.com/vanraak/datadepot/blob/main/README.pdf>

datadepot-0.0.35.post0/README.md ADDED Viewed

@@ -0,0 +1,69 @@
+# Package `datadepot`
+**Package ‘datadepot’**
+**Title** \`\`DataDepot’’
+**Description**
+The **datadepot** package provides a collection of datasets used in the book `Data Science Foundations and Machine Learning with Python`.
+**URL** <https://github.com/vanraak/datadepot>
+**Depends** Python (\>= 3.8) and Pandas (\>2.0)
+**License** GPL (\>= 2)
+**Repository** Pypi
+**Authors** Jeroen van Raak and Reza Mohammadi
+**Maintainer** Jeroen van Raak, <j.j.f.vanraak@uva.nl>
+**NeedsCompilation** no
+**Installation**
+    pip install datadepot
+**Usage**
+    import datadepot
+    df=datadepot.load('<dataset>')
+Replace <dataset> with the name of the dataset, such as ‘bank’, ‘house’, or ‘churn’.
+**Example**
+    df=datadepot.load('bank') # Load the bank dataset.
+**Datasets**
+The following datasets are included:
+- adult
+- advertising
+- bank
+- caravan
+- cereal
+- churn
+- churn_ibm
+- churn_tel
+- corona
+- diamonds
+- drug
+- gapminder
+- house
+- house_price
+- insurance
+- marketing
+- mpg
+- red_wines
+- risk
+- white_wines
+**Documentation**
+The full documentation is available at:
+<https://github.com/vanraak/datadepot/blob/main/README.pdf>

datadepot-0.0.35.post0/README.pdf ADDED Viewed

Binary file

datadepot-0.0.35.post0/datadepot/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from ._version import __version__
+from .datasets import load, datasets, dataset_table
+def version():
+    """Prints DataDepot package version."""
+    print(f"Version: {__version__}")
+__all__ = ["load", "datasets", "dataset_table"]
+__doc__ = f"""
+DataDepot package: Example datasets for Python users
+Available datasets:
+{dataset_table()}
+# Load a dataset from the DataDepot package as a pandas DataFrame
+>>> import datadepot
+>>> df = datadepot.load('<dataset_name>')  # Load a dataset as a pandas DataFrame
+# Show the version of the DataDepot library:
+>>> datadepot.version()  # DataDepot version {__version__}
+"""

datadepot-0.0.35.post0/datadepot/_version.py ADDED Viewed

@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.0.35.post0'
+__version_tuple__ = version_tuple = (0, 0, 35, 'post0')
+__commit_id__ = commit_id = 'g34844a160'

datadepot-0.0.35.post0/datadepot/datasets.py ADDED Viewed

@@ -0,0 +1,62 @@
+import pandas as pd
+import importlib.resources
+datasets = {
+    "adult": "Adult census income dataset.",
+    "advertising": "The dataset from an organization’s social media ad campaign.",
+    "bank": "Bank marketing dataset.",
+    "caravan": "Caravan insurance dataset.",
+    "cereal": "Cereal nutrition dataset.",
+    "churn": "Credit card churn dataset.",
+    "churn_ibm": "Telecom churn dataset (IBM).",
+    "churn_tel": "Telecom churn dataset (MLC).",
+    "corona": "COVID-19 related dataset.",
+    "diamonds": "Diamonds dataset.",
+    "drug": "Drug classification dataset.",
+    "gapminder": "Gapminder dataset.",
+    "house": "House sales dataset.",
+    "house_price": "House price dataset.",
+    "insurance": "Insurance dataset.",
+    "marketing": "Marketing campaigns dataset.",
+    "mpg": "Auto MPG dataset.",
+    "red_wines": "Red wine quality dataset.",
+    "risk": "Risk analysis dataset.",
+    "transcripts": "Earnings conference call transcripts",
+    "white_wines": "White wine quality dataset.",
+}
+def load(name: str) -> pd.DataFrame:
+    name = name.strip().lower().replace("_", "")  # remove underscores
+    # Build a lookup dict: stripped keys -> canonical keys
+    lookup = {k.replace("_", ""): k for k in datasets.keys()}
+    if name in lookup:
+        canonical_name = lookup[name]  # get the actual dataset key (with underscore)
+        try:
+            with (
+                importlib.resources.files("datadepot.data")
+                .joinpath(f"{canonical_name}.pkl")
+                .open("rb") as f
+            ):
+                return pd.read_pickle(f)
+        except Exception as e:
+            raise RuntimeError(f"Failed to load dataset '{canonical_name}': {e}")
+    else:
+        raise ValueError(f"Dataset '{name}' does not exist.")
+def dataset_table() -> str:
+    """Generate a dynamic-width table of datasets."""
+    name_width = max(len(name) for name in datasets) + 2
+    desc_width = max(len(desc) for desc in datasets) + 2
+    table_lines = [
+        f"{'Dataset':<{name_width}} {'Description':<{desc_width}}",
+        "-" * (name_width + desc_width),
+    ]
+    for name, desc in datasets.items():
+        table_lines.append(f"{name:<{name_width}} {desc:<{desc_width}}")
+    return "\n".join(table_lines)

datadepot-0.0.35.post0/datadepot.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,107 @@
+Metadata-Version: 2.4
+Name: datadepot
+Version: 0.0.35.post0
+Summary: The datadepot package provides a collection of datasets used in the book Data Science Foundations and Machine Learning with Python.
+Author-email: Jeroen van Raak <j.j.f.vanraak@uva.nl>
+License: Copyright (c) 2025 Jeroen van Raak
+        MIT License
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/vanraak/datadepot
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pandas>=1.5
+Requires-Dist: numpy>=1.21
+Dynamic: license-file
+# Package `datadepot`
+**Package ‘datadepot’**
+**Title** \`\`DataDepot’’
+**Description**
+The **datadepot** package provides a collection of datasets used in the book `Data Science Foundations and Machine Learning with Python`.
+**URL** <https://github.com/vanraak/datadepot>
+**Depends** Python (\>= 3.8) and Pandas (\>2.0)
+**License** GPL (\>= 2)
+**Repository** Pypi
+**Authors** Jeroen van Raak and Reza Mohammadi
+**Maintainer** Jeroen van Raak, <j.j.f.vanraak@uva.nl>
+**NeedsCompilation** no
+**Installation**
+    pip install datadepot
+**Usage**
+    import datadepot
+    df=datadepot.load('<dataset>')
+Replace <dataset> with the name of the dataset, such as ‘bank’, ‘house’, or ‘churn’.
+**Example**
+    df=datadepot.load('bank') # Load the bank dataset.
+**Datasets**
+The following datasets are included:
+- adult
+- advertising
+- bank
+- caravan
+- cereal
+- churn
+- churn_ibm
+- churn_tel
+- corona
+- diamonds
+- drug
+- gapminder
+- house
+- house_price
+- insurance
+- marketing
+- mpg
+- red_wines
+- risk
+- white_wines
+**Documentation**
+The full documentation is available at:
+<https://github.com/vanraak/datadepot/blob/main/README.pdf>

datadepot-0.0.35.post0/datadepot.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,15 @@
+.gitignore
+LICENSE
+MANIFEST.in
+README.md
+README.pdf
+pyproject.toml
+.github/workflows/release.yaml
+datadepot/__init__.py
+datadepot/_version.py
+datadepot/datasets.py
+datadepot.egg-info/PKG-INFO
+datadepot.egg-info/SOURCES.txt
+datadepot.egg-info/dependency_links.txt
+datadepot.egg-info/requires.txt
+datadepot.egg-info/top_level.txt

datadepot-0.0.35.post0/datadepot.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

datadepot-0.0.35.post0/datadepot.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ pandas>=1.5
2	+ numpy>=1.21

datadepot-0.0.35.post0/datadepot.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ datadepot

datadepot-0.0.35.post0/pyproject.toml ADDED Viewed

@@ -0,0 +1,38 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel", "setuptools_scm"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "datadepot"
+dynamic = ["version"]
+description = "The datadepot package provides a collection of datasets used in the book Data Science Foundations and Machine Learning with Python."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+dependencies = [
+    "pandas>=1.5",
+    "numpy>=1.21",
+]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+authors = [
+    { name = "Jeroen van Raak", email = "j.j.f.vanraak@uva.nl" }
+]
+urls = { "Homepage" = "https://github.com/vanraak/datadepot" }
+[tool.setuptools]
+include-package-data = true
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["datadepot*"]
+exclude = ["csv", "pickle", "sourcedata"]
+[tool.setuptools_scm]
+version_scheme = "post-release"
+local_scheme = "no-local-version"
+write_to = "datadepot/_version.py"

datadepot-0.0.35.post0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0