datadepot 0.0.35.post0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ environment: pypi
12
+ permissions:
13
+ id-token: write
14
+ contents: read
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+
21
+ - name: Set up Python
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: 3.11
25
+
26
+ - name: Install build tools
27
+ run: pip install build setuptools_scm wheel
28
+
29
+ - name: Build package
30
+ run: python -m build
31
+
32
+ - name: Publish to PyPI
33
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,6 @@
1
+ # Ignore build output
2
+ build/
3
+ dist/
4
+ dsf/__pycache__/
5
+ *.egg-info/
6
+ *.egg
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2025 Jeroen van Raak
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include README.md
2
+ include LICENSE
3
+ recursive-include dsf/data *
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: datadepot
3
+ Version: 0.0.35.post0
4
+ Summary: The datadepot package provides a collection of datasets used in the book Data Science Foundations and Machine Learning with Python.
5
+ Author-email: Jeroen van Raak <j.j.f.vanraak@uva.nl>
6
+ License: Copyright (c) 2025 Jeroen van Raak
7
+
8
+ MIT License
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/vanraak/datadepot
29
+ Classifier: Programming Language :: Python :: 3
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Operating System :: OS Independent
32
+ Requires-Python: >=3.10
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Requires-Dist: pandas>=1.5
36
+ Requires-Dist: numpy>=1.21
37
+ Dynamic: license-file
38
+
39
+ # Package `datadepot`
40
+
41
+
42
+ **Package ‘datadepot’**
43
+
44
+ **Title** \`\`DataDepot’’
45
+
46
+ **Description**
47
+
48
+ The **datadepot** package provides a collection of datasets used in the book `Data Science Foundations and Machine Learning with Python`.
49
+
50
+ **URL** <https://github.com/vanraak/datadepot>
51
+
52
+ **Depends** Python (\>= 3.8) and Pandas (\>2.0)
53
+
54
+ **License** GPL (\>= 2)
55
+
56
+ **Repository** Pypi
57
+
58
+ **Authors** Jeroen van Raak and Reza Mohammadi
59
+
60
+ **Maintainer** Jeroen van Raak, <j.j.f.vanraak@uva.nl>
61
+
62
+ **NeedsCompilation** no
63
+
64
+ **Installation**
65
+
66
+ pip install datadepot
67
+
68
+ **Usage**
69
+
70
+ import datadepot
71
+ df=datadepot.load('<dataset>')
72
+
73
+ Replace <dataset> with the name of the dataset, such as ‘bank’, ‘house’, or ‘churn’.
74
+
75
+ **Example**
76
+
77
+ df=datadepot.load('bank') # Load the bank dataset.
78
+
79
+ **Datasets**
80
+
81
+ The following datasets are included:
82
+
83
+ - adult
84
+ - advertising
85
+ - bank
86
+ - caravan
87
+ - cereal
88
+ - churn
89
+ - churn_ibm
90
+ - churn_tel
91
+ - corona
92
+ - diamonds
93
+ - drug
94
+ - gapminder
95
+ - house
96
+ - house_price
97
+ - insurance
98
+ - marketing
99
+ - mpg
100
+ - red_wines
101
+ - risk
102
+ - white_wines
103
+
104
+ **Documentation**
105
+
106
+ The full documentation is available at:
107
+ <https://github.com/vanraak/datadepot/blob/main/README.pdf>
@@ -0,0 +1,69 @@
1
+ # Package `datadepot`
2
+
3
+
4
+ **Package ‘datadepot’**
5
+
6
+ **Title** \`\`DataDepot’’
7
+
8
+ **Description**
9
+
10
+ The **datadepot** package provides a collection of datasets used in the book `Data Science Foundations and Machine Learning with Python`.
11
+
12
+ **URL** <https://github.com/vanraak/datadepot>
13
+
14
+ **Depends** Python (\>= 3.8) and Pandas (\>2.0)
15
+
16
+ **License** GPL (\>= 2)
17
+
18
+ **Repository** Pypi
19
+
20
+ **Authors** Jeroen van Raak and Reza Mohammadi
21
+
22
+ **Maintainer** Jeroen van Raak, <j.j.f.vanraak@uva.nl>
23
+
24
+ **NeedsCompilation** no
25
+
26
+ **Installation**
27
+
28
+ pip install datadepot
29
+
30
+ **Usage**
31
+
32
+ import datadepot
33
+ df=datadepot.load('<dataset>')
34
+
35
+ Replace <dataset> with the name of the dataset, such as ‘bank’, ‘house’, or ‘churn’.
36
+
37
+ **Example**
38
+
39
+ df=datadepot.load('bank') # Load the bank dataset.
40
+
41
+ **Datasets**
42
+
43
+ The following datasets are included:
44
+
45
+ - adult
46
+ - advertising
47
+ - bank
48
+ - caravan
49
+ - cereal
50
+ - churn
51
+ - churn_ibm
52
+ - churn_tel
53
+ - corona
54
+ - diamonds
55
+ - drug
56
+ - gapminder
57
+ - house
58
+ - house_price
59
+ - insurance
60
+ - marketing
61
+ - mpg
62
+ - red_wines
63
+ - risk
64
+ - white_wines
65
+
66
+ **Documentation**
67
+
68
+ The full documentation is available at:
69
+ <https://github.com/vanraak/datadepot/blob/main/README.pdf>
Binary file
@@ -0,0 +1,25 @@
1
+ from ._version import __version__
2
+ from .datasets import load, datasets, dataset_table
3
+
4
+
5
+ def version():
6
+ """Prints DataDepot package version."""
7
+ print(f"Version: {__version__}")
8
+
9
+
10
+ __all__ = ["load", "datasets", "dataset_table"]
11
+
12
+ __doc__ = f"""
13
+ DataDepot package: Example datasets for Python users
14
+
15
+ Available datasets:
16
+ {dataset_table()}
17
+
18
+ # Load a dataset from the DataDepot package as a pandas DataFrame
19
+
20
+ >>> import datadepot
21
+ >>> df = datadepot.load('<dataset_name>') # Load a dataset as a pandas DataFrame
22
+
23
+ # Show the version of the DataDepot library:
24
+ >>> datadepot.version() # DataDepot version {__version__}
25
+ """
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.0.35.post0'
32
+ __version_tuple__ = version_tuple = (0, 0, 35, 'post0')
33
+
34
+ __commit_id__ = commit_id = 'g34844a160'
@@ -0,0 +1,62 @@
1
+ import pandas as pd
2
+ import importlib.resources
3
+
4
+ datasets = {
5
+ "adult": "Adult census income dataset.",
6
+ "advertising": "The dataset from an organization’s social media ad campaign.",
7
+ "bank": "Bank marketing dataset.",
8
+ "caravan": "Caravan insurance dataset.",
9
+ "cereal": "Cereal nutrition dataset.",
10
+ "churn": "Credit card churn dataset.",
11
+ "churn_ibm": "Telecom churn dataset (IBM).",
12
+ "churn_tel": "Telecom churn dataset (MLC).",
13
+ "corona": "COVID-19 related dataset.",
14
+ "diamonds": "Diamonds dataset.",
15
+ "drug": "Drug classification dataset.",
16
+ "gapminder": "Gapminder dataset.",
17
+ "house": "House sales dataset.",
18
+ "house_price": "House price dataset.",
19
+ "insurance": "Insurance dataset.",
20
+ "marketing": "Marketing campaigns dataset.",
21
+ "mpg": "Auto MPG dataset.",
22
+ "red_wines": "Red wine quality dataset.",
23
+ "risk": "Risk analysis dataset.",
24
+ "transcripts": "Earnings conference call transcripts",
25
+ "white_wines": "White wine quality dataset.",
26
+ }
27
+
28
+
29
+ def load(name: str) -> pd.DataFrame:
30
+ name = name.strip().lower().replace("_", "") # remove underscores
31
+ # Build a lookup dict: stripped keys -> canonical keys
32
+ lookup = {k.replace("_", ""): k for k in datasets.keys()}
33
+
34
+ if name in lookup:
35
+ canonical_name = lookup[name] # get the actual dataset key (with underscore)
36
+ try:
37
+ with (
38
+ importlib.resources.files("datadepot.data")
39
+ .joinpath(f"{canonical_name}.pkl")
40
+ .open("rb") as f
41
+ ):
42
+ return pd.read_pickle(f)
43
+ except Exception as e:
44
+ raise RuntimeError(f"Failed to load dataset '{canonical_name}': {e}")
45
+ else:
46
+ raise ValueError(f"Dataset '{name}' does not exist.")
47
+
48
+
49
+ def dataset_table() -> str:
50
+ """Generate a dynamic-width table of datasets."""
51
+ name_width = max(len(name) for name in datasets) + 2
52
+ desc_width = max(len(desc) for desc in datasets) + 2
53
+
54
+ table_lines = [
55
+ f"{'Dataset':<{name_width}} {'Description':<{desc_width}}",
56
+ "-" * (name_width + desc_width),
57
+ ]
58
+
59
+ for name, desc in datasets.items():
60
+ table_lines.append(f"{name:<{name_width}} {desc:<{desc_width}}")
61
+
62
+ return "\n".join(table_lines)
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: datadepot
3
+ Version: 0.0.35.post0
4
+ Summary: The datadepot package provides a collection of datasets used in the book Data Science Foundations and Machine Learning with Python.
5
+ Author-email: Jeroen van Raak <j.j.f.vanraak@uva.nl>
6
+ License: Copyright (c) 2025 Jeroen van Raak
7
+
8
+ MIT License
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/vanraak/datadepot
29
+ Classifier: Programming Language :: Python :: 3
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Operating System :: OS Independent
32
+ Requires-Python: >=3.10
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Requires-Dist: pandas>=1.5
36
+ Requires-Dist: numpy>=1.21
37
+ Dynamic: license-file
38
+
39
+ # Package `datadepot`
40
+
41
+
42
+ **Package ‘datadepot’**
43
+
44
+ **Title** \`\`DataDepot’’
45
+
46
+ **Description**
47
+
48
+ The **datadepot** package provides a collection of datasets used in the book `Data Science Foundations and Machine Learning with Python`.
49
+
50
+ **URL** <https://github.com/vanraak/datadepot>
51
+
52
+ **Depends** Python (\>= 3.8) and Pandas (\>2.0)
53
+
54
+ **License** GPL (\>= 2)
55
+
56
+ **Repository** Pypi
57
+
58
+ **Authors** Jeroen van Raak and Reza Mohammadi
59
+
60
+ **Maintainer** Jeroen van Raak, <j.j.f.vanraak@uva.nl>
61
+
62
+ **NeedsCompilation** no
63
+
64
+ **Installation**
65
+
66
+ pip install datadepot
67
+
68
+ **Usage**
69
+
70
+ import datadepot
71
+ df=datadepot.load('<dataset>')
72
+
73
+ Replace <dataset> with the name of the dataset, such as ‘bank’, ‘house’, or ‘churn’.
74
+
75
+ **Example**
76
+
77
+ df=datadepot.load('bank') # Load the bank dataset.
78
+
79
+ **Datasets**
80
+
81
+ The following datasets are included:
82
+
83
+ - adult
84
+ - advertising
85
+ - bank
86
+ - caravan
87
+ - cereal
88
+ - churn
89
+ - churn_ibm
90
+ - churn_tel
91
+ - corona
92
+ - diamonds
93
+ - drug
94
+ - gapminder
95
+ - house
96
+ - house_price
97
+ - insurance
98
+ - marketing
99
+ - mpg
100
+ - red_wines
101
+ - risk
102
+ - white_wines
103
+
104
+ **Documentation**
105
+
106
+ The full documentation is available at:
107
+ <https://github.com/vanraak/datadepot/blob/main/README.pdf>
@@ -0,0 +1,15 @@
1
+ .gitignore
2
+ LICENSE
3
+ MANIFEST.in
4
+ README.md
5
+ README.pdf
6
+ pyproject.toml
7
+ .github/workflows/release.yaml
8
+ datadepot/__init__.py
9
+ datadepot/_version.py
10
+ datadepot/datasets.py
11
+ datadepot.egg-info/PKG-INFO
12
+ datadepot.egg-info/SOURCES.txt
13
+ datadepot.egg-info/dependency_links.txt
14
+ datadepot.egg-info/requires.txt
15
+ datadepot.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ pandas>=1.5
2
+ numpy>=1.21
@@ -0,0 +1 @@
1
+ datadepot
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel", "setuptools_scm"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "datadepot"
7
+ dynamic = ["version"]
8
+ description = "The datadepot package provides a collection of datasets used in the book Data Science Foundations and Machine Learning with Python."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { file = "LICENSE" }
12
+ dependencies = [
13
+ "pandas>=1.5",
14
+ "numpy>=1.21",
15
+ ]
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ ]
21
+ authors = [
22
+ { name = "Jeroen van Raak", email = "j.j.f.vanraak@uva.nl" }
23
+ ]
24
+
25
+ urls = { "Homepage" = "https://github.com/vanraak/datadepot" }
26
+
27
+ [tool.setuptools]
28
+ include-package-data = true
29
+
30
+ [tool.setuptools.packages.find]
31
+ where = ["."]
32
+ include = ["datadepot*"]
33
+ exclude = ["csv", "pickle", "sourcedata"]
34
+
35
+ [tool.setuptools_scm]
36
+ version_scheme = "post-release"
37
+ local_scheme = "no-local-version"
38
+ write_to = "datadepot/_version.py"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+