poolish-ds-data 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- poolish_ds_data-0.1.0/PKG-INFO +37 -0
- poolish_ds_data-0.1.0/README.md +24 -0
- poolish_ds_data-0.1.0/pyproject.toml +20 -0
- poolish_ds_data-0.1.0/setup.cfg +4 -0
- poolish_ds_data-0.1.0/src/poolish_ds_data/__init__.py +66 -0
- poolish_ds_data-0.1.0/src/poolish_ds_data.egg-info/PKG-INFO +37 -0
- poolish_ds_data-0.1.0/src/poolish_ds_data.egg-info/SOURCES.txt +8 -0
- poolish_ds_data-0.1.0/src/poolish_ds_data.egg-info/dependency_links.txt +1 -0
- poolish_ds_data-0.1.0/src/poolish_ds_data.egg-info/requires.txt +2 -0
- poolish_ds_data-0.1.0/src/poolish_ds_data.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: poolish-ds-data
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Data files for Peddie School data science course
|
|
5
|
+
Author-email: Mark Sawula <msawula@peddie.org>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://sawula.github.io
|
|
8
|
+
Project-URL: Repository, https://github.com/sawula/ds-course-data
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
|
|
14
|
+
# poolish-ds-data
|
|
15
|
+
|
|
16
|
+
Data files for the Peddie School data science course.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install poolish-ds-data
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
import poolish_ds_data as ds
|
|
28
|
+
|
|
29
|
+
# Load a CSV as a pandas DataFrame
|
|
30
|
+
df = ds.load("epl2122.csv")
|
|
31
|
+
|
|
32
|
+
# See all available datasets
|
|
33
|
+
ds.list_datasets()
|
|
34
|
+
|
|
35
|
+
# Get the raw URL for any file (useful for images, GeoJSON, etc.)
|
|
36
|
+
url = ds.get_url("south-korea-with-regions_1516.geojson")
|
|
37
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# poolish-ds-data
|
|
2
|
+
|
|
3
|
+
Data files for the Peddie School data science course.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install poolish-ds-data
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import poolish_ds_data as ds
|
|
15
|
+
|
|
16
|
+
# Load a CSV as a pandas DataFrame
|
|
17
|
+
df = ds.load("epl2122.csv")
|
|
18
|
+
|
|
19
|
+
# See all available datasets
|
|
20
|
+
ds.list_datasets()
|
|
21
|
+
|
|
22
|
+
# Get the raw URL for any file (useful for images, GeoJSON, etc.)
|
|
23
|
+
url = ds.get_url("south-korea-with-regions_1516.geojson")
|
|
24
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=42", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "poolish-ds-data"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Data files for Peddie School data science course"
|
|
9
|
+
authors = [{name = "Mark Sawula", email = "msawula@peddie.org"}]
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
dependencies = ["pandas", "requests"]
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
license = {text = "MIT"}
|
|
14
|
+
|
|
15
|
+
[project.urls]
|
|
16
|
+
Homepage = "https://sawula.github.io"
|
|
17
|
+
Repository = "https://github.com/sawula/ds-course-data"
|
|
18
|
+
|
|
19
|
+
[tool.setuptools.packages.find]
|
|
20
|
+
where = ["src"]
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import requests
|
|
3
|
+
from io import StringIO
|
|
4
|
+
|
|
5
|
+
BASE_URL = "https://raw.githubusercontent.com/sawula/ds-course-data/main/"
|
|
6
|
+
|
|
7
|
+
DATASETS = [
|
|
8
|
+
"FC26_20250921.csv",
|
|
9
|
+
"Madden25.csv",
|
|
10
|
+
"MER_T02_01A.csv",
|
|
11
|
+
"NBA Stats 2324.csv",
|
|
12
|
+
"NFLsalaries.csv",
|
|
13
|
+
"NYC ER respire 16 20 totals.csv",
|
|
14
|
+
"NYC ER respire 16 20.csv",
|
|
15
|
+
"PandasJumble0.csv",
|
|
16
|
+
"PandasJumble1.csv",
|
|
17
|
+
"PandasJumble2.csv",
|
|
18
|
+
"SF2506_stations_with_elevation.csv",
|
|
19
|
+
"WS_golf_winnings.csv",
|
|
20
|
+
"brainrot.csv",
|
|
21
|
+
"coldmed.csv",
|
|
22
|
+
"combine_stat_crunch.csv",
|
|
23
|
+
"data8station.csv",
|
|
24
|
+
"data8trip.csv",
|
|
25
|
+
"dc_v_state.csv",
|
|
26
|
+
"electric_price_state_sector_23_24.csv",
|
|
27
|
+
"electric_prices_state_sector_23_24.csv",
|
|
28
|
+
"epa_02_10.csv",
|
|
29
|
+
"epl2122.csv",
|
|
30
|
+
"pandemic_trends.csv",
|
|
31
|
+
"players_22.csv",
|
|
32
|
+
"pop_shift_0.csv",
|
|
33
|
+
"resume.csv",
|
|
34
|
+
"resumes_city_eoe.csv",
|
|
35
|
+
"state_pop.csv",
|
|
36
|
+
"top_colleges.csv",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def load(filename):
|
|
41
|
+
"""Load a dataset by filename. Returns a pandas DataFrame for CSV files,
|
|
42
|
+
or a dict for JSON/GeoJSON files."""
|
|
43
|
+
url = BASE_URL + filename.replace(" ", "%20")
|
|
44
|
+
response = requests.get(url)
|
|
45
|
+
response.raise_for_status()
|
|
46
|
+
|
|
47
|
+
if filename.endswith(".csv"):
|
|
48
|
+
return pd.read_csv(StringIO(response.text))
|
|
49
|
+
elif filename.endswith(".json") or filename.endswith(".geojson"):
|
|
50
|
+
return response.json()
|
|
51
|
+
else:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"Unsupported file type for direct loading. "
|
|
54
|
+
f"Use get_url('{filename}') to get the raw URL instead."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def list_datasets():
|
|
59
|
+
"""Print all available datasets."""
|
|
60
|
+
for name in sorted(DATASETS):
|
|
61
|
+
print(name)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_url(filename):
|
|
65
|
+
"""Get the raw GitHub URL for any file in the repo."""
|
|
66
|
+
return BASE_URL + filename.replace(" ", "%20")
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: poolish-ds-data
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Data files for Peddie School data science course
|
|
5
|
+
Author-email: Mark Sawula <msawula@peddie.org>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://sawula.github.io
|
|
8
|
+
Project-URL: Repository, https://github.com/sawula/ds-course-data
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
|
|
14
|
+
# poolish-ds-data
|
|
15
|
+
|
|
16
|
+
Data files for the Peddie School data science course.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install poolish-ds-data
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
import poolish_ds_data as ds
|
|
28
|
+
|
|
29
|
+
# Load a CSV as a pandas DataFrame
|
|
30
|
+
df = ds.load("epl2122.csv")
|
|
31
|
+
|
|
32
|
+
# See all available datasets
|
|
33
|
+
ds.list_datasets()
|
|
34
|
+
|
|
35
|
+
# Get the raw URL for any file (useful for images, GeoJSON, etc.)
|
|
36
|
+
url = ds.get_url("south-korea-with-regions_1516.geojson")
|
|
37
|
+
```
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/poolish_ds_data/__init__.py
|
|
4
|
+
src/poolish_ds_data.egg-info/PKG-INFO
|
|
5
|
+
src/poolish_ds_data.egg-info/SOURCES.txt
|
|
6
|
+
src/poolish_ds_data.egg-info/dependency_links.txt
|
|
7
|
+
src/poolish_ds_data.egg-info/requires.txt
|
|
8
|
+
src/poolish_ds_data.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
poolish_ds_data
|