poolish-ds-data 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ import pandas as pd
2
+ import requests
3
+ from io import StringIO
4
+
5
+ BASE_URL = "https://raw.githubusercontent.com/sawula/ds-course-data/main/"
6
+
7
+ DATASETS = [
8
+ "FC26_20250921.csv",
9
+ "Madden25.csv",
10
+ "MER_T02_01A.csv",
11
+ "NBA Stats 2324.csv",
12
+ "NFLsalaries.csv",
13
+ "NYC ER respire 16 20 totals.csv",
14
+ "NYC ER respire 16 20.csv",
15
+ "PandasJumble0.csv",
16
+ "PandasJumble1.csv",
17
+ "PandasJumble2.csv",
18
+ "SF2506_stations_with_elevation.csv",
19
+ "WS_golf_winnings.csv",
20
+ "brainrot.csv",
21
+ "coldmed.csv",
22
+ "combine_stat_crunch.csv",
23
+ "data8station.csv",
24
+ "data8trip.csv",
25
+ "dc_v_state.csv",
26
+ "electric_price_state_sector_23_24.csv",
27
+ "electric_prices_state_sector_23_24.csv",
28
+ "epa_02_10.csv",
29
+ "epl2122.csv",
30
+ "pandemic_trends.csv",
31
+ "players_22.csv",
32
+ "pop_shift_0.csv",
33
+ "resume.csv",
34
+ "resumes_city_eoe.csv",
35
+ "state_pop.csv",
36
+ "top_colleges.csv",
37
+ ]
38
+
39
+
40
+ def load(filename):
41
+ """Load a dataset by filename. Returns a pandas DataFrame for CSV files,
42
+ or a dict for JSON/GeoJSON files."""
43
+ url = BASE_URL + filename.replace(" ", "%20")
44
+ response = requests.get(url)
45
+ response.raise_for_status()
46
+
47
+ if filename.endswith(".csv"):
48
+ return pd.read_csv(StringIO(response.text))
49
+ elif filename.endswith(".json") or filename.endswith(".geojson"):
50
+ return response.json()
51
+ else:
52
+ raise ValueError(
53
+ f"Unsupported file type for direct loading. "
54
+ f"Use get_url('{filename}') to get the raw URL instead."
55
+ )
56
+
57
+
58
+ def list_datasets():
59
+ """Print all available datasets."""
60
+ for name in sorted(DATASETS):
61
+ print(name)
62
+
63
+
64
+ def get_url(filename):
65
+ """Get the raw GitHub URL for any file in the repo."""
66
+ return BASE_URL + filename.replace(" ", "%20")
@@ -0,0 +1,37 @@
1
+ Metadata-Version: 2.4
2
+ Name: poolish-ds-data
3
+ Version: 0.1.0
4
+ Summary: Data files for Peddie School data science course
5
+ Author-email: Mark Sawula <msawula@peddie.org>
6
+ License: MIT
7
+ Project-URL: Homepage, https://sawula.github.io
8
+ Project-URL: Repository, https://github.com/sawula/ds-course-data
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: pandas
12
+ Requires-Dist: requests
13
+
14
+ # poolish-ds-data
15
+
16
+ Data files for the Peddie School data science course.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install poolish-ds-data
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ```python
27
+ import poolish_ds_data as ds
28
+
29
+ # Load a CSV as a pandas DataFrame
30
+ df = ds.load("epl2122.csv")
31
+
32
+ # See all available datasets
33
+ ds.list_datasets()
34
+
35
+ # Get the raw URL for any file (useful for images, GeoJSON, etc.)
36
+ url = ds.get_url("south-korea-with-regions_1516.geojson")
37
+ ```
@@ -0,0 +1,5 @@
1
+ poolish_ds_data/__init__.py,sha256=5y7zAiz3xRRw0JGeyTquH_ISXlnBC8Ze_90N4FHippc,1784
2
+ poolish_ds_data-0.1.0.dist-info/METADATA,sha256=j4kkGGaDaD7jIIa7R2kArO8KMXO5Axy8czM5UX_0pO0,824
3
+ poolish_ds_data-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
4
+ poolish_ds_data-0.1.0.dist-info/top_level.txt,sha256=9l2FolHaj57zh5PmSUK89j9ACoCp3zfCsxxMM0eAgU8,16
5
+ poolish_ds_data-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ poolish_ds_data