nbastatpy 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nbastatpy might be problematic. Click here for more details.

nbastatpy/utils.py CHANGED
@@ -4,7 +4,6 @@ import pandas as pd
4
4
 
5
5
 
6
6
  class PlayTypes:
7
-
8
7
  PERMODE = {
9
8
  "PERGAME": "PerGame",
10
9
  "PER36": "Per36",
@@ -68,7 +67,6 @@ class PlayTypes:
68
67
 
69
68
 
70
69
  class Formatter:
71
-
72
70
  def get_current_season_year() -> str:
73
71
  current_datetime = datetime.now()
74
72
  current_season_year = current_datetime.year
@@ -76,6 +74,36 @@ class Formatter:
76
74
  current_season_year -= 1
77
75
  return current_season_year
78
76
 
77
+ def normalize_season_year(season_input) -> int:
78
+ """
79
+ Normalize various season year inputs to a 4-digit year.
80
+
81
+ Args:
82
+ season_input: Can be int or str. Examples: 2022, "2022", 22, "22", "2022-23"
83
+
84
+ Returns:
85
+ int: The starting year of the season (e.g., 2022 for 2022-23 season)
86
+ """
87
+ # Convert to string for uniform processing
88
+ season_str = str(season_input).strip()
89
+
90
+ # Handle full season format like "2022-23"
91
+ if "-" in season_str:
92
+ return int(season_str.split("-")[0])
93
+
94
+ # Convert to integer
95
+ year = int(season_str)
96
+
97
+ # If 2-digit year, convert to 4-digit
98
+ if year < 100:
99
+ # Assume years 00-49 are 2000-2049, 50-99 are 1950-1999
100
+ if year < 50:
101
+ year += 2000
102
+ else:
103
+ year += 1900
104
+
105
+ return year
106
+
79
107
  def format_season(season_year: int) -> str:
80
108
  return "{}-{}".format(int(season_year), str(int(season_year) + 1)[2:])
81
109
 
@@ -86,7 +114,6 @@ class Formatter:
86
114
  return next(value for value in row if pd.notna(value))
87
115
 
88
116
  def check_playtype(play: str, playtypes: dict) -> str:
89
-
90
117
  play = play.replace("_", "").replace("-", "").upper()
91
118
 
92
119
  if play == "ALL":
@@ -0,0 +1,288 @@
1
+ from typing import Dict, List, Optional, Set
2
+
3
+ import pandas as pd
4
+
5
+ from nbastatpy.config import DateFields, IDFields
6
+
7
+
8
+ class ValidationResult:
9
+ """Container for validation results."""
10
+
11
+ def __init__(self):
12
+ """Initialize validation result."""
13
+ self.errors: List[str] = []
14
+ self.warnings: List[str] = []
15
+ self.passed: bool = True
16
+
17
+ def add_error(self, message: str) -> None:
18
+ """Add an error message.
19
+
20
+ Args:
21
+ message: Error message to add
22
+ """
23
+ self.errors.append(message)
24
+ self.passed = False
25
+
26
+ def add_warning(self, message: str) -> None:
27
+ """Add a warning message.
28
+
29
+ Args:
30
+ message: Warning message to add
31
+ """
32
+ self.warnings.append(message)
33
+
34
+ def __str__(self) -> str:
35
+ """String representation of validation result."""
36
+ if self.passed and not self.warnings:
37
+ return "Validation passed with no issues"
38
+
39
+ result = []
40
+ if self.errors:
41
+ result.append(f"Errors ({len(self.errors)}):")
42
+ for error in self.errors:
43
+ result.append(f" - {error}")
44
+
45
+ if self.warnings:
46
+ result.append(f"Warnings ({len(self.warnings)}):")
47
+ for warning in self.warnings:
48
+ result.append(f" - {warning}")
49
+
50
+ return "\n".join(result)
51
+
52
+
53
+ def validate_id_format(df: pd.DataFrame) -> ValidationResult:
54
+ """Validate that ID fields are properly formatted.
55
+
56
+ Args:
57
+ df: DataFrame to validate
58
+
59
+ Returns:
60
+ ValidationResult with any issues found
61
+ """
62
+ result = ValidationResult()
63
+
64
+ for id_field in IDFields.ID_FIELDS:
65
+ if id_field in df.columns:
66
+ # Check if IDs are 10 digits
67
+ non_null = df[id_field].dropna()
68
+ if len(non_null) > 0:
69
+ # Check length
70
+ invalid_length = non_null.astype(str).str.len() != 10
71
+ if invalid_length.any():
72
+ count = invalid_length.sum()
73
+ result.add_error(
74
+ f"Column '{id_field}' has {count} values not formatted as 10-digit IDs"
75
+ )
76
+
77
+ # Check if numeric (after padding)
78
+ try:
79
+ pd.to_numeric(non_null, errors="raise")
80
+ except ValueError:
81
+ result.add_error(f"Column '{id_field}' contains non-numeric values")
82
+
83
+ return result
84
+
85
+
86
+ def validate_date_fields(df: pd.DataFrame) -> ValidationResult:
87
+ """Validate that date fields are properly parsed.
88
+
89
+ Args:
90
+ df: DataFrame to validate
91
+
92
+ Returns:
93
+ ValidationResult with any issues found
94
+ """
95
+ result = ValidationResult()
96
+
97
+ for date_field in DateFields.DATE_FIELDS:
98
+ if date_field in df.columns:
99
+ # Check if dates are parsed
100
+ if df[date_field].dtype == "object":
101
+ result.add_warning(
102
+ f"Column '{date_field}' is still object type, may not be properly parsed"
103
+ )
104
+
105
+ # Check for null values
106
+ null_count = df[date_field].isna().sum()
107
+ if null_count > 0:
108
+ total = len(df)
109
+ pct = (null_count / total) * 100
110
+ if pct > 50:
111
+ result.add_error(
112
+ f"Column '{date_field}' has {null_count}/{total} ({pct:.1f}%) null values"
113
+ )
114
+ elif pct > 10:
115
+ result.add_warning(
116
+ f"Column '{date_field}' has {null_count}/{total} ({pct:.1f}%) null values"
117
+ )
118
+
119
+ return result
120
+
121
+
122
+ def validate_required_columns(
123
+ df: pd.DataFrame, required_columns: Set[str]
124
+ ) -> ValidationResult:
125
+ """Validate that required columns are present.
126
+
127
+ Args:
128
+ df: DataFrame to validate
129
+ required_columns: Set of required column names
130
+
131
+ Returns:
132
+ ValidationResult with any issues found
133
+ """
134
+ result = ValidationResult()
135
+
136
+ missing = required_columns - set(df.columns)
137
+ if missing:
138
+ result.add_error(f"Missing required columns: {', '.join(sorted(missing))}")
139
+
140
+ return result
141
+
142
+
143
+ def validate_numeric_ranges(
144
+ df: pd.DataFrame, range_rules: Dict[str, tuple]
145
+ ) -> ValidationResult:
146
+ """Validate that numeric columns are within expected ranges.
147
+
148
+ Args:
149
+ df: DataFrame to validate
150
+ range_rules: Dict mapping column names to (min, max) tuples
151
+
152
+ Returns:
153
+ ValidationResult with any issues found
154
+ """
155
+ result = ValidationResult()
156
+
157
+ for col, (min_val, max_val) in range_rules.items():
158
+ if col in df.columns:
159
+ try:
160
+ numeric_col = pd.to_numeric(df[col], errors="coerce")
161
+ non_null = numeric_col.dropna()
162
+
163
+ if len(non_null) > 0:
164
+ if (non_null < min_val).any():
165
+ count = (non_null < min_val).sum()
166
+ result.add_warning(
167
+ f"Column '{col}' has {count} values below minimum ({min_val})"
168
+ )
169
+
170
+ if (non_null > max_val).any():
171
+ count = (non_null > max_val).sum()
172
+ result.add_warning(
173
+ f"Column '{col}' has {count} values above maximum ({max_val})"
174
+ )
175
+ except Exception as e:
176
+ result.add_error(f"Could not validate range for column '{col}': {e}")
177
+
178
+ return result
179
+
180
+
181
+ def validate_completeness(
182
+ df: pd.DataFrame, max_null_pct: float = 50.0
183
+ ) -> ValidationResult:
184
+ """Validate data completeness (check for excessive null values).
185
+
186
+ Args:
187
+ df: DataFrame to validate
188
+ max_null_pct: Maximum acceptable percentage of null values per column
189
+
190
+ Returns:
191
+ ValidationResult with any issues found
192
+ """
193
+ result = ValidationResult()
194
+
195
+ for col in df.columns:
196
+ null_count = df[col].isna().sum()
197
+ if null_count > 0:
198
+ total = len(df)
199
+ pct = (null_count / total) * 100
200
+
201
+ if pct > max_null_pct:
202
+ result.add_error(
203
+ f"Column '{col}' has {null_count}/{total} ({pct:.1f}%) null values"
204
+ )
205
+ elif pct > 25:
206
+ result.add_warning(
207
+ f"Column '{col}' has {null_count}/{total} ({pct:.1f}%) null values"
208
+ )
209
+
210
+ return result
211
+
212
+
213
+ def validate_dataframe(
214
+ df: pd.DataFrame,
215
+ required_columns: Optional[Set[str]] = None,
216
+ range_rules: Optional[Dict[str, tuple]] = None,
217
+ max_null_pct: float = 50.0,
218
+ ) -> ValidationResult:
219
+ """Run all validations on a DataFrame.
220
+
221
+ Args:
222
+ df: DataFrame to validate
223
+ required_columns: Set of required column names (optional)
224
+ range_rules: Dict mapping column names to (min, max) tuples (optional)
225
+ max_null_pct: Maximum acceptable percentage of null values per column
226
+
227
+ Returns:
228
+ Combined ValidationResult from all checks
229
+
230
+ Example:
231
+ >>> result = validate_dataframe(
232
+ ... df,
233
+ ... required_columns={'player_id', 'team_id'},
234
+ ... range_rules={'age': (15, 50), 'pts': (0, 100)}
235
+ ... )
236
+ >>> if not result.passed:
237
+ ... print(result)
238
+ """
239
+ combined_result = ValidationResult()
240
+
241
+ # Run ID format validation
242
+ id_result = validate_id_format(df)
243
+ combined_result.errors.extend(id_result.errors)
244
+ combined_result.warnings.extend(id_result.warnings)
245
+
246
+ # Run date validation
247
+ date_result = validate_date_fields(df)
248
+ combined_result.errors.extend(date_result.errors)
249
+ combined_result.warnings.extend(date_result.warnings)
250
+
251
+ # Run required columns validation
252
+ if required_columns:
253
+ req_result = validate_required_columns(df, required_columns)
254
+ combined_result.errors.extend(req_result.errors)
255
+ combined_result.warnings.extend(req_result.warnings)
256
+
257
+ # Run range validation
258
+ if range_rules:
259
+ range_result = validate_numeric_ranges(df, range_rules)
260
+ combined_result.errors.extend(range_result.errors)
261
+ combined_result.warnings.extend(range_result.warnings)
262
+
263
+ # Run completeness validation
264
+ comp_result = validate_completeness(df, max_null_pct)
265
+ combined_result.errors.extend(comp_result.errors)
266
+ combined_result.warnings.extend(comp_result.warnings)
267
+
268
+ # Update passed status
269
+ combined_result.passed = len(combined_result.errors) == 0
270
+
271
+ return combined_result
272
+
273
+
274
+ # Common range rules for NBA data
275
+ NBA_RANGE_RULES = {
276
+ "age": (15, 50),
277
+ "pts": (0, 200),
278
+ "reb": (0, 50),
279
+ "ast": (0, 50),
280
+ "stl": (0, 20),
281
+ "blk": (0, 20),
282
+ "fg_pct": (0.0, 1.0),
283
+ "fg3_pct": (0.0, 1.0),
284
+ "ft_pct": (0.0, 1.0),
285
+ "minutes": (0, 60),
286
+ "height_inches": (60, 96),
287
+ "weight": (150, 350),
288
+ }
@@ -0,0 +1,69 @@
1
+ Metadata-Version: 2.4
2
+ Name: nbastatpy
3
+ Version: 0.2.0
4
+ Summary: An easy-to-use wrapper for nba_api to easily find data for a player, game, team, or season
5
+ Keywords: basketball,data,nba,sports,stats
6
+ Classifier: Intended Audience :: Science/Research
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: bs4>=0.0.2
12
+ Requires-Dist: loguru>=0.7.3
13
+ Requires-Dist: nba-api>=1.6.1
14
+ Requires-Dist: pandas>=2.2.3
15
+ Requires-Dist: pillow>=11.0.0
16
+ Requires-Dist: requests>=2.32.3
17
+ Requires-Dist: rich>=13.9.4
18
+ Dynamic: license-file
19
+
20
+ # NBAStatPy
21
+
22
+ [![PyPI version](https://badge.fury.io/py/nbastatpy.svg)](https://badge.fury.io/py/nbastatpy)
23
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
24
+ [![CI](https://github.com/aberghammer-analytics/NBAStatPy/workflows/Run%20Pytest/badge.svg)](https://github.com/aberghammer-analytics/NBAStatPy/actions)
25
+
26
+ ## Overview
27
+
28
+ A simple, easy-to-use wrapper for the `nba_api` package to access NBA data for players, games, teams, and seasons.
29
+
30
+ ## Quick Start
31
+
32
+ ```python
33
+ from nbastatpy.player import Player
34
+
35
+ # Create a player object
36
+ player = Player("Giannis", season="2023", playoffs=True)
37
+
38
+ # Get data
39
+ awards = player.get_awards()
40
+ stats = player.get_career_stats()
41
+ ```
42
+
43
+ ## Main Classes
44
+
45
+ - **Player** - Access player stats, career data, and awards
46
+ - **Game** - Get boxscores, play-by-play, and game details
47
+ - **Season** - Query league-wide stats, lineups, and tracking data
48
+ - **Team** - Retrieve team rosters, stats, and splits
49
+
50
+
51
+ ### Standalone Usage
52
+
53
+ ```python
54
+ from nbastatpy.standardize import standardize_dataframe
55
+
56
+ df = standardize_dataframe(df, data_type='player')
57
+ ```
58
+
59
+ ## Installation
60
+
61
+ ### Pip
62
+ ```bash
63
+ pip install nbastatpy
64
+ ```
65
+
66
+ ### UV
67
+ ```bash
68
+ uv add nbastatpy
69
+ ```
@@ -0,0 +1,14 @@
1
+ nbastatpy/__init__.py,sha256=YSt-JI-4n442Chj_Z3ZMS6VZZyyF9pybHurT44qbpGk,386
2
+ nbastatpy/config.py,sha256=P0K7xe2F3lxubSlLQTybt9co3QyAD-455c3SUdz4m1c,10474
3
+ nbastatpy/game.py,sha256=HcMwCjMX6plzdsrHlkTEe2j8ksEMuKWxyIHYPe8medc,7537
4
+ nbastatpy/player.py,sha256=0c0cQobDHN8pFLdZIaFEoGNQbsCzO6BuUY4qxvqeO6Y,18298
5
+ nbastatpy/season.py,sha256=p2Nh0319PZN48TRTKcwdLuG2LHY_AbKjM4rElP2Ien8,26069
6
+ nbastatpy/standardize.py,sha256=CXYaI2mhhgkVu-aCCJK2ypAGpz6xOByTt_92lDeo7kA,19766
7
+ nbastatpy/team.py,sha256=fplEe_lLcICzL73zE3N8-FuoQRdqqPjDoYe8xb06Fgg,16056
8
+ nbastatpy/utils.py,sha256=iivvzljZIymLUm9CmO-k6120kbBHXwU2HfsUiQrQaKg,3631
9
+ nbastatpy/validators.py,sha256=2Ejt23vuIoDxAJTNxzOENWxAgbcgSmIbpTgdRKBlFLs,8828
10
+ nbastatpy-0.2.0.dist-info/licenses/LICENSE,sha256=TZeVWgOOMmZI_Puv-mvmD8h35JL78SyVE5A6JVISLV0,1078
11
+ nbastatpy-0.2.0.dist-info/METADATA,sha256=hmM8nHLEqhAqZP8RfsHYRtYmcf8NDCRNqXkHh5zSEJI,1830
12
+ nbastatpy-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ nbastatpy-0.2.0.dist-info/top_level.txt,sha256=PglfKWLerpCzEQgumDHwodzwYZe22wfRjzs-XEXu2J0,10
14
+ nbastatpy-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.6.1
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ nbastatpy
@@ -1,67 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: nbastatpy
3
- Version: 0.1.6
4
- Summary: An easy-to-use wrapper for nba_api to easily find data for a player, game, team, or season
5
- Home-page: https://github.com/aberghammer-analytics/NBAStatPy
6
- Keywords: basketball,data,nba,sports,stats
7
- Maintainer: Anthony Berghammer
8
- Maintainer-email: aberghammer.analytics@gmail.com
9
- Requires-Python: >=3.10,<4.0
10
- Classifier: Intended Audience :: Science/Research
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Requires-Dist: bs4 (>=0.0.2,<0.0.3)
16
- Requires-Dist: loguru (>=0.7.2,<0.8.0)
17
- Requires-Dist: nba_api (>=1.4.1,<2.0.0)
18
- Requires-Dist: pandas (>=2.2.2,<3.0.0)
19
- Requires-Dist: pillow (>=10.3.0,<11.0.0)
20
- Requires-Dist: requests (>=2.31.0,<3.0.0)
21
- Requires-Dist: rich (>=13.7.1,<14.0.0)
22
- Project-URL: Documentation, https://github.com/aberghammer-analytics/NBAStatPy/blob/main/README.md
23
- Project-URL: Repository, https://github.com/aberghammer-analytics/NBAStatPy
24
- Description-Content-Type: text/markdown
25
-
26
- # NBAStatPy
27
-
28
- ## Overview
29
-
30
- This is an easy-to-use wrapper for the `nba_api` package. The goal is to be able to easily access and find data for a player, game, team, or season.
31
-
32
- The data is accessed through a class based on how you're searching for it. A quickstart example is shown below. Currently there are 4 classes:
33
-
34
- - `Game`
35
- - `Player`
36
- - `Season`
37
- - `Team`
38
-
39
- ## Quickstart
40
-
41
- To get started you can import the class that represents the data you're searching for.
42
-
43
- ```{python}
44
- from nbastatpy.player import Player
45
- ```
46
-
47
- Then you build a player using either an ID from stats.nba.com or the player's name. When you're building the player object you can add additional search data like season, data format, or playoffs vs. regular season.
48
-
49
- ```{python}
50
- player = Player(
51
- "Giannis",
52
- season="2020",
53
- playoffs=True,
54
- permode="PerGame"
55
- )
56
- ```
57
-
58
- Once you have the player object, you can get different datasets based on the criteria. For instance, you can get the awards the player has won by doing the following:
59
-
60
- ```{python}
61
- player.get_awards()
62
- ```
63
-
64
- This returns a pandas dataframe with the awards won by the player each year.
65
-
66
- There are a lot of endpoints and various arguments for more complex queries like tracking and synergy datasets.
67
-
@@ -1,10 +0,0 @@
1
- nbastatpy/__init__.py,sha256=LhTkEQMCuJU5UMrQliKi4z3HtuMxxDY5_79ZJ0IB5PU,18
2
- nbastatpy/game.py,sha256=RSUgATPk6xBj6kFqtQ74IfnjdwMUuRhaZWEflO84Qjk,4631
3
- nbastatpy/player.py,sha256=NHa0WTuw9301mfPSSsXTWg87_xGS8GjP1Fh1JYkTK8s,17311
4
- nbastatpy/season.py,sha256=0zEWb_iBcsKe42wsU9ISxbUl6HU2Tbt1DISRJXFyj_A,19732
5
- nbastatpy/team.py,sha256=gjjIJc4JoYAjmJII9Mw1FDrDMl0AJjFMgVeIEvPnNNg,15250
6
- nbastatpy/utils.py,sha256=mFH1SqQUWR_Vpl154urgvOZWf5HtflIrlj1eGNzhCeY,2739
7
- nbastatpy-0.1.6.dist-info/LICENSE,sha256=TZeVWgOOMmZI_Puv-mvmD8h35JL78SyVE5A6JVISLV0,1078
8
- nbastatpy-0.1.6.dist-info/METADATA,sha256=KPD6Y4p63u4q-Cq8cDJrRc-1pr6Fzh9irMbXvTiXgJQ,2355
9
- nbastatpy-0.1.6.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
10
- nbastatpy-0.1.6.dist-info/RECORD,,