PyPI - nbastatpy - Versions diffs - 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

nbastatpy 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nbastatpy might be problematic. Click here for more details.

Files changed (16) hide show

nbastatpy/__init__.py +17 -1
nbastatpy/config.py +445 -0
nbastatpy/game.py +131 -30
nbastatpy/player.py +58 -22
nbastatpy/season.py +272 -50
nbastatpy/standardize.py +529 -0
nbastatpy/team.py +49 -16
nbastatpy/utils.py +30 -3
nbastatpy/validators.py +288 -0
nbastatpy-0.2.0.dist-info/METADATA +69 -0
nbastatpy-0.2.0.dist-info/RECORD +14 -0
{nbastatpy-0.1.6.dist-info → nbastatpy-0.2.0.dist-info}/WHEEL +2 -1
nbastatpy-0.2.0.dist-info/top_level.txt +1 -0
nbastatpy-0.1.6.dist-info/METADATA +0 -67
nbastatpy-0.1.6.dist-info/RECORD +0 -10
{nbastatpy-0.1.6.dist-info → nbastatpy-0.2.0.dist-info/licenses}/LICENSE +0 -0

nbastatpy/utils.py CHANGED Viewed

@@ -4,7 +4,6 @@ import pandas as pd
 class PlayTypes:
     PERMODE = {
         "PERGAME": "PerGame",
         "PER36": "Per36",
@@ -68,7 +67,6 @@ class PlayTypes:
 class Formatter:
     def get_current_season_year() -> str:
         current_datetime = datetime.now()
         current_season_year = current_datetime.year
@@ -76,6 +74,36 @@ class Formatter:
             current_season_year -= 1
         return current_season_year
+    def normalize_season_year(season_input) -> int:
+        """
+        Normalize various season year inputs to a 4-digit year.
+        Args:
+            season_input: Can be int or str. Examples: 2022, "2022", 22, "22", "2022-23"
+        Returns:
+            int: The starting year of the season (e.g., 2022 for 2022-23 season)
+        """
+        # Convert to string for uniform processing
+        season_str = str(season_input).strip()
+        # Handle full season format like "2022-23"
+        if "-" in season_str:
+            return int(season_str.split("-")[0])
+        # Convert to integer
+        year = int(season_str)
+        # If 2-digit year, convert to 4-digit
+        if year < 100:
+            # Assume years 00-49 are 2000-2049, 50-99 are 1950-1999
+            if year < 50:
+                year += 2000
+            else:
+                year += 1900
+        return year
     def format_season(season_year: int) -> str:
         return "{}-{}".format(int(season_year), str(int(season_year) + 1)[2:])
@@ -86,7 +114,6 @@ class Formatter:
         return next(value for value in row if pd.notna(value))
     def check_playtype(play: str, playtypes: dict) -> str:
         play = play.replace("_", "").replace("-", "").upper()
         if play == "ALL":

nbastatpy/validators.py ADDED Viewed

@@ -0,0 +1,288 @@
+from typing import Dict, List, Optional, Set
+import pandas as pd
+from nbastatpy.config import DateFields, IDFields
+class ValidationResult:
+    """Container for validation results."""
+    def __init__(self):
+        """Initialize validation result."""
+        self.errors: List[str] = []
+        self.warnings: List[str] = []
+        self.passed: bool = True
+    def add_error(self, message: str) -> None:
+        """Add an error message.
+        Args:
+            message: Error message to add
+        """
+        self.errors.append(message)
+        self.passed = False
+    def add_warning(self, message: str) -> None:
+        """Add a warning message.
+        Args:
+            message: Warning message to add
+        """
+        self.warnings.append(message)
+    def __str__(self) -> str:
+        """String representation of validation result."""
+        if self.passed and not self.warnings:
+            return "Validation passed with no issues"
+        result = []
+        if self.errors:
+            result.append(f"Errors ({len(self.errors)}):")
+            for error in self.errors:
+                result.append(f"  - {error}")
+        if self.warnings:
+            result.append(f"Warnings ({len(self.warnings)}):")
+            for warning in self.warnings:
+                result.append(f"  - {warning}")
+        return "\n".join(result)
+def validate_id_format(df: pd.DataFrame) -> ValidationResult:
+    """Validate that ID fields are properly formatted.
+    Args:
+        df: DataFrame to validate
+    Returns:
+        ValidationResult with any issues found
+    """
+    result = ValidationResult()
+    for id_field in IDFields.ID_FIELDS:
+        if id_field in df.columns:
+            # Check if IDs are 10 digits
+            non_null = df[id_field].dropna()
+            if len(non_null) > 0:
+                # Check length
+                invalid_length = non_null.astype(str).str.len() != 10
+                if invalid_length.any():
+                    count = invalid_length.sum()
+                    result.add_error(
+                        f"Column '{id_field}' has {count} values not formatted as 10-digit IDs"
+                    )
+                # Check if numeric (after padding)
+                try:
+                    pd.to_numeric(non_null, errors="raise")
+                except ValueError:
+                    result.add_error(f"Column '{id_field}' contains non-numeric values")
+    return result
+def validate_date_fields(df: pd.DataFrame) -> ValidationResult:
+    """Validate that date fields are properly parsed.
+    Args:
+        df: DataFrame to validate
+    Returns:
+        ValidationResult with any issues found
+    """
+    result = ValidationResult()
+    for date_field in DateFields.DATE_FIELDS:
+        if date_field in df.columns:
+            # Check if dates are parsed
+            if df[date_field].dtype == "object":
+                result.add_warning(
+                    f"Column '{date_field}' is still object type, may not be properly parsed"
+                )
+            # Check for null values
+            null_count = df[date_field].isna().sum()
+            if null_count > 0:
+                total = len(df)
+                pct = (null_count / total) * 100
+                if pct > 50:
+                    result.add_error(
+                        f"Column '{date_field}' has {null_count}/{total} ({pct:.1f}%) null values"
+                    )
+                elif pct > 10:
+                    result.add_warning(
+                        f"Column '{date_field}' has {null_count}/{total} ({pct:.1f}%) null values"
+                    )
+    return result
+def validate_required_columns(
+    df: pd.DataFrame, required_columns: Set[str]
+) -> ValidationResult:
+    """Validate that required columns are present.
+    Args:
+        df: DataFrame to validate
+        required_columns: Set of required column names
+    Returns:
+        ValidationResult with any issues found
+    """
+    result = ValidationResult()
+    missing = required_columns - set(df.columns)
+    if missing:
+        result.add_error(f"Missing required columns: {', '.join(sorted(missing))}")
+    return result
+def validate_numeric_ranges(
+    df: pd.DataFrame, range_rules: Dict[str, tuple]
+) -> ValidationResult:
+    """Validate that numeric columns are within expected ranges.
+    Args:
+        df: DataFrame to validate
+        range_rules: Dict mapping column names to (min, max) tuples
+    Returns:
+        ValidationResult with any issues found
+    """
+    result = ValidationResult()
+    for col, (min_val, max_val) in range_rules.items():
+        if col in df.columns:
+            try:
+                numeric_col = pd.to_numeric(df[col], errors="coerce")
+                non_null = numeric_col.dropna()
+                if len(non_null) > 0:
+                    if (non_null < min_val).any():
+                        count = (non_null < min_val).sum()
+                        result.add_warning(
+                            f"Column '{col}' has {count} values below minimum ({min_val})"
+                        )
+                    if (non_null > max_val).any():
+                        count = (non_null > max_val).sum()
+                        result.add_warning(
+                            f"Column '{col}' has {count} values above maximum ({max_val})"
+                        )
+            except Exception as e:
+                result.add_error(f"Could not validate range for column '{col}': {e}")
+    return result
+def validate_completeness(
+    df: pd.DataFrame, max_null_pct: float = 50.0
+) -> ValidationResult:
+    """Validate data completeness (check for excessive null values).
+    Args:
+        df: DataFrame to validate
+        max_null_pct: Maximum acceptable percentage of null values per column
+    Returns:
+        ValidationResult with any issues found
+    """
+    result = ValidationResult()
+    for col in df.columns:
+        null_count = df[col].isna().sum()
+        if null_count > 0:
+            total = len(df)
+            pct = (null_count / total) * 100
+            if pct > max_null_pct:
+                result.add_error(
+                    f"Column '{col}' has {null_count}/{total} ({pct:.1f}%) null values"
+                )
+            elif pct > 25:
+                result.add_warning(
+                    f"Column '{col}' has {null_count}/{total} ({pct:.1f}%) null values"
+                )
+    return result
+def validate_dataframe(
+    df: pd.DataFrame,
+    required_columns: Optional[Set[str]] = None,
+    range_rules: Optional[Dict[str, tuple]] = None,
+    max_null_pct: float = 50.0,
+) -> ValidationResult:
+    """Run all validations on a DataFrame.
+    Args:
+        df: DataFrame to validate
+        required_columns: Set of required column names (optional)
+        range_rules: Dict mapping column names to (min, max) tuples (optional)
+        max_null_pct: Maximum acceptable percentage of null values per column
+    Returns:
+        Combined ValidationResult from all checks
+    Example:
+        >>> result = validate_dataframe(
+        ...     df,
+        ...     required_columns={'player_id', 'team_id'},
+        ...     range_rules={'age': (15, 50), 'pts': (0, 100)}
+        ... )
+        >>> if not result.passed:
+        ...     print(result)
+    """
+    combined_result = ValidationResult()
+    # Run ID format validation
+    id_result = validate_id_format(df)
+    combined_result.errors.extend(id_result.errors)
+    combined_result.warnings.extend(id_result.warnings)
+    # Run date validation
+    date_result = validate_date_fields(df)
+    combined_result.errors.extend(date_result.errors)
+    combined_result.warnings.extend(date_result.warnings)
+    # Run required columns validation
+    if required_columns:
+        req_result = validate_required_columns(df, required_columns)
+        combined_result.errors.extend(req_result.errors)
+        combined_result.warnings.extend(req_result.warnings)
+    # Run range validation
+    if range_rules:
+        range_result = validate_numeric_ranges(df, range_rules)
+        combined_result.errors.extend(range_result.errors)
+        combined_result.warnings.extend(range_result.warnings)
+    # Run completeness validation
+    comp_result = validate_completeness(df, max_null_pct)
+    combined_result.errors.extend(comp_result.errors)
+    combined_result.warnings.extend(comp_result.warnings)
+    # Update passed status
+    combined_result.passed = len(combined_result.errors) == 0
+    return combined_result
+# Common range rules for NBA data
+NBA_RANGE_RULES = {
+    "age": (15, 50),
+    "pts": (0, 200),
+    "reb": (0, 50),
+    "ast": (0, 50),
+    "stl": (0, 20),
+    "blk": (0, 20),
+    "fg_pct": (0.0, 1.0),
+    "fg3_pct": (0.0, 1.0),
+    "ft_pct": (0.0, 1.0),
+    "minutes": (0, 60),
+    "height_inches": (60, 96),
+    "weight": (150, 350),
+}

nbastatpy-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,69 @@
+Metadata-Version: 2.4
+Name: nbastatpy
+Version: 0.2.0
+Summary: An easy-to-use wrapper for nba_api to easily find data for a player, game, team, or season
+Keywords: basketball,data,nba,sports,stats
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: bs4>=0.0.2
+Requires-Dist: loguru>=0.7.3
+Requires-Dist: nba-api>=1.6.1
+Requires-Dist: pandas>=2.2.3
+Requires-Dist: pillow>=11.0.0
+Requires-Dist: requests>=2.32.3
+Requires-Dist: rich>=13.9.4
+Dynamic: license-file
+# NBAStatPy
+[![PyPI version](https://badge.fury.io/py/nbastatpy.svg)](https://badge.fury.io/py/nbastatpy)
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![CI](https://github.com/aberghammer-analytics/NBAStatPy/workflows/Run%20Pytest/badge.svg)](https://github.com/aberghammer-analytics/NBAStatPy/actions)
+## Overview
+A simple, easy-to-use wrapper for the `nba_api` package to access NBA data for players, games, teams, and seasons.
+## Quick Start
+```python
+from nbastatpy.player import Player
+# Create a player object
+player = Player("Giannis", season="2023", playoffs=True)
+# Get data
+awards = player.get_awards()
+stats = player.get_career_stats()
+```
+## Main Classes
+- **Player** - Access player stats, career data, and awards
+- **Game** - Get boxscores, play-by-play, and game details
+- **Season** - Query league-wide stats, lineups, and tracking data
+- **Team** - Retrieve team rosters, stats, and splits
+### Standalone Usage
+```python
+from nbastatpy.standardize import standardize_dataframe
+df = standardize_dataframe(df, data_type='player')
+```
+## Installation
+### Pip
+```bash
+pip install nbastatpy
+```
+### UV
+```bash
+uv add nbastatpy
+```

nbastatpy-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+nbastatpy/__init__.py,sha256=YSt-JI-4n442Chj_Z3ZMS6VZZyyF9pybHurT44qbpGk,386
+nbastatpy/config.py,sha256=P0K7xe2F3lxubSlLQTybt9co3QyAD-455c3SUdz4m1c,10474
+nbastatpy/game.py,sha256=HcMwCjMX6plzdsrHlkTEe2j8ksEMuKWxyIHYPe8medc,7537
+nbastatpy/player.py,sha256=0c0cQobDHN8pFLdZIaFEoGNQbsCzO6BuUY4qxvqeO6Y,18298
+nbastatpy/season.py,sha256=p2Nh0319PZN48TRTKcwdLuG2LHY_AbKjM4rElP2Ien8,26069
+nbastatpy/standardize.py,sha256=CXYaI2mhhgkVu-aCCJK2ypAGpz6xOByTt_92lDeo7kA,19766
+nbastatpy/team.py,sha256=fplEe_lLcICzL73zE3N8-FuoQRdqqPjDoYe8xb06Fgg,16056
+nbastatpy/utils.py,sha256=iivvzljZIymLUm9CmO-k6120kbBHXwU2HfsUiQrQaKg,3631
+nbastatpy/validators.py,sha256=2Ejt23vuIoDxAJTNxzOENWxAgbcgSmIbpTgdRKBlFLs,8828
+nbastatpy-0.2.0.dist-info/licenses/LICENSE,sha256=TZeVWgOOMmZI_Puv-mvmD8h35JL78SyVE5A6JVISLV0,1078
+nbastatpy-0.2.0.dist-info/METADATA,sha256=hmM8nHLEqhAqZP8RfsHYRtYmcf8NDCRNqXkHh5zSEJI,1830
+nbastatpy-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nbastatpy-0.2.0.dist-info/top_level.txt,sha256=PglfKWLerpCzEQgumDHwodzwYZe22wfRjzs-XEXu2J0,10
+nbastatpy-0.2.0.dist-info/RECORD,,

{nbastatpy-0.1.6.dist-info → nbastatpy-0.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,5 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.6.1
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

nbastatpy-0.2.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ nbastatpy

nbastatpy-0.1.6.dist-info/METADATA DELETED Viewed

@@ -1,67 +0,0 @@
-Metadata-Version: 2.1
-Name: nbastatpy
-Version: 0.1.6
-Summary: An easy-to-use wrapper for nba_api to easily find data for a player, game, team, or season
-Home-page: https://github.com/aberghammer-analytics/NBAStatPy
-Keywords: basketball,data,nba,sports,stats
-Maintainer: Anthony Berghammer
-Maintainer-email: aberghammer.analytics@gmail.com
-Requires-Python: >=3.10,<4.0
-Classifier: Intended Audience :: Science/Research
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: bs4 (>=0.0.2,<0.0.3)
-Requires-Dist: loguru (>=0.7.2,<0.8.0)
-Requires-Dist: nba_api (>=1.4.1,<2.0.0)
-Requires-Dist: pandas (>=2.2.2,<3.0.0)
-Requires-Dist: pillow (>=10.3.0,<11.0.0)
-Requires-Dist: requests (>=2.31.0,<3.0.0)
-Requires-Dist: rich (>=13.7.1,<14.0.0)
-Project-URL: Documentation, https://github.com/aberghammer-analytics/NBAStatPy/blob/main/README.md
-Project-URL: Repository, https://github.com/aberghammer-analytics/NBAStatPy
-Description-Content-Type: text/markdown
-# NBAStatPy
-## Overview
-This is an easy-to-use wrapper for the `nba_api` package. The goal is to be able to easily access and find data for a player, game, team, or season.
-The data is accessed through a class based on how you're searching for it. A quickstart example is shown below. Currently there are 4 classes:
-- `Game`
-- `Player`
-- `Season`
-- `Team`
-## Quickstart
-To get started you can import the class that represents the data you're searching for.
-```{python}
-from nbastatpy.player import Player
-```
-Then you build a player using either an ID from stats.nba.com or the player's name. When you're building the player object you can add additional search data like season, data format, or playoffs vs. regular season.
-```{python}
-player = Player(
-    "Giannis",
-    season="2020",
-    playoffs=True,
-    permode="PerGame"
-)
-```
-Once you have the player object, you can get different datasets based on the criteria. For instance, you can get the awards the player has won by doing the following:
-```{python}
-player.get_awards()
-```
-This returns a pandas dataframe with the awards won by the player each year.
-There are a lot of endpoints and various arguments for more complex queries like tracking and synergy datasets.

nbastatpy-0.1.6.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-nbastatpy/__init__.py,sha256=LhTkEQMCuJU5UMrQliKi4z3HtuMxxDY5_79ZJ0IB5PU,18
-nbastatpy/game.py,sha256=RSUgATPk6xBj6kFqtQ74IfnjdwMUuRhaZWEflO84Qjk,4631
-nbastatpy/player.py,sha256=NHa0WTuw9301mfPSSsXTWg87_xGS8GjP1Fh1JYkTK8s,17311
-nbastatpy/season.py,sha256=0zEWb_iBcsKe42wsU9ISxbUl6HU2Tbt1DISRJXFyj_A,19732
-nbastatpy/team.py,sha256=gjjIJc4JoYAjmJII9Mw1FDrDMl0AJjFMgVeIEvPnNNg,15250
-nbastatpy/utils.py,sha256=mFH1SqQUWR_Vpl154urgvOZWf5HtflIrlj1eGNzhCeY,2739
-nbastatpy-0.1.6.dist-info/LICENSE,sha256=TZeVWgOOMmZI_Puv-mvmD8h35JL78SyVE5A6JVISLV0,1078
-nbastatpy-0.1.6.dist-info/METADATA,sha256=KPD6Y4p63u4q-Cq8cDJrRc-1pr6Fzh9irMbXvTiXgJQ,2355
-nbastatpy-0.1.6.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
-nbastatpy-0.1.6.dist-info/RECORD,,

{nbastatpy-0.1.6.dist-info → nbastatpy-0.2.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

nbastatpy 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

nbastatpy 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl