PyPI - hkjc - Versions diffs - 0.3.18__py3-none-any.whl → 0.3.21__py3-none-any.whl - Mend

hkjc 0.3.18py3-none-any.whl → 0.3.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

hkjc/__init__.py +4 -4
hkjc/features.py +6 -0
hkjc/harville_model.py +122 -31
hkjc/historical.py +29 -44
hkjc/live.py +22 -18
hkjc/processing.py +16 -3
{hkjc-0.3.18.dist-info → hkjc-0.3.21.dist-info}/METADATA +1 -1
hkjc-0.3.21.dist-info/RECORD +14 -0
hkjc/analysis.py +0 -3
hkjc-0.3.18.dist-info/RECORD +0 -14
{hkjc-0.3.18.dist-info → hkjc-0.3.21.dist-info}/WHEEL +0 -0

hkjc/__init__.py CHANGED Viewed

@@ -4,10 +4,10 @@ This module re-exports commonly used symbols from the submodules.
 """
 from importlib.metadata import version as _version
-__all__ = ["live", "qpbanker",
+__all__ = ["live", "features",
            "generate_all_qp_trades", "generate_all_pla_trades", "pareto_filter",
-                        "speedpro_energy", "speedmap", "harveille_model",
-                        "generate_historical_data"]
+           "speedpro_energy", "speedmap", "harveille_model",
+           "generate_historical_data"]
 try:
     __version__ = _version(__name__)
@@ -17,4 +17,4 @@ except Exception:  # pragma: no cover - best-effort version resolution
 from .processing import generate_all_qp_trades, generate_all_pla_trades, generate_historical_data
 from .utils import pareto_filter
 from .speedpro import speedmap, speedpro_energy
-from . import harville_model, live
+from . import harville_model, live, features

hkjc/features.py ADDED Viewed

@@ -0,0 +1,6 @@
+""" Polars expressions for commonly-used analysis features, subject to frequent changes.
+"""
+import polars as pl
+rating_diff = (pl.col('Rtg').max().over('RaceId')-pl.col('Rtg')).alias('RtgDiff')
+frontrunner_pct = (pl.col('FavoriteRunningStyle')=='FrontRunner').mean().over('RaceId').alias('FRPct')

hkjc/harville_model.py CHANGED Viewed

@@ -198,10 +198,12 @@ class HarvilleModel:
         lambda_qin (float): Weight for Qin pool loss
         lambda_quinella (float): Weight for Quinella pool loss
         lambda_banker (float): Weight for Banker pool loss
+        takeout_rate (float): House take out rate (e.g., 0.175 = 17.5%)
     """
     def __init__(self, n_horses: int, lambda_win: float = LAMBDA_DEFAULTS['WIN'], lambda_qin: float = LAMBDA_DEFAULTS['QIN'],
-                 lambda_quinella: float = LAMBDA_DEFAULTS['QPL'], lambda_banker: float = LAMBDA_DEFAULTS['PLA']) -> None:
+                 lambda_quinella: float = LAMBDA_DEFAULTS['QPL'], lambda_banker: float = LAMBDA_DEFAULTS['PLA'],
+                 takeout_rate: float = 0.175) -> None:
         """
         Initialize model.
@@ -211,6 +213,9 @@ class HarvilleModel:
             lambda_qin: Weight for Qin odds (prob pair finishes 1st-2nd)
             lambda_quinella: Weight for Quinella odds (prob pair in top 3)
             lambda_banker: Weight for Banker odds (prob horse in top 3)
+            takeout_rate: House take out rate as decimal (default 0.175 = 17.5%).
+                         The observed odds include the house's take, which makes
+                         them higher than true odds. This parameter adjusts for that.
         Raises:
             ValueError: If n_horses > 20 (exponential complexity warning)
@@ -223,9 +228,51 @@ class HarvilleModel:
         self.lambda_qin = lambda_qin
         self.lambda_quinella = lambda_quinella
         self.lambda_banker = lambda_banker
+        self.takeout_rate = takeout_rate
         self._eval_count = 0
         self.result = None
+    def _adjust_for_takeout(self, probs: Optional[np.ndarray]) -> Optional[np.ndarray]:
+        """
+        Adjust observed probabilities to remove house takeout rate.
+        Observed odds from the betting market include the house's take, causing
+        the sum of implied probabilities to exceed 1.0. This method adjusts them
+        to represent true probabilities.
+        Args:
+            probs: Observed probabilities (can be 1D or 2D array)
+        Returns:
+            Adjusted probabilities with takeout removed, or None if input is None
+        """
+        if probs is None:
+            return None
+        # Multiply by (1 - takeout_rate) to remove the house edge
+        adjusted = probs * (1.0 - self.takeout_rate)
+        return adjusted
+    def _probs_to_market_odds(self, probs: np.ndarray) -> np.ndarray:
+        """
+        Convert fitted probabilities to market odds including takeout rate.
+        This converts true probabilities (which sum to 1.0) back to decimal odds
+        as they would appear in the betting market, which includes the house's
+        takeout rate. The resulting odds can be directly compared to observed odds.
+        Args:
+            probs: Fitted probabilities (1D or 2D array)
+        Returns:
+            Market odds (decimal format) with takeout reintroduced
+        """
+        # Convert true probabilities to market odds with takeout
+        # Market odds are worse (higher) than fair odds due to house edge
+        return (1.0 - self.takeout_rate) / probs
     def _loss(self, theta: np.ndarray, W_obs: Optional[np.ndarray],
              Qin_obs: Optional[np.ndarray], Q_obs: Optional[np.ndarray],
              b_obs: Optional[np.ndarray]) -> float:
@@ -290,26 +337,34 @@ class HarvilleModel:
         Returns:
             Dictionary containing:
-                - theta: Fitted strength parameters (n,)
-                - W_fitted: Fitted Win probabilities (n,)
-                - Qin_fitted: Fitted Qin probabilities (n, n)
-                - Q_fitted: Fitted Quinella probabilities (n, n)
-                - b_fitted: Fitted Banker probabilities (n,)
-                - P_fitted: Full place probability matrix (n, n), P[i,j] =
-                            prob horse i finishes in position j
-                - loss: Final loss value
                 - success: Whether optimization converged
                 - message: Optimizer status message
                 - n_eval: Number of loss function evaluations
+                - loss: Final loss value
+                - prob_fit: Dictionary of fitted probabilities (sum to 1.0)
+                    - theta: Fitted strength parameters (n,)
+                    - W: Win probabilities (n,)
+                    - Qin: Qin probabilities (n, n)
+                    - Q: Quinella probabilities (n, n)
+                    - b: Banker probabilities (n,)
+                    - P: Full place probability matrix (n, n), P[i,j] =
+                         prob horse i finishes in position j
+                - odds_fit: Dictionary of fitted market odds (directly comparable to observed)
+                    - WIN: Win odds (n,)
+                    - QIN: Qin odds (n, n)
+                    - QPL: Quinella Place odds (n, n)
+                    - PLA: Place odds (n,)
         Raises:
             ValueError: If no odds provided or shapes don't match n_horses
         Example:
-            >>> opt = HarvilleOptimizer(n_horses=10)
+            >>> opt = HarvilleModel(n_horses=10, takeout_rate=0.175)
             >>> results = opt.fit(W_obs=win_probs, Q_obs=quinella_probs)
-            >>> print(f"Fitted strengths: {results['theta']}")
+            >>> print(f"Fitted strengths: {results['prob_fit']['theta']}")
             >>> print(f"Converged: {results['success']}")
+            >>> # Compare fitted odds to observed odds
+            >>> diff = results['odds_fit']['WIN'] - observed_win_odds
         """
         if W_obs is None and Qin_obs is None and Q_obs is None and b_obs is None:
             raise ValueError("At least one type of odds must be provided")
@@ -323,6 +378,12 @@ class HarvilleModel:
         if b_obs is not None and b_obs.shape != (self.n,):
             raise ValueError(f"b_obs must be ({self.n},)")
+        # Adjust observed probabilities for house takeout rate
+        W_obs = self._adjust_for_takeout(W_obs)
+        Qin_obs = self._adjust_for_takeout(Qin_obs)
+        Q_obs = self._adjust_for_takeout(Q_obs)
+        b_obs = self._adjust_for_takeout(b_obs)
         if theta_init is None:
             if W_obs is not None:
                 theta_init = W_obs / W_obs.sum()
@@ -356,27 +417,41 @@ class HarvilleModel:
         W_fitted, Qin_fitted, Q_fitted, b_fitted, P_fitted = _compute_probabilities(theta_opt)
+        # Convert fitted probabilities to market odds (with takeout reintroduced)
+        WIN_odds_fitted = self._probs_to_market_odds(W_fitted)
+        PLA_odds_fitted = self._probs_to_market_odds(b_fitted)
+        QIN_odds_fitted = self._probs_to_market_odds(Qin_fitted)
+        QPL_odds_fitted = self._probs_to_market_odds(Q_fitted)
         self.result = {
-            'theta': theta_opt,
-            'W_fitted': W_fitted,
-            'Qin_fitted': Qin_fitted,
-            'Q_fitted': Q_fitted,
-            'b_fitted': b_fitted,
-            'P_fitted': P_fitted,
-            'loss': result.fun,
             'success': result.success,
             'message': result.message,
-            'n_eval': self._eval_count
+            'n_eval': self._eval_count,
+            'loss': result.fun,
+            'prob_fit': {
+                'theta': theta_opt,
+                'W': W_fitted,
+                'Qin': Qin_fitted,
+                'Q': Q_fitted,
+                'b': b_fitted,
+                'P': P_fitted
+            },
+            'odds_fit': {
+                'WIN': WIN_odds_fitted,
+                'QIN': QIN_odds_fitted,
+                'QPL': QPL_odds_fitted,
+                'PLA': PLA_odds_fitted
+            }
         }
         return self.result
-def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float] = None) -> dict:
+def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float] = None, takeout_rate: float = 0.175) -> dict:
     """
     Fit Harville model to observed betting odds.
-    At least one odds type must be provided. All odds should be probabilities
-    (not decimal/fractional odds). Matrices should be symmetric where applicable.
+    At least one odds type must be provided. All odds should be decimal odds
+    (not probabilities). Matrices should be symmetric where applicable.
     Args:
         odds: Dictionary of odds arrays with types as keys.:
@@ -384,20 +459,35 @@ def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float
         lambdas: Optional dictionary of lambda weights for each odds type.
                     Keys can be 'WIN', 'QIN', 'QPL', 'PLA'. Defaults to
                     {'WIN': 1.0, 'QIN': 2.0, 'QPL': 1.5, 'PLA': 0.7}
+        takeout_rate: House take out rate as decimal (default 0.175 = 17.5%).
+                     The house keeps this percentage of the betting pool, causing
+                     observed odds to be higher than fair odds.
     Returns:
         Dictionary containing:
-            - theta: Fitted strength parameters (n,)
-            - W_fitted: Fitted Win probabilities (n,)
-            - Qin_fitted: Fitted Qin probabilities (n, n)
-            - Q_fitted: Fitted Quinella probabilities (n, n)
-            - b_fitted: Fitted Banker probabilities (n,)
-            - P_fitted: Full place probability matrix (n, n), P[i,j] =
-                        prob horse i finishes in position j
-            - loss: Final loss value
             - success: Whether optimization converged
             - message: Optimizer status message
             - n_eval: Number of loss function evaluations
+            - loss: Final loss value
+            - prob_fit: Dictionary of fitted probabilities (sum to 1.0)
+                - theta: Fitted strength parameters (n,)
+                - W: Win probabilities (n,)
+                - Qin: Qin probabilities (n, n)
+                - Q: Quinella probabilities (n, n)
+                - b: Banker probabilities (n,)
+                - P: Full place probability matrix (n, n), P[i,j] =
+                     prob horse i finishes in position j
+            - odds_fit: Dictionary of fitted market odds (directly comparable to observed)
+                - WIN: Win odds (n,)
+                - QIN: Qin odds (n, n)
+                - QPL: Quinella Place odds (n, n)
+                - PLA: Place odds (n,)
+    Example:
+        >>> odds = {'WIN': np.array([3.5, 4.2, 5.0, 8.5, 12.0])}
+        >>> result = fit_harville_to_odds(odds, takeout_rate=0.175)
+        >>> print(result['prob_fit']['theta'])  # True winning probabilities
+        >>> print(result['odds_fit']['WIN'])  # Fitted market odds (compare to input)
     """
     n_horses = None
     W_obs = None
@@ -443,7 +533,8 @@ def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float
         lambda_win=merged_lambdas['WIN'],
         lambda_qin=merged_lambdas['QIN'],
         lambda_quinella=merged_lambdas['QPL'],
-        lambda_banker=merged_lambdas['PLA']
+        lambda_banker=merged_lambdas['PLA'],
+        takeout_rate=takeout_rate
     )
     result = ho.fit(W_obs=W_obs, Qin_obs=Qin_obs, Q_obs=Q_obs, b_obs=b_obs)
     return result

hkjc/historical.py CHANGED Viewed

@@ -20,7 +20,7 @@ incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
 def _soupify(url: str) -> BeautifulSoup:
     """Fetch and parse a webpage and return BeautifulSoup object
     """
-    response = requests.get(url, timeout=30)
+    response = requests.get(url, timeout=180)
     response.raise_for_status()
     return BeautifulSoup(response.content, 'html.parser')
@@ -52,11 +52,13 @@ def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition")
         .alias("split_data").cast(pl.Int64, strict=False)
     ).unnest("split_data")
-    df = df.with_columns(pl.col('FinishPosition').fill_null(pl.col('Position3')))
+    df = df.with_columns(
+        pl.col('FinishPosition').fill_null(pl.col('Position3')))
     df = df.with_columns([
         (pl.col("StartPosition")-pl.col("FinishPosition")).alias("PositionChange"),
-        pl.mean_horizontal("StartPosition", "Position2").alias("AvgStartPosition"),
+        pl.mean_horizontal("StartPosition", "Position2").alias(
+            "AvgStartPosition"),
     ]).with_columns(pl.when(pl.col("StartPosition").is_null()).then(pl.lit("--"))
                     .when((pl.col("AvgStartPosition") <= 3) & (pl.col("StartPosition") <= 3)).then(pl.lit("FrontRunner"))
                     .when((pl.col("PositionChange") >= 1) & (pl.col("StartPosition") >= 6)).then(pl.lit("Closer"))
@@ -77,35 +79,7 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
         pl.col('Date') != '')  # Remove empty rows
     horse_data = _classify_running_style(horse_data)
-    # Extract horse profile info
-    table = soup.find_all('table', class_='table_eng_text')
-    profile_data = _parse_html_table(table[0], skip_header=True)
-    profile_data = _parse_html_table(table[1], skip_header=True)
-    try:
-        current_rating = int(profile_data.filter(
-            pl.col("column_0").str.starts_with("Current Rating"))['column_2'].item(0))
-        season_start_rating = int(profile_data.filter(pl.col(
-            "column_0").str.starts_with("Start of Season Rating"))['column_2'].item(0))
-    except:
-        current_rating, season_start_rating = 0, 0
-    try:
-        last_rating = int(profile_data.filter(
-            pl.col("column_0").str.starts_with("Last Rating"))['column_2'].item(0))
-    except:
-        last_rating = 0
-    horse_info = {
-        'HorseID': horse_no,
-        'CurrentRating': current_rating,
-        'SeasonStartRating': season_start_rating,
-        'LastRating': last_rating if current_rating == 0 else current_rating
-    }
-    horse_data = (horse_data.with_columns([
-        pl.lit(value).alias(key) for key, value in horse_info.items()
-    ])
-    )
+    horse_data = horse_data.with_columns(pl.lit(horse_no).alias('HorseNo'))
     return horse_data
@@ -124,16 +98,16 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
         pl.col('Dr').cast(pl.Int64, strict=False),
         pl.col('Rtg').cast(pl.Int64, strict=False),
         pl.col('Dist').cast(pl.Int64, strict=False),
-        pl.col('WinOdds').cast(pl.Float64, strict=False),
-        pl.col('RaceIndex').cast(pl.Int64, strict=False)
+        pl.col('WinOdds').cast(pl.Float64, strict=False)
     ])
-    df = df.with_columns(
+    df = (df.filter(~pl.col('FinishTime').str.starts_with('--'))
+          .with_columns(
         (
-            pl.col("FinishTime").str.split_exact(".", 1).struct.field("field_0").cast(pl.Int64) * 60 +
-            pl.col("FinishTime").str.split_exact(".", 1).struct.field("field_1").cast(pl.Int64)
-        ).cast(pl.Float64).alias("FinishTime")
-    )
+            pl.col("FinishTime").str.splitn(".", 2).struct.field("field_0").cast(pl.Int64) * 60 +
+            pl.col("FinishTime").str.splitn(".", 2).struct.field("field_1").cast(pl.Float64)
+        ).cast(pl.Float64).round(2).alias("FinishTime")
+    ))
     df = df.with_columns(
         pl.col('RCTrackCourse').str.split_exact(' / ', 2)
@@ -141,12 +115,22 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
         .alias('RCTrackCourse')
     ).unnest('RCTrackCourse')
+    df = df.with_columns(
+        pl.when(pl.col('Date').str.len_chars() <= 8)
+        .then(pl.col('Date').str.strptime(pl.Date, '%d/%m/%y', strict=False))
+        .otherwise(pl.col('Date').str.strptime(pl.Date, '%d/%m/%Y'))
+    ).with_columns(
+        pl.concat_str(pl.col('Date').dt.strftime('%Y%m%d'), pl.col(
+            'Venue'), pl.col('RaceIndex')).alias('RaceId')
+    ).drop("VideoReplay")
     return df
 def get_horse_data(horse_no: str) -> pl.DataFrame:
     df = _extract_horse_data(horse_no)
     return _clean_horse_data(df)
 def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
     """ Clean and convert horse data to suitable data types
     """
@@ -165,13 +149,14 @@ def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
     df = df.with_columns(
         (
-            pl.col("FinishTime").str.split_exact(":", 1).struct.field("field_0").cast(pl.Int64) * 60 +
-            pl.col("FinishTime").str.split_exact(":", 1).struct.field("field_1").cast(pl.Int64)
-        ).cast(pl.Float64).alias("FinishTime")
+            pl.col("FinishTime").str.splitn(":", 2).struct.field("field_0").cast(pl.Int64) * 60 +
+            pl.col("FinishTime").str.splitn(":", 2).struct.field("field_1").cast(pl.Float64)
+        ).cast(pl.Float64).round(2).alias("FinishTime")
     )
     return df
 def _extract_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
     soup = _soupify_race_page(date, venue_code, race_number)
     table = soup.find('div', class_='race_tab').find('table')
@@ -211,5 +196,5 @@ def _extract_race_data(date: str, venue_code: str, race_number: int) -> pl.DataF
 def get_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
-    df = _extract_race_data(date,venue_code,race_number)
-    return _clean_race_data(df)
+    df = _extract_race_data(date, venue_code, race_number)
+    return _clean_race_data(df)

hkjc/live.py CHANGED Viewed

@@ -7,8 +7,6 @@ import requests
 from cachetools.func import ttl_cache
 import numpy as np
-from .utils import _validate_date, _validate_venue_code
 HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
 RACEMTG_PAYLOAD = {
@@ -245,7 +243,7 @@ query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo:
 @ttl_cache(maxsize=12, ttl=1000)
-def _fetch_live_races(date: str, venue_code: str) -> dict:
+def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
     """Fetch live race data from HKJC GraphQL endpoint."""
     payload = RACEMTG_PAYLOAD.copy()
     payload["variables"] = payload["variables"].copy()
@@ -265,9 +263,10 @@ def _fetch_live_races(date: str, venue_code: str) -> dict:
     if r.status_code != 200:
         raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
-    races = r.json()['data']['raceMeetings'][0]['races']
+    data = r.json()['data']['raceMeetings'][0]
+    races = data['races']
-    race_info = {}
+    race_info = {'Date': data['date'], 'Venue': data['venueCode'], 'Races': {}}
     for race in races:
         race_num = race['no']
         race_name = race['raceName_en']
@@ -277,12 +276,15 @@ def _fetch_live_races(date: str, venue_code: str) -> dict:
         race_class = race['raceClass_en']
         race_course = race['raceCourse']['displayCode']
-        runners = [{'Dr': runner['barrierDrawNumber'],
-                    'Rtg' : int(runner['currentRating']),
-                    'Wt' : int(runner['currentWeight']),
+        runners = [{'No': runner['no'],
+                    'Name': runner['name_en'],
+                    'Dr': runner['barrierDrawNumber'],
+                    'Rtg': int(runner['currentRating']),
+                    'Wt': int(runner['currentWeight']),
+                    'Handicap': int(runner['handicapWeight']),
                     'HorseNo': runner['horse']['code']
-                    } for runner in race['runners']]
-        race_info[race_num]={
+                    } for runner in race['runners'] if runner['status'] != "Standby"]
+        race_info['Races'][race_num] = {
             'No': race_num,
             'Name': race_name,
             'Class': race_class,
@@ -290,13 +292,13 @@ def _fetch_live_races(date: str, venue_code: str) -> dict:
             'Dist': race_dist,
             'Going': race_going,
             'Track': race_track,
-            'Runners': runners
+            'Runners': runners
         }
     return race_info
 @ttl_cache(maxsize=12, ttl=30)
-def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', 'QPL')) -> List[dict]:
+def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', )) -> List[dict]:
     """Fetch live odds data from HKJC GraphQL endpoint."""
     payload = LIVEODDS_PAYLOAD.copy()
     payload["variables"] = payload["variables"].copy()
@@ -329,14 +331,14 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
     ]
-def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
+def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['WIN', 'PLA', 'QPL', 'QIN']) -> dict:
     """Fetch live odds as numpy arrays.
     Args:
         date (str): Date in 'YYYY-MM-DD' format.
         venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
         race_number (int): Race number.
-        odds_type (List[str]): Types of odds to fetch. Default is ['PLA', 'QPL']. Currently the following types are supported:
+        odds_type (List[str]): Types of odds to fetch. Default is ['WIN', 'PLA', 'QPL', 'QIN']. Currently the following types are supported:
             - 'WIN': Win odds
             - 'PLA': Place odds
             - 'QIN': Quinella odds
@@ -348,11 +350,13 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
             If odds_type is 'WIN','PLA', returns a 1D array of place odds.
             If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
     """
-    _validate_date(date)
-    _validate_venue_code(venue_code)
     race_info = _fetch_live_races(date, venue_code)
-    N = len(race_info[race_number]['Runners'])
+    N = len(race_info['Races'][race_number]['Runners'])
+    if (race_info['Date'] != date) or (race_info['Venue'] != venue_code):
+        print(f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
+        date = race_info['Date']
+        venue_code = race_info['Venue']
     data = _fetch_live_odds(date, venue_code, race_number,
                             odds_type=tuple(odds_type))

hkjc/processing.py CHANGED Viewed

@@ -42,7 +42,15 @@ def _historical_process_single_date_venue(date: str, venue_code: str) -> List[pl
 def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
-    """Generate historical race dataset from start_date to end_date"""
+    """Generate historical race dataset from start_date to end_date (inclusive).
+    Args:
+        start_date (str): Date in 'YYYY-MM-DD' format.
+        end_date (str): Date in 'YYYY-MM-DD' format.
+    Returns:
+        pl.DataFrame: DataFrame with all records.
+    """
     _validate_date(start_date)
     _validate_date(end_date)
     start_dt = dt.strptime(start_date, '%Y-%m-%d')
@@ -50,7 +58,7 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
     dfs = []
-    for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True), leave=False, desc='Scanning for horse IDs ...'):
+    for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True, closed='both'), leave=False, desc='Scanning for horse IDs ...'):
         for venue_code in ['ST', 'HV']:
             dfs += _historical_process_single_date_venue(date, venue_code)
@@ -63,7 +71,12 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
     # Use horse track records
     dfs = [_extract_horse_data(horse_id) for horse_id in tqdm(horse_ids, desc='Processing horses ...', leave=False)]
     df = pl.concat(dfs)
-    return _clean_horse_data(df)
+    try:
+        return _clean_horse_data(df).filter(pl.col('Date').is_between(start_dt, end_dt))
+    except:
+        print('Failed to clean data. Returning raw data for debug.')
+    return df
 # ==========================

{hkjc-0.3.18.dist-info → hkjc-0.3.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hkjc
-Version: 0.3.18
+Version: 0.3.21
 Summary: Library for scrapping HKJC data and perform basic analysis
 Requires-Python: >=3.11
 Requires-Dist: beautifulsoup4>=4.14.2

hkjc-0.3.21.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+hkjc/__init__.py,sha256=XSm9N6YbZ2SzyxjO9aR26ctB4Z1-VeBImuroSgncUfk,737
+hkjc/features.py,sha256=LicwtKBpMzpz_dSX9bjoCLLaRUu8oeZo1AloTe7v7sI,298
+hkjc/harville_model.py,sha256=WSA_1EcNOHKGraP6WVHJ3FXZPGrDrjKhJc_q70KKx80,20188
+hkjc/historical.py,sha256=aONchf7CMNs2B-WVDS_GWg8g0U0ZEH-FjbfhdJwc_N0,7683
+hkjc/live.py,sha256=CfMeHRQfhKSmhQaexM99sdP0KRbIEqg2DIvNPc1gohk,10696
+hkjc/processing.py,sha256=hQnHxl6HYlFOeSLSOCVsemgTKcwt9_tYUQI-itpvjUg,7188
+hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
+hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
+hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
+hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
+hkjc-0.3.21.dist-info/METADATA,sha256=YuIC0EvFVS3Z-8cwdzczMV7qQxMYvIKtO442iUQu5Jg,480
+hkjc-0.3.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hkjc-0.3.21.dist-info/RECORD,,

hkjc/analysis.py DELETED Viewed

@@ -1,3 +0,0 @@
-# TODO:
-# Generate filtered live odds, fav run style, dr, current rating, season start rating, track record

hkjc-0.3.18.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-hkjc/__init__.py,sha256=5A9MzcITYJDcA2UbIBpkimZBYSqS4pgRuQJhTagOfpE,753
-hkjc/analysis.py,sha256=0042_NMIkQCl0J6B0P4TFfrBDCnm2B6jsCZKOEO30yI,108
-hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
-hkjc/historical.py,sha256=v9k_R47Na5en5ftrocjIHofkNAUthE_lp4CyLaCTsQE,8280
-hkjc/live.py,sha256=GqctH-BVdIL6Vi1g8XHe3p8fZBopCQf5KACLAR0meP0,10249
-hkjc/processing.py,sha256=H0chtW_FBMMhK3IzcjYjrryd3fAPYimanc2fWuGiB0M,6807
-hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
-hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
-hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
-hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
-hkjc-0.3.18.dist-info/METADATA,sha256=aoXp6Fvn3EkuXyv6p5LClSbZa5XS_bfcUxMKBJXcNvw,480
-hkjc-0.3.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hkjc-0.3.18.dist-info/RECORD,,

{hkjc-0.3.18.dist-info → hkjc-0.3.21.dist-info}/WHEEL RENAMED Viewed

File without changes

hkjc 0.3.18__py3-none-any.whl → 0.3.21__py3-none-any.whl

hkjc 0.3.18py3-none-any.whl → 0.3.21py3-none-any.whl