PyPI - hkjc - Versions diffs - 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl - Mend

hkjc 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

hkjc/__init__.py +2 -2
hkjc/harville_model.py +98 -12
hkjc/historical.py +13 -0
hkjc/live_odds.py +22 -2
hkjc/processing.py +5 -15
hkjc/speedpro.py +6 -2
{hkjc-0.3.4.dist-info → hkjc-0.3.6.dist-info}/METADATA +1 -1
hkjc-0.3.6.dist-info/RECORD +12 -0
hkjc/analysis.py +0 -0
hkjc/visualization.py +0 -0
hkjc-0.3.4.dist-info/RECORD +0 -13
{hkjc-0.3.4.dist-info → hkjc-0.3.6.dist-info}/WHEEL +0 -0

hkjc/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ from importlib.metadata import version as _version
 __all__ = ["live_odds", "qpbanker",
 		    "generate_all_qp_trades", "generate_pareto_qp_trades",
-			"speedpro_df", "speedmap","harveille_model"]
+			"speedpro_energy", "speedmap","harveille_model"]
 try:
 	__version__ = _version(__name__)
@@ -15,5 +15,5 @@ except Exception:  # pragma: no cover - best-effort version resolution
 from .live_odds import live_odds
 from .processing import generate_all_qp_trades, generate_pareto_qp_trades
-from .speedpro import speedmap, speedpro_df
+from .speedpro import speedmap, speedpro_energy
 from . import harville_model

hkjc/harville_model.py CHANGED Viewed

@@ -20,6 +20,12 @@ from scipy.optimize import minimize
 from numba import njit
 from typing import Tuple, Optional
+LAMBDA_DEFAULTS = {
+    'WIN': 1.0,
+    'QIN': 2.0,
+    'QPL': 1.5,
+    'PLA': 0.7
+}
 @njit(cache=True)
 def _popcount(mask: int) -> int:
@@ -173,7 +179,7 @@ def _kl_divergence(p_obs: np.ndarray, p_model: np.ndarray) -> float:
     return np.sum(p_obs_flat * np.log(p_obs_flat / p_model_flat))
-class HarvilleOptimizer:
+class HarvilleModel:
     """
     Fits Harville race model to betting market odds using dynamic programming.
@@ -182,9 +188,9 @@ class HarvilleOptimizer:
     relative strength. This optimizer estimates theta from observed betting odds
     across multiple pool types.
-    Default lambda weights (1.0, 2.0, 1.5, 0.7) reflect that early Win odds are
-    biased by informed traders waiting until closing, while exotic pools provide
-    more stable signals for ensemble estimation.
+    Default lambda weights reflect that early Win odds are biased by informed
+    traders waiting until closing, while exotic pools provide more stable
+    signals for ensemble estimation.
     Attributes:
         n (int): Number of horses
@@ -194,10 +200,10 @@ class HarvilleOptimizer:
         lambda_banker (float): Weight for Banker pool loss
     """
-    def __init__(self, n_horses: int, lambda_win: float = 1.0, lambda_qin: float = 2.0,
-                 lambda_quinella: float = 1.5, lambda_banker: float = 0.7):
+    def __init__(self, n_horses: int, lambda_win: float = LAMBDA_DEFAULTS['WIN'], lambda_qin: float = LAMBDA_DEFAULTS['QIN'],
+                 lambda_quinella: float = LAMBDA_DEFAULTS['QPL'], lambda_banker: float = LAMBDA_DEFAULTS['PLA']) -> None:
         """
-        Initialize optimizer.
+        Initialize model.
         Args:
             n_horses: Number of horses in race (recommend <= 20 for speed)
@@ -218,8 +224,9 @@ class HarvilleOptimizer:
         self.lambda_quinella = lambda_quinella
         self.lambda_banker = lambda_banker
         self._eval_count = 0
+        self.result = None
-    def loss(self, theta: np.ndarray, W_obs: Optional[np.ndarray],
+    def _loss(self, theta: np.ndarray, W_obs: Optional[np.ndarray],
              Qin_obs: Optional[np.ndarray], Q_obs: Optional[np.ndarray],
              b_obs: Optional[np.ndarray]) -> float:
         """
@@ -328,7 +335,7 @@ class HarvilleOptimizer:
         if method == 'L-BFGS-B':
             result = minimize(
-                fun=lambda x: self.loss(x, W_obs, Qin_obs, Q_obs, b_obs),
+                fun=lambda x: self._loss(x, W_obs, Qin_obs, Q_obs, b_obs),
                 x0=theta_init,
                 method='L-BFGS-B',
                 bounds=[(1e-6, 1.0) for _ in range(self.n)],
@@ -336,7 +343,7 @@ class HarvilleOptimizer:
             )
         else:
             result = minimize(
-                fun=lambda x: self.loss(x, W_obs, Qin_obs, Q_obs, b_obs),
+                fun=lambda x: self._loss(x, W_obs, Qin_obs, Q_obs, b_obs),
                 x0=theta_init,
                 method='SLSQP',
                 bounds=[(1e-6, 1.0) for _ in range(self.n)],
@@ -349,7 +356,7 @@ class HarvilleOptimizer:
         W_fitted, Qin_fitted, Q_fitted, b_fitted, P_fitted = _compute_probabilities(theta_opt)
-        return {
+        self.result = {
             'theta': theta_opt,
             'W_fitted': W_fitted,
             'Qin_fitted': Qin_fitted,
@@ -360,4 +367,83 @@ class HarvilleOptimizer:
             'success': result.success,
             'message': result.message,
             'n_eval': self._eval_count
-        }
+        }
+        return self.result
+def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float] = None) -> dict:
+    """
+    Fit Harville model to observed betting odds.
+    At least one odds type must be provided. All odds should be probabilities
+    (not decimal/fractional odds). Matrices should be symmetric where applicable.
+    Args:
+        odds: Dictionary of odds arrays with types as keys.:
+                'WIN' (n,), 'QIN' (n,n), 'QPL' (n,n), 'PLA' (n,)
+        lambdas: Optional dictionary of lambda weights for each odds type.
+                    Keys can be 'WIN', 'QIN', 'QPL', 'PLA'. Defaults to
+                    {'WIN': 1.0, 'QIN': 2.0, 'QPL': 1.5, 'PLA': 0.7}
+    Returns:
+        Dictionary containing:
+            - theta: Fitted strength parameters (n,)
+            - W_fitted: Fitted Win probabilities (n,)
+            - Qin_fitted: Fitted Qin probabilities (n, n)
+            - Q_fitted: Fitted Quinella probabilities (n, n)
+            - b_fitted: Fitted Banker probabilities (n,)
+            - P_fitted: Full place probability matrix (n, n), P[i,j] =
+                        prob horse i finishes in position j
+            - loss: Final loss value
+            - success: Whether optimization converged
+            - message: Optimizer status message
+            - n_eval: Number of loss function evaluations
+    """
+    n_horses = None
+    W_obs = None
+    Qin_obs = None
+    Q_obs = None
+    b_obs = None
+    if 'WIN' in odds:
+        W_odds = odds['WIN']
+        if n_horses is None:
+            n_horses = len(W_odds)
+        elif n_horses != len(W_odds):
+            raise ValueError("Inconsistent number of horses in WIN odds")
+        W_obs = np.nan_to_num(1.0 / W_odds, 0)
+    if 'QIN' in odds:
+        Qin_odds = odds['QIN']
+        if n_horses is None:
+            n_horses = Qin_odds.shape[0]
+        elif n_horses != Qin_odds.shape[0]:
+            raise ValueError("Inconsistent number of horses in QIN odds")
+        Qin_obs = np.nan_to_num(1.0 / Qin_odds, 0)
+    if 'QPL' in odds:
+        Q_odds = odds['QPL']
+        if n_horses is None:
+            n_horses = Q_odds.shape[0]
+        elif n_horses != Q_odds.shape[0]:
+            raise ValueError("Inconsistent number of horses in QPL odds")
+        Q_obs = np.nan_to_num(1.0 / Q_odds, 0)
+    if 'PLA' in odds:
+        b_odds = odds['PLA']
+        if n_horses is None:
+            n_horses = len(b_odds)
+        elif n_horses != len(b_odds):
+            raise ValueError("Inconsistent number of horses in PLA odds")
+        b_obs = np.nan_to_num(1.0 / b_odds, 0)
+    merged_lambdas = {**LAMBDA_DEFAULTS, **(lambdas or {})}
+    ho = HarvilleModel(
+        n_horses,
+        lambda_win=merged_lambdas['WIN'],
+        lambda_qin=merged_lambdas['QIN'],
+        lambda_quinella=merged_lambdas['QPL'],
+        lambda_banker=merged_lambdas['PLA']
+    )
+    result = ho.fit(W_obs=W_obs, Qin_obs=Qin_obs, Q_obs=Q_obs, b_obs=b_obs)
+    return result

hkjc/historical.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Functions to fetch and process historical race and horse data from HKJC
+"""
+from __future__ import annotations
+import requests
+# TODO read and process all races from start date to end date
+# TODO query all basic info and race history for a specific horse
+# TODO classify running style & draw to determine blocking probability
+#

hkjc/live_odds.py CHANGED Viewed

@@ -3,6 +3,8 @@
 from __future__ import annotations
 from typing import Tuple, List
+from .harville_model import fit_harville_to_odds
 import requests
 from cachetools.func import ttl_cache
 import numpy as np
@@ -85,7 +87,7 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
     ]
-def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
+def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL'], fit_harville=False) -> dict:
     """Fetch live odds as numpy arrays.
     Args:
@@ -97,6 +99,7 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
             - 'PLA': Place odds
             - 'QIN': Quinella odds
             - 'QPL': Quinella Place odds
+        fit_harville (bool): Whether to fit the odds using Harville model. Default is False.
     Returns:
         dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
@@ -109,8 +112,10 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
     except Exception:
         raise ValueError("Date must be in 'YYYY-MM-DD' format")
+    mandatory_types = ['WIN','PLA','QIN','QPL'] if fit_harville else ['PLA']
     data = _fetch_live_odds(date, venue_code, race_number,
-                            odds_type=tuple(set(['PLA']+odds_type)))  # ensure PLA is always fetched
+                            odds_type=tuple(set(mandatory_types+odds_type)))
     # use place odds to determine number of horses
     pla_data = [entry for entry in data if entry["Type"] == "PLA"]
@@ -129,4 +134,19 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
         elif entry["Type"] in ["PLA","WIN"]:
             odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
+    if fit_harville:
+        fit_res = fit_harville_to_odds(
+            W_obs=odds['WIN'],
+            Qin_obs=odds['QIN'],
+            Q_obs=odds['QPL'],
+            b_obs=odds['PLA']
+        )
+        if fit_res['success']:
+            odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
+            odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
+            odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
+            odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
+        else:
+            print(f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
     return {t: odds[t] for t in odds_type}

hkjc/processing.py CHANGED Viewed

@@ -6,7 +6,6 @@ from typing import Tuple, List
 from .live_odds import live_odds
 from .qpbanker import win_probability, expected_value, average_odds
 from .optimization import _pareto_filter
-from .harville_model import HarvilleOptimizer
 import polars as pl
 import numpy as np
@@ -27,7 +26,7 @@ def _process_single_qp_trade(banker: int, covered: List[int], odds_pla: List[flo
     return (banker, covered, win_prob, exp_value, ave_odds)
-def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, harville_fit=False) -> pl.DataFrame:
+def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville=False) -> pl.DataFrame:
     """Generate all possible qp tickets for the specified race.
     Args:
@@ -35,26 +34,17 @@ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate:
         venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
         race_number (int): Race number.
         rebate (float, optional): The rebate percentage. Defaults to 0.12.
-        harville_fit (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
+        fit_harville (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
     Returns:
         pl.DataFrame: DataFrame with all possible trades and their metrics.
     """
     odds = live_odds(date, venue_code, race_number,
-                     odds_type=['PLA', 'QPL', 'WIN', 'QIN'])
+                     odds_type=['PLA', 'QPL'], fit_harville=fit_harville)
     N = len(odds['PLA'])
     candidates = np.arange(1, N+1)
-    if harville_fit:
-        ho = HarvilleOptimizer(N)
-        prob = {k: np.nan_to_num(1/v, 0) for k,v in odds.items()}
-        fit_res = ho.fit(prob['WIN'], prob['QIN'],
-                         prob['QPL'], prob['PLA'])
-        if fit_res['success']:
-            odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
-            odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
     results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
                for banker in tqdm(candidates, desc="Processing bankers")
                for covered in _all_subsets(candidates[candidates != banker])]
@@ -65,7 +55,7 @@ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate:
     return df
-def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = [], harville_fit=False) -> pl.DataFrame:
+def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = [], fit_harville=False) -> pl.DataFrame:
     """Generate qp tickets that are Pareto optimal for the specified race.
     Args:
@@ -79,7 +69,7 @@ def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, reba
     Returns:
         pl.DataFrame: DataFrame with all Pareto trades and their metrics.
     """
-    df = generate_all_qp_trades(date, venue_code, race_number, rebate, harville_fit=harville_fit)
+    df = generate_all_qp_trades(date, venue_code, race_number, rebate, harville_fit=fit_harville)
     pareto_df = _pareto_filter(df, groupby=groupby, by=[
                                'WinProb', 'ExpValue'], maximize=True)
     return pareto_df

hkjc/speedpro.py CHANGED Viewed

@@ -12,7 +12,7 @@ ENERGY_XLS_TEMPLATE = "https://racing.hkjc.com/racing/speedpro/assets/excel/{dat
 SPEEDMAP_URL_TEMPLATE = "https://racing.hkjc.com/racing/speedpro/assets/json/speedguide/race_{race_num}.json"
-def speedpro_df(race_date: str) -> pl.DataFrame:
+def speedpro_energy(race_date: str) -> pl.DataFrame:
     """Fetch and process SpeedPro scores for a given race date.
     Args:
@@ -29,11 +29,15 @@ def speedpro_df(race_date: str) -> pl.DataFrame:
     df = pl.read_excel(ENERGY_XLS_TEMPLATE.format(
         date=dt.strptime(race_date, "%Y-%m-%d").strftime("%Y%m%d")))
     # Clean column names
     df.columns = [col.strip().replace(" ", "").replace(
         "\n", "_").replace('.', '') for col in df.columns]
+    df = (df.with_columns(pl.col('RunnerNumber').str.to_integer())
+          .with_columns(pl.col('SpeedPRO_Energy_Difference').str.to_integer())
+          .select(['RaceNumber', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
     return df

{hkjc-0.3.4.dist-info → hkjc-0.3.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hkjc
-Version: 0.3.4
+Version: 0.3.6
 Summary: Library for scrapping HKJC data and perform basic analysis
 Requires-Python: >=3.11
 Requires-Dist: cachetools>=6.2.0

hkjc-0.3.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+hkjc/__init__.py,sha256=xKkhk53z9aGyGkiwQ_RNc7Qa82lp1Oinyx83m_pBv_E,627
+hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
+hkjc/historical.py,sha256=wKTJi--0Mx_x0vO0ysOGD37oM8453woQK-cLzPOLgiQ,336
+hkjc/live_odds.py,sha256=m3sO5AIam73Qr2my8aUW4slE7G8xZk6tnMDRPAWS_bs,5447
+hkjc/optimization.py,sha256=OArQ3w9bwcIV_lTNuE5za6AROoa90xk_gwAoGwQ-8RE,3784
+hkjc/processing.py,sha256=1RZi-xPMV_rcfmfDHmd-MVRKL9fDd6XKolD5dzvxaiQ,3307
+hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hkjc/qpbanker.py,sha256=vhvYb5_nGrKgYgre9gGF6tgswovca5C9pZVOPGxEP1Q,4804
+hkjc/speedpro.py,sha256=Tb8YqWnD7nnRv0I5onnWxGtsRi3bQZZumWnZncT2n4M,1968
+hkjc-0.3.6.dist-info/METADATA,sha256=I7hzvISiv67h4oCrJY0OpuywtRfk_WKvnBBHNbAQmlE,413
+hkjc-0.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hkjc-0.3.6.dist-info/RECORD,,

hkjc/analysis.py DELETED Viewed

File without changes

hkjc/visualization.py DELETED Viewed

File without changes

hkjc-0.3.4.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-hkjc/__init__.py,sha256=KBbWVwLXPPb93bk_h2Qt9t5OH8y6RrVUeH-ZYNKQAoQ,619
-hkjc/analysis.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hkjc/harville_model.py,sha256=KoN5AxaAbjDkCCFUrTBSdsfZDkcINrCku9JHguofMHU,12799
-hkjc/live_odds.py,sha256=i_g9ckQKA9GWbwPXNvbmNvm-dPbF9UJoGiWv6_bHzwA,4603
-hkjc/optimization.py,sha256=OArQ3w9bwcIV_lTNuE5za6AROoa90xk_gwAoGwQ-8RE,3784
-hkjc/processing.py,sha256=HgZ0NnjCBb8Z7fX7yTLMlZiXjVIaRr1nELsE3Fs8VFE,3741
-hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hkjc/qpbanker.py,sha256=vhvYb5_nGrKgYgre9gGF6tgswovca5C9pZVOPGxEP1Q,4804
-hkjc/speedpro.py,sha256=vKnSz9yY1rfVmRo7GVxXLjsiQN-YgwxSbV0B7yuszS4,1702
-hkjc/visualization.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hkjc-0.3.4.dist-info/METADATA,sha256=Wh2uEr7WOJej-2NhKXYUlvQmcxGge0rh29Rfc5TqRPU,413
-hkjc-0.3.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hkjc-0.3.4.dist-info/RECORD,,

{hkjc-0.3.4.dist-info → hkjc-0.3.6.dist-info}/WHEEL RENAMED Viewed

File without changes

hkjc 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

hkjc 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl