PyPI - hkjc - Versions diffs - 0.3.7__tar.gz → 0.3.9__tar.gz - Mend

hkjc 0.3.7tar.gz → 0.3.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{hkjc-0.3.7 → hkjc-0.3.9}/PKG-INFO +1 -1
{hkjc-0.3.7 → hkjc-0.3.9}/pyproject.toml +1 -1
hkjc-0.3.9/src/hkjc/__init__.py +20 -0
{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/live_odds.py +7 -22
{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/optimization.py +1 -1
hkjc-0.3.9/src/hkjc/processing.py +120 -0
{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/speedpro.py +2 -1
hkjc-0.3.9/src/hkjc/strategy/place_only.py +53 -0
{hkjc-0.3.7/src/hkjc → hkjc-0.3.9/src/hkjc/strategy}/qpbanker.py +1 -0
{hkjc-0.3.7 → hkjc-0.3.9}/uv.lock +1 -1
hkjc-0.3.7/src/hkjc/__init__.py +0 -19
hkjc-0.3.7/src/hkjc/processing.py +0 -75
{hkjc-0.3.7 → hkjc-0.3.9}/.python-version +0 -0
{hkjc-0.3.7 → hkjc-0.3.9}/README.md +0 -0
{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/harville_model.py +0 -0
{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/historical.py +0 -0
{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/py.typed +0 -0

{hkjc-0.3.7 → hkjc-0.3.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hkjc
-Version: 0.3.7
+Version: 0.3.9
 Summary: Library for scrapping HKJC data and perform basic analysis
 Requires-Python: >=3.11
 Requires-Dist: cachetools>=6.2.0

{hkjc-0.3.7 → hkjc-0.3.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hkjc"
-version = "0.3.7"
+version = "0.3.9"
 description = "Library for scrapping HKJC data and perform basic analysis"
 readme = "README.md"
 requires-python = ">=3.11"

hkjc-0.3.9/src/hkjc/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""Top-level package for hkjc tools.
+This module re-exports commonly used symbols from the submodules.
+"""
+from importlib.metadata import version as _version
+__all__ = ["live_odds", "qpbanker",
+           "generate_all_qp_trades", "generate_all_pla_trades", "pareto_filter",
+                        "speedpro_energy", "speedmap", "harveille_model"]
+try:
+    __version__ = _version(__name__)
+except Exception:  # pragma: no cover - best-effort version resolution
+    __version__ = "0.0.0"
+from .live_odds import live_odds
+from .processing import generate_all_qp_trades, generate_all_pla_trades
+from .optimization import pareto_filter
+from .speedpro import speedmap, speedpro_energy
+from . import harville_model

{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/live_odds.py RENAMED Viewed

@@ -3,8 +3,6 @@
 from __future__ import annotations
 from typing import Tuple, List
-from .harville_model import fit_harville_to_odds
 import requests
 from cachetools.func import ttl_cache
 import numpy as np
@@ -87,7 +85,7 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
     ]
-def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL'], fit_harville=False) -> dict:
+def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
     """Fetch live odds as numpy arrays.
     Args:
@@ -112,7 +110,7 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
     except Exception:
         raise ValueError("Date must be in 'YYYY-MM-DD' format")
-    mandatory_types = ['WIN','PLA','QIN','QPL'] if fit_harville else ['PLA']
+    mandatory_types = ['PLA']
     data = _fetch_live_odds(date, venue_code, race_number,
                             odds_type=tuple(set(mandatory_types+odds_type)))
@@ -129,24 +127,11 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
     for entry in data:
         if entry["Type"] in ["QIN", "QPL"]:
             horse_ids = list(map(int, entry["HorseID"].split(",")))
-            odds[entry["Type"]][horse_ids[0] - 1, horse_ids[1] - 1] = entry["Odds"]
-            odds[entry["Type"]][horse_ids[1] - 1, horse_ids[0] - 1] = entry["Odds"]
-        elif entry["Type"] in ["PLA","WIN"]:
+            odds[entry["Type"]][horse_ids[0] - 1,
+                                horse_ids[1] - 1] = entry["Odds"]
+            odds[entry["Type"]][horse_ids[1] - 1,
+                                horse_ids[0] - 1] = entry["Odds"]
+        elif entry["Type"] in ["PLA", "WIN"]:
             odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
-    if fit_harville:
-        fit_res = fit_harville_to_odds(
-            W_obs=odds['WIN'],
-            Qin_obs=odds['QIN'],
-            Q_obs=odds['QPL'],
-            b_obs=odds['PLA']
-        )
-        if fit_res['success']:
-            odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
-            odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
-            odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
-            odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
-        else:
-            print(f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
     return {t: odds[t] for t in odds_type}

{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/optimization.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import polars as pl
 from typing import List, Union
-def _pareto_filter(
+def pareto_filter(
     df: pl.DataFrame,
     groupby: List[str],
     by: List[str],

hkjc-0.3.9/src/hkjc/processing.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Functions to batch process trades into dataframes for analysis.
+"""
+from __future__ import annotations
+from typing import Tuple, List
+from .live_odds import live_odds
+from .strategy import qpbanker, place_only
+from .harville_model import fit_harville_to_odds
+import polars as pl
+import numpy as np
+from itertools import combinations
+from tqdm import tqdm
+def _all_subsets(lst): return [list(x) for r in range(
+    1, len(lst)+1) for x in combinations(lst, r)]  # list subsets of a list
+def _process_single_qp_trade(banker: int, covered: List[int], pla_odds: np.ndarray, qpl_odds: np.ndarray, rebate: float) -> Tuple[int, List, float, float, float]:
+    """Process a single qp trade.
+    """
+    win_prob = qpbanker.win_probability(pla_odds, banker, covered)
+    exp_value = qpbanker.expected_value(
+        pla_odds, qpl_odds, banker, covered, rebate)
+    ave_odds = qpbanker.average_odds(qpl_odds, banker, covered)
+    return (banker, covered, win_prob, exp_value, ave_odds)
+def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville: bool = False) -> pl.DataFrame:
+    """Generate all possible qp tickets for the specified race.
+    Args:
+        date (str): Date in 'YYYY-MM-DD' format.
+        venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
+        race_number (int): Race number.
+        rebate (float, optional): The rebate percentage. Defaults to 0.12.
+        fit_harville (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
+    Returns:
+        pl.DataFrame: DataFrame with all possible trades and their metrics.
+    """
+    odds = live_odds(date, venue_code, race_number,
+                     odds_type=['PLA', 'QPL'] + (['WIN', 'QIN'] if fit_harville else []))
+    N = len(odds['PLA'])
+    candidates = np.arange(1, N+1)
+    if fit_harville:
+        fit_res = fit_harville_to_odds(
+            W_obs=odds['WIN'],
+            Qin_obs=odds['QIN'],
+            Q_obs=odds['QPL'],
+            b_obs=odds['PLA']
+        )
+        if fit_res['success']:
+            odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
+            odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
+            odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
+            odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
+        else:
+            print(
+                f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
+    results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
+               for banker in tqdm(candidates, desc="Processing bankers")
+               for covered in _all_subsets(candidates[candidates != banker])]
+    df = (pl.DataFrame(results, schema=['Banker', 'Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
+          .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
+    return df
+def _process_single_pla_trade(covered: List[int], pla_odds: np.ndarray, p_matrix: np.ndarray, rebate: float = 0.1) -> Tuple[List, float, float, float]:
+    """Process a single place-only trade.
+    """
+    win_prob = place_only.win_probability(p_matrix, covered)
+    exp_value = place_only.expected_value(pla_odds, p_matrix, covered, rebate)
+    ave_odds = place_only.average_odds(pla_odds, covered)
+    return (covered, win_prob, exp_value, ave_odds)
+def generate_all_pla_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.1) -> pl.DataFrame:
+    """Generate all possible place-only tickets for the specified race.
+    Args:
+        date (str): Date in 'YYYY-MM-DD' format.
+        venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
+        race_number (int): Race number.
+        rebate (float, optional): The rebate percentage. Defaults to 0.12.
+    Returns:
+        pl.DataFrame: DataFrame with all possible trades and their metrics.
+    """
+    odds = live_odds(date, venue_code, race_number,
+                     odds_type=['PLA', 'QPL', 'WIN', 'QIN'])
+    N = len(odds['PLA'])
+    candidates = np.arange(1, N+1)
+    fit_res = fit_harville_to_odds(odds)
+    if fit_res['success']:
+        odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
+        odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
+        odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
+        odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
+    else:
+        raise RuntimeError(
+            f"[ERROR] Harville model fitting failed: {fit_res.get('message','')}")
+    p_matrix = fit_res['P_fitted']
+    results = [_process_single_pla_trade(covered, odds['PLA'], p_matrix, rebate)
+               for covered in _all_subsets(candidates)]
+    df = (pl.DataFrame(results, schema=['Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
+          .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
+    return df

{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/speedpro.py RENAMED Viewed

@@ -36,7 +36,8 @@ def speedpro_energy(race_date: str) -> pl.DataFrame:
     df = (df.with_columns(pl.col('RunnerNumber').str.to_integer())
           .with_columns(pl.col('SpeedPRO_Energy_Difference').str.to_integer())
-          .select(['RaceNumber', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
+          .with_columns(pl.col('FitnessRatings').str.to_integer())
+          .select(['RaceNo', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
     return df

hkjc-0.3.9/src/hkjc/strategy/place_only.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""Functions to perform probability and expectation calculations for the place-only strategy.
+"""
+from __future__ import annotations
+from typing import List
+import numpy as np
+def win_probability(p_matrix: np.ndarray, covered: List[int]) -> float:
+    """Calculate the probability of winning at least one ticket in the place-only strategy.
+    Args:
+        p_matrix (np.ndarray): An array of place probabilities for the horses (0-indexed). p_ij is the probability of horse i placing in position j.
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+    Returns:
+        float: probability
+    """
+    win_prob = 1-np.prod(1-np.sum(p_matrix[covered, :3], axis=1))
+    return win_prob
+def expected_value(pla_odds: np.ndarray, p_matrix: np.ndarray, covered: List[int], rebate: float = 0.10) -> float:
+    """Calculate the expected value (per dollar) of the place-only strategy using constant stake.
+    Args:
+        pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
+        p_matrix (np.ndarray): An array of place probabilities for the horses (0-indexed). p_ij is the probability of horse i placing in position j.
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+        rebate (float, optional): The rebate percentage. Defaults to 0.10.
+    Returns:
+            float: expected value per dollar staked
+    """
+    true_prob = np.sum(p_matrix[:, :3], axis=1)
+    C = len(covered)
+    ev = np.sum((true_prob*(pla_odds-rebate))[covered])/C - (1-rebate)
+    return ev
+def average_odds(pla_odds: np.ndarray, covered: List[int]) -> float:
+    """Calculate the (harmonic) average odds across the covered set.
+    Args:
+        pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+    Returns:
+            float: average odds
+    """
+    C = len(covered)
+    avg_odds = C / sum([1/pla_odds[c-1] for c in covered])
+    return avg_odds

{hkjc-0.3.7/src/hkjc → hkjc-0.3.9/src/hkjc/strategy}/qpbanker.py RENAMED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 from typing import List
 import numpy as np
 def _pla_odds_partition(pla_odds: np.ndarray, banker: int, covered: List[int]) -> tuple[float, float, float]:
     """Partition the place odds into banker, covered and eliminated sets.

{hkjc-0.3.7 → hkjc-0.3.9}/uv.lock RENAMED Viewed

@@ -97,7 +97,7 @@ wheels = [
 [[package]]
 name = "hkjc"
-version = "0.3.7"
+version = "0.3.9"
 source = { editable = "." }
 dependencies = [
     { name = "cachetools" },

hkjc-0.3.7/src/hkjc/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-"""Top-level package for hkjc tools.
-This module re-exports commonly used symbols from the submodules.
-"""
-from importlib.metadata import version as _version
-__all__ = ["live_odds", "qpbanker",
-		    "generate_all_qp_trades", "generate_pareto_qp_trades",
-			"speedpro_energy", "speedmap","harveille_model"]
-try:
-	__version__ = _version(__name__)
-except Exception:  # pragma: no cover - best-effort version resolution
-	__version__ = "0.0.0"
-from .live_odds import live_odds
-from .processing import generate_all_qp_trades, generate_pareto_qp_trades
-from .speedpro import speedmap, speedpro_energy
-from . import harville_model

hkjc-0.3.7/src/hkjc/processing.py DELETED Viewed

@@ -1,75 +0,0 @@
-"""Functions to batch process trades into dataframes for analysis.
-"""
-from __future__ import annotations
-from typing import Tuple, List
-from .live_odds import live_odds
-from .qpbanker import win_probability, expected_value, average_odds
-from .optimization import _pareto_filter
-import polars as pl
-import numpy as np
-from itertools import combinations
-from tqdm import tqdm
-def _all_subsets(lst): return [list(x) for r in range(
-    1, len(lst)+1) for x in combinations(lst, r)]  # list subsets of a list
-def _process_single_qp_trade(banker: int, covered: List[int], odds_pla: List[float], odds_qpl: List[float], rebate: float) -> Tuple[int, List, float, float]:
-    """Process a single qp trade.
-    """
-    win_prob = win_probability(odds_pla, banker, covered)
-    exp_value = expected_value(odds_pla, odds_qpl, banker, covered, rebate)
-    ave_odds = average_odds(odds_qpl, banker, covered)
-    return (banker, covered, win_prob, exp_value, ave_odds)
-def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville=False) -> pl.DataFrame:
-    """Generate all possible qp tickets for the specified race.
-    Args:
-        date (str): Date in 'YYYY-MM-DD' format.
-        venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
-        race_number (int): Race number.
-        rebate (float, optional): The rebate percentage. Defaults to 0.12.
-        fit_harville (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
-    Returns:
-        pl.DataFrame: DataFrame with all possible trades and their metrics.
-    """
-    odds = live_odds(date, venue_code, race_number,
-                     odds_type=['PLA', 'QPL'], fit_harville=fit_harville)
-    N = len(odds['PLA'])
-    candidates = np.arange(1, N+1)
-    results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
-               for banker in tqdm(candidates, desc="Processing bankers")
-               for covered in _all_subsets(candidates[candidates != banker])]
-    df = (pl.DataFrame(results, schema=['Banker', 'Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
-          .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
-    return df
-def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = [], fit_harville=False) -> pl.DataFrame:
-    """Generate qp tickets that are Pareto optimal for the specified race.
-    Args:
-        date (str): Date in 'YYYY-MM-DD' format.
-        venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
-        race_number (int): Race number.
-        rebate (float, optional): The rebate percentage. Defaults to 0.12.
-        groupby (List[str], optional): Columns to group by when determining Pareto optimality. Defaults to [] (global optimal).
-        harville_fit (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
-    Returns:
-        pl.DataFrame: DataFrame with all Pareto trades and their metrics.
-    """
-    df = generate_all_qp_trades(date, venue_code, race_number, rebate, fit_harville=fit_harville)
-    pareto_df = _pareto_filter(df, groupby=groupby, by=[
-                               'WinProb', 'ExpValue'], maximize=True)
-    return pareto_df

{hkjc-0.3.7 → hkjc-0.3.9}/.python-version RENAMED Viewed

File without changes

{hkjc-0.3.7 → hkjc-0.3.9}/README.md RENAMED Viewed

File without changes

{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/harville_model.py RENAMED Viewed

File without changes

{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/historical.py RENAMED Viewed

File without changes

{hkjc-0.3.7 → hkjc-0.3.9}/src/hkjc/py.typed RENAMED Viewed

File without changes

hkjc 0.3.7__tar.gz → 0.3.9__tar.gz

hkjc 0.3.7tar.gz → 0.3.9tar.gz