PyPI - hkjc - Versions diffs - 0.1.0__py3-none-any.whl - Mend

hkjc 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

hkjc/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Top-level package for hkjc tools.
+This module re-exports commonly used symbols from the submodules.
+"""
+from importlib.metadata import version as _version
+__all__ = ["live_odds", "qpbanker",
+		    "generate_all_qp_trades", "generate_pareto_qp_trades"]
+try:
+	__version__ = _version(__name__)
+except Exception:  # pragma: no cover - best-effort version resolution
+	__version__ = "0.0.0"
+from .live_odds import live_odds
+from .processing import generate_all_qp_trades, generate_pareto_qp_trades

hkjc/analysis.py ADDED Viewed

File without changes

hkjc/live_odds.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""Functions to fetch and process data from HKJC
+"""
+from __future__ import annotations
+from typing import Tuple, List
+import requests
+from cachetools.func import ttl_cache
+import numpy as np
+from datetime import datetime as dt
+ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
+LIVEODDS_PAYLOAD = {
+    "operationName": "racing",
+    "variables": {"date": None, "venueCode": None, "raceNo": None, "oddsTypes": None},
+    "query": """
+query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo: Int) {
+    raceMeetings(date: $date, venueCode: $venueCode) {
+        pmPools(oddsTypes: $oddsTypes, raceNo: $raceNo) {
+            id
+            status
+            sellStatus
+            oddsType
+            lastUpdateTime
+            guarantee
+            minTicketCost
+            name_en
+            name_ch
+            leg {
+                number
+                races
+            }
+            cWinSelections {
+                composite
+                name_ch
+                name_en
+                starters
+            }
+            oddsNodes {
+                combString
+                oddsValue
+                hotFavourite
+                oddsDropValue
+                bankerOdds {
+                    combString
+                    oddsValue
+                }
+            }
+        }
+    }
+}""",
+}
+@ttl_cache(maxsize=12, ttl=30)
+def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', 'QPL')) -> Tuple[dict]:
+    """Fetch live odds data from HKJC GraphQL endpoint."""
+    payload = LIVEODDS_PAYLOAD.copy()
+    payload["variables"] = payload["variables"].copy()
+    payload["variables"]["date"] = date
+    payload["variables"]["venueCode"] = venue_code
+    payload["variables"]["raceNo"] = race_number
+    payload["variables"]["oddsTypes"] = odds_type
+    headers = {
+        "Origin": "https://bet.hkjc.com",
+        "Referer": "https://bet.hkjc.com",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "User-Agent": "python-hkjc-fetch/0.1",
+    }
+    r = requests.post(ENDPOINT, json=payload, headers=headers, timeout=10)
+    if r.status_code != 200:
+        raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
+    meetings = r.json().get("data", {}).get("raceMeetings", [])
+    return [
+        {"HorseID": node["combString"], "Type": pool.get(
+            "oddsType"), "Odds": float(node["oddsValue"])}
+        for meeting in meetings
+        for pool in meeting.get("pmPools", [])
+        for node in pool.get("oddsNodes", [])
+    ]
+def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
+    """Fetch live odds as numpy arrays.
+    Args:
+        date (str): Date in 'YYYY-MM-DD' format.
+        venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
+        race_number (int): Race number.
+        odds_type (List[str]): Types of odds to fetch. Default is ['PLA', 'QPL']. Currently the following types are supported:
+            - 'WIN': Win odds
+            - 'PLA': Place odds
+            - 'QIN': Quinella odds
+            - 'QPL': Quinella Place odds
+    Returns:
+        dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
+            If odds_type is 'WIN','PLA', returns a 1D array of place odds.
+            If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
+    """
+    # validate date format
+    try:
+        dt.strptime(date, "%Y-%m-%d")
+    except Exception:
+        raise ValueError("Date must be in 'YYYY-MM-DD' format")
+    data = _fetch_live_odds(date, venue_code, race_number,
+                            odds_type=tuple(set(['PLA']+odds_type)))  # ensure PLA is always fetched
+    # use place odds to determine number of horses
+    pla_data = [entry for entry in data if entry["Type"] == "PLA"]
+    N = len(pla_data)
+    odds = {'WIN': np.full(N, np.nan, dtype=float),
+            'PLA': np.full(N, np.nan, dtype=float),
+            'QIN': np.full((N, N), np.nan, dtype=float),
+            'QPL': np.full((N, N), np.nan, dtype=float)}
+    for entry in data:
+        if entry["Type"] in ["QIN", "QPL"]:
+            horse_ids = list(map(int, entry["HorseID"].split(",")))
+            odds[entry["Type"]][horse_ids[0] - 1, horse_ids[1] - 1] = entry["Odds"]
+            odds[entry["Type"]][horse_ids[1] - 1, horse_ids[0] - 1] = entry["Odds"]
+        elif entry["Type"] in ["PLA","WIN"]:
+            odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
+    return {t: odds[t] for t in odds_type}

hkjc/odds_fitting.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ ## TODO: implement odds filtering

hkjc/optimization.py ADDED Viewed

@@ -0,0 +1,106 @@
+import polars as pl
+from typing import List, Union
+def _pareto_filter(
+    df: pl.DataFrame,
+    groupby: List[str],
+    by: List[str],
+    maximize: Union[bool, List[bool]] = True
+) -> pl.DataFrame:
+    """
+    Filter dataframe to only include Pareto optimal rows within each group.
+    Args:
+        df: Input dataframe
+        groupby: Columns to group by (empty list for global filter)
+        by: Columns to consider for Pareto optimality
+        maximize: Whether to maximize (True) or minimize (False) each 'by' column
+    Returns:
+        DataFrame containing only Pareto optimal rows
+    """
+    if df.is_empty() or not by:
+        return df
+    # Normalize maximize to list
+    maximize_list = [maximize] * len(by) if isinstance(maximize, bool) else maximize
+    if len(maximize_list) != len(by):
+        raise ValueError(f"Length of 'maximize' ({len(maximize_list)}) must equal length of 'by' ({len(by)})")
+    # Single objective: simple min/max filter
+    if len(by) == 1:
+        opt_expr = pl.col(by[0]).max() if maximize_list[0] else pl.col(by[0]).min()
+        if groupby:
+            opt_expr = opt_expr.over(groupby)
+        return df.filter(pl.col(by[0]) == opt_expr)
+    # Two objectives: efficient skyline algorithm
+    if len(by) == 2:
+        temp_cols = ["__obj_0", "__obj_1"]
+        # Transform to maximization problem
+        df_temp = df.with_columns([
+            (pl.col(by[i]) * (1 if maximize_list[i] else -1)).alias(temp_cols[i])
+            for i in range(2)
+        ])
+        # Sort by first objective descending, then second descending (for stability)
+        groupby = groupby or []
+        sort_cols = (groupby if groupby else []) + temp_cols
+        sorted_df = df_temp.sort(sort_cols, descending=[False] * len(groupby) + [True, True])
+        # Keep rows where second objective is not dominated by any previous row in group
+        if groupby:
+            max_so_far = pl.col(temp_cols[1]).cum_max().shift(1, fill_value=float("-inf")).over(groupby)
+        else:
+            max_so_far = pl.col(temp_cols[1]).cum_max().shift(1, fill_value=float("-inf"))
+        mask = pl.col(temp_cols[1]) > max_so_far
+        return sorted_df.filter(mask).drop(temp_cols)
+    # N objectives (N > 2): pairwise dominance check
+    df_with_id = df.with_row_index("__id")
+    # Self-join to compare all pairs
+    left = df_with_id.lazy()
+    right = df_with_id.lazy()
+    if groupby:
+        pairs = left.join(right, on=groupby, suffix="_r")
+    else:
+        pairs = left.join(right, how="cross", suffix="_r")
+    # Only compare different rows
+    pairs = pairs.filter(pl.col("__id") != pl.col("__id_r"))
+    # Build dominance conditions
+    dominance_conditions = []
+    for col, is_max in zip(by, maximize_list):
+        if is_max:
+            # right dominates left if right[col] >= left[col] for all cols
+            dominance_conditions.append(pl.col(f"{col}_r") >= pl.col(col))
+        else:
+            dominance_conditions.append(pl.col(f"{col}_r") <= pl.col(col))
+    # Strict dominance: all >= and at least one >
+    strict_conditions = []
+    for col, is_max in zip(by, maximize_list):
+        if is_max:
+            strict_conditions.append(pl.col(f"{col}_r") > pl.col(col))
+        else:
+            strict_conditions.append(pl.col(f"{col}_r") < pl.col(col))
+    is_dominated = pl.all_horizontal(dominance_conditions) & pl.any_horizontal(strict_conditions)
+    # Find IDs of dominated rows
+    dominated_ids = (
+        pairs.filter(is_dominated)
+        .select("__id")
+        .unique()
+        .collect()
+        .get_column("__id")
+    )
+    # Return non-dominated rows
+    return df_with_id.filter(~pl.col("__id").is_in(dominated_ids)).drop("__id")

hkjc/processing.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Functions to batch process trades into dataframes for analysis.
+"""
+from __future__ import annotations
+from typing import Tuple, List
+from .live_odds import live_odds
+from .qpbanker import win_probability, expected_value, average_odds
+from .optimization import _pareto_filter
+import polars as pl
+import numpy as np
+from itertools import combinations
+from tqdm import tqdm
+def _all_subsets(lst): return [list(x) for r in range(
+    1, len(lst)+1) for x in combinations(lst, r)]  # list subsets of a list
+def _process_single_qp_trade(banker: int, covered: List[int], odds_pla: List[float], odds_qpl: List[float], rebate: float) -> Tuple[int, List, float, float]:
+    """Process a single qp trade.
+    """
+    win_prob = win_probability(odds_pla, banker, covered)
+    exp_value = expected_value(odds_pla, odds_qpl, banker, covered, rebate)
+    ave_odds = average_odds(odds_qpl, banker, covered)
+    return (banker, covered, win_prob, exp_value, ave_odds)
+def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12) -> pl.DataFrame:
+    """Generate all possible qp tickets for the specified race.
+    Args:
+        date (str): Date in 'YYYY-MM-DD' format.
+        venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
+        race_number (int): Race number.
+        rebate (float, optional): The rebate percentage. Defaults to 0.12.
+    Returns:
+        pl.DataFrame: DataFrame with all possible trades and their metrics.
+    """
+    odds = live_odds(date, venue_code, race_number, odds_type=['PLA', 'QPL'])
+    N = len(odds['PLA'])
+    candidates = np.arange(1, N+1)
+    results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
+               for banker in tqdm(candidates, desc="Processing bankers")
+               for covered in _all_subsets(candidates[candidates != banker])]
+    df = (pl.DataFrame(results, schema=['Banker', 'Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
+          .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
+    return df
+def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = []) -> pl.DataFrame:
+    """Generate qp tickets that are Pareto optimal for the specified race.
+    Args:
+        date (str): Date in 'YYYY-MM-DD' format.
+        venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
+        race_number (int): Race number.
+        rebate (float, optional): The rebate percentage. Defaults to 0.12.
+        groupby (List[str], optional): Columns to group by when determining Pareto optimality. Defaults to [] (global optimal).
+    Returns:
+        pl.DataFrame: DataFrame with all Pareto trades and their metrics.
+    """
+    df = generate_all_qp_trades(date, venue_code, race_number, rebate)
+    pareto_df = _pareto_filter(df, groupby=groupby, by=[
+                               'WinProb', 'ExpValue'], maximize=True)
+    return pareto_df

hkjc/py.typed ADDED Viewed

File without changes

hkjc/qpbanker.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Functions to perform probability and expectation calculations for the QP Banker strategy.
+"""
+from __future__ import annotations
+from typing import List
+import numpy as np
+def _pla_odds_partition(pla_odds: np.ndarray, banker: int, covered: List[int]) -> tuple[float, float, float]:
+    """Partition the place odds into banker, covered and eliminated sets.
+    Args:
+        pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
+        banker (int): The horse number of the banker (1-indexed).
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+    Returns:
+        tuple[float, float, float]: A tuple containing the probabilities of the banker,
+                                    covered set and eliminated set.
+    """
+    p_banker = 1 / pla_odds[banker - 1]
+    Z_covered = sum([1/pla_odds[c-1] for c in covered])
+    Z_total = (1/pla_odds).sum()
+    Z_elim = Z_total - p_banker - Z_covered
+    return p_banker, Z_covered, Z_elim
+def _double_win_probability(pla_odds: np.ndarray, banker: int, covered: List[int]) -> float:
+    """Calculate the probability of winning a two tickets in the QPBanker strategy.
+    See overleaf document for derivation.
+    Args:
+        pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
+        banker (int): The horse number of the banker (1-indexed).
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+    Returns:
+        float: probability
+    """
+    p_banker, Z_covered, Z_elim = _pla_odds_partition(pla_odds, banker, covered)
+    Z_total = p_banker + Z_covered + Z_elim
+    return 3*p_banker*Z_covered**2 / Z_total**3
+def _single_win_probability(pla_odds: np.ndarray, banker: int, covered: List[int]) -> float:
+    """Calculate the probability of winning a single ticket in the QPBanker strategy.
+    See overleaf document for derivation.
+    Args:
+        pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
+        banker (int): The horse number of the banker (1-indexed).
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+    Returns:
+        float: probability
+    """
+    p_banker, Z_covered, Z_elim = _pla_odds_partition(pla_odds, banker, covered)
+    Z_total = p_banker + Z_covered + Z_elim
+    return 6*p_banker*Z_covered*Z_elim / Z_total**3
+def win_probability(pla_odds: np.ndarray, banker: int, covered: List[int]) -> float:
+    """Calculate the probability of winning at least one ticket in the QPBanker strategy.
+    Args:
+        pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
+        banker (int): The horse number of the banker (1-indexed).
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+    Returns:
+        float: probability
+    """
+    p_double = _double_win_probability(pla_odds, banker, covered)
+    p_single = _single_win_probability(pla_odds, banker, covered)
+    return p_double + p_single
+def expected_value(pla_odds: np.ndarray, qpl_odds: np.ndarray, banker: int, covered: List[int], rebate: float = 0.12) -> float:
+    """Calculate the expected value (per dollar) of the QPBanker strategy using constant stake.
+    Args:
+        pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
+        qpl_odds (np.ndarray): An array of quinella place odds for the horses (0-indexed).
+        banker (int): The horse number of the banker (1-indexed).
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+        rebate (float, optional): The rebate percentage. Defaults to 0.12.
+    Returns:
+            float: expected value per dollar staked
+    """
+    p_banker, Z_covered, Z_elim = _pla_odds_partition(pla_odds, banker, covered)
+    Z_total = p_banker + Z_covered + Z_elim
+    pla_prob = 1/pla_odds
+    P_dbl = _double_win_probability(pla_odds, banker, covered)
+    P_single = _single_win_probability(pla_odds, banker, covered)
+    C = len(covered)
+    BigSum = sum([qpl_odds[banker-1][c-1]*pla_prob[c-1]*(Z_covered-pla_prob[c-1]+Z_elim) for c in covered])
+    EV = 6*p_banker*BigSum / Z_total**3 - (1-rebate)*C - rebate*(2*P_dbl + P_single)
+    return EV / C
+def average_odds(qpl_odds: np.ndarray, banker: int, covered: List[int]) -> float:
+    """Calculate the (harmonic) average odds across the covered set.
+    Args:
+        qpl_odds (np.ndarray): An array of quinella place odds for the horses (0-indexed).
+        banker (int): The horse number of the banker (1-indexed).
+        covered (List[int]): A list of horse numbers in the cover set (1-indexed).
+    Returns:
+            float: average odds
+    """
+    C = len(covered)
+    avg_odds = C / sum([1/qpl_odds[banker-1][c-1] for c in covered])
+    return avg_odds

hkjc/visualization.py ADDED Viewed

File without changes

hkjc-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,11 @@
+Metadata-Version: 2.4
+Name: hkjc
+Version: 0.1.0
+Summary: Library for scrapping HKJC data and perform basic analysis
+Requires-Python: >=3.11
+Requires-Dist: cachetools>=6.2.0
+Requires-Dist: numpy>=2.3.3
+Requires-Dist: polars>=1.33.1
+Requires-Dist: requests>=2.32.5
+Requires-Dist: scipy>=1.16.2
+Requires-Dist: tqdm>=4.67.1

hkjc-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+hkjc/__init__.py,sha256=_boKRSn7A1On4Uwh4Ds-UoS2-dAfbyHoXB9XLp6Efrc,499
+hkjc/analysis.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hkjc/live_odds.py,sha256=i_g9ckQKA9GWbwPXNvbmNvm-dPbF9UJoGiWv6_bHzwA,4603
+hkjc/odds_fitting.py,sha256=abHa19Vv3yAjX4PPFhwoMldmG1DF1tXGXtYVaFszhJI,33
+hkjc/optimization.py,sha256=OArQ3w9bwcIV_lTNuE5za6AROoa90xk_gwAoGwQ-8RE,3784
+hkjc/processing.py,sha256=9AiTkjsx51sZtyA4XcfK-werwFWxdea0BeIEuNvGQYQ,2983
+hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hkjc/qpbanker.py,sha256=vhvYb5_nGrKgYgre9gGF6tgswovca5C9pZVOPGxEP1Q,4804
+hkjc/visualization.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hkjc-0.1.0.dist-info/METADATA,sha256=fsrs2M07EvjjNyLqXMAQnyRg8Ebk-IcVq415v2QZEBQ,320
+hkjc-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hkjc-0.1.0.dist-info/RECORD,,

hkjc-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.27.0
+Root-Is-Purelib: true
+Tag: py3-none-any