hkjc 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """Top-level package for hkjc tools.
2
+
3
+ This module re-exports commonly used symbols from the submodules.
4
+ """
5
+ from importlib.metadata import version as _version
6
+
7
+ __all__ = ["live_odds", "qpbanker",
8
+ "generate_all_qp_trades", "generate_pareto_qp_trades"]
9
+
10
+ try:
11
+ __version__ = _version(__name__)
12
+ except Exception: # pragma: no cover - best-effort version resolution
13
+ __version__ = "0.0.0"
14
+
15
+ from .live_odds import live_odds
16
+ from .processing import generate_all_qp_trades, generate_pareto_qp_trades
hkjc/analysis.py ADDED
File without changes
hkjc/live_odds.py ADDED
@@ -0,0 +1,132 @@
1
+ """Functions to fetch and process data from HKJC
2
+ """
3
+ from __future__ import annotations
4
+ from typing import Tuple, List
5
+
6
+ import requests
7
+ from cachetools.func import ttl_cache
8
+ import numpy as np
9
+ from datetime import datetime as dt
10
+
11
+ ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
12
+
13
+ LIVEODDS_PAYLOAD = {
14
+ "operationName": "racing",
15
+ "variables": {"date": None, "venueCode": None, "raceNo": None, "oddsTypes": None},
16
+ "query": """
17
+ query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo: Int) {
18
+ raceMeetings(date: $date, venueCode: $venueCode) {
19
+ pmPools(oddsTypes: $oddsTypes, raceNo: $raceNo) {
20
+ id
21
+ status
22
+ sellStatus
23
+ oddsType
24
+ lastUpdateTime
25
+ guarantee
26
+ minTicketCost
27
+ name_en
28
+ name_ch
29
+ leg {
30
+ number
31
+ races
32
+ }
33
+ cWinSelections {
34
+ composite
35
+ name_ch
36
+ name_en
37
+ starters
38
+ }
39
+ oddsNodes {
40
+ combString
41
+ oddsValue
42
+ hotFavourite
43
+ oddsDropValue
44
+ bankerOdds {
45
+ combString
46
+ oddsValue
47
+ }
48
+ }
49
+ }
50
+ }
51
+ }""",
52
+ }
53
+
54
+
55
+ @ttl_cache(maxsize=12, ttl=30)
56
+ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', 'QPL')) -> Tuple[dict]:
57
+ """Fetch live odds data from HKJC GraphQL endpoint."""
58
+ payload = LIVEODDS_PAYLOAD.copy()
59
+ payload["variables"] = payload["variables"].copy()
60
+ payload["variables"]["date"] = date
61
+ payload["variables"]["venueCode"] = venue_code
62
+ payload["variables"]["raceNo"] = race_number
63
+ payload["variables"]["oddsTypes"] = odds_type
64
+
65
+ headers = {
66
+ "Origin": "https://bet.hkjc.com",
67
+ "Referer": "https://bet.hkjc.com",
68
+ "Content-Type": "application/json",
69
+ "Accept": "application/json",
70
+ "User-Agent": "python-hkjc-fetch/0.1",
71
+ }
72
+
73
+ r = requests.post(ENDPOINT, json=payload, headers=headers, timeout=10)
74
+ if r.status_code != 200:
75
+ raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
76
+
77
+ meetings = r.json().get("data", {}).get("raceMeetings", [])
78
+
79
+ return [
80
+ {"HorseID": node["combString"], "Type": pool.get(
81
+ "oddsType"), "Odds": float(node["oddsValue"])}
82
+ for meeting in meetings
83
+ for pool in meeting.get("pmPools", [])
84
+ for node in pool.get("oddsNodes", [])
85
+ ]
86
+
87
+
88
+ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
89
+ """Fetch live odds as numpy arrays.
90
+
91
+ Args:
92
+ date (str): Date in 'YYYY-MM-DD' format.
93
+ venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
94
+ race_number (int): Race number.
95
+ odds_type (List[str]): Types of odds to fetch. Default is ['PLA', 'QPL']. Currently the following types are supported:
96
+ - 'WIN': Win odds
97
+ - 'PLA': Place odds
98
+ - 'QIN': Quinella odds
99
+ - 'QPL': Quinella Place odds
100
+
101
+ Returns:
102
+ dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
103
+ If odds_type is 'WIN','PLA', returns a 1D array of place odds.
104
+ If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
105
+ """
106
+ # validate date format
107
+ try:
108
+ dt.strptime(date, "%Y-%m-%d")
109
+ except Exception:
110
+ raise ValueError("Date must be in 'YYYY-MM-DD' format")
111
+
112
+ data = _fetch_live_odds(date, venue_code, race_number,
113
+ odds_type=tuple(set(['PLA']+odds_type))) # ensure PLA is always fetched
114
+
115
+ # use place odds to determine number of horses
116
+ pla_data = [entry for entry in data if entry["Type"] == "PLA"]
117
+ N = len(pla_data)
118
+
119
+ odds = {'WIN': np.full(N, np.nan, dtype=float),
120
+ 'PLA': np.full(N, np.nan, dtype=float),
121
+ 'QIN': np.full((N, N), np.nan, dtype=float),
122
+ 'QPL': np.full((N, N), np.nan, dtype=float)}
123
+
124
+ for entry in data:
125
+ if entry["Type"] in ["QIN", "QPL"]:
126
+ horse_ids = list(map(int, entry["HorseID"].split(",")))
127
+ odds[entry["Type"]][horse_ids[0] - 1, horse_ids[1] - 1] = entry["Odds"]
128
+ odds[entry["Type"]][horse_ids[1] - 1, horse_ids[0] - 1] = entry["Odds"]
129
+ elif entry["Type"] in ["PLA","WIN"]:
130
+ odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
131
+
132
+ return {t: odds[t] for t in odds_type}
hkjc/odds_fitting.py ADDED
@@ -0,0 +1 @@
1
+ ## TODO: implement odds filtering
hkjc/optimization.py ADDED
@@ -0,0 +1,106 @@
1
+ import polars as pl
2
+ from typing import List, Union
3
+
4
+ def _pareto_filter(
5
+ df: pl.DataFrame,
6
+ groupby: List[str],
7
+ by: List[str],
8
+ maximize: Union[bool, List[bool]] = True
9
+ ) -> pl.DataFrame:
10
+ """
11
+ Filter dataframe to only include Pareto optimal rows within each group.
12
+
13
+ Args:
14
+ df: Input dataframe
15
+ groupby: Columns to group by (empty list for global filter)
16
+ by: Columns to consider for Pareto optimality
17
+ maximize: Whether to maximize (True) or minimize (False) each 'by' column
18
+
19
+ Returns:
20
+ DataFrame containing only Pareto optimal rows
21
+ """
22
+ if df.is_empty() or not by:
23
+ return df
24
+
25
+ # Normalize maximize to list
26
+ maximize_list = [maximize] * len(by) if isinstance(maximize, bool) else maximize
27
+
28
+ if len(maximize_list) != len(by):
29
+ raise ValueError(f"Length of 'maximize' ({len(maximize_list)}) must equal length of 'by' ({len(by)})")
30
+
31
+ # Single objective: simple min/max filter
32
+ if len(by) == 1:
33
+ opt_expr = pl.col(by[0]).max() if maximize_list[0] else pl.col(by[0]).min()
34
+ if groupby:
35
+ opt_expr = opt_expr.over(groupby)
36
+ return df.filter(pl.col(by[0]) == opt_expr)
37
+
38
+ # Two objectives: efficient skyline algorithm
39
+ if len(by) == 2:
40
+ temp_cols = ["__obj_0", "__obj_1"]
41
+
42
+ # Transform to maximization problem
43
+ df_temp = df.with_columns([
44
+ (pl.col(by[i]) * (1 if maximize_list[i] else -1)).alias(temp_cols[i])
45
+ for i in range(2)
46
+ ])
47
+
48
+ # Sort by first objective descending, then second descending (for stability)
49
+ groupby = groupby or []
50
+ sort_cols = (groupby if groupby else []) + temp_cols
51
+ sorted_df = df_temp.sort(sort_cols, descending=[False] * len(groupby) + [True, True])
52
+
53
+ # Keep rows where second objective is not dominated by any previous row in group
54
+ if groupby:
55
+ max_so_far = pl.col(temp_cols[1]).cum_max().shift(1, fill_value=float("-inf")).over(groupby)
56
+ else:
57
+ max_so_far = pl.col(temp_cols[1]).cum_max().shift(1, fill_value=float("-inf"))
58
+
59
+ mask = pl.col(temp_cols[1]) > max_so_far
60
+ return sorted_df.filter(mask).drop(temp_cols)
61
+
62
+ # N objectives (N > 2): pairwise dominance check
63
+ df_with_id = df.with_row_index("__id")
64
+
65
+ # Self-join to compare all pairs
66
+ left = df_with_id.lazy()
67
+ right = df_with_id.lazy()
68
+
69
+ if groupby:
70
+ pairs = left.join(right, on=groupby, suffix="_r")
71
+ else:
72
+ pairs = left.join(right, how="cross", suffix="_r")
73
+
74
+ # Only compare different rows
75
+ pairs = pairs.filter(pl.col("__id") != pl.col("__id_r"))
76
+
77
+ # Build dominance conditions
78
+ dominance_conditions = []
79
+ for col, is_max in zip(by, maximize_list):
80
+ if is_max:
81
+ # right dominates left if right[col] >= left[col] for all cols
82
+ dominance_conditions.append(pl.col(f"{col}_r") >= pl.col(col))
83
+ else:
84
+ dominance_conditions.append(pl.col(f"{col}_r") <= pl.col(col))
85
+
86
+ # Strict dominance: all >= and at least one >
87
+ strict_conditions = []
88
+ for col, is_max in zip(by, maximize_list):
89
+ if is_max:
90
+ strict_conditions.append(pl.col(f"{col}_r") > pl.col(col))
91
+ else:
92
+ strict_conditions.append(pl.col(f"{col}_r") < pl.col(col))
93
+
94
+ is_dominated = pl.all_horizontal(dominance_conditions) & pl.any_horizontal(strict_conditions)
95
+
96
+ # Find IDs of dominated rows
97
+ dominated_ids = (
98
+ pairs.filter(is_dominated)
99
+ .select("__id")
100
+ .unique()
101
+ .collect()
102
+ .get_column("__id")
103
+ )
104
+
105
+ # Return non-dominated rows
106
+ return df_with_id.filter(~pl.col("__id").is_in(dominated_ids)).drop("__id")
hkjc/processing.py ADDED
@@ -0,0 +1,71 @@
1
+ """Functions to batch process trades into dataframes for analysis.
2
+ """
3
+ from __future__ import annotations
4
+ from typing import Tuple, List
5
+
6
+ from .live_odds import live_odds
7
+ from .qpbanker import win_probability, expected_value, average_odds
8
+ from .optimization import _pareto_filter
9
+
10
+ import polars as pl
11
+ import numpy as np
12
+ from itertools import combinations
13
+ from tqdm import tqdm
14
+
15
+
16
+ def _all_subsets(lst): return [list(x) for r in range(
17
+ 1, len(lst)+1) for x in combinations(lst, r)] # list subsets of a list
18
+
19
+
20
+ def _process_single_qp_trade(banker: int, covered: List[int], odds_pla: List[float], odds_qpl: List[float], rebate: float) -> Tuple[int, List, float, float]:
21
+ """Process a single qp trade.
22
+ """
23
+ win_prob = win_probability(odds_pla, banker, covered)
24
+ exp_value = expected_value(odds_pla, odds_qpl, banker, covered, rebate)
25
+ ave_odds = average_odds(odds_qpl, banker, covered)
26
+ return (banker, covered, win_prob, exp_value, ave_odds)
27
+
28
+
29
+ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12) -> pl.DataFrame:
30
+ """Generate all possible qp tickets for the specified race.
31
+
32
+ Args:
33
+ date (str): Date in 'YYYY-MM-DD' format.
34
+ venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
35
+ race_number (int): Race number.
36
+ rebate (float, optional): The rebate percentage. Defaults to 0.12.
37
+
38
+ Returns:
39
+ pl.DataFrame: DataFrame with all possible trades and their metrics.
40
+ """
41
+ odds = live_odds(date, venue_code, race_number, odds_type=['PLA', 'QPL'])
42
+ N = len(odds['PLA'])
43
+ candidates = np.arange(1, N+1)
44
+
45
+ results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
46
+ for banker in tqdm(candidates, desc="Processing bankers")
47
+ for covered in _all_subsets(candidates[candidates != banker])]
48
+
49
+ df = (pl.DataFrame(results, schema=['Banker', 'Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
50
+ .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
51
+
52
+ return df
53
+
54
+
55
+ def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = []) -> pl.DataFrame:
56
+ """Generate qp tickets that are Pareto optimal for the specified race.
57
+
58
+ Args:
59
+ date (str): Date in 'YYYY-MM-DD' format.
60
+ venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
61
+ race_number (int): Race number.
62
+ rebate (float, optional): The rebate percentage. Defaults to 0.12.
63
+ groupby (List[str], optional): Columns to group by when determining Pareto optimality. Defaults to [] (global optimal).
64
+
65
+ Returns:
66
+ pl.DataFrame: DataFrame with all Pareto trades and their metrics.
67
+ """
68
+ df = generate_all_qp_trades(date, venue_code, race_number, rebate)
69
+ pareto_df = _pareto_filter(df, groupby=groupby, by=[
70
+ 'WinProb', 'ExpValue'], maximize=True)
71
+ return pareto_df
hkjc/py.typed ADDED
File without changes
hkjc/qpbanker.py ADDED
@@ -0,0 +1,119 @@
1
+ """Functions to perform probability and expectation calculations for the QP Banker strategy.
2
+ """
3
+ from __future__ import annotations
4
+
5
+ from typing import List
6
+ import numpy as np
7
+
8
+ def _pla_odds_partition(pla_odds: np.ndarray, banker: int, covered: List[int]) -> tuple[float, float, float]:
9
+ """Partition the place odds into banker, covered and eliminated sets.
10
+
11
+ Args:
12
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
13
+ banker (int): The horse number of the banker (1-indexed).
14
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
15
+
16
+ Returns:
17
+ tuple[float, float, float]: A tuple containing the probabilities of the banker,
18
+ covered set and eliminated set.
19
+ """
20
+ p_banker = 1 / pla_odds[banker - 1]
21
+ Z_covered = sum([1/pla_odds[c-1] for c in covered])
22
+ Z_total = (1/pla_odds).sum()
23
+ Z_elim = Z_total - p_banker - Z_covered
24
+
25
+ return p_banker, Z_covered, Z_elim
26
+
27
+ def _double_win_probability(pla_odds: np.ndarray, banker: int, covered: List[int]) -> float:
28
+ """Calculate the probability of winning a two tickets in the QPBanker strategy.
29
+ See overleaf document for derivation.
30
+
31
+ Args:
32
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
33
+ banker (int): The horse number of the banker (1-indexed).
34
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
35
+
36
+ Returns:
37
+ float: probability
38
+ """
39
+ p_banker, Z_covered, Z_elim = _pla_odds_partition(pla_odds, banker, covered)
40
+ Z_total = p_banker + Z_covered + Z_elim
41
+
42
+ return 3*p_banker*Z_covered**2 / Z_total**3
43
+
44
+
45
+ def _single_win_probability(pla_odds: np.ndarray, banker: int, covered: List[int]) -> float:
46
+ """Calculate the probability of winning a single ticket in the QPBanker strategy.
47
+ See overleaf document for derivation.
48
+
49
+ Args:
50
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
51
+ banker (int): The horse number of the banker (1-indexed).
52
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
53
+
54
+ Returns:
55
+ float: probability
56
+ """
57
+ p_banker, Z_covered, Z_elim = _pla_odds_partition(pla_odds, banker, covered)
58
+ Z_total = p_banker + Z_covered + Z_elim
59
+
60
+ return 6*p_banker*Z_covered*Z_elim / Z_total**3
61
+
62
+
63
+ def win_probability(pla_odds: np.ndarray, banker: int, covered: List[int]) -> float:
64
+ """Calculate the probability of winning at least one ticket in the QPBanker strategy.
65
+
66
+ Args:
67
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
68
+ banker (int): The horse number of the banker (1-indexed).
69
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
70
+
71
+ Returns:
72
+ float: probability
73
+ """
74
+ p_double = _double_win_probability(pla_odds, banker, covered)
75
+ p_single = _single_win_probability(pla_odds, banker, covered)
76
+
77
+ return p_double + p_single
78
+
79
+
80
+ def expected_value(pla_odds: np.ndarray, qpl_odds: np.ndarray, banker: int, covered: List[int], rebate: float = 0.12) -> float:
81
+ """Calculate the expected value (per dollar) of the QPBanker strategy using constant stake.
82
+
83
+ Args:
84
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
85
+ qpl_odds (np.ndarray): An array of quinella place odds for the horses (0-indexed).
86
+ banker (int): The horse number of the banker (1-indexed).
87
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
88
+ rebate (float, optional): The rebate percentage. Defaults to 0.12.
89
+
90
+ Returns:
91
+ float: expected value per dollar staked
92
+ """
93
+ p_banker, Z_covered, Z_elim = _pla_odds_partition(pla_odds, banker, covered)
94
+ Z_total = p_banker + Z_covered + Z_elim
95
+ pla_prob = 1/pla_odds
96
+
97
+ P_dbl = _double_win_probability(pla_odds, banker, covered)
98
+ P_single = _single_win_probability(pla_odds, banker, covered)
99
+ C = len(covered)
100
+
101
+ BigSum = sum([qpl_odds[banker-1][c-1]*pla_prob[c-1]*(Z_covered-pla_prob[c-1]+Z_elim) for c in covered])
102
+ EV = 6*p_banker*BigSum / Z_total**3 - (1-rebate)*C - rebate*(2*P_dbl + P_single)
103
+
104
+ return EV / C
105
+
106
+ def average_odds(qpl_odds: np.ndarray, banker: int, covered: List[int]) -> float:
107
+ """Calculate the (harmonic) average odds across the covered set.
108
+
109
+ Args:
110
+ qpl_odds (np.ndarray): An array of quinella place odds for the horses (0-indexed).
111
+ banker (int): The horse number of the banker (1-indexed).
112
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
113
+
114
+ Returns:
115
+ float: average odds
116
+ """
117
+ C = len(covered)
118
+ avg_odds = C / sum([1/qpl_odds[banker-1][c-1] for c in covered])
119
+ return avg_odds
hkjc/visualization.py ADDED
File without changes
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: hkjc
3
+ Version: 0.1.0
4
+ Summary: Library for scrapping HKJC data and perform basic analysis
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: cachetools>=6.2.0
7
+ Requires-Dist: numpy>=2.3.3
8
+ Requires-Dist: polars>=1.33.1
9
+ Requires-Dist: requests>=2.32.5
10
+ Requires-Dist: scipy>=1.16.2
11
+ Requires-Dist: tqdm>=4.67.1
@@ -0,0 +1,12 @@
1
+ hkjc/__init__.py,sha256=_boKRSn7A1On4Uwh4Ds-UoS2-dAfbyHoXB9XLp6Efrc,499
2
+ hkjc/analysis.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ hkjc/live_odds.py,sha256=i_g9ckQKA9GWbwPXNvbmNvm-dPbF9UJoGiWv6_bHzwA,4603
4
+ hkjc/odds_fitting.py,sha256=abHa19Vv3yAjX4PPFhwoMldmG1DF1tXGXtYVaFszhJI,33
5
+ hkjc/optimization.py,sha256=OArQ3w9bwcIV_lTNuE5za6AROoa90xk_gwAoGwQ-8RE,3784
6
+ hkjc/processing.py,sha256=9AiTkjsx51sZtyA4XcfK-werwFWxdea0BeIEuNvGQYQ,2983
7
+ hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ hkjc/qpbanker.py,sha256=vhvYb5_nGrKgYgre9gGF6tgswovca5C9pZVOPGxEP1Q,4804
9
+ hkjc/visualization.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ hkjc-0.1.0.dist-info/METADATA,sha256=fsrs2M07EvjjNyLqXMAQnyRg8Ebk-IcVq415v2QZEBQ,320
11
+ hkjc-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ hkjc-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any