hkjc 0.3.7__tar.gz → 0.3.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hkjc
3
- Version: 0.3.7
3
+ Version: 0.3.9
4
4
  Summary: Library for scrapping HKJC data and perform basic analysis
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: cachetools>=6.2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hkjc"
3
- version = "0.3.7"
3
+ version = "0.3.9"
4
4
  description = "Library for scrapping HKJC data and perform basic analysis"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -0,0 +1,20 @@
1
+ """Top-level package for hkjc tools.
2
+
3
+ This module re-exports commonly used symbols from the submodules.
4
+ """
5
+ from importlib.metadata import version as _version
6
+
7
+ __all__ = ["live_odds", "qpbanker",
8
+ "generate_all_qp_trades", "generate_all_pla_trades", "pareto_filter",
9
+ "speedpro_energy", "speedmap", "harveille_model"]
10
+
11
+ try:
12
+ __version__ = _version(__name__)
13
+ except Exception: # pragma: no cover - best-effort version resolution
14
+ __version__ = "0.0.0"
15
+
16
+ from .live_odds import live_odds
17
+ from .processing import generate_all_qp_trades, generate_all_pla_trades
18
+ from .optimization import pareto_filter
19
+ from .speedpro import speedmap, speedpro_energy
20
+ from . import harville_model
@@ -3,8 +3,6 @@
3
3
  from __future__ import annotations
4
4
  from typing import Tuple, List
5
5
 
6
- from .harville_model import fit_harville_to_odds
7
-
8
6
  import requests
9
7
  from cachetools.func import ttl_cache
10
8
  import numpy as np
@@ -87,7 +85,7 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
87
85
  ]
88
86
 
89
87
 
90
- def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL'], fit_harville=False) -> dict:
88
+ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
91
89
  """Fetch live odds as numpy arrays.
92
90
 
93
91
  Args:
@@ -112,7 +110,7 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
112
110
  except Exception:
113
111
  raise ValueError("Date must be in 'YYYY-MM-DD' format")
114
112
 
115
- mandatory_types = ['WIN','PLA','QIN','QPL'] if fit_harville else ['PLA']
113
+ mandatory_types = ['PLA']
116
114
 
117
115
  data = _fetch_live_odds(date, venue_code, race_number,
118
116
  odds_type=tuple(set(mandatory_types+odds_type)))
@@ -129,24 +127,11 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
129
127
  for entry in data:
130
128
  if entry["Type"] in ["QIN", "QPL"]:
131
129
  horse_ids = list(map(int, entry["HorseID"].split(",")))
132
- odds[entry["Type"]][horse_ids[0] - 1, horse_ids[1] - 1] = entry["Odds"]
133
- odds[entry["Type"]][horse_ids[1] - 1, horse_ids[0] - 1] = entry["Odds"]
134
- elif entry["Type"] in ["PLA","WIN"]:
130
+ odds[entry["Type"]][horse_ids[0] - 1,
131
+ horse_ids[1] - 1] = entry["Odds"]
132
+ odds[entry["Type"]][horse_ids[1] - 1,
133
+ horse_ids[0] - 1] = entry["Odds"]
134
+ elif entry["Type"] in ["PLA", "WIN"]:
135
135
  odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
136
136
 
137
- if fit_harville:
138
- fit_res = fit_harville_to_odds(
139
- W_obs=odds['WIN'],
140
- Qin_obs=odds['QIN'],
141
- Q_obs=odds['QPL'],
142
- b_obs=odds['PLA']
143
- )
144
- if fit_res['success']:
145
- odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
146
- odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
147
- odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
148
- odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
149
- else:
150
- print(f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
151
-
152
137
  return {t: odds[t] for t in odds_type}
@@ -1,7 +1,7 @@
1
1
  import polars as pl
2
2
  from typing import List, Union
3
3
 
4
- def _pareto_filter(
4
+ def pareto_filter(
5
5
  df: pl.DataFrame,
6
6
  groupby: List[str],
7
7
  by: List[str],
@@ -0,0 +1,120 @@
1
+ """Functions to batch process trades into dataframes for analysis.
2
+ """
3
+ from __future__ import annotations
4
+ from typing import Tuple, List
5
+
6
+ from .live_odds import live_odds
7
+ from .strategy import qpbanker, place_only
8
+ from .harville_model import fit_harville_to_odds
9
+
10
+ import polars as pl
11
+ import numpy as np
12
+ from itertools import combinations
13
+ from tqdm import tqdm
14
+
15
+
16
+ def _all_subsets(lst): return [list(x) for r in range(
17
+ 1, len(lst)+1) for x in combinations(lst, r)] # list subsets of a list
18
+
19
+
20
+ def _process_single_qp_trade(banker: int, covered: List[int], pla_odds: np.ndarray, qpl_odds: np.ndarray, rebate: float) -> Tuple[int, List, float, float, float]:
21
+ """Process a single qp trade.
22
+ """
23
+ win_prob = qpbanker.win_probability(pla_odds, banker, covered)
24
+ exp_value = qpbanker.expected_value(
25
+ pla_odds, qpl_odds, banker, covered, rebate)
26
+ ave_odds = qpbanker.average_odds(qpl_odds, banker, covered)
27
+ return (banker, covered, win_prob, exp_value, ave_odds)
28
+
29
+
30
+ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville: bool = False) -> pl.DataFrame:
31
+ """Generate all possible qp tickets for the specified race.
32
+
33
+ Args:
34
+ date (str): Date in 'YYYY-MM-DD' format.
35
+ venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
36
+ race_number (int): Race number.
37
+ rebate (float, optional): The rebate percentage. Defaults to 0.12.
38
+ fit_harville (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
39
+
40
+ Returns:
41
+ pl.DataFrame: DataFrame with all possible trades and their metrics.
42
+ """
43
+
44
+ odds = live_odds(date, venue_code, race_number,
45
+ odds_type=['PLA', 'QPL'] + (['WIN', 'QIN'] if fit_harville else []))
46
+ N = len(odds['PLA'])
47
+ candidates = np.arange(1, N+1)
48
+
49
+ if fit_harville:
50
+ fit_res = fit_harville_to_odds(
51
+ W_obs=odds['WIN'],
52
+ Qin_obs=odds['QIN'],
53
+ Q_obs=odds['QPL'],
54
+ b_obs=odds['PLA']
55
+ )
56
+ if fit_res['success']:
57
+ odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
58
+ odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
59
+ odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
60
+ odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
61
+ else:
62
+ print(
63
+ f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
64
+
65
+ results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
66
+ for banker in tqdm(candidates, desc="Processing bankers")
67
+ for covered in _all_subsets(candidates[candidates != banker])]
68
+
69
+ df = (pl.DataFrame(results, schema=['Banker', 'Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
70
+ .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
71
+
72
+ return df
73
+
74
+
75
+ def _process_single_pla_trade(covered: List[int], pla_odds: np.ndarray, p_matrix: np.ndarray, rebate: float = 0.1) -> Tuple[List, float, float, float]:
76
+ """Process a single place-only trade.
77
+ """
78
+ win_prob = place_only.win_probability(p_matrix, covered)
79
+ exp_value = place_only.expected_value(pla_odds, p_matrix, covered, rebate)
80
+ ave_odds = place_only.average_odds(pla_odds, covered)
81
+ return (covered, win_prob, exp_value, ave_odds)
82
+
83
+
84
+ def generate_all_pla_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.1) -> pl.DataFrame:
85
+ """Generate all possible place-only tickets for the specified race.
86
+
87
+ Args:
88
+ date (str): Date in 'YYYY-MM-DD' format.
89
+ venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
90
+ race_number (int): Race number.
91
+ rebate (float, optional): The rebate percentage. Defaults to 0.12.
92
+
93
+ Returns:
94
+ pl.DataFrame: DataFrame with all possible trades and their metrics.
95
+ """
96
+
97
+ odds = live_odds(date, venue_code, race_number,
98
+ odds_type=['PLA', 'QPL', 'WIN', 'QIN'])
99
+ N = len(odds['PLA'])
100
+ candidates = np.arange(1, N+1)
101
+
102
+ fit_res = fit_harville_to_odds(odds)
103
+
104
+ if fit_res['success']:
105
+ odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
106
+ odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
107
+ odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
108
+ odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
109
+ else:
110
+ raise RuntimeError(
111
+ f"[ERROR] Harville model fitting failed: {fit_res.get('message','')}")
112
+ p_matrix = fit_res['P_fitted']
113
+
114
+ results = [_process_single_pla_trade(covered, odds['PLA'], p_matrix, rebate)
115
+ for covered in _all_subsets(candidates)]
116
+
117
+ df = (pl.DataFrame(results, schema=['Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
118
+ .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
119
+
120
+ return df
@@ -36,7 +36,8 @@ def speedpro_energy(race_date: str) -> pl.DataFrame:
36
36
 
37
37
  df = (df.with_columns(pl.col('RunnerNumber').str.to_integer())
38
38
  .with_columns(pl.col('SpeedPRO_Energy_Difference').str.to_integer())
39
- .select(['RaceNumber', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
39
+ .with_columns(pl.col('FitnessRatings').str.to_integer())
40
+ .select(['RaceNo', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
40
41
 
41
42
  return df
42
43
 
@@ -0,0 +1,53 @@
1
+ """Functions to perform probability and expectation calculations for the place-only strategy.
2
+ """
3
+ from __future__ import annotations
4
+
5
+ from typing import List
6
+ import numpy as np
7
+
8
+
9
+ def win_probability(p_matrix: np.ndarray, covered: List[int]) -> float:
10
+ """Calculate the probability of winning at least one ticket in the place-only strategy.
11
+
12
+ Args:
13
+ p_matrix (np.ndarray): An array of place probabilities for the horses (0-indexed). p_ij is the probability of horse i placing in position j.
14
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
15
+
16
+ Returns:
17
+ float: probability
18
+ """
19
+
20
+ win_prob = 1-np.prod(1-np.sum(p_matrix[covered, :3], axis=1))
21
+ return win_prob
22
+
23
+
24
+ def expected_value(pla_odds: np.ndarray, p_matrix: np.ndarray, covered: List[int], rebate: float = 0.10) -> float:
25
+ """Calculate the expected value (per dollar) of the place-only strategy using constant stake.
26
+
27
+ Args:
28
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
29
+ p_matrix (np.ndarray): An array of place probabilities for the horses (0-indexed). p_ij is the probability of horse i placing in position j.
30
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
31
+ rebate (float, optional): The rebate percentage. Defaults to 0.10.
32
+
33
+ Returns:
34
+ float: expected value per dollar staked
35
+ """
36
+ true_prob = np.sum(p_matrix[:, :3], axis=1)
37
+ C = len(covered)
38
+ ev = np.sum((true_prob*(pla_odds-rebate))[covered])/C - (1-rebate)
39
+ return ev
40
+
41
+ def average_odds(pla_odds: np.ndarray, covered: List[int]) -> float:
42
+ """Calculate the (harmonic) average odds across the covered set.
43
+
44
+ Args:
45
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
46
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
47
+
48
+ Returns:
49
+ float: average odds
50
+ """
51
+ C = len(covered)
52
+ avg_odds = C / sum([1/pla_odds[c-1] for c in covered])
53
+ return avg_odds
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  from typing import List
6
6
  import numpy as np
7
7
 
8
+
8
9
  def _pla_odds_partition(pla_odds: np.ndarray, banker: int, covered: List[int]) -> tuple[float, float, float]:
9
10
  """Partition the place odds into banker, covered and eliminated sets.
10
11
 
@@ -97,7 +97,7 @@ wheels = [
97
97
 
98
98
  [[package]]
99
99
  name = "hkjc"
100
- version = "0.3.7"
100
+ version = "0.3.9"
101
101
  source = { editable = "." }
102
102
  dependencies = [
103
103
  { name = "cachetools" },
@@ -1,19 +0,0 @@
1
- """Top-level package for hkjc tools.
2
-
3
- This module re-exports commonly used symbols from the submodules.
4
- """
5
- from importlib.metadata import version as _version
6
-
7
- __all__ = ["live_odds", "qpbanker",
8
- "generate_all_qp_trades", "generate_pareto_qp_trades",
9
- "speedpro_energy", "speedmap","harveille_model"]
10
-
11
- try:
12
- __version__ = _version(__name__)
13
- except Exception: # pragma: no cover - best-effort version resolution
14
- __version__ = "0.0.0"
15
-
16
- from .live_odds import live_odds
17
- from .processing import generate_all_qp_trades, generate_pareto_qp_trades
18
- from .speedpro import speedmap, speedpro_energy
19
- from . import harville_model
@@ -1,75 +0,0 @@
1
- """Functions to batch process trades into dataframes for analysis.
2
- """
3
- from __future__ import annotations
4
- from typing import Tuple, List
5
-
6
- from .live_odds import live_odds
7
- from .qpbanker import win_probability, expected_value, average_odds
8
- from .optimization import _pareto_filter
9
-
10
- import polars as pl
11
- import numpy as np
12
- from itertools import combinations
13
- from tqdm import tqdm
14
-
15
-
16
- def _all_subsets(lst): return [list(x) for r in range(
17
- 1, len(lst)+1) for x in combinations(lst, r)] # list subsets of a list
18
-
19
-
20
- def _process_single_qp_trade(banker: int, covered: List[int], odds_pla: List[float], odds_qpl: List[float], rebate: float) -> Tuple[int, List, float, float]:
21
- """Process a single qp trade.
22
- """
23
- win_prob = win_probability(odds_pla, banker, covered)
24
- exp_value = expected_value(odds_pla, odds_qpl, banker, covered, rebate)
25
- ave_odds = average_odds(odds_qpl, banker, covered)
26
- return (banker, covered, win_prob, exp_value, ave_odds)
27
-
28
-
29
- def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville=False) -> pl.DataFrame:
30
- """Generate all possible qp tickets for the specified race.
31
-
32
- Args:
33
- date (str): Date in 'YYYY-MM-DD' format.
34
- venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
35
- race_number (int): Race number.
36
- rebate (float, optional): The rebate percentage. Defaults to 0.12.
37
- fit_harville (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
38
-
39
- Returns:
40
- pl.DataFrame: DataFrame with all possible trades and their metrics.
41
- """
42
-
43
- odds = live_odds(date, venue_code, race_number,
44
- odds_type=['PLA', 'QPL'], fit_harville=fit_harville)
45
- N = len(odds['PLA'])
46
- candidates = np.arange(1, N+1)
47
-
48
- results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
49
- for banker in tqdm(candidates, desc="Processing bankers")
50
- for covered in _all_subsets(candidates[candidates != banker])]
51
-
52
- df = (pl.DataFrame(results, schema=['Banker', 'Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
53
- .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
54
-
55
- return df
56
-
57
-
58
- def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = [], fit_harville=False) -> pl.DataFrame:
59
- """Generate qp tickets that are Pareto optimal for the specified race.
60
-
61
- Args:
62
- date (str): Date in 'YYYY-MM-DD' format.
63
- venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
64
- race_number (int): Race number.
65
- rebate (float, optional): The rebate percentage. Defaults to 0.12.
66
- groupby (List[str], optional): Columns to group by when determining Pareto optimality. Defaults to [] (global optimal).
67
- harville_fit (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
68
-
69
- Returns:
70
- pl.DataFrame: DataFrame with all Pareto trades and their metrics.
71
- """
72
- df = generate_all_qp_trades(date, venue_code, race_number, rebate, fit_harville=fit_harville)
73
- pareto_df = _pareto_filter(df, groupby=groupby, by=[
74
- 'WinProb', 'ExpValue'], maximize=True)
75
- return pareto_df
File without changes
File without changes
File without changes
File without changes
File without changes