hkjc 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/__init__.py CHANGED
@@ -5,15 +5,16 @@ This module re-exports commonly used symbols from the submodules.
5
5
  from importlib.metadata import version as _version
6
6
 
7
7
  __all__ = ["live_odds", "qpbanker",
8
- "generate_all_qp_trades", "generate_pareto_qp_trades",
9
- "speedpro_energy", "speedmap","harveille_model"]
8
+ "generate_all_qp_trades", "generate_all_pla_trades", "pareto_filter",
9
+ "speedpro_energy", "speedmap", "harveille_model"]
10
10
 
11
11
  try:
12
- __version__ = _version(__name__)
12
+ __version__ = _version(__name__)
13
13
  except Exception: # pragma: no cover - best-effort version resolution
14
- __version__ = "0.0.0"
14
+ __version__ = "0.0.0"
15
15
 
16
16
  from .live_odds import live_odds
17
- from .processing import generate_all_qp_trades, generate_pareto_qp_trades
17
+ from .processing import generate_all_qp_trades, generate_all_pla_trades
18
+ from .optimization import pareto_filter
18
19
  from .speedpro import speedmap, speedpro_energy
19
- from . import harville_model
20
+ from . import harville_model
hkjc/live_odds.py CHANGED
@@ -3,8 +3,6 @@
3
3
  from __future__ import annotations
4
4
  from typing import Tuple, List
5
5
 
6
- from .harville_model import fit_harville_to_odds
7
-
8
6
  import requests
9
7
  from cachetools.func import ttl_cache
10
8
  import numpy as np
@@ -87,7 +85,7 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
87
85
  ]
88
86
 
89
87
 
90
- def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL'], fit_harville=False) -> dict:
88
+ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
91
89
  """Fetch live odds as numpy arrays.
92
90
 
93
91
  Args:
@@ -112,7 +110,7 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
112
110
  except Exception:
113
111
  raise ValueError("Date must be in 'YYYY-MM-DD' format")
114
112
 
115
- mandatory_types = ['WIN','PLA','QIN','QPL'] if fit_harville else ['PLA']
113
+ mandatory_types = ['PLA']
116
114
 
117
115
  data = _fetch_live_odds(date, venue_code, race_number,
118
116
  odds_type=tuple(set(mandatory_types+odds_type)))
@@ -129,24 +127,11 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
129
127
  for entry in data:
130
128
  if entry["Type"] in ["QIN", "QPL"]:
131
129
  horse_ids = list(map(int, entry["HorseID"].split(",")))
132
- odds[entry["Type"]][horse_ids[0] - 1, horse_ids[1] - 1] = entry["Odds"]
133
- odds[entry["Type"]][horse_ids[1] - 1, horse_ids[0] - 1] = entry["Odds"]
134
- elif entry["Type"] in ["PLA","WIN"]:
130
+ odds[entry["Type"]][horse_ids[0] - 1,
131
+ horse_ids[1] - 1] = entry["Odds"]
132
+ odds[entry["Type"]][horse_ids[1] - 1,
133
+ horse_ids[0] - 1] = entry["Odds"]
134
+ elif entry["Type"] in ["PLA", "WIN"]:
135
135
  odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
136
136
 
137
- if fit_harville:
138
- fit_res = fit_harville_to_odds(
139
- W_obs=odds['WIN'],
140
- Qin_obs=odds['QIN'],
141
- Q_obs=odds['QPL'],
142
- b_obs=odds['PLA']
143
- )
144
- if fit_res['success']:
145
- odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
146
- odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
147
- odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
148
- odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
149
- else:
150
- print(f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
151
-
152
137
  return {t: odds[t] for t in odds_type}
hkjc/optimization.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import polars as pl
2
2
  from typing import List, Union
3
3
 
4
- def _pareto_filter(
4
+ def pareto_filter(
5
5
  df: pl.DataFrame,
6
6
  groupby: List[str],
7
7
  by: List[str],
hkjc/processing.py CHANGED
@@ -4,8 +4,8 @@ from __future__ import annotations
4
4
  from typing import Tuple, List
5
5
 
6
6
  from .live_odds import live_odds
7
- from .qpbanker import win_probability, expected_value, average_odds
8
- from .optimization import _pareto_filter
7
+ from .strategy import qpbanker, place_only
8
+ from .harville_model import fit_harville_to_odds
9
9
 
10
10
  import polars as pl
11
11
  import numpy as np
@@ -17,16 +17,17 @@ def _all_subsets(lst): return [list(x) for r in range(
17
17
  1, len(lst)+1) for x in combinations(lst, r)] # list subsets of a list
18
18
 
19
19
 
20
- def _process_single_qp_trade(banker: int, covered: List[int], odds_pla: List[float], odds_qpl: List[float], rebate: float) -> Tuple[int, List, float, float]:
20
+ def _process_single_qp_trade(banker: int, covered: List[int], pla_odds: np.ndarray, qpl_odds: np.ndarray, rebate: float) -> Tuple[int, List, float, float, float]:
21
21
  """Process a single qp trade.
22
22
  """
23
- win_prob = win_probability(odds_pla, banker, covered)
24
- exp_value = expected_value(odds_pla, odds_qpl, banker, covered, rebate)
25
- ave_odds = average_odds(odds_qpl, banker, covered)
23
+ win_prob = qpbanker.win_probability(pla_odds, banker, covered)
24
+ exp_value = qpbanker.expected_value(
25
+ pla_odds, qpl_odds, banker, covered, rebate)
26
+ ave_odds = qpbanker.average_odds(qpl_odds, banker, covered)
26
27
  return (banker, covered, win_prob, exp_value, ave_odds)
27
28
 
28
29
 
29
- def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville=False) -> pl.DataFrame:
30
+ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville: bool = False) -> pl.DataFrame:
30
31
  """Generate all possible qp tickets for the specified race.
31
32
 
32
33
  Args:
@@ -41,10 +42,26 @@ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate:
41
42
  """
42
43
 
43
44
  odds = live_odds(date, venue_code, race_number,
44
- odds_type=['PLA', 'QPL'], fit_harville=fit_harville)
45
+ odds_type=['PLA', 'QPL'] + (['WIN', 'QIN'] if fit_harville else []))
45
46
  N = len(odds['PLA'])
46
47
  candidates = np.arange(1, N+1)
47
48
 
49
+ if fit_harville:
50
+ fit_res = fit_harville_to_odds(
51
+ W_obs=odds['WIN'],
52
+ Qin_obs=odds['QIN'],
53
+ Q_obs=odds['QPL'],
54
+ b_obs=odds['PLA']
55
+ )
56
+ if fit_res['success']:
57
+ odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
58
+ odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
59
+ odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
60
+ odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
61
+ else:
62
+ print(
63
+ f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
64
+
48
65
  results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
49
66
  for banker in tqdm(candidates, desc="Processing bankers")
50
67
  for covered in _all_subsets(candidates[candidates != banker])]
@@ -55,21 +72,53 @@ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate:
55
72
  return df
56
73
 
57
74
 
58
- def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = [], fit_harville=False) -> pl.DataFrame:
59
- """Generate qp tickets that are Pareto optimal for the specified race.
75
+ def _process_single_pla_trade(covered: List[int], pla_odds: np.ndarray, p_matrix: np.ndarray, rebate: float = 0.1) -> Tuple[List, float, float, float]:
76
+ """Process a single place-only trade.
77
+ """
78
+ win_prob = place_only.win_probability(p_matrix, covered)
79
+ exp_value = place_only.expected_value(pla_odds, p_matrix, covered, rebate)
80
+ ave_odds = place_only.average_odds(pla_odds, covered)
81
+ return (covered, win_prob, exp_value, ave_odds)
82
+
83
+
84
+ def generate_all_pla_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.1) -> pl.DataFrame:
85
+ """Generate all possible place-only tickets for the specified race.
60
86
 
61
87
  Args:
62
88
  date (str): Date in 'YYYY-MM-DD' format.
63
89
  venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
64
90
  race_number (int): Race number.
65
91
  rebate (float, optional): The rebate percentage. Defaults to 0.12.
66
- groupby (List[str], optional): Columns to group by when determining Pareto optimality. Defaults to [] (global optimal).
67
- harville_fit (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
68
92
 
69
93
  Returns:
70
- pl.DataFrame: DataFrame with all Pareto trades and their metrics.
94
+ pl.DataFrame: DataFrame with all possible trades and their metrics.
71
95
  """
72
- df = generate_all_qp_trades(date, venue_code, race_number, rebate, harville_fit=fit_harville)
73
- pareto_df = _pareto_filter(df, groupby=groupby, by=[
74
- 'WinProb', 'ExpValue'], maximize=True)
75
- return pareto_df
96
+
97
+ odds = live_odds(date, venue_code, race_number,
98
+ odds_type=['PLA', 'QPL', 'WIN', 'QIN'])
99
+ N = len(odds['PLA'])
100
+ candidates = np.arange(1, N+1)
101
+
102
+ fit_res = fit_harville_to_odds(
103
+ W_obs=odds['WIN'],
104
+ Qin_obs=odds['QIN'],
105
+ Q_obs=odds['QPL'],
106
+ b_obs=odds['PLA']
107
+ )
108
+ if fit_res['success']:
109
+ odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
110
+ odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
111
+ odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
112
+ odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
113
+ else:
114
+ raise RuntimeError(
115
+ f"[ERROR] Harville model fitting failed: {fit_res.get('message','')}")
116
+ p_matrix = fit_res['P_fitted']
117
+
118
+ results = [_process_single_pla_trade(covered, odds['PLA'], p_matrix, rebate)
119
+ for covered in _all_subsets(candidates)]
120
+
121
+ df = (pl.DataFrame(results, schema=['Covered', 'WinProb', 'ExpValue', 'AvgOdds'])
122
+ .with_columns(pl.col('Covered').list.len().alias('NumCovered')))
123
+
124
+ return df
hkjc/speedpro.py CHANGED
@@ -36,7 +36,7 @@ def speedpro_energy(race_date: str) -> pl.DataFrame:
36
36
 
37
37
  df = (df.with_columns(pl.col('RunnerNumber').str.to_integer())
38
38
  .with_columns(pl.col('SpeedPRO_Energy_Difference').str.to_integer())
39
- .select(['RaceNumber', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
39
+ .select(['RaceNo', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
40
40
 
41
41
  return df
42
42
 
@@ -0,0 +1,53 @@
1
+ """Functions to perform probability and expectation calculations for the place-only strategy.
2
+ """
3
+ from __future__ import annotations
4
+
5
+ from typing import List
6
+ import numpy as np
7
+
8
+
9
+ def win_probability(p_matrix: np.ndarray, covered: List[int]) -> float:
10
+ """Calculate the probability of winning at least one ticket in the place-only strategy.
11
+
12
+ Args:
13
+ p_matrix (np.ndarray): An array of place probabilities for the horses (0-indexed). p_ij is the probability of horse i placing in position j.
14
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
15
+
16
+ Returns:
17
+ float: probability
18
+ """
19
+
20
+ win_prob = 1-np.prod(1-np.sum(p_matrix[covered, :3], axis=1))
21
+ return win_prob
22
+
23
+
24
+ def expected_value(pla_odds: np.ndarray, p_matrix: np.ndarray, covered: List[int], rebate: float = 0.10) -> float:
25
+ """Calculate the expected value (per dollar) of the place-only strategy using constant stake.
26
+
27
+ Args:
28
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
29
+ p_matrix (np.ndarray): An array of place probabilities for the horses (0-indexed). p_ij is the probability of horse i placing in position j.
30
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
31
+ rebate (float, optional): The rebate percentage. Defaults to 0.10.
32
+
33
+ Returns:
34
+ float: expected value per dollar staked
35
+ """
36
+ true_prob = np.sum(p_matrix[:, :3], axis=1)
37
+ C = len(covered)
38
+ ev = np.sum((true_prob*(pla_odds-rebate))[covered])/C - (1-rebate)
39
+ return ev
40
+
41
+ def average_odds(pla_odds: np.ndarray, covered: List[int]) -> float:
42
+ """Calculate the (harmonic) average odds across the covered set.
43
+
44
+ Args:
45
+ pla_odds (np.ndarray): An array of place odds for the horses (0-indexed).
46
+ covered (List[int]): A list of horse numbers in the cover set (1-indexed).
47
+
48
+ Returns:
49
+ float: average odds
50
+ """
51
+ C = len(covered)
52
+ avg_odds = C / sum([1/pla_odds[c-1] for c in covered])
53
+ return avg_odds
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  from typing import List
6
6
  import numpy as np
7
7
 
8
+
8
9
  def _pla_odds_partition(pla_odds: np.ndarray, banker: int, covered: List[int]) -> tuple[float, float, float]:
9
10
  """Partition the place odds into banker, covered and eliminated sets.
10
11
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hkjc
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: Library for scrapping HKJC data and perform basic analysis
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: cachetools>=6.2.0
@@ -0,0 +1,13 @@
1
+ hkjc/__init__.py,sha256=jgA3OiBaRifvNd5b5qR7VqdBTFfY1t9zQwhiQYh-Q4o,714
2
+ hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
3
+ hkjc/historical.py,sha256=wKTJi--0Mx_x0vO0ysOGD37oM8453woQK-cLzPOLgiQ,336
4
+ hkjc/live_odds.py,sha256=HQZCvEMUG4YNVj2IaFshU5HD0j5mfBSSDhksNla-ERk,4768
5
+ hkjc/optimization.py,sha256=p_NwPfl8qrcg2XWfHX4D7_jSRT819oVcctK-4VuvtME,3783
6
+ hkjc/processing.py,sha256=MCP42v-DgQBVUx7g2yhlfbZusj7Q60N2sqzFBdIUb94,5028
7
+ hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ hkjc/speedpro.py,sha256=5KFcVwxcblSaP8_G6W15h1Nkt-yJHJ7gVKwo3kpumS4,1964
9
+ hkjc/strategy/place_only.py,sha256=Dfzqr1PmWd9xHpylXO0Zlww9xMoIFPQ_gMHvRunw_1Q,2049
10
+ hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
11
+ hkjc-0.3.8.dist-info/METADATA,sha256=VNe1lDBAfidKWBTMaBd5al2srXFCLWBOegOzh49xfE0,413
12
+ hkjc-0.3.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
+ hkjc-0.3.8.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- hkjc/__init__.py,sha256=xKkhk53z9aGyGkiwQ_RNc7Qa82lp1Oinyx83m_pBv_E,627
2
- hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
3
- hkjc/historical.py,sha256=wKTJi--0Mx_x0vO0ysOGD37oM8453woQK-cLzPOLgiQ,336
4
- hkjc/live_odds.py,sha256=m3sO5AIam73Qr2my8aUW4slE7G8xZk6tnMDRPAWS_bs,5447
5
- hkjc/optimization.py,sha256=OArQ3w9bwcIV_lTNuE5za6AROoa90xk_gwAoGwQ-8RE,3784
6
- hkjc/processing.py,sha256=1RZi-xPMV_rcfmfDHmd-MVRKL9fDd6XKolD5dzvxaiQ,3307
7
- hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- hkjc/qpbanker.py,sha256=vhvYb5_nGrKgYgre9gGF6tgswovca5C9pZVOPGxEP1Q,4804
9
- hkjc/speedpro.py,sha256=Tb8YqWnD7nnRv0I5onnWxGtsRi3bQZZumWnZncT2n4M,1968
10
- hkjc-0.3.6.dist-info/METADATA,sha256=I7hzvISiv67h4oCrJY0OpuywtRfk_WKvnBBHNbAQmlE,413
11
- hkjc-0.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- hkjc-0.3.6.dist-info/RECORD,,
File without changes