hkjc 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/__init__.py CHANGED
@@ -6,7 +6,7 @@ from importlib.metadata import version as _version
6
6
 
7
7
  __all__ = ["live_odds", "qpbanker",
8
8
  "generate_all_qp_trades", "generate_pareto_qp_trades",
9
- "speedpro_df", "speedmap","harveille_model"]
9
+ "speedpro_energy", "speedmap","harveille_model"]
10
10
 
11
11
  try:
12
12
  __version__ = _version(__name__)
@@ -15,5 +15,5 @@ except Exception: # pragma: no cover - best-effort version resolution
15
15
 
16
16
  from .live_odds import live_odds
17
17
  from .processing import generate_all_qp_trades, generate_pareto_qp_trades
18
- from .speedpro import speedmap, speedpro_df
18
+ from .speedpro import speedmap, speedpro_energy
19
19
  from . import harville_model
hkjc/harville_model.py CHANGED
@@ -20,6 +20,12 @@ from scipy.optimize import minimize
20
20
  from numba import njit
21
21
  from typing import Tuple, Optional
22
22
 
23
+ LAMBDA_DEFAULTS = {
24
+ 'WIN': 1.0,
25
+ 'QIN': 2.0,
26
+ 'QPL': 1.5,
27
+ 'PLA': 0.7
28
+ }
23
29
 
24
30
  @njit(cache=True)
25
31
  def _popcount(mask: int) -> int:
@@ -173,7 +179,7 @@ def _kl_divergence(p_obs: np.ndarray, p_model: np.ndarray) -> float:
173
179
  return np.sum(p_obs_flat * np.log(p_obs_flat / p_model_flat))
174
180
 
175
181
 
176
- class HarvilleOptimizer:
182
+ class HarvilleModel:
177
183
  """
178
184
  Fits Harville race model to betting market odds using dynamic programming.
179
185
 
@@ -182,9 +188,9 @@ class HarvilleOptimizer:
182
188
  relative strength. This optimizer estimates theta from observed betting odds
183
189
  across multiple pool types.
184
190
 
185
- Default lambda weights (1.0, 2.0, 1.5, 0.7) reflect that early Win odds are
186
- biased by informed traders waiting until closing, while exotic pools provide
187
- more stable signals for ensemble estimation.
191
+ Default lambda weights reflect that early Win odds are biased by informed
192
+ traders waiting until closing, while exotic pools provide more stable
193
+ signals for ensemble estimation.
188
194
 
189
195
  Attributes:
190
196
  n (int): Number of horses
@@ -194,10 +200,10 @@ class HarvilleOptimizer:
194
200
  lambda_banker (float): Weight for Banker pool loss
195
201
  """
196
202
 
197
- def __init__(self, n_horses: int, lambda_win: float = 1.0, lambda_qin: float = 2.0,
198
- lambda_quinella: float = 1.5, lambda_banker: float = 0.7):
203
+ def __init__(self, n_horses: int, lambda_win: float = LAMBDA_DEFAULTS['WIN'], lambda_qin: float = LAMBDA_DEFAULTS['QIN'],
204
+ lambda_quinella: float = LAMBDA_DEFAULTS['QPL'], lambda_banker: float = LAMBDA_DEFAULTS['PLA']) -> None:
199
205
  """
200
- Initialize optimizer.
206
+ Initialize model.
201
207
 
202
208
  Args:
203
209
  n_horses: Number of horses in race (recommend <= 20 for speed)
@@ -218,8 +224,9 @@ class HarvilleOptimizer:
218
224
  self.lambda_quinella = lambda_quinella
219
225
  self.lambda_banker = lambda_banker
220
226
  self._eval_count = 0
227
+ self.result = None
221
228
 
222
- def loss(self, theta: np.ndarray, W_obs: Optional[np.ndarray],
229
+ def _loss(self, theta: np.ndarray, W_obs: Optional[np.ndarray],
223
230
  Qin_obs: Optional[np.ndarray], Q_obs: Optional[np.ndarray],
224
231
  b_obs: Optional[np.ndarray]) -> float:
225
232
  """
@@ -328,7 +335,7 @@ class HarvilleOptimizer:
328
335
 
329
336
  if method == 'L-BFGS-B':
330
337
  result = minimize(
331
- fun=lambda x: self.loss(x, W_obs, Qin_obs, Q_obs, b_obs),
338
+ fun=lambda x: self._loss(x, W_obs, Qin_obs, Q_obs, b_obs),
332
339
  x0=theta_init,
333
340
  method='L-BFGS-B',
334
341
  bounds=[(1e-6, 1.0) for _ in range(self.n)],
@@ -336,7 +343,7 @@ class HarvilleOptimizer:
336
343
  )
337
344
  else:
338
345
  result = minimize(
339
- fun=lambda x: self.loss(x, W_obs, Qin_obs, Q_obs, b_obs),
346
+ fun=lambda x: self._loss(x, W_obs, Qin_obs, Q_obs, b_obs),
340
347
  x0=theta_init,
341
348
  method='SLSQP',
342
349
  bounds=[(1e-6, 1.0) for _ in range(self.n)],
@@ -349,7 +356,7 @@ class HarvilleOptimizer:
349
356
 
350
357
  W_fitted, Qin_fitted, Q_fitted, b_fitted, P_fitted = _compute_probabilities(theta_opt)
351
358
 
352
- return {
359
+ self.result = {
353
360
  'theta': theta_opt,
354
361
  'W_fitted': W_fitted,
355
362
  'Qin_fitted': Qin_fitted,
@@ -360,4 +367,83 @@ class HarvilleOptimizer:
360
367
  'success': result.success,
361
368
  'message': result.message,
362
369
  'n_eval': self._eval_count
363
- }
370
+ }
371
+
372
+ return self.result
373
+
374
+ def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float] = None) -> dict:
375
+ """
376
+ Fit Harville model to observed betting odds.
377
+
378
+ At least one odds type must be provided. All odds should be probabilities
379
+ (not decimal/fractional odds). Matrices should be symmetric where applicable.
380
+
381
+ Args:
382
+ odds: Dictionary of odds arrays with types as keys.:
383
+ 'WIN' (n,), 'QIN' (n,n), 'QPL' (n,n), 'PLA' (n,)
384
+ lambdas: Optional dictionary of lambda weights for each odds type.
385
+ Keys can be 'WIN', 'QIN', 'QPL', 'PLA'. Defaults to
386
+ {'WIN': 1.0, 'QIN': 2.0, 'QPL': 1.5, 'PLA': 0.7}
387
+
388
+ Returns:
389
+ Dictionary containing:
390
+ - theta: Fitted strength parameters (n,)
391
+ - W_fitted: Fitted Win probabilities (n,)
392
+ - Qin_fitted: Fitted Qin probabilities (n, n)
393
+ - Q_fitted: Fitted Quinella probabilities (n, n)
394
+ - b_fitted: Fitted Banker probabilities (n,)
395
+ - P_fitted: Full place probability matrix (n, n), P[i,j] =
396
+ prob horse i finishes in position j
397
+ - loss: Final loss value
398
+ - success: Whether optimization converged
399
+ - message: Optimizer status message
400
+ - n_eval: Number of loss function evaluations
401
+ """
402
+ n_horses = None
403
+ W_obs = None
404
+ Qin_obs = None
405
+ Q_obs = None
406
+ b_obs = None
407
+
408
+ if 'WIN' in odds:
409
+ W_odds = odds['WIN']
410
+ if n_horses is None:
411
+ n_horses = len(W_odds)
412
+ elif n_horses != len(W_odds):
413
+ raise ValueError("Inconsistent number of horses in WIN odds")
414
+ W_obs = np.nan_to_num(1.0 / W_odds, 0)
415
+
416
+ if 'QIN' in odds:
417
+ Qin_odds = odds['QIN']
418
+ if n_horses is None:
419
+ n_horses = Qin_odds.shape[0]
420
+ elif n_horses != Qin_odds.shape[0]:
421
+ raise ValueError("Inconsistent number of horses in QIN odds")
422
+ Qin_obs = np.nan_to_num(1.0 / Qin_odds, 0)
423
+
424
+ if 'QPL' in odds:
425
+ Q_odds = odds['QPL']
426
+ if n_horses is None:
427
+ n_horses = Q_odds.shape[0]
428
+ elif n_horses != Q_odds.shape[0]:
429
+ raise ValueError("Inconsistent number of horses in QPL odds")
430
+ Q_obs = np.nan_to_num(1.0 / Q_odds, 0)
431
+
432
+ if 'PLA' in odds:
433
+ b_odds = odds['PLA']
434
+ if n_horses is None:
435
+ n_horses = len(b_odds)
436
+ elif n_horses != len(b_odds):
437
+ raise ValueError("Inconsistent number of horses in PLA odds")
438
+ b_obs = np.nan_to_num(1.0 / b_odds, 0)
439
+
440
+ merged_lambdas = {**LAMBDA_DEFAULTS, **(lambdas or {})}
441
+ ho = HarvilleModel(
442
+ n_horses,
443
+ lambda_win=merged_lambdas['WIN'],
444
+ lambda_qin=merged_lambdas['QIN'],
445
+ lambda_quinella=merged_lambdas['QPL'],
446
+ lambda_banker=merged_lambdas['PLA']
447
+ )
448
+ result = ho.fit(W_obs=W_obs, Qin_obs=Qin_obs, Q_obs=Q_obs, b_obs=b_obs)
449
+ return result
hkjc/historical.py ADDED
@@ -0,0 +1,13 @@
1
+ """Functions to fetch and process historical race and horse data from HKJC
2
+ """
3
+ from __future__ import annotations
4
+
5
+ import requests
6
+
7
+ # TODO read and process all races from start date to end date
8
+
9
+ # TODO query all basic info and race history for a specific horse
10
+
11
+ # TODO classify running style & draw to determine blocking probability
12
+
13
+ #
hkjc/live_odds.py CHANGED
@@ -3,6 +3,8 @@
3
3
  from __future__ import annotations
4
4
  from typing import Tuple, List
5
5
 
6
+ from .harville_model import fit_harville_to_odds
7
+
6
8
  import requests
7
9
  from cachetools.func import ttl_cache
8
10
  import numpy as np
@@ -85,7 +87,7 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
85
87
  ]
86
88
 
87
89
 
88
- def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
90
+ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL'], fit_harville=False) -> dict:
89
91
  """Fetch live odds as numpy arrays.
90
92
 
91
93
  Args:
@@ -97,6 +99,7 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
97
99
  - 'PLA': Place odds
98
100
  - 'QIN': Quinella odds
99
101
  - 'QPL': Quinella Place odds
102
+ fit_harville (bool): Whether to fit the odds using Harville model. Default is False.
100
103
 
101
104
  Returns:
102
105
  dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
@@ -109,8 +112,10 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
109
112
  except Exception:
110
113
  raise ValueError("Date must be in 'YYYY-MM-DD' format")
111
114
 
115
+ mandatory_types = ['WIN','PLA','QIN','QPL'] if fit_harville else ['PLA']
116
+
112
117
  data = _fetch_live_odds(date, venue_code, race_number,
113
- odds_type=tuple(set(['PLA']+odds_type))) # ensure PLA is always fetched
118
+ odds_type=tuple(set(mandatory_types+odds_type)))
114
119
 
115
120
  # use place odds to determine number of horses
116
121
  pla_data = [entry for entry in data if entry["Type"] == "PLA"]
@@ -129,4 +134,19 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
129
134
  elif entry["Type"] in ["PLA","WIN"]:
130
135
  odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
131
136
 
137
+ if fit_harville:
138
+ fit_res = fit_harville_to_odds(
139
+ W_obs=odds['WIN'],
140
+ Qin_obs=odds['QIN'],
141
+ Q_obs=odds['QPL'],
142
+ b_obs=odds['PLA']
143
+ )
144
+ if fit_res['success']:
145
+ odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
146
+ odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
147
+ odds['WIN'] = np.nan_to_num(1/fit_res['W_fitted'], posinf=0)
148
+ odds['QIN'] = np.nan_to_num(1/fit_res['Qin_fitted'], posinf=0)
149
+ else:
150
+ print(f"[WARNING] Harville model fitting failed: {fit_res.get('message','')}")
151
+
132
152
  return {t: odds[t] for t in odds_type}
hkjc/processing.py CHANGED
@@ -6,7 +6,6 @@ from typing import Tuple, List
6
6
  from .live_odds import live_odds
7
7
  from .qpbanker import win_probability, expected_value, average_odds
8
8
  from .optimization import _pareto_filter
9
- from .harville_model import HarvilleOptimizer
10
9
 
11
10
  import polars as pl
12
11
  import numpy as np
@@ -27,7 +26,7 @@ def _process_single_qp_trade(banker: int, covered: List[int], odds_pla: List[flo
27
26
  return (banker, covered, win_prob, exp_value, ave_odds)
28
27
 
29
28
 
30
- def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, harville_fit=False) -> pl.DataFrame:
29
+ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, fit_harville=False) -> pl.DataFrame:
31
30
  """Generate all possible qp tickets for the specified race.
32
31
 
33
32
  Args:
@@ -35,26 +34,17 @@ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate:
35
34
  venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
36
35
  race_number (int): Race number.
37
36
  rebate (float, optional): The rebate percentage. Defaults to 0.12.
38
- harville_fit (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
37
+ fit_harville (bool, optional): Whether to fit the odds using Harville model. Defaults to False.
39
38
 
40
39
  Returns:
41
40
  pl.DataFrame: DataFrame with all possible trades and their metrics.
42
41
  """
43
42
 
44
43
  odds = live_odds(date, venue_code, race_number,
45
- odds_type=['PLA', 'QPL', 'WIN', 'QIN'])
44
+ odds_type=['PLA', 'QPL'], fit_harville=fit_harville)
46
45
  N = len(odds['PLA'])
47
46
  candidates = np.arange(1, N+1)
48
47
 
49
- if harville_fit:
50
- ho = HarvilleOptimizer(N)
51
- prob = {k: np.nan_to_num(1/v, 0) for k,v in odds.items()}
52
- fit_res = ho.fit(prob['WIN'], prob['QIN'],
53
- prob['QPL'], prob['PLA'])
54
- if fit_res['success']:
55
- odds['PLA'] = np.nan_to_num(1/fit_res['b_fitted'], posinf=0)
56
- odds['QPL'] = np.nan_to_num(1/fit_res['Q_fitted'], posinf=0)
57
-
58
48
  results = [_process_single_qp_trade(banker, covered, odds['PLA'], odds['QPL'], rebate)
59
49
  for banker in tqdm(candidates, desc="Processing bankers")
60
50
  for covered in _all_subsets(candidates[candidates != banker])]
@@ -65,7 +55,7 @@ def generate_all_qp_trades(date: str, venue_code: str, race_number: int, rebate:
65
55
  return df
66
56
 
67
57
 
68
- def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = [], harville_fit=False) -> pl.DataFrame:
58
+ def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, rebate: float = 0.12, groupby: List[str] = [], fit_harville=False) -> pl.DataFrame:
69
59
  """Generate qp tickets that are Pareto optimal for the specified race.
70
60
 
71
61
  Args:
@@ -79,7 +69,7 @@ def generate_pareto_qp_trades(date: str, venue_code: str, race_number: int, reba
79
69
  Returns:
80
70
  pl.DataFrame: DataFrame with all Pareto trades and their metrics.
81
71
  """
82
- df = generate_all_qp_trades(date, venue_code, race_number, rebate, harville_fit=harville_fit)
72
+ df = generate_all_qp_trades(date, venue_code, race_number, rebate, harville_fit=fit_harville)
83
73
  pareto_df = _pareto_filter(df, groupby=groupby, by=[
84
74
  'WinProb', 'ExpValue'], maximize=True)
85
75
  return pareto_df
hkjc/speedpro.py CHANGED
@@ -12,7 +12,7 @@ ENERGY_XLS_TEMPLATE = "https://racing.hkjc.com/racing/speedpro/assets/excel/{dat
12
12
  SPEEDMAP_URL_TEMPLATE = "https://racing.hkjc.com/racing/speedpro/assets/json/speedguide/race_{race_num}.json"
13
13
 
14
14
 
15
- def speedpro_df(race_date: str) -> pl.DataFrame:
15
+ def speedpro_energy(race_date: str) -> pl.DataFrame:
16
16
  """Fetch and process SpeedPro scores for a given race date.
17
17
 
18
18
  Args:
@@ -29,11 +29,15 @@ def speedpro_df(race_date: str) -> pl.DataFrame:
29
29
 
30
30
  df = pl.read_excel(ENERGY_XLS_TEMPLATE.format(
31
31
  date=dt.strptime(race_date, "%Y-%m-%d").strftime("%Y%m%d")))
32
-
32
+
33
33
  # Clean column names
34
34
  df.columns = [col.strip().replace(" ", "").replace(
35
35
  "\n", "_").replace('.', '') for col in df.columns]
36
36
 
37
+ df = (df.with_columns(pl.col('RunnerNumber').str.to_integer())
38
+ .with_columns(pl.col('SpeedPRO_Energy_Difference').str.to_integer())
39
+ .select(['RaceNumber', 'RunnerNumber', 'HorseName', 'FitnessRatings','SpeedPRO_Energy_Difference']))
40
+
37
41
  return df
38
42
 
39
43
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hkjc
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: Library for scrapping HKJC data and perform basic analysis
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: cachetools>=6.2.0
@@ -0,0 +1,12 @@
1
+ hkjc/__init__.py,sha256=xKkhk53z9aGyGkiwQ_RNc7Qa82lp1Oinyx83m_pBv_E,627
2
+ hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
3
+ hkjc/historical.py,sha256=wKTJi--0Mx_x0vO0ysOGD37oM8453woQK-cLzPOLgiQ,336
4
+ hkjc/live_odds.py,sha256=m3sO5AIam73Qr2my8aUW4slE7G8xZk6tnMDRPAWS_bs,5447
5
+ hkjc/optimization.py,sha256=OArQ3w9bwcIV_lTNuE5za6AROoa90xk_gwAoGwQ-8RE,3784
6
+ hkjc/processing.py,sha256=1RZi-xPMV_rcfmfDHmd-MVRKL9fDd6XKolD5dzvxaiQ,3307
7
+ hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ hkjc/qpbanker.py,sha256=vhvYb5_nGrKgYgre9gGF6tgswovca5C9pZVOPGxEP1Q,4804
9
+ hkjc/speedpro.py,sha256=Tb8YqWnD7nnRv0I5onnWxGtsRi3bQZZumWnZncT2n4M,1968
10
+ hkjc-0.3.6.dist-info/METADATA,sha256=I7hzvISiv67h4oCrJY0OpuywtRfk_WKvnBBHNbAQmlE,413
11
+ hkjc-0.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ hkjc-0.3.6.dist-info/RECORD,,
hkjc/analysis.py DELETED
File without changes
hkjc/visualization.py DELETED
File without changes
@@ -1,13 +0,0 @@
1
- hkjc/__init__.py,sha256=KBbWVwLXPPb93bk_h2Qt9t5OH8y6RrVUeH-ZYNKQAoQ,619
2
- hkjc/analysis.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- hkjc/harville_model.py,sha256=KoN5AxaAbjDkCCFUrTBSdsfZDkcINrCku9JHguofMHU,12799
4
- hkjc/live_odds.py,sha256=i_g9ckQKA9GWbwPXNvbmNvm-dPbF9UJoGiWv6_bHzwA,4603
5
- hkjc/optimization.py,sha256=OArQ3w9bwcIV_lTNuE5za6AROoa90xk_gwAoGwQ-8RE,3784
6
- hkjc/processing.py,sha256=HgZ0NnjCBb8Z7fX7yTLMlZiXjVIaRr1nELsE3Fs8VFE,3741
7
- hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- hkjc/qpbanker.py,sha256=vhvYb5_nGrKgYgre9gGF6tgswovca5C9pZVOPGxEP1Q,4804
9
- hkjc/speedpro.py,sha256=vKnSz9yY1rfVmRo7GVxXLjsiQN-YgwxSbV0B7yuszS4,1702
10
- hkjc/visualization.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- hkjc-0.3.4.dist-info/METADATA,sha256=Wh2uEr7WOJej-2NhKXYUlvQmcxGge0rh29Rfc5TqRPU,413
12
- hkjc-0.3.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
- hkjc-0.3.4.dist-info/RECORD,,
File without changes