hkjc 0.3.18__py3-none-any.whl → 0.3.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/__init__.py CHANGED
@@ -4,10 +4,10 @@ This module re-exports commonly used symbols from the submodules.
4
4
  """
5
5
  from importlib.metadata import version as _version
6
6
 
7
- __all__ = ["live", "qpbanker",
7
+ __all__ = ["live", "features",
8
8
  "generate_all_qp_trades", "generate_all_pla_trades", "pareto_filter",
9
- "speedpro_energy", "speedmap", "harveille_model",
10
- "generate_historical_data"]
9
+ "speedpro_energy", "speedmap", "harveille_model",
10
+ "generate_historical_data"]
11
11
 
12
12
  try:
13
13
  __version__ = _version(__name__)
@@ -17,4 +17,4 @@ except Exception: # pragma: no cover - best-effort version resolution
17
17
  from .processing import generate_all_qp_trades, generate_all_pla_trades, generate_historical_data
18
18
  from .utils import pareto_filter
19
19
  from .speedpro import speedmap, speedpro_energy
20
- from . import harville_model, live
20
+ from . import harville_model, live, features
hkjc/features.py ADDED
@@ -0,0 +1,6 @@
1
+ """ Polars expressions for commonly-used analysis features, subject to frequent changes.
2
+ """
3
+ import polars as pl
4
+
5
+ rating_diff = (pl.col('Rtg').max().over('RaceId')-pl.col('Rtg')).alias('RtgDiff')
6
+ frontrunner_pct = (pl.col('FavoriteRunningStyle')=='FrontRunner').mean().over('RaceId').alias('FRPct')
hkjc/harville_model.py CHANGED
@@ -198,10 +198,12 @@ class HarvilleModel:
198
198
  lambda_qin (float): Weight for Qin pool loss
199
199
  lambda_quinella (float): Weight for Quinella pool loss
200
200
  lambda_banker (float): Weight for Banker pool loss
201
+ takeout_rate (float): House take out rate (e.g., 0.175 = 17.5%)
201
202
  """
202
203
 
203
204
  def __init__(self, n_horses: int, lambda_win: float = LAMBDA_DEFAULTS['WIN'], lambda_qin: float = LAMBDA_DEFAULTS['QIN'],
204
- lambda_quinella: float = LAMBDA_DEFAULTS['QPL'], lambda_banker: float = LAMBDA_DEFAULTS['PLA']) -> None:
205
+ lambda_quinella: float = LAMBDA_DEFAULTS['QPL'], lambda_banker: float = LAMBDA_DEFAULTS['PLA'],
206
+ takeout_rate: float = 0.175) -> None:
205
207
  """
206
208
  Initialize model.
207
209
 
@@ -211,6 +213,9 @@ class HarvilleModel:
211
213
  lambda_qin: Weight for Qin odds (prob pair finishes 1st-2nd)
212
214
  lambda_quinella: Weight for Quinella odds (prob pair in top 3)
213
215
  lambda_banker: Weight for Banker odds (prob horse in top 3)
216
+ takeout_rate: House take out rate as decimal (default 0.175 = 17.5%).
217
+ The observed odds include the house's take, which makes
218
+ them higher than true odds. This parameter adjusts for that.
214
219
 
215
220
  Raises:
216
221
  ValueError: If n_horses > 20 (exponential complexity warning)
@@ -223,9 +228,51 @@ class HarvilleModel:
223
228
  self.lambda_qin = lambda_qin
224
229
  self.lambda_quinella = lambda_quinella
225
230
  self.lambda_banker = lambda_banker
231
+ self.takeout_rate = takeout_rate
226
232
  self._eval_count = 0
227
233
  self.result = None
228
234
 
235
+ def _adjust_for_takeout(self, probs: Optional[np.ndarray]) -> Optional[np.ndarray]:
236
+ """
237
+ Adjust observed probabilities to remove house takeout rate.
238
+
239
+ Observed odds from the betting market include the house's take, causing
240
+ the sum of implied probabilities to exceed 1.0. This method adjusts them
241
+ to represent true probabilities.
242
+
243
+ Args:
244
+ probs: Observed probabilities (can be 1D or 2D array)
245
+
246
+ Returns:
247
+ Adjusted probabilities with takeout removed, or None if input is None
248
+ """
249
+ if probs is None:
250
+ return None
251
+
252
+ # Multiply by (1 - takeout_rate) to remove the house edge
253
+ adjusted = probs * (1.0 - self.takeout_rate)
254
+
255
+ return adjusted
256
+
257
+ def _probs_to_market_odds(self, probs: np.ndarray) -> np.ndarray:
258
+ """
259
+ Convert fitted probabilities to market odds including takeout rate.
260
+
261
+ This converts true probabilities (which sum to 1.0) back to decimal odds
262
+ as they would appear in the betting market, which includes the house's
263
+ takeout rate. The resulting odds can be directly compared to observed odds.
264
+
265
+ Args:
266
+ probs: Fitted probabilities (1D or 2D array)
267
+
268
+ Returns:
269
+ Market odds (decimal format) with takeout reintroduced
270
+ """
271
+
272
+ # Convert true probabilities to market odds with takeout
273
+ # Market odds are worse (higher) than fair odds due to house edge
274
+ return (1.0 - self.takeout_rate) / probs
275
+
229
276
  def _loss(self, theta: np.ndarray, W_obs: Optional[np.ndarray],
230
277
  Qin_obs: Optional[np.ndarray], Q_obs: Optional[np.ndarray],
231
278
  b_obs: Optional[np.ndarray]) -> float:
@@ -290,26 +337,34 @@ class HarvilleModel:
290
337
 
291
338
  Returns:
292
339
  Dictionary containing:
293
- - theta: Fitted strength parameters (n,)
294
- - W_fitted: Fitted Win probabilities (n,)
295
- - Qin_fitted: Fitted Qin probabilities (n, n)
296
- - Q_fitted: Fitted Quinella probabilities (n, n)
297
- - b_fitted: Fitted Banker probabilities (n,)
298
- - P_fitted: Full place probability matrix (n, n), P[i,j] =
299
- prob horse i finishes in position j
300
- - loss: Final loss value
301
340
  - success: Whether optimization converged
302
341
  - message: Optimizer status message
303
342
  - n_eval: Number of loss function evaluations
343
+ - loss: Final loss value
344
+ - prob_fit: Dictionary of fitted probabilities (sum to 1.0)
345
+ - theta: Fitted strength parameters (n,)
346
+ - W: Win probabilities (n,)
347
+ - Qin: Qin probabilities (n, n)
348
+ - Q: Quinella probabilities (n, n)
349
+ - b: Banker probabilities (n,)
350
+ - P: Full place probability matrix (n, n), P[i,j] =
351
+ prob horse i finishes in position j
352
+ - odds_fit: Dictionary of fitted market odds (directly comparable to observed)
353
+ - WIN: Win odds (n,)
354
+ - QIN: Qin odds (n, n)
355
+ - QPL: Quinella Place odds (n, n)
356
+ - PLA: Place odds (n,)
304
357
 
305
358
  Raises:
306
359
  ValueError: If no odds provided or shapes don't match n_horses
307
360
 
308
361
  Example:
309
- >>> opt = HarvilleOptimizer(n_horses=10)
362
+ >>> opt = HarvilleModel(n_horses=10, takeout_rate=0.175)
310
363
  >>> results = opt.fit(W_obs=win_probs, Q_obs=quinella_probs)
311
- >>> print(f"Fitted strengths: {results['theta']}")
364
+ >>> print(f"Fitted strengths: {results['prob_fit']['theta']}")
312
365
  >>> print(f"Converged: {results['success']}")
366
+ >>> # Compare fitted odds to observed odds
367
+ >>> diff = results['odds_fit']['WIN'] - observed_win_odds
313
368
  """
314
369
  if W_obs is None and Qin_obs is None and Q_obs is None and b_obs is None:
315
370
  raise ValueError("At least one type of odds must be provided")
@@ -323,6 +378,12 @@ class HarvilleModel:
323
378
  if b_obs is not None and b_obs.shape != (self.n,):
324
379
  raise ValueError(f"b_obs must be ({self.n},)")
325
380
 
381
+ # Adjust observed probabilities for house takeout rate
382
+ W_obs = self._adjust_for_takeout(W_obs)
383
+ Qin_obs = self._adjust_for_takeout(Qin_obs)
384
+ Q_obs = self._adjust_for_takeout(Q_obs)
385
+ b_obs = self._adjust_for_takeout(b_obs)
386
+
326
387
  if theta_init is None:
327
388
  if W_obs is not None:
328
389
  theta_init = W_obs / W_obs.sum()
@@ -356,27 +417,41 @@ class HarvilleModel:
356
417
 
357
418
  W_fitted, Qin_fitted, Q_fitted, b_fitted, P_fitted = _compute_probabilities(theta_opt)
358
419
 
420
+ # Convert fitted probabilities to market odds (with takeout reintroduced)
421
+ WIN_odds_fitted = self._probs_to_market_odds(W_fitted)
422
+ PLA_odds_fitted = self._probs_to_market_odds(b_fitted)
423
+ QIN_odds_fitted = self._probs_to_market_odds(Qin_fitted)
424
+ QPL_odds_fitted = self._probs_to_market_odds(Q_fitted)
425
+
359
426
  self.result = {
360
- 'theta': theta_opt,
361
- 'W_fitted': W_fitted,
362
- 'Qin_fitted': Qin_fitted,
363
- 'Q_fitted': Q_fitted,
364
- 'b_fitted': b_fitted,
365
- 'P_fitted': P_fitted,
366
- 'loss': result.fun,
367
427
  'success': result.success,
368
428
  'message': result.message,
369
- 'n_eval': self._eval_count
429
+ 'n_eval': self._eval_count,
430
+ 'loss': result.fun,
431
+ 'prob_fit': {
432
+ 'theta': theta_opt,
433
+ 'W': W_fitted,
434
+ 'Qin': Qin_fitted,
435
+ 'Q': Q_fitted,
436
+ 'b': b_fitted,
437
+ 'P': P_fitted
438
+ },
439
+ 'odds_fit': {
440
+ 'WIN': WIN_odds_fitted,
441
+ 'QIN': QIN_odds_fitted,
442
+ 'QPL': QPL_odds_fitted,
443
+ 'PLA': PLA_odds_fitted
444
+ }
370
445
  }
371
446
 
372
447
  return self.result
373
448
 
374
- def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float] = None) -> dict:
449
+ def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float] = None, takeout_rate: float = 0.175) -> dict:
375
450
  """
376
451
  Fit Harville model to observed betting odds.
377
452
 
378
- At least one odds type must be provided. All odds should be probabilities
379
- (not decimal/fractional odds). Matrices should be symmetric where applicable.
453
+ At least one odds type must be provided. All odds should be decimal odds
454
+ (not probabilities). Matrices should be symmetric where applicable.
380
455
 
381
456
  Args:
382
457
  odds: Dictionary of odds arrays with types as keys.:
@@ -384,20 +459,35 @@ def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float
384
459
  lambdas: Optional dictionary of lambda weights for each odds type.
385
460
  Keys can be 'WIN', 'QIN', 'QPL', 'PLA'. Defaults to
386
461
  {'WIN': 1.0, 'QIN': 2.0, 'QPL': 1.5, 'PLA': 0.7}
462
+ takeout_rate: House take out rate as decimal (default 0.175 = 17.5%).
463
+ The house keeps this percentage of the betting pool, causing
464
+ observed odds to be higher than fair odds.
387
465
 
388
466
  Returns:
389
467
  Dictionary containing:
390
- - theta: Fitted strength parameters (n,)
391
- - W_fitted: Fitted Win probabilities (n,)
392
- - Qin_fitted: Fitted Qin probabilities (n, n)
393
- - Q_fitted: Fitted Quinella probabilities (n, n)
394
- - b_fitted: Fitted Banker probabilities (n,)
395
- - P_fitted: Full place probability matrix (n, n), P[i,j] =
396
- prob horse i finishes in position j
397
- - loss: Final loss value
398
468
  - success: Whether optimization converged
399
469
  - message: Optimizer status message
400
470
  - n_eval: Number of loss function evaluations
471
+ - loss: Final loss value
472
+ - prob_fit: Dictionary of fitted probabilities (sum to 1.0)
473
+ - theta: Fitted strength parameters (n,)
474
+ - W: Win probabilities (n,)
475
+ - Qin: Qin probabilities (n, n)
476
+ - Q: Quinella probabilities (n, n)
477
+ - b: Banker probabilities (n,)
478
+ - P: Full place probability matrix (n, n), P[i,j] =
479
+ prob horse i finishes in position j
480
+ - odds_fit: Dictionary of fitted market odds (directly comparable to observed)
481
+ - WIN: Win odds (n,)
482
+ - QIN: Qin odds (n, n)
483
+ - QPL: Quinella Place odds (n, n)
484
+ - PLA: Place odds (n,)
485
+
486
+ Example:
487
+ >>> odds = {'WIN': np.array([3.5, 4.2, 5.0, 8.5, 12.0])}
488
+ >>> result = fit_harville_to_odds(odds, takeout_rate=0.175)
489
+ >>> print(result['prob_fit']['theta']) # True winning probabilities
490
+ >>> print(result['odds_fit']['WIN']) # Fitted market odds (compare to input)
401
491
  """
402
492
  n_horses = None
403
493
  W_obs = None
@@ -443,7 +533,8 @@ def fit_harville_to_odds(odds : dict[str, np.ndarray], lambdas : dict[str, float
443
533
  lambda_win=merged_lambdas['WIN'],
444
534
  lambda_qin=merged_lambdas['QIN'],
445
535
  lambda_quinella=merged_lambdas['QPL'],
446
- lambda_banker=merged_lambdas['PLA']
536
+ lambda_banker=merged_lambdas['PLA'],
537
+ takeout_rate=takeout_rate
447
538
  )
448
539
  result = ho.fit(W_obs=W_obs, Qin_obs=Qin_obs, Q_obs=Q_obs, b_obs=b_obs)
449
540
  return result
hkjc/historical.py CHANGED
@@ -20,7 +20,7 @@ incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
20
20
  def _soupify(url: str) -> BeautifulSoup:
21
21
  """Fetch and parse a webpage and return BeautifulSoup object
22
22
  """
23
- response = requests.get(url, timeout=30)
23
+ response = requests.get(url, timeout=180)
24
24
  response.raise_for_status()
25
25
  return BeautifulSoup(response.content, 'html.parser')
26
26
 
@@ -52,11 +52,13 @@ def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition")
52
52
  .alias("split_data").cast(pl.Int64, strict=False)
53
53
  ).unnest("split_data")
54
54
 
55
- df = df.with_columns(pl.col('FinishPosition').fill_null(pl.col('Position3')))
55
+ df = df.with_columns(
56
+ pl.col('FinishPosition').fill_null(pl.col('Position3')))
56
57
 
57
58
  df = df.with_columns([
58
59
  (pl.col("StartPosition")-pl.col("FinishPosition")).alias("PositionChange"),
59
- pl.mean_horizontal("StartPosition", "Position2").alias("AvgStartPosition"),
60
+ pl.mean_horizontal("StartPosition", "Position2").alias(
61
+ "AvgStartPosition"),
60
62
  ]).with_columns(pl.when(pl.col("StartPosition").is_null()).then(pl.lit("--"))
61
63
  .when((pl.col("AvgStartPosition") <= 3) & (pl.col("StartPosition") <= 3)).then(pl.lit("FrontRunner"))
62
64
  .when((pl.col("PositionChange") >= 1) & (pl.col("StartPosition") >= 6)).then(pl.lit("Closer"))
@@ -77,35 +79,7 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
77
79
  pl.col('Date') != '') # Remove empty rows
78
80
  horse_data = _classify_running_style(horse_data)
79
81
 
80
- # Extract horse profile info
81
- table = soup.find_all('table', class_='table_eng_text')
82
- profile_data = _parse_html_table(table[0], skip_header=True)
83
- profile_data = _parse_html_table(table[1], skip_header=True)
84
-
85
- try:
86
- current_rating = int(profile_data.filter(
87
- pl.col("column_0").str.starts_with("Current Rating"))['column_2'].item(0))
88
- season_start_rating = int(profile_data.filter(pl.col(
89
- "column_0").str.starts_with("Start of Season Rating"))['column_2'].item(0))
90
- except:
91
- current_rating, season_start_rating = 0, 0
92
-
93
- try:
94
- last_rating = int(profile_data.filter(
95
- pl.col("column_0").str.starts_with("Last Rating"))['column_2'].item(0))
96
- except:
97
- last_rating = 0
98
-
99
- horse_info = {
100
- 'HorseID': horse_no,
101
- 'CurrentRating': current_rating,
102
- 'SeasonStartRating': season_start_rating,
103
- 'LastRating': last_rating if current_rating == 0 else current_rating
104
- }
105
- horse_data = (horse_data.with_columns([
106
- pl.lit(value).alias(key) for key, value in horse_info.items()
107
- ])
108
- )
82
+ horse_data = horse_data.with_columns(pl.lit(horse_no).alias('HorseNo'))
109
83
 
110
84
  return horse_data
111
85
 
@@ -124,16 +98,16 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
124
98
  pl.col('Dr').cast(pl.Int64, strict=False),
125
99
  pl.col('Rtg').cast(pl.Int64, strict=False),
126
100
  pl.col('Dist').cast(pl.Int64, strict=False),
127
- pl.col('WinOdds').cast(pl.Float64, strict=False),
128
- pl.col('RaceIndex').cast(pl.Int64, strict=False)
101
+ pl.col('WinOdds').cast(pl.Float64, strict=False)
129
102
  ])
130
103
 
131
- df = df.with_columns(
104
+ df = (df.filter(~pl.col('FinishTime').str.starts_with('--'))
105
+ .with_columns(
132
106
  (
133
- pl.col("FinishTime").str.split_exact(".", 1).struct.field("field_0").cast(pl.Int64) * 60 +
134
- pl.col("FinishTime").str.split_exact(".", 1).struct.field("field_1").cast(pl.Int64)
135
- ).cast(pl.Float64).alias("FinishTime")
136
- )
107
+ pl.col("FinishTime").str.splitn(".", 2).struct.field("field_0").cast(pl.Int64) * 60 +
108
+ pl.col("FinishTime").str.splitn(".", 2).struct.field("field_1").cast(pl.Float64)
109
+ ).cast(pl.Float64).round(2).alias("FinishTime")
110
+ ))
137
111
 
138
112
  df = df.with_columns(
139
113
  pl.col('RCTrackCourse').str.split_exact(' / ', 2)
@@ -141,12 +115,22 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
141
115
  .alias('RCTrackCourse')
142
116
  ).unnest('RCTrackCourse')
143
117
 
118
+ df = df.with_columns(
119
+ pl.when(pl.col('Date').str.len_chars() <= 8)
120
+ .then(pl.col('Date').str.strptime(pl.Date, '%d/%m/%y', strict=False))
121
+ .otherwise(pl.col('Date').str.strptime(pl.Date, '%d/%m/%Y'))
122
+ ).with_columns(
123
+ pl.concat_str(pl.col('Date').dt.strftime('%Y%m%d'), pl.col(
124
+ 'Venue'), pl.col('RaceIndex')).alias('RaceId')
125
+ ).drop("VideoReplay")
144
126
  return df
145
127
 
128
+
146
129
  def get_horse_data(horse_no: str) -> pl.DataFrame:
147
130
  df = _extract_horse_data(horse_no)
148
131
  return _clean_horse_data(df)
149
132
 
133
+
150
134
  def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
151
135
  """ Clean and convert horse data to suitable data types
152
136
  """
@@ -165,13 +149,14 @@ def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
165
149
 
166
150
  df = df.with_columns(
167
151
  (
168
- pl.col("FinishTime").str.split_exact(":", 1).struct.field("field_0").cast(pl.Int64) * 60 +
169
- pl.col("FinishTime").str.split_exact(":", 1).struct.field("field_1").cast(pl.Int64)
170
- ).cast(pl.Float64).alias("FinishTime")
152
+ pl.col("FinishTime").str.splitn(":", 2).struct.field("field_0").cast(pl.Int64) * 60 +
153
+ pl.col("FinishTime").str.splitn(":", 2).struct.field("field_1").cast(pl.Float64)
154
+ ).cast(pl.Float64).round(2).alias("FinishTime")
171
155
  )
172
156
 
173
157
  return df
174
158
 
159
+
175
160
  def _extract_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
176
161
  soup = _soupify_race_page(date, venue_code, race_number)
177
162
  table = soup.find('div', class_='race_tab').find('table')
@@ -211,5 +196,5 @@ def _extract_race_data(date: str, venue_code: str, race_number: int) -> pl.DataF
211
196
 
212
197
 
213
198
  def get_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
214
- df = _extract_race_data(date,venue_code,race_number)
215
- return _clean_race_data(df)
199
+ df = _extract_race_data(date, venue_code, race_number)
200
+ return _clean_race_data(df)
hkjc/live.py CHANGED
@@ -7,8 +7,6 @@ import requests
7
7
  from cachetools.func import ttl_cache
8
8
  import numpy as np
9
9
 
10
- from .utils import _validate_date, _validate_venue_code
11
-
12
10
  HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
13
11
 
14
12
  RACEMTG_PAYLOAD = {
@@ -245,7 +243,7 @@ query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo:
245
243
 
246
244
 
247
245
  @ttl_cache(maxsize=12, ttl=1000)
248
- def _fetch_live_races(date: str, venue_code: str) -> dict:
246
+ def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
249
247
  """Fetch live race data from HKJC GraphQL endpoint."""
250
248
  payload = RACEMTG_PAYLOAD.copy()
251
249
  payload["variables"] = payload["variables"].copy()
@@ -265,9 +263,10 @@ def _fetch_live_races(date: str, venue_code: str) -> dict:
265
263
  if r.status_code != 200:
266
264
  raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
267
265
 
268
- races = r.json()['data']['raceMeetings'][0]['races']
266
+ data = r.json()['data']['raceMeetings'][0]
267
+ races = data['races']
269
268
 
270
- race_info = {}
269
+ race_info = {'Date': data['date'], 'Venue': data['venueCode'], 'Races': {}}
271
270
  for race in races:
272
271
  race_num = race['no']
273
272
  race_name = race['raceName_en']
@@ -277,12 +276,15 @@ def _fetch_live_races(date: str, venue_code: str) -> dict:
277
276
  race_class = race['raceClass_en']
278
277
  race_course = race['raceCourse']['displayCode']
279
278
 
280
- runners = [{'Dr': runner['barrierDrawNumber'],
281
- 'Rtg' : int(runner['currentRating']),
282
- 'Wt' : int(runner['currentWeight']),
279
+ runners = [{'No': runner['no'],
280
+ 'Name': runner['name_en'],
281
+ 'Dr': runner['barrierDrawNumber'],
282
+ 'Rtg': int(runner['currentRating']),
283
+ 'Wt': int(runner['currentWeight']),
284
+ 'Handicap': int(runner['handicapWeight']),
283
285
  'HorseNo': runner['horse']['code']
284
- } for runner in race['runners']]
285
- race_info[race_num]={
286
+ } for runner in race['runners'] if runner['status'] != "Standby"]
287
+ race_info['Races'][race_num] = {
286
288
  'No': race_num,
287
289
  'Name': race_name,
288
290
  'Class': race_class,
@@ -290,13 +292,13 @@ def _fetch_live_races(date: str, venue_code: str) -> dict:
290
292
  'Dist': race_dist,
291
293
  'Going': race_going,
292
294
  'Track': race_track,
293
- 'Runners': runners
295
+ 'Runners': runners
294
296
  }
295
297
  return race_info
296
298
 
297
299
 
298
300
  @ttl_cache(maxsize=12, ttl=30)
299
- def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', 'QPL')) -> List[dict]:
301
+ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', )) -> List[dict]:
300
302
  """Fetch live odds data from HKJC GraphQL endpoint."""
301
303
  payload = LIVEODDS_PAYLOAD.copy()
302
304
  payload["variables"] = payload["variables"].copy()
@@ -329,14 +331,14 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
329
331
  ]
330
332
 
331
333
 
332
- def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
334
+ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['WIN', 'PLA', 'QPL', 'QIN']) -> dict:
333
335
  """Fetch live odds as numpy arrays.
334
336
 
335
337
  Args:
336
338
  date (str): Date in 'YYYY-MM-DD' format.
337
339
  venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
338
340
  race_number (int): Race number.
339
- odds_type (List[str]): Types of odds to fetch. Default is ['PLA', 'QPL']. Currently the following types are supported:
341
+ odds_type (List[str]): Types of odds to fetch. Default is ['WIN', 'PLA', 'QPL', 'QIN']. Currently the following types are supported:
340
342
  - 'WIN': Win odds
341
343
  - 'PLA': Place odds
342
344
  - 'QIN': Quinella odds
@@ -348,11 +350,13 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
348
350
  If odds_type is 'WIN','PLA', returns a 1D array of place odds.
349
351
  If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
350
352
  """
351
- _validate_date(date)
352
- _validate_venue_code(venue_code)
353
-
354
353
  race_info = _fetch_live_races(date, venue_code)
355
- N = len(race_info[race_number]['Runners'])
354
+ N = len(race_info['Races'][race_number]['Runners'])
355
+
356
+ if (race_info['Date'] != date) or (race_info['Venue'] != venue_code):
357
+ print(f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
358
+ date = race_info['Date']
359
+ venue_code = race_info['Venue']
356
360
 
357
361
  data = _fetch_live_odds(date, venue_code, race_number,
358
362
  odds_type=tuple(odds_type))
hkjc/processing.py CHANGED
@@ -42,7 +42,15 @@ def _historical_process_single_date_venue(date: str, venue_code: str) -> List[pl
42
42
 
43
43
 
44
44
  def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
45
- """Generate historical race dataset from start_date to end_date"""
45
+ """Generate historical race dataset from start_date to end_date (inclusive).
46
+
47
+ Args:
48
+ start_date (str): Date in 'YYYY-MM-DD' format.
49
+ end_date (str): Date in 'YYYY-MM-DD' format.
50
+
51
+ Returns:
52
+ pl.DataFrame: DataFrame with all records.
53
+ """
46
54
  _validate_date(start_date)
47
55
  _validate_date(end_date)
48
56
  start_dt = dt.strptime(start_date, '%Y-%m-%d')
@@ -50,7 +58,7 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
50
58
 
51
59
  dfs = []
52
60
 
53
- for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True), leave=False, desc='Scanning for horse IDs ...'):
61
+ for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True, closed='both'), leave=False, desc='Scanning for horse IDs ...'):
54
62
  for venue_code in ['ST', 'HV']:
55
63
  dfs += _historical_process_single_date_venue(date, venue_code)
56
64
 
@@ -63,7 +71,12 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
63
71
  # Use horse track records
64
72
  dfs = [_extract_horse_data(horse_id) for horse_id in tqdm(horse_ids, desc='Processing horses ...', leave=False)]
65
73
  df = pl.concat(dfs)
66
- return _clean_horse_data(df)
74
+
75
+ try:
76
+ return _clean_horse_data(df).filter(pl.col('Date').is_between(start_dt, end_dt))
77
+ except:
78
+ print('Failed to clean data. Returning raw data for debug.')
79
+ return df
67
80
 
68
81
 
69
82
  # ==========================
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hkjc
3
- Version: 0.3.18
3
+ Version: 0.3.21
4
4
  Summary: Library for scrapping HKJC data and perform basic analysis
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: beautifulsoup4>=4.14.2
@@ -0,0 +1,14 @@
1
+ hkjc/__init__.py,sha256=XSm9N6YbZ2SzyxjO9aR26ctB4Z1-VeBImuroSgncUfk,737
2
+ hkjc/features.py,sha256=LicwtKBpMzpz_dSX9bjoCLLaRUu8oeZo1AloTe7v7sI,298
3
+ hkjc/harville_model.py,sha256=WSA_1EcNOHKGraP6WVHJ3FXZPGrDrjKhJc_q70KKx80,20188
4
+ hkjc/historical.py,sha256=aONchf7CMNs2B-WVDS_GWg8g0U0ZEH-FjbfhdJwc_N0,7683
5
+ hkjc/live.py,sha256=CfMeHRQfhKSmhQaexM99sdP0KRbIEqg2DIvNPc1gohk,10696
6
+ hkjc/processing.py,sha256=hQnHxl6HYlFOeSLSOCVsemgTKcwt9_tYUQI-itpvjUg,7188
7
+ hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
9
+ hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
10
+ hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
11
+ hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
12
+ hkjc-0.3.21.dist-info/METADATA,sha256=YuIC0EvFVS3Z-8cwdzczMV7qQxMYvIKtO442iUQu5Jg,480
13
+ hkjc-0.3.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ hkjc-0.3.21.dist-info/RECORD,,
hkjc/analysis.py DELETED
@@ -1,3 +0,0 @@
1
- # TODO:
2
-
3
- # Generate filtered live odds, fav run style, dr, current rating, season start rating, track record
@@ -1,14 +0,0 @@
1
- hkjc/__init__.py,sha256=5A9MzcITYJDcA2UbIBpkimZBYSqS4pgRuQJhTagOfpE,753
2
- hkjc/analysis.py,sha256=0042_NMIkQCl0J6B0P4TFfrBDCnm2B6jsCZKOEO30yI,108
3
- hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
4
- hkjc/historical.py,sha256=v9k_R47Na5en5ftrocjIHofkNAUthE_lp4CyLaCTsQE,8280
5
- hkjc/live.py,sha256=GqctH-BVdIL6Vi1g8XHe3p8fZBopCQf5KACLAR0meP0,10249
6
- hkjc/processing.py,sha256=H0chtW_FBMMhK3IzcjYjrryd3fAPYimanc2fWuGiB0M,6807
7
- hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
9
- hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
10
- hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
11
- hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
12
- hkjc-0.3.18.dist-info/METADATA,sha256=aoXp6Fvn3EkuXyv6p5LClSbZa5XS_bfcUxMKBJXcNvw,480
13
- hkjc-0.3.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- hkjc-0.3.18.dist-info/RECORD,,
File without changes