hkjc 0.3.21__py3-none-any.whl → 0.3.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/historical.py CHANGED
@@ -15,12 +15,22 @@ HKJC_HORSE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Ho
15
15
  incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
16
16
  'UR', 'VOID', 'WR', 'WV', 'WV-A', 'WX', 'WX-A', 'WXNR']
17
17
 
18
+ REQUEST_TIMEOUT = 10
19
+
20
+ HTML_HEADERS = {
21
+ "Origin": "https://racing.hkjc.com",
22
+ "Referer": "https://racing.hkjc.com",
23
+ "Content-Type": "text/plain",
24
+ "Accept": "*/*",
25
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
26
+ }
27
+
18
28
 
19
29
  @ttl_cache(maxsize=100, ttl=3600)
20
30
  def _soupify(url: str) -> BeautifulSoup:
21
31
  """Fetch and parse a webpage and return BeautifulSoup object
22
32
  """
23
- response = requests.get(url, timeout=180)
33
+ response = requests.get(url, timeout=REQUEST_TIMEOUT, headers=HTML_HEADERS)
24
34
  response.raise_for_status()
25
35
  return BeautifulSoup(response.content, 'html.parser')
26
36
 
@@ -43,6 +53,9 @@ def _soupify_horse_page(horse_no: str) -> BeautifulSoup:
43
53
  def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition") -> pl.DataFrame:
44
54
  """Classify running style based on RunningPosition column
45
55
  """
56
+ if df.height == 0:
57
+ return df
58
+
46
59
  # Split the RunningPosition column into separate columns and convert to integers
47
60
  df = df.with_columns(
48
61
  pl.col(running_pos_col)
@@ -74,12 +87,16 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
74
87
  """Extract horse info and history from horse page
75
88
  """
76
89
  soup = _soupify_horse_page(horse_no)
90
+ horse_name = soup.find('title').get_text().split('- Horses -')[0].strip()
77
91
  table = soup.find('table', class_='bigborder')
78
92
  horse_data = _parse_html_table(table).filter(
79
93
  pl.col('Date') != '') # Remove empty rows
80
- horse_data = _classify_running_style(horse_data)
81
-
82
- horse_data = horse_data.with_columns(pl.lit(horse_no).alias('HorseNo'))
94
+ if horse_data.height > 0:
95
+ horse_data = _classify_running_style(horse_data)
96
+ horse_data = horse_data.with_columns([
97
+ pl.lit(horse_no).alias('HorseNo'),
98
+ pl.lit(horse_name).alias('HorseName')
99
+ ])
83
100
 
84
101
  return horse_data
85
102
 
@@ -87,6 +104,9 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
87
104
  def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
88
105
  """ Clean and convert horse data to suitable data types
89
106
  """
107
+ if df.height == 0:
108
+ return df
109
+
90
110
  df = df.with_columns(
91
111
  pl.col('Pla').str.split(' ').list.first().alias('Pla')
92
112
  ).filter(~pl.col('Pla').is_in(incidents))
@@ -105,7 +125,8 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
105
125
  .with_columns(
106
126
  (
107
127
  pl.col("FinishTime").str.splitn(".", 2).struct.field("field_0").cast(pl.Int64) * 60 +
108
- pl.col("FinishTime").str.splitn(".", 2).struct.field("field_1").cast(pl.Float64)
128
+ pl.col("FinishTime").str.splitn(
129
+ ".", 2).struct.field("field_1").cast(pl.Float64)
109
130
  ).cast(pl.Float64).round(2).alias("FinishTime")
110
131
  ))
111
132
 
@@ -134,6 +155,9 @@ def get_horse_data(horse_no: str) -> pl.DataFrame:
134
155
  def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
135
156
  """ Clean and convert horse data to suitable data types
136
157
  """
158
+ if df.height == 0:
159
+ return df
160
+
137
161
  df = df.with_columns(
138
162
  pl.col('Pla').str.split(' ').list.first().alias('Pla')
139
163
  ).filter(~pl.col('Pla').is_in(incidents))
@@ -150,7 +174,8 @@ def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
150
174
  df = df.with_columns(
151
175
  (
152
176
  pl.col("FinishTime").str.splitn(":", 2).struct.field("field_0").cast(pl.Int64) * 60 +
153
- pl.col("FinishTime").str.splitn(":", 2).struct.field("field_1").cast(pl.Float64)
177
+ pl.col("FinishTime").str.splitn(
178
+ ":", 2).struct.field("field_1").cast(pl.Float64)
154
179
  ).cast(pl.Float64).round(2).alias("FinishTime")
155
180
  )
156
181
 
hkjc/live.py CHANGED
@@ -7,6 +7,8 @@ import requests
7
7
  from cachetools.func import ttl_cache
8
8
  import numpy as np
9
9
 
10
+ from .utils import _try_int
11
+
10
12
  HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
11
13
 
12
14
  RACEMTG_PAYLOAD = {
@@ -241,29 +243,42 @@ query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo:
241
243
  }""",
242
244
  }
243
245
 
246
+ JSON_HEADERS = {
247
+ "Origin": "https://bet.hkjc.com",
248
+ "Referer": "https://bet.hkjc.com",
249
+ "Content-Type": "application/json",
250
+ "Accept": "application/json",
251
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
252
+ }
253
+
254
+ REQUEST_TIMEOUT = 30
255
+
244
256
 
245
257
  @ttl_cache(maxsize=12, ttl=1000)
246
- def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
258
+ def _fetch_live_races(date: str = None, venue_code: str = None) -> dict:
247
259
  """Fetch live race data from HKJC GraphQL endpoint."""
248
260
  payload = RACEMTG_PAYLOAD.copy()
249
261
  payload["variables"] = payload["variables"].copy()
250
262
  payload["variables"]["date"] = date
251
263
  payload["variables"]["venueCode"] = venue_code
252
264
 
253
- headers = {
254
- "Origin": "https://bet.hkjc.com",
255
- "Referer": "https://bet.hkjc.com",
256
- "Content-Type": "application/json",
257
- "Accept": "application/json",
258
- "User-Agent": "python-hkjc-fetch/0.1",
259
- }
265
+ headers = JSON_HEADERS
260
266
 
261
267
  r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
262
- headers=headers, timeout=10)
268
+ headers=headers, timeout=REQUEST_TIMEOUT)
263
269
  if r.status_code != 200:
264
270
  raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
265
271
 
266
- data = r.json()['data']['raceMeetings'][0]
272
+ data = r.json()['data']['raceMeetings'] # list of all meetings
273
+
274
+ # Prioritize first local race, if not continue with the first race (default 0)
275
+ index = 0
276
+ for i, entry in enumerate(data):
277
+ if entry['venueCode'] in ['HV', 'ST']:
278
+ index = i
279
+ break
280
+
281
+ data = data[index]
267
282
  races = data['races']
268
283
 
269
284
  race_info = {'Date': data['date'], 'Venue': data['venueCode'], 'Races': {}}
@@ -279,9 +294,9 @@ def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
279
294
  runners = [{'No': runner['no'],
280
295
  'Name': runner['name_en'],
281
296
  'Dr': runner['barrierDrawNumber'],
282
- 'Rtg': int(runner['currentRating']),
283
- 'Wt': int(runner['currentWeight']),
284
- 'Handicap': int(runner['handicapWeight']),
297
+ 'Rtg': _try_int(runner['currentRating']),
298
+ 'Wt': _try_int(runner['currentWeight']),
299
+ 'Handicap': _try_int(runner['handicapWeight']),
285
300
  'HorseNo': runner['horse']['code']
286
301
  } for runner in race['runners'] if runner['status'] != "Standby"]
287
302
  race_info['Races'][race_num] = {
@@ -307,16 +322,10 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
307
322
  payload["variables"]["raceNo"] = race_number
308
323
  payload["variables"]["oddsTypes"] = odds_type
309
324
 
310
- headers = {
311
- "Origin": "https://bet.hkjc.com",
312
- "Referer": "https://bet.hkjc.com",
313
- "Content-Type": "application/json",
314
- "Accept": "application/json",
315
- "User-Agent": "python-hkjc-fetch/0.1",
316
- }
325
+ headers = JSON_HEADERS
317
326
 
318
327
  r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
319
- headers=headers, timeout=10)
328
+ headers=headers, timeout=REQUEST_TIMEOUT)
320
329
  if r.status_code != 200:
321
330
  raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
322
331
 
@@ -354,7 +363,8 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
354
363
  N = len(race_info['Races'][race_number]['Runners'])
355
364
 
356
365
  if (race_info['Date'] != date) or (race_info['Venue'] != venue_code):
357
- print(f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
366
+ print(
367
+ f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
358
368
  date = race_info['Date']
359
369
  venue_code = race_info['Venue']
360
370
 
hkjc/utils.py CHANGED
@@ -4,6 +4,12 @@ from datetime import datetime as dt
4
4
  import bs4
5
5
  import re
6
6
 
7
+ def _try_int(value: str) -> int:
8
+ try:
9
+ return int(value)
10
+ except:
11
+ return 0
12
+
7
13
 
8
14
  def _validate_date(date_str: str) -> bool:
9
15
  # validate date format
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hkjc
3
- Version: 0.3.21
3
+ Version: 0.3.23
4
4
  Summary: Library for scrapping HKJC data and perform basic analysis
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: beautifulsoup4>=4.14.2
@@ -12,5 +12,4 @@ Requires-Dist: numpy>=2.3.3
12
12
  Requires-Dist: polars>=1.33.1
13
13
  Requires-Dist: pyarrow>=21.0.0
14
14
  Requires-Dist: requests>=2.32.5
15
- Requires-Dist: scipy>=1.16.2
16
15
  Requires-Dist: tqdm>=4.67.1
@@ -1,14 +1,14 @@
1
1
  hkjc/__init__.py,sha256=XSm9N6YbZ2SzyxjO9aR26ctB4Z1-VeBImuroSgncUfk,737
2
2
  hkjc/features.py,sha256=LicwtKBpMzpz_dSX9bjoCLLaRUu8oeZo1AloTe7v7sI,298
3
3
  hkjc/harville_model.py,sha256=WSA_1EcNOHKGraP6WVHJ3FXZPGrDrjKhJc_q70KKx80,20188
4
- hkjc/historical.py,sha256=aONchf7CMNs2B-WVDS_GWg8g0U0ZEH-FjbfhdJwc_N0,7683
5
- hkjc/live.py,sha256=CfMeHRQfhKSmhQaexM99sdP0KRbIEqg2DIvNPc1gohk,10696
4
+ hkjc/historical.py,sha256=88z3DiWuj1L0sJw5EXnEkg4L_xx7-UH6UI6x9duDMvI,8380
5
+ hkjc/live.py,sha256=DgCjqd-QHdUk2ReSQoxIcUhcChCqtUG60p8r-iHnk-k,10958
6
6
  hkjc/processing.py,sha256=hQnHxl6HYlFOeSLSOCVsemgTKcwt9_tYUQI-itpvjUg,7188
7
7
  hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
9
- hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
9
+ hkjc/utils.py,sha256=uAiFmy5NXsADUiD1-MCPgs1hs4N3e7tVYtSREkxwKSQ,6425
10
10
  hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
11
11
  hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
12
- hkjc-0.3.21.dist-info/METADATA,sha256=YuIC0EvFVS3Z-8cwdzczMV7qQxMYvIKtO442iUQu5Jg,480
13
- hkjc-0.3.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- hkjc-0.3.21.dist-info/RECORD,,
12
+ hkjc-0.3.23.dist-info/METADATA,sha256=_BEaF2r7sXrq2lhdFj-qunHEKL4koB8-vJtbDs0ZeVw,451
13
+ hkjc-0.3.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ hkjc-0.3.23.dist-info/RECORD,,
File without changes