hkjc 0.3.21__py3-none-any.whl → 0.3.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/historical.py
CHANGED
@@ -15,12 +15,22 @@ HKJC_HORSE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Ho
|
|
15
15
|
incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
|
16
16
|
'UR', 'VOID', 'WR', 'WV', 'WV-A', 'WX', 'WX-A', 'WXNR']
|
17
17
|
|
18
|
+
REQUEST_TIMEOUT = 10
|
19
|
+
|
20
|
+
HTML_HEADERS = {
|
21
|
+
"Origin": "https://racing.hkjc.com",
|
22
|
+
"Referer": "https://racing.hkjc.com",
|
23
|
+
"Content-Type": "text/plain",
|
24
|
+
"Accept": "*/*",
|
25
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
|
26
|
+
}
|
27
|
+
|
18
28
|
|
19
29
|
@ttl_cache(maxsize=100, ttl=3600)
|
20
30
|
def _soupify(url: str) -> BeautifulSoup:
|
21
31
|
"""Fetch and parse a webpage and return BeautifulSoup object
|
22
32
|
"""
|
23
|
-
response = requests.get(url, timeout=
|
33
|
+
response = requests.get(url, timeout=REQUEST_TIMEOUT, headers=HTML_HEADERS)
|
24
34
|
response.raise_for_status()
|
25
35
|
return BeautifulSoup(response.content, 'html.parser')
|
26
36
|
|
@@ -43,6 +53,9 @@ def _soupify_horse_page(horse_no: str) -> BeautifulSoup:
|
|
43
53
|
def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition") -> pl.DataFrame:
|
44
54
|
"""Classify running style based on RunningPosition column
|
45
55
|
"""
|
56
|
+
if df.height == 0:
|
57
|
+
return df
|
58
|
+
|
46
59
|
# Split the RunningPosition column into separate columns and convert to integers
|
47
60
|
df = df.with_columns(
|
48
61
|
pl.col(running_pos_col)
|
@@ -74,12 +87,16 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
|
|
74
87
|
"""Extract horse info and history from horse page
|
75
88
|
"""
|
76
89
|
soup = _soupify_horse_page(horse_no)
|
90
|
+
horse_name = soup.find('title').get_text().split('- Horses -')[0].strip()
|
77
91
|
table = soup.find('table', class_='bigborder')
|
78
92
|
horse_data = _parse_html_table(table).filter(
|
79
93
|
pl.col('Date') != '') # Remove empty rows
|
80
|
-
horse_data
|
81
|
-
|
82
|
-
|
94
|
+
if horse_data.height > 0:
|
95
|
+
horse_data = _classify_running_style(horse_data)
|
96
|
+
horse_data = horse_data.with_columns([
|
97
|
+
pl.lit(horse_no).alias('HorseNo'),
|
98
|
+
pl.lit(horse_name).alias('HorseName')
|
99
|
+
])
|
83
100
|
|
84
101
|
return horse_data
|
85
102
|
|
@@ -87,6 +104,9 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
|
|
87
104
|
def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
|
88
105
|
""" Clean and convert horse data to suitable data types
|
89
106
|
"""
|
107
|
+
if df.height == 0:
|
108
|
+
return df
|
109
|
+
|
90
110
|
df = df.with_columns(
|
91
111
|
pl.col('Pla').str.split(' ').list.first().alias('Pla')
|
92
112
|
).filter(~pl.col('Pla').is_in(incidents))
|
@@ -105,7 +125,8 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
|
|
105
125
|
.with_columns(
|
106
126
|
(
|
107
127
|
pl.col("FinishTime").str.splitn(".", 2).struct.field("field_0").cast(pl.Int64) * 60 +
|
108
|
-
pl.col("FinishTime").str.splitn(
|
128
|
+
pl.col("FinishTime").str.splitn(
|
129
|
+
".", 2).struct.field("field_1").cast(pl.Float64)
|
109
130
|
).cast(pl.Float64).round(2).alias("FinishTime")
|
110
131
|
))
|
111
132
|
|
@@ -134,6 +155,9 @@ def get_horse_data(horse_no: str) -> pl.DataFrame:
|
|
134
155
|
def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
|
135
156
|
""" Clean and convert horse data to suitable data types
|
136
157
|
"""
|
158
|
+
if df.height == 0:
|
159
|
+
return df
|
160
|
+
|
137
161
|
df = df.with_columns(
|
138
162
|
pl.col('Pla').str.split(' ').list.first().alias('Pla')
|
139
163
|
).filter(~pl.col('Pla').is_in(incidents))
|
@@ -150,7 +174,8 @@ def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
|
|
150
174
|
df = df.with_columns(
|
151
175
|
(
|
152
176
|
pl.col("FinishTime").str.splitn(":", 2).struct.field("field_0").cast(pl.Int64) * 60 +
|
153
|
-
pl.col("FinishTime").str.splitn(
|
177
|
+
pl.col("FinishTime").str.splitn(
|
178
|
+
":", 2).struct.field("field_1").cast(pl.Float64)
|
154
179
|
).cast(pl.Float64).round(2).alias("FinishTime")
|
155
180
|
)
|
156
181
|
|
hkjc/live.py
CHANGED
@@ -7,6 +7,8 @@ import requests
|
|
7
7
|
from cachetools.func import ttl_cache
|
8
8
|
import numpy as np
|
9
9
|
|
10
|
+
from .utils import _try_int
|
11
|
+
|
10
12
|
HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
|
11
13
|
|
12
14
|
RACEMTG_PAYLOAD = {
|
@@ -241,29 +243,42 @@ query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo:
|
|
241
243
|
}""",
|
242
244
|
}
|
243
245
|
|
246
|
+
JSON_HEADERS = {
|
247
|
+
"Origin": "https://bet.hkjc.com",
|
248
|
+
"Referer": "https://bet.hkjc.com",
|
249
|
+
"Content-Type": "application/json",
|
250
|
+
"Accept": "application/json",
|
251
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
|
252
|
+
}
|
253
|
+
|
254
|
+
REQUEST_TIMEOUT = 30
|
255
|
+
|
244
256
|
|
245
257
|
@ttl_cache(maxsize=12, ttl=1000)
|
246
|
-
def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
|
258
|
+
def _fetch_live_races(date: str = None, venue_code: str = None) -> dict:
|
247
259
|
"""Fetch live race data from HKJC GraphQL endpoint."""
|
248
260
|
payload = RACEMTG_PAYLOAD.copy()
|
249
261
|
payload["variables"] = payload["variables"].copy()
|
250
262
|
payload["variables"]["date"] = date
|
251
263
|
payload["variables"]["venueCode"] = venue_code
|
252
264
|
|
253
|
-
headers =
|
254
|
-
"Origin": "https://bet.hkjc.com",
|
255
|
-
"Referer": "https://bet.hkjc.com",
|
256
|
-
"Content-Type": "application/json",
|
257
|
-
"Accept": "application/json",
|
258
|
-
"User-Agent": "python-hkjc-fetch/0.1",
|
259
|
-
}
|
265
|
+
headers = JSON_HEADERS
|
260
266
|
|
261
267
|
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
262
|
-
headers=headers, timeout=
|
268
|
+
headers=headers, timeout=REQUEST_TIMEOUT)
|
263
269
|
if r.status_code != 200:
|
264
270
|
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
265
271
|
|
266
|
-
data = r.json()['data']['raceMeetings']
|
272
|
+
data = r.json()['data']['raceMeetings'] # list of all meetings
|
273
|
+
|
274
|
+
# Prioritize first local race, if not continue with the first race (default 0)
|
275
|
+
index = 0
|
276
|
+
for i, entry in enumerate(data):
|
277
|
+
if entry['venueCode'] in ['HV', 'ST']:
|
278
|
+
index = i
|
279
|
+
break
|
280
|
+
|
281
|
+
data = data[index]
|
267
282
|
races = data['races']
|
268
283
|
|
269
284
|
race_info = {'Date': data['date'], 'Venue': data['venueCode'], 'Races': {}}
|
@@ -279,9 +294,9 @@ def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
|
|
279
294
|
runners = [{'No': runner['no'],
|
280
295
|
'Name': runner['name_en'],
|
281
296
|
'Dr': runner['barrierDrawNumber'],
|
282
|
-
'Rtg':
|
283
|
-
'Wt':
|
284
|
-
'Handicap':
|
297
|
+
'Rtg': _try_int(runner['currentRating']),
|
298
|
+
'Wt': _try_int(runner['currentWeight']),
|
299
|
+
'Handicap': _try_int(runner['handicapWeight']),
|
285
300
|
'HorseNo': runner['horse']['code']
|
286
301
|
} for runner in race['runners'] if runner['status'] != "Standby"]
|
287
302
|
race_info['Races'][race_num] = {
|
@@ -307,16 +322,10 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
|
|
307
322
|
payload["variables"]["raceNo"] = race_number
|
308
323
|
payload["variables"]["oddsTypes"] = odds_type
|
309
324
|
|
310
|
-
headers =
|
311
|
-
"Origin": "https://bet.hkjc.com",
|
312
|
-
"Referer": "https://bet.hkjc.com",
|
313
|
-
"Content-Type": "application/json",
|
314
|
-
"Accept": "application/json",
|
315
|
-
"User-Agent": "python-hkjc-fetch/0.1",
|
316
|
-
}
|
325
|
+
headers = JSON_HEADERS
|
317
326
|
|
318
327
|
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
319
|
-
headers=headers, timeout=
|
328
|
+
headers=headers, timeout=REQUEST_TIMEOUT)
|
320
329
|
if r.status_code != 200:
|
321
330
|
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
322
331
|
|
@@ -354,7 +363,8 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
|
|
354
363
|
N = len(race_info['Races'][race_number]['Runners'])
|
355
364
|
|
356
365
|
if (race_info['Date'] != date) or (race_info['Venue'] != venue_code):
|
357
|
-
print(
|
366
|
+
print(
|
367
|
+
f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
|
358
368
|
date = race_info['Date']
|
359
369
|
venue_code = race_info['Venue']
|
360
370
|
|
hkjc/utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hkjc
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.23
|
4
4
|
Summary: Library for scrapping HKJC data and perform basic analysis
|
5
5
|
Requires-Python: >=3.11
|
6
6
|
Requires-Dist: beautifulsoup4>=4.14.2
|
@@ -12,5 +12,4 @@ Requires-Dist: numpy>=2.3.3
|
|
12
12
|
Requires-Dist: polars>=1.33.1
|
13
13
|
Requires-Dist: pyarrow>=21.0.0
|
14
14
|
Requires-Dist: requests>=2.32.5
|
15
|
-
Requires-Dist: scipy>=1.16.2
|
16
15
|
Requires-Dist: tqdm>=4.67.1
|
@@ -1,14 +1,14 @@
|
|
1
1
|
hkjc/__init__.py,sha256=XSm9N6YbZ2SzyxjO9aR26ctB4Z1-VeBImuroSgncUfk,737
|
2
2
|
hkjc/features.py,sha256=LicwtKBpMzpz_dSX9bjoCLLaRUu8oeZo1AloTe7v7sI,298
|
3
3
|
hkjc/harville_model.py,sha256=WSA_1EcNOHKGraP6WVHJ3FXZPGrDrjKhJc_q70KKx80,20188
|
4
|
-
hkjc/historical.py,sha256=
|
5
|
-
hkjc/live.py,sha256=
|
4
|
+
hkjc/historical.py,sha256=88z3DiWuj1L0sJw5EXnEkg4L_xx7-UH6UI6x9duDMvI,8380
|
5
|
+
hkjc/live.py,sha256=DgCjqd-QHdUk2ReSQoxIcUhcChCqtUG60p8r-iHnk-k,10958
|
6
6
|
hkjc/processing.py,sha256=hQnHxl6HYlFOeSLSOCVsemgTKcwt9_tYUQI-itpvjUg,7188
|
7
7
|
hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
|
9
|
-
hkjc/utils.py,sha256=
|
9
|
+
hkjc/utils.py,sha256=uAiFmy5NXsADUiD1-MCPgs1hs4N3e7tVYtSREkxwKSQ,6425
|
10
10
|
hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
|
11
11
|
hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
|
12
|
-
hkjc-0.3.
|
13
|
-
hkjc-0.3.
|
14
|
-
hkjc-0.3.
|
12
|
+
hkjc-0.3.23.dist-info/METADATA,sha256=_BEaF2r7sXrq2lhdFj-qunHEKL4koB8-vJtbDs0ZeVw,451
|
13
|
+
hkjc-0.3.23.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
14
|
+
hkjc-0.3.23.dist-info/RECORD,,
|
File without changes
|