hkjc 0.3.16__py3-none-any.whl → 0.3.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hkjc/__init__.py +2 -3
- hkjc/historical.py +72 -25
- hkjc/live.py +375 -0
- hkjc/processing.py +8 -35
- {hkjc-0.3.16.dist-info → hkjc-0.3.18.dist-info}/METADATA +2 -2
- {hkjc-0.3.16.dist-info → hkjc-0.3.18.dist-info}/RECORD +7 -7
- hkjc/live_odds.py +0 -136
- {hkjc-0.3.16.dist-info → hkjc-0.3.18.dist-info}/WHEEL +0 -0
hkjc/__init__.py
CHANGED
@@ -4,7 +4,7 @@ This module re-exports commonly used symbols from the submodules.
|
|
4
4
|
"""
|
5
5
|
from importlib.metadata import version as _version
|
6
6
|
|
7
|
-
__all__ = ["
|
7
|
+
__all__ = ["live", "qpbanker",
|
8
8
|
"generate_all_qp_trades", "generate_all_pla_trades", "pareto_filter",
|
9
9
|
"speedpro_energy", "speedmap", "harveille_model",
|
10
10
|
"generate_historical_data"]
|
@@ -14,8 +14,7 @@ try:
|
|
14
14
|
except Exception: # pragma: no cover - best-effort version resolution
|
15
15
|
__version__ = "0.0.0"
|
16
16
|
|
17
|
-
from .live_odds import live_odds
|
18
17
|
from .processing import generate_all_qp_trades, generate_all_pla_trades, generate_historical_data
|
19
18
|
from .utils import pareto_filter
|
20
19
|
from .speedpro import speedmap, speedpro_energy
|
21
|
-
from . import harville_model
|
20
|
+
from . import harville_model, live
|
hkjc/historical.py
CHANGED
@@ -12,6 +12,9 @@ from .utils import _parse_html_table
|
|
12
12
|
HKJC_RACE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Racing/LocalResults.aspx?RaceDate={date}&Racecourse={venue_code}&RaceNo={race_number}"
|
13
13
|
HKJC_HORSE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Horse/Horse.aspx?HorseNo={horse_no}"
|
14
14
|
|
15
|
+
incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
|
16
|
+
'UR', 'VOID', 'WR', 'WV', 'WV-A', 'WX', 'WX-A', 'WXNR']
|
17
|
+
|
15
18
|
|
16
19
|
@ttl_cache(maxsize=100, ttl=3600)
|
17
20
|
def _soupify(url: str) -> BeautifulSoup:
|
@@ -37,7 +40,6 @@ def _soupify_horse_page(horse_no: str) -> BeautifulSoup:
|
|
37
40
|
return _soupify(url)
|
38
41
|
|
39
42
|
|
40
|
-
|
41
43
|
def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition") -> pl.DataFrame:
|
42
44
|
"""Classify running style based on RunningPosition column
|
43
45
|
"""
|
@@ -50,14 +52,13 @@ def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition")
|
|
50
52
|
.alias("split_data").cast(pl.Int64, strict=False)
|
51
53
|
).unnest("split_data")
|
52
54
|
|
53
|
-
df.with_columns(pl.col('FinishPosition').fill_null(pl.col('Position3')))
|
55
|
+
df = df.with_columns(pl.col('FinishPosition').fill_null(pl.col('Position3')))
|
54
56
|
|
55
57
|
df = df.with_columns([
|
56
58
|
(pl.col("StartPosition")-pl.col("FinishPosition")).alias("PositionChange"),
|
57
|
-
pl.mean_horizontal("StartPosition", "Position2",
|
58
|
-
"Position3", "FinishPosition").alias("AvgPosition"),
|
59
|
+
pl.mean_horizontal("StartPosition", "Position2").alias("AvgStartPosition"),
|
59
60
|
]).with_columns(pl.when(pl.col("StartPosition").is_null()).then(pl.lit("--"))
|
60
|
-
.when((pl.col("
|
61
|
+
.when((pl.col("AvgStartPosition") <= 3) & (pl.col("StartPosition") <= 3)).then(pl.lit("FrontRunner"))
|
61
62
|
.when((pl.col("PositionChange") >= 1) & (pl.col("StartPosition") >= 6)).then(pl.lit("Closer"))
|
62
63
|
.otherwise(pl.lit("Pacer")).alias("RunningStyle"))
|
63
64
|
|
@@ -67,7 +68,7 @@ def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition")
|
|
67
68
|
return df
|
68
69
|
|
69
70
|
|
70
|
-
def
|
71
|
+
def _extract_horse_data(horse_no: str) -> pl.DataFrame:
|
71
72
|
"""Extract horse info and history from horse page
|
72
73
|
"""
|
73
74
|
soup = _soupify_horse_page(horse_no)
|
@@ -82,13 +83,16 @@ def get_horse_data(horse_no: str) -> pl.DataFrame:
|
|
82
83
|
profile_data = _parse_html_table(table[1], skip_header=True)
|
83
84
|
|
84
85
|
try:
|
85
|
-
current_rating = int(profile_data.filter(
|
86
|
-
|
86
|
+
current_rating = int(profile_data.filter(
|
87
|
+
pl.col("column_0").str.starts_with("Current Rating"))['column_2'].item(0))
|
88
|
+
season_start_rating = int(profile_data.filter(pl.col(
|
89
|
+
"column_0").str.starts_with("Start of Season Rating"))['column_2'].item(0))
|
87
90
|
except:
|
88
91
|
current_rating, season_start_rating = 0, 0
|
89
|
-
|
92
|
+
|
90
93
|
try:
|
91
|
-
last_rating = int(profile_data.filter(
|
94
|
+
last_rating = int(profile_data.filter(
|
95
|
+
pl.col("column_0").str.starts_with("Last Rating"))['column_2'].item(0))
|
92
96
|
except:
|
93
97
|
last_rating = 0
|
94
98
|
|
@@ -96,47 +100,85 @@ def get_horse_data(horse_no: str) -> pl.DataFrame:
|
|
96
100
|
'HorseID': horse_no,
|
97
101
|
'CurrentRating': current_rating,
|
98
102
|
'SeasonStartRating': season_start_rating,
|
99
|
-
'LastRating'
|
103
|
+
'LastRating': last_rating if current_rating == 0 else current_rating
|
100
104
|
}
|
101
105
|
horse_data = (horse_data.with_columns([
|
102
106
|
pl.lit(value).alias(key) for key, value in horse_info.items()
|
103
107
|
])
|
104
108
|
)
|
105
109
|
|
106
|
-
|
110
|
+
return horse_data
|
111
|
+
|
112
|
+
|
113
|
+
def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
|
114
|
+
""" Clean and convert horse data to suitable data types
|
115
|
+
"""
|
116
|
+
df = df.with_columns(
|
117
|
+
pl.col('Pla').str.split(' ').list.first().alias('Pla')
|
118
|
+
).filter(~pl.col('Pla').is_in(incidents))
|
119
|
+
|
120
|
+
df = df.with_columns([
|
107
121
|
pl.col('Pla').cast(pl.Int64, strict=False),
|
108
|
-
pl.col('WinOdds').cast(pl.Int64, strict=False),
|
109
122
|
pl.col('ActWt').cast(pl.Int64, strict=False),
|
110
123
|
pl.col('DeclarHorseWt').cast(pl.Int64, strict=False),
|
111
124
|
pl.col('Dr').cast(pl.Int64, strict=False),
|
112
125
|
pl.col('Rtg').cast(pl.Int64, strict=False),
|
113
|
-
pl.col('
|
114
|
-
pl.col('
|
126
|
+
pl.col('Dist').cast(pl.Int64, strict=False),
|
127
|
+
pl.col('WinOdds').cast(pl.Float64, strict=False),
|
128
|
+
pl.col('RaceIndex').cast(pl.Int64, strict=False)
|
115
129
|
])
|
116
130
|
|
117
|
-
|
131
|
+
df = df.with_columns(
|
118
132
|
(
|
119
|
-
pl.col("FinishTime").str.
|
120
|
-
pl.col("FinishTime").str.
|
133
|
+
pl.col("FinishTime").str.split_exact(".", 1).struct.field("field_0").cast(pl.Int64) * 60 +
|
134
|
+
pl.col("FinishTime").str.split_exact(".", 1).struct.field("field_1").cast(pl.Int64)
|
121
135
|
).cast(pl.Float64).alias("FinishTime")
|
122
136
|
)
|
123
137
|
|
124
|
-
|
138
|
+
df = df.with_columns(
|
125
139
|
pl.col('RCTrackCourse').str.split_exact(' / ', 2)
|
126
140
|
.struct.rename_fields(['Venue', 'Track', 'Course'])
|
127
141
|
.alias('RCTrackCourse')
|
128
142
|
).unnest('RCTrackCourse')
|
129
143
|
|
130
|
-
return
|
144
|
+
return df
|
131
145
|
|
146
|
+
def get_horse_data(horse_no: str) -> pl.DataFrame:
|
147
|
+
df = _extract_horse_data(horse_no)
|
148
|
+
return _clean_horse_data(df)
|
132
149
|
|
133
|
-
def
|
150
|
+
def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
|
151
|
+
""" Clean and convert horse data to suitable data types
|
152
|
+
"""
|
153
|
+
df = df.with_columns(
|
154
|
+
pl.col('Pla').str.split(' ').list.first().alias('Pla')
|
155
|
+
).filter(~pl.col('Pla').is_in(incidents))
|
156
|
+
|
157
|
+
df = df.with_columns([
|
158
|
+
pl.col('Pla').cast(pl.Int64, strict=False),
|
159
|
+
pl.col('HorseNo').cast(pl.Int64, strict=False),
|
160
|
+
pl.col('ActWt').cast(pl.Int64, strict=False),
|
161
|
+
pl.col('DeclarHorseWt').cast(pl.Int64, strict=False),
|
162
|
+
pl.col('Dr').cast(pl.Int64, strict=False),
|
163
|
+
pl.col('WinOdds').cast(pl.Float64, strict=False)
|
164
|
+
])
|
165
|
+
|
166
|
+
df = df.with_columns(
|
167
|
+
(
|
168
|
+
pl.col("FinishTime").str.split_exact(":", 1).struct.field("field_0").cast(pl.Int64) * 60 +
|
169
|
+
pl.col("FinishTime").str.split_exact(":", 1).struct.field("field_1").cast(pl.Int64)
|
170
|
+
).cast(pl.Float64).alias("FinishTime")
|
171
|
+
)
|
172
|
+
|
173
|
+
return df
|
174
|
+
|
175
|
+
def _extract_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
|
134
176
|
soup = _soupify_race_page(date, venue_code, race_number)
|
135
177
|
table = soup.find('div', class_='race_tab').find('table')
|
136
178
|
race_data = _parse_html_table(table)
|
137
179
|
|
138
180
|
# Extract the relevant race information
|
139
|
-
race_id = race_data.columns[0].replace(f'RACE{race_number}','')
|
181
|
+
race_id = race_data.columns[0].replace(f'RACE{race_number}', '')
|
140
182
|
race_class = race_data.item(1, 0).split('-')[0].strip()
|
141
183
|
race_dist = race_data.item(1, 0).split('-')[1].strip().rstrip('M')
|
142
184
|
race_name = race_data.item(2, 0).strip()
|
@@ -162,7 +204,12 @@ def get_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
|
|
162
204
|
.with_columns(
|
163
205
|
pl.col("Horse").str.extract(r"\((.*?)\)")
|
164
206
|
.alias("HorseID")
|
165
|
-
|
166
|
-
|
207
|
+
)
|
208
|
+
)
|
209
|
+
|
210
|
+
return race_data
|
167
211
|
|
168
|
-
|
212
|
+
|
213
|
+
def get_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
|
214
|
+
df = _extract_race_data(date,venue_code,race_number)
|
215
|
+
return _clean_race_data(df)
|
hkjc/live.py
ADDED
@@ -0,0 +1,375 @@
|
|
1
|
+
"""Functions to fetch and process data from HKJC
|
2
|
+
"""
|
3
|
+
from __future__ import annotations
|
4
|
+
from typing import Tuple, List
|
5
|
+
|
6
|
+
import requests
|
7
|
+
from cachetools.func import ttl_cache
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
from .utils import _validate_date, _validate_venue_code
|
11
|
+
|
12
|
+
HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
|
13
|
+
|
14
|
+
RACEMTG_PAYLOAD = {
|
15
|
+
"operationName": "raceMeetings",
|
16
|
+
"variables": {"date": None, "venueCode": None},
|
17
|
+
"query": """
|
18
|
+
fragment raceFragment on Race {
|
19
|
+
id
|
20
|
+
no
|
21
|
+
status
|
22
|
+
raceName_en
|
23
|
+
raceName_ch
|
24
|
+
postTime
|
25
|
+
country_en
|
26
|
+
country_ch
|
27
|
+
distance
|
28
|
+
wageringFieldSize
|
29
|
+
go_en
|
30
|
+
go_ch
|
31
|
+
ratingType
|
32
|
+
raceTrack {
|
33
|
+
description_en
|
34
|
+
description_ch
|
35
|
+
}
|
36
|
+
raceCourse {
|
37
|
+
description_en
|
38
|
+
description_ch
|
39
|
+
displayCode
|
40
|
+
}
|
41
|
+
claCode
|
42
|
+
raceClass_en
|
43
|
+
raceClass_ch
|
44
|
+
judgeSigns {
|
45
|
+
value_en
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
fragment racingBlockFragment on RaceMeeting {
|
50
|
+
jpEsts: pmPools(
|
51
|
+
oddsTypes: [WIN, PLA, TCE, TRI, FF, QTT, DT, TT, SixUP]
|
52
|
+
filters: ["jackpot", "estimatedDividend"]
|
53
|
+
) {
|
54
|
+
leg {
|
55
|
+
number
|
56
|
+
races
|
57
|
+
}
|
58
|
+
oddsType
|
59
|
+
jackpot
|
60
|
+
estimatedDividend
|
61
|
+
mergedPoolId
|
62
|
+
}
|
63
|
+
poolInvs: pmPools(
|
64
|
+
oddsTypes: [WIN, PLA, QIN, QPL, CWA, CWB, CWC, IWN, FCT, TCE, TRI, FF, QTT, DBL, TBL, DT, TT, SixUP]
|
65
|
+
) {
|
66
|
+
id
|
67
|
+
leg {
|
68
|
+
races
|
69
|
+
}
|
70
|
+
}
|
71
|
+
penetrometerReadings(filters: ["first"]) {
|
72
|
+
reading
|
73
|
+
readingTime
|
74
|
+
}
|
75
|
+
hammerReadings(filters: ["first"]) {
|
76
|
+
reading
|
77
|
+
readingTime
|
78
|
+
}
|
79
|
+
changeHistories(filters: ["top3"]) {
|
80
|
+
type
|
81
|
+
time
|
82
|
+
raceNo
|
83
|
+
runnerNo
|
84
|
+
horseName_ch
|
85
|
+
horseName_en
|
86
|
+
jockeyName_ch
|
87
|
+
jockeyName_en
|
88
|
+
scratchHorseName_ch
|
89
|
+
scratchHorseName_en
|
90
|
+
handicapWeight
|
91
|
+
scrResvIndicator
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
query raceMeetings($date: String, $venueCode: String) {
|
96
|
+
timeOffset {
|
97
|
+
rc
|
98
|
+
}
|
99
|
+
activeMeetings: raceMeetings {
|
100
|
+
id
|
101
|
+
venueCode
|
102
|
+
date
|
103
|
+
status
|
104
|
+
races {
|
105
|
+
no
|
106
|
+
postTime
|
107
|
+
status
|
108
|
+
wageringFieldSize
|
109
|
+
}
|
110
|
+
}
|
111
|
+
raceMeetings(date: $date, venueCode: $venueCode) {
|
112
|
+
id
|
113
|
+
status
|
114
|
+
venueCode
|
115
|
+
date
|
116
|
+
totalNumberOfRace
|
117
|
+
currentNumberOfRace
|
118
|
+
dateOfWeek
|
119
|
+
meetingType
|
120
|
+
totalInvestment
|
121
|
+
country {
|
122
|
+
code
|
123
|
+
namech
|
124
|
+
nameen
|
125
|
+
seq
|
126
|
+
}
|
127
|
+
races {
|
128
|
+
...raceFragment
|
129
|
+
runners {
|
130
|
+
id
|
131
|
+
no
|
132
|
+
standbyNo
|
133
|
+
status
|
134
|
+
name_ch
|
135
|
+
name_en
|
136
|
+
horse {
|
137
|
+
id
|
138
|
+
code
|
139
|
+
}
|
140
|
+
color
|
141
|
+
barrierDrawNumber
|
142
|
+
handicapWeight
|
143
|
+
currentWeight
|
144
|
+
currentRating
|
145
|
+
internationalRating
|
146
|
+
gearInfo
|
147
|
+
racingColorFileName
|
148
|
+
allowance
|
149
|
+
trainerPreference
|
150
|
+
last6run
|
151
|
+
saddleClothNo
|
152
|
+
trumpCard
|
153
|
+
priority
|
154
|
+
finalPosition
|
155
|
+
deadHeat
|
156
|
+
winOdds
|
157
|
+
jockey {
|
158
|
+
code
|
159
|
+
name_en
|
160
|
+
name_ch
|
161
|
+
}
|
162
|
+
trainer {
|
163
|
+
code
|
164
|
+
name_en
|
165
|
+
name_ch
|
166
|
+
}
|
167
|
+
}
|
168
|
+
}
|
169
|
+
obSt: pmPools(oddsTypes: [WIN, PLA]) {
|
170
|
+
leg {
|
171
|
+
races
|
172
|
+
}
|
173
|
+
oddsType
|
174
|
+
comingleStatus
|
175
|
+
}
|
176
|
+
poolInvs: pmPools(
|
177
|
+
oddsTypes: [WIN, PLA, QIN, QPL, CWA, CWB, CWC, IWN, FCT, TCE, TRI, FF, QTT, DBL, TBL, DT, TT, SixUP]
|
178
|
+
) {
|
179
|
+
id
|
180
|
+
leg {
|
181
|
+
number
|
182
|
+
races
|
183
|
+
}
|
184
|
+
status
|
185
|
+
sellStatus
|
186
|
+
oddsType
|
187
|
+
investment
|
188
|
+
mergedPoolId
|
189
|
+
lastUpdateTime
|
190
|
+
}
|
191
|
+
...racingBlockFragment
|
192
|
+
pmPools(oddsTypes: []) {
|
193
|
+
id
|
194
|
+
}
|
195
|
+
jkcInstNo: foPools(oddsTypes: [JKC], filters: ["top"]) {
|
196
|
+
instNo
|
197
|
+
}
|
198
|
+
tncInstNo: foPools(oddsTypes: [TNC], filters: ["top"]) {
|
199
|
+
instNo
|
200
|
+
}
|
201
|
+
}
|
202
|
+
}
|
203
|
+
"""}
|
204
|
+
|
205
|
+
LIVEODDS_PAYLOAD = {
|
206
|
+
"operationName": "racing",
|
207
|
+
"variables": {"date": None, "venueCode": None, "raceNo": None, "oddsTypes": None},
|
208
|
+
"query": """
|
209
|
+
query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo: Int) {
|
210
|
+
raceMeetings(date: $date, venueCode: $venueCode) {
|
211
|
+
pmPools(oddsTypes: $oddsTypes, raceNo: $raceNo) {
|
212
|
+
id
|
213
|
+
status
|
214
|
+
sellStatus
|
215
|
+
oddsType
|
216
|
+
lastUpdateTime
|
217
|
+
guarantee
|
218
|
+
minTicketCost
|
219
|
+
name_en
|
220
|
+
name_ch
|
221
|
+
leg {
|
222
|
+
number
|
223
|
+
races
|
224
|
+
}
|
225
|
+
cWinSelections {
|
226
|
+
composite
|
227
|
+
name_ch
|
228
|
+
name_en
|
229
|
+
starters
|
230
|
+
}
|
231
|
+
oddsNodes {
|
232
|
+
combString
|
233
|
+
oddsValue
|
234
|
+
hotFavourite
|
235
|
+
oddsDropValue
|
236
|
+
bankerOdds {
|
237
|
+
combString
|
238
|
+
oddsValue
|
239
|
+
}
|
240
|
+
}
|
241
|
+
}
|
242
|
+
}
|
243
|
+
}""",
|
244
|
+
}
|
245
|
+
|
246
|
+
|
247
|
+
@ttl_cache(maxsize=12, ttl=1000)
|
248
|
+
def _fetch_live_races(date: str, venue_code: str) -> dict:
|
249
|
+
"""Fetch live race data from HKJC GraphQL endpoint."""
|
250
|
+
payload = RACEMTG_PAYLOAD.copy()
|
251
|
+
payload["variables"] = payload["variables"].copy()
|
252
|
+
payload["variables"]["date"] = date
|
253
|
+
payload["variables"]["venueCode"] = venue_code
|
254
|
+
|
255
|
+
headers = {
|
256
|
+
"Origin": "https://bet.hkjc.com",
|
257
|
+
"Referer": "https://bet.hkjc.com",
|
258
|
+
"Content-Type": "application/json",
|
259
|
+
"Accept": "application/json",
|
260
|
+
"User-Agent": "python-hkjc-fetch/0.1",
|
261
|
+
}
|
262
|
+
|
263
|
+
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
264
|
+
headers=headers, timeout=10)
|
265
|
+
if r.status_code != 200:
|
266
|
+
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
267
|
+
|
268
|
+
races = r.json()['data']['raceMeetings'][0]['races']
|
269
|
+
|
270
|
+
race_info = {}
|
271
|
+
for race in races:
|
272
|
+
race_num = race['no']
|
273
|
+
race_name = race['raceName_en']
|
274
|
+
race_dist = race['distance']
|
275
|
+
race_going = race['go_en']
|
276
|
+
race_track = race['raceTrack']['description_en']
|
277
|
+
race_class = race['raceClass_en']
|
278
|
+
race_course = race['raceCourse']['displayCode']
|
279
|
+
|
280
|
+
runners = [{'Dr': runner['barrierDrawNumber'],
|
281
|
+
'Rtg' : int(runner['currentRating']),
|
282
|
+
'Wt' : int(runner['currentWeight']),
|
283
|
+
'HorseNo': runner['horse']['code']
|
284
|
+
} for runner in race['runners']]
|
285
|
+
race_info[race_num]={
|
286
|
+
'No': race_num,
|
287
|
+
'Name': race_name,
|
288
|
+
'Class': race_class,
|
289
|
+
'Course': race_course,
|
290
|
+
'Dist': race_dist,
|
291
|
+
'Going': race_going,
|
292
|
+
'Track': race_track,
|
293
|
+
'Runners': runners
|
294
|
+
}
|
295
|
+
return race_info
|
296
|
+
|
297
|
+
|
298
|
+
@ttl_cache(maxsize=12, ttl=30)
|
299
|
+
def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', 'QPL')) -> List[dict]:
|
300
|
+
"""Fetch live odds data from HKJC GraphQL endpoint."""
|
301
|
+
payload = LIVEODDS_PAYLOAD.copy()
|
302
|
+
payload["variables"] = payload["variables"].copy()
|
303
|
+
payload["variables"]["date"] = date
|
304
|
+
payload["variables"]["venueCode"] = venue_code
|
305
|
+
payload["variables"]["raceNo"] = race_number
|
306
|
+
payload["variables"]["oddsTypes"] = odds_type
|
307
|
+
|
308
|
+
headers = {
|
309
|
+
"Origin": "https://bet.hkjc.com",
|
310
|
+
"Referer": "https://bet.hkjc.com",
|
311
|
+
"Content-Type": "application/json",
|
312
|
+
"Accept": "application/json",
|
313
|
+
"User-Agent": "python-hkjc-fetch/0.1",
|
314
|
+
}
|
315
|
+
|
316
|
+
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
317
|
+
headers=headers, timeout=10)
|
318
|
+
if r.status_code != 200:
|
319
|
+
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
320
|
+
|
321
|
+
meetings = r.json().get("data", {}).get("raceMeetings", [])
|
322
|
+
|
323
|
+
return [
|
324
|
+
{"HorseID": node["combString"], "Type": pool.get(
|
325
|
+
"oddsType"), "Odds": float(node["oddsValue"])}
|
326
|
+
for meeting in meetings
|
327
|
+
for pool in meeting.get("pmPools", [])
|
328
|
+
for node in pool.get("oddsNodes", [])
|
329
|
+
]
|
330
|
+
|
331
|
+
|
332
|
+
def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
|
333
|
+
"""Fetch live odds as numpy arrays.
|
334
|
+
|
335
|
+
Args:
|
336
|
+
date (str): Date in 'YYYY-MM-DD' format.
|
337
|
+
venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
|
338
|
+
race_number (int): Race number.
|
339
|
+
odds_type (List[str]): Types of odds to fetch. Default is ['PLA', 'QPL']. Currently the following types are supported:
|
340
|
+
- 'WIN': Win odds
|
341
|
+
- 'PLA': Place odds
|
342
|
+
- 'QIN': Quinella odds
|
343
|
+
- 'QPL': Quinella Place odds
|
344
|
+
fit_harville (bool): Whether to fit the odds using Harville model. Default is False.
|
345
|
+
|
346
|
+
Returns:
|
347
|
+
dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
|
348
|
+
If odds_type is 'WIN','PLA', returns a 1D array of place odds.
|
349
|
+
If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
|
350
|
+
"""
|
351
|
+
_validate_date(date)
|
352
|
+
_validate_venue_code(venue_code)
|
353
|
+
|
354
|
+
race_info = _fetch_live_races(date, venue_code)
|
355
|
+
N = len(race_info[race_number]['Runners'])
|
356
|
+
|
357
|
+
data = _fetch_live_odds(date, venue_code, race_number,
|
358
|
+
odds_type=tuple(odds_type))
|
359
|
+
|
360
|
+
odds = {'WIN': np.full(N, np.nan, dtype=float),
|
361
|
+
'PLA': np.full(N, np.nan, dtype=float),
|
362
|
+
'QIN': np.full((N, N), np.nan, dtype=float),
|
363
|
+
'QPL': np.full((N, N), np.nan, dtype=float)}
|
364
|
+
|
365
|
+
for entry in data:
|
366
|
+
if entry["Type"] in ["QIN", "QPL"]:
|
367
|
+
horse_ids = list(map(int, entry["HorseID"].split(",")))
|
368
|
+
odds[entry["Type"]][horse_ids[0] - 1,
|
369
|
+
horse_ids[1] - 1] = entry["Odds"]
|
370
|
+
odds[entry["Type"]][horse_ids[1] - 1,
|
371
|
+
horse_ids[0] - 1] = entry["Odds"]
|
372
|
+
elif entry["Type"] in ["PLA", "WIN"]:
|
373
|
+
odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
|
374
|
+
|
375
|
+
return {t: odds[t] for t in odds_type}
|
hkjc/processing.py
CHANGED
@@ -3,10 +3,10 @@
|
|
3
3
|
from __future__ import annotations
|
4
4
|
from typing import Tuple, List, Union
|
5
5
|
|
6
|
-
from .
|
6
|
+
from .live import live_odds
|
7
7
|
from .strategy import qpbanker, place_only
|
8
8
|
from .harville_model import fit_harville_to_odds
|
9
|
-
from .historical import
|
9
|
+
from .historical import _extract_horse_data, _extract_race_data, _clean_horse_data
|
10
10
|
from .utils import _validate_date
|
11
11
|
|
12
12
|
import polars as pl
|
@@ -23,8 +23,7 @@ def _all_subsets(lst): return [list(x) for r in range(
|
|
23
23
|
# ======================================
|
24
24
|
# Historical data processing functions
|
25
25
|
# ======================================
|
26
|
-
|
27
|
-
'UR', 'VOID', 'WR', 'WV', 'WV-A', 'WX', 'WX-A', 'WXNR']
|
26
|
+
|
28
27
|
|
29
28
|
|
30
29
|
def _historical_process_single_date_venue(date: str, venue_code: str) -> List[pl.DataFrame]:
|
@@ -33,7 +32,7 @@ def _historical_process_single_date_venue(date: str, venue_code: str) -> List[pl
|
|
33
32
|
range(1, 12), desc=f"Processing {date} {venue_code} ...", leave=False)
|
34
33
|
for race_number in iter_date:
|
35
34
|
try:
|
36
|
-
dfs.append(
|
35
|
+
dfs.append(_extract_race_data(date.strftime('%Y/%m/%d'),
|
37
36
|
venue_code, race_number))
|
38
37
|
except:
|
39
38
|
if race_number == 1:
|
@@ -51,7 +50,7 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
|
|
51
50
|
|
52
51
|
dfs = []
|
53
52
|
|
54
|
-
for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True)):
|
53
|
+
for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True), leave=False, desc='Scanning for horse IDs ...'):
|
55
54
|
for venue_code in ['ST', 'HV']:
|
56
55
|
dfs += _historical_process_single_date_venue(date, venue_code)
|
57
56
|
|
@@ -62,35 +61,9 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
|
|
62
61
|
horse_ids = pl.concat(dfs)['HorseID'].unique()
|
63
62
|
|
64
63
|
# Use horse track records
|
65
|
-
dfs = [
|
66
|
-
df = (
|
67
|
-
|
68
|
-
pl.col('Date').str.strptime(pl.Date, '%m/%d/%y')
|
69
|
-
).filter(pl.col('Date').is_between(start_dt, end_dt))
|
70
|
-
.filter(~pl.col('Pla').is_in(incidents))
|
71
|
-
.with_columns(
|
72
|
-
pl.col('Pla').str.split(' ').list.first().alias('Pla')
|
73
|
-
)
|
74
|
-
)
|
75
|
-
|
76
|
-
df = df.with_columns([
|
77
|
-
pl.col('Pla').cast(pl.Int64, strict=False),
|
78
|
-
pl.col('HorseNo').cast(pl.Int64, strict=False),
|
79
|
-
pl.col('ActWt').cast(pl.Int64, strict=False),
|
80
|
-
pl.col('DeclarHorseWt').cast(pl.Int64, strict=False),
|
81
|
-
pl.col('Dr').cast(pl.Int64, strict=False),
|
82
|
-
pl.col('RaceDistance').cast(pl.Int64, strict=False),
|
83
|
-
pl.col('WinOdds').cast(pl.Float64, strict=False)
|
84
|
-
])
|
85
|
-
|
86
|
-
df = df.with_columns(
|
87
|
-
(
|
88
|
-
pl.col("FinishTime").str.split(":").list.get(0).cast(pl.Int64) * 60 +
|
89
|
-
pl.col("FinishTime").str.split(":").list.get(1).cast(pl.Float64)
|
90
|
-
).cast(pl.Float64).alias("FinishTime")
|
91
|
-
)
|
92
|
-
|
93
|
-
return df
|
64
|
+
dfs = [_extract_horse_data(horse_id) for horse_id in tqdm(horse_ids, desc='Processing horses ...', leave=False)]
|
65
|
+
df = pl.concat(dfs)
|
66
|
+
return _clean_horse_data(df)
|
94
67
|
|
95
68
|
|
96
69
|
# ==========================
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hkjc
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.18
|
4
4
|
Summary: Library for scrapping HKJC data and perform basic analysis
|
5
5
|
Requires-Python: >=3.11
|
6
6
|
Requires-Dist: beautifulsoup4>=4.14.2
|
7
7
|
Requires-Dist: cachetools>=6.2.0
|
8
8
|
Requires-Dist: fastexcel>=0.16.0
|
9
|
-
Requires-Dist:
|
9
|
+
Requires-Dist: flask>=3.1.2
|
10
10
|
Requires-Dist: numba>=0.62.1
|
11
11
|
Requires-Dist: numpy>=2.3.3
|
12
12
|
Requires-Dist: polars>=1.33.1
|
@@ -1,14 +1,14 @@
|
|
1
|
-
hkjc/__init__.py,sha256=
|
1
|
+
hkjc/__init__.py,sha256=5A9MzcITYJDcA2UbIBpkimZBYSqS4pgRuQJhTagOfpE,753
|
2
2
|
hkjc/analysis.py,sha256=0042_NMIkQCl0J6B0P4TFfrBDCnm2B6jsCZKOEO30yI,108
|
3
3
|
hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
|
4
|
-
hkjc/historical.py,sha256=
|
5
|
-
hkjc/
|
6
|
-
hkjc/processing.py,sha256=
|
4
|
+
hkjc/historical.py,sha256=v9k_R47Na5en5ftrocjIHofkNAUthE_lp4CyLaCTsQE,8280
|
5
|
+
hkjc/live.py,sha256=GqctH-BVdIL6Vi1g8XHe3p8fZBopCQf5KACLAR0meP0,10249
|
6
|
+
hkjc/processing.py,sha256=H0chtW_FBMMhK3IzcjYjrryd3fAPYimanc2fWuGiB0M,6807
|
7
7
|
hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
|
9
9
|
hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
|
10
10
|
hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
|
11
11
|
hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
|
12
|
-
hkjc-0.3.
|
13
|
-
hkjc-0.3.
|
14
|
-
hkjc-0.3.
|
12
|
+
hkjc-0.3.18.dist-info/METADATA,sha256=aoXp6Fvn3EkuXyv6p5LClSbZa5XS_bfcUxMKBJXcNvw,480
|
13
|
+
hkjc-0.3.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
14
|
+
hkjc-0.3.18.dist-info/RECORD,,
|
hkjc/live_odds.py
DELETED
@@ -1,136 +0,0 @@
|
|
1
|
-
"""Functions to fetch and process data from HKJC
|
2
|
-
"""
|
3
|
-
from __future__ import annotations
|
4
|
-
from typing import Tuple, List
|
5
|
-
|
6
|
-
import requests
|
7
|
-
from cachetools.func import ttl_cache
|
8
|
-
import numpy as np
|
9
|
-
|
10
|
-
from .utils import _validate_date, _validate_venue_code
|
11
|
-
|
12
|
-
HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
|
13
|
-
|
14
|
-
LIVEODDS_PAYLOAD = {
|
15
|
-
"operationName": "racing",
|
16
|
-
"variables": {"date": None, "venueCode": None, "raceNo": None, "oddsTypes": None},
|
17
|
-
"query": """
|
18
|
-
query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo: Int) {
|
19
|
-
raceMeetings(date: $date, venueCode: $venueCode) {
|
20
|
-
pmPools(oddsTypes: $oddsTypes, raceNo: $raceNo) {
|
21
|
-
id
|
22
|
-
status
|
23
|
-
sellStatus
|
24
|
-
oddsType
|
25
|
-
lastUpdateTime
|
26
|
-
guarantee
|
27
|
-
minTicketCost
|
28
|
-
name_en
|
29
|
-
name_ch
|
30
|
-
leg {
|
31
|
-
number
|
32
|
-
races
|
33
|
-
}
|
34
|
-
cWinSelections {
|
35
|
-
composite
|
36
|
-
name_ch
|
37
|
-
name_en
|
38
|
-
starters
|
39
|
-
}
|
40
|
-
oddsNodes {
|
41
|
-
combString
|
42
|
-
oddsValue
|
43
|
-
hotFavourite
|
44
|
-
oddsDropValue
|
45
|
-
bankerOdds {
|
46
|
-
combString
|
47
|
-
oddsValue
|
48
|
-
}
|
49
|
-
}
|
50
|
-
}
|
51
|
-
}
|
52
|
-
}""",
|
53
|
-
}
|
54
|
-
|
55
|
-
|
56
|
-
@ttl_cache(maxsize=12, ttl=30)
|
57
|
-
def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', 'QPL')) -> Tuple[dict]:
|
58
|
-
"""Fetch live odds data from HKJC GraphQL endpoint."""
|
59
|
-
payload = LIVEODDS_PAYLOAD.copy()
|
60
|
-
payload["variables"] = payload["variables"].copy()
|
61
|
-
payload["variables"]["date"] = date
|
62
|
-
payload["variables"]["venueCode"] = venue_code
|
63
|
-
payload["variables"]["raceNo"] = race_number
|
64
|
-
payload["variables"]["oddsTypes"] = odds_type
|
65
|
-
|
66
|
-
headers = {
|
67
|
-
"Origin": "https://bet.hkjc.com",
|
68
|
-
"Referer": "https://bet.hkjc.com",
|
69
|
-
"Content-Type": "application/json",
|
70
|
-
"Accept": "application/json",
|
71
|
-
"User-Agent": "python-hkjc-fetch/0.1",
|
72
|
-
}
|
73
|
-
|
74
|
-
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
75
|
-
headers=headers, timeout=10)
|
76
|
-
if r.status_code != 200:
|
77
|
-
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
78
|
-
|
79
|
-
meetings = r.json().get("data", {}).get("raceMeetings", [])
|
80
|
-
|
81
|
-
return [
|
82
|
-
{"HorseID": node["combString"], "Type": pool.get(
|
83
|
-
"oddsType"), "Odds": float(node["oddsValue"])}
|
84
|
-
for meeting in meetings
|
85
|
-
for pool in meeting.get("pmPools", [])
|
86
|
-
for node in pool.get("oddsNodes", [])
|
87
|
-
]
|
88
|
-
|
89
|
-
|
90
|
-
def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
|
91
|
-
"""Fetch live odds as numpy arrays.
|
92
|
-
|
93
|
-
Args:
|
94
|
-
date (str): Date in 'YYYY-MM-DD' format.
|
95
|
-
venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
|
96
|
-
race_number (int): Race number.
|
97
|
-
odds_type (List[str]): Types of odds to fetch. Default is ['PLA', 'QPL']. Currently the following types are supported:
|
98
|
-
- 'WIN': Win odds
|
99
|
-
- 'PLA': Place odds
|
100
|
-
- 'QIN': Quinella odds
|
101
|
-
- 'QPL': Quinella Place odds
|
102
|
-
fit_harville (bool): Whether to fit the odds using Harville model. Default is False.
|
103
|
-
|
104
|
-
Returns:
|
105
|
-
dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
|
106
|
-
If odds_type is 'WIN','PLA', returns a 1D array of place odds.
|
107
|
-
If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
|
108
|
-
"""
|
109
|
-
_validate_date(date)
|
110
|
-
_validate_venue_code(venue_code)
|
111
|
-
|
112
|
-
mandatory_types = ['PLA']
|
113
|
-
|
114
|
-
data = _fetch_live_odds(date, venue_code, race_number,
|
115
|
-
odds_type=tuple(set(mandatory_types+odds_type)))
|
116
|
-
|
117
|
-
# use place odds to determine number of horses
|
118
|
-
pla_data = [entry for entry in data if entry["Type"] == "PLA"]
|
119
|
-
N = len(pla_data)
|
120
|
-
|
121
|
-
odds = {'WIN': np.full(N, np.nan, dtype=float),
|
122
|
-
'PLA': np.full(N, np.nan, dtype=float),
|
123
|
-
'QIN': np.full((N, N), np.nan, dtype=float),
|
124
|
-
'QPL': np.full((N, N), np.nan, dtype=float)}
|
125
|
-
|
126
|
-
for entry in data:
|
127
|
-
if entry["Type"] in ["QIN", "QPL"]:
|
128
|
-
horse_ids = list(map(int, entry["HorseID"].split(",")))
|
129
|
-
odds[entry["Type"]][horse_ids[0] - 1,
|
130
|
-
horse_ids[1] - 1] = entry["Odds"]
|
131
|
-
odds[entry["Type"]][horse_ids[1] - 1,
|
132
|
-
horse_ids[0] - 1] = entry["Odds"]
|
133
|
-
elif entry["Type"] in ["PLA", "WIN"]:
|
134
|
-
odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
|
135
|
-
|
136
|
-
return {t: odds[t] for t in odds_type}
|
File without changes
|