hkjc 0.3.17__py3-none-any.whl → 0.3.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hkjc/__init__.py +4 -5
- hkjc/features.py +6 -0
- hkjc/historical.py +29 -45
- hkjc/live.py +379 -0
- hkjc/processing.py +7 -2
- {hkjc-0.3.17.dist-info → hkjc-0.3.19.dist-info}/METADATA +2 -2
- hkjc-0.3.19.dist-info/RECORD +14 -0
- hkjc/analysis.py +0 -3
- hkjc/live_odds.py +0 -136
- hkjc-0.3.17.dist-info/RECORD +0 -14
- {hkjc-0.3.17.dist-info → hkjc-0.3.19.dist-info}/WHEEL +0 -0
hkjc/__init__.py
CHANGED
@@ -4,18 +4,17 @@ This module re-exports commonly used symbols from the submodules.
|
|
4
4
|
"""
|
5
5
|
from importlib.metadata import version as _version
|
6
6
|
|
7
|
-
__all__ = ["
|
7
|
+
__all__ = ["live", "features",
|
8
8
|
"generate_all_qp_trades", "generate_all_pla_trades", "pareto_filter",
|
9
|
-
|
10
|
-
|
9
|
+
"speedpro_energy", "speedmap", "harveille_model",
|
10
|
+
"generate_historical_data"]
|
11
11
|
|
12
12
|
try:
|
13
13
|
__version__ = _version(__name__)
|
14
14
|
except Exception: # pragma: no cover - best-effort version resolution
|
15
15
|
__version__ = "0.0.0"
|
16
16
|
|
17
|
-
from .live_odds import live_odds
|
18
17
|
from .processing import generate_all_qp_trades, generate_all_pla_trades, generate_historical_data
|
19
18
|
from .utils import pareto_filter
|
20
19
|
from .speedpro import speedmap, speedpro_energy
|
21
|
-
from . import harville_model
|
20
|
+
from . import harville_model, live, features
|
hkjc/features.py
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
""" Polars expressions to commonly-used analysis features, subject to frequent changes.
|
2
|
+
"""
|
3
|
+
import polars as pl
|
4
|
+
|
5
|
+
rating_diff = (pl.col('Rtg').max().over('RaceId')-pl.col('Rtg')).alias('RtgDiff')
|
6
|
+
frontrunner_pct = (pl.col('FavoriteRunningStyle')=='FrontRunner').mean().over('RaceId').alias('FRPct')
|
hkjc/historical.py
CHANGED
@@ -52,14 +52,15 @@ def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition")
|
|
52
52
|
.alias("split_data").cast(pl.Int64, strict=False)
|
53
53
|
).unnest("split_data")
|
54
54
|
|
55
|
-
df = df.with_columns(
|
55
|
+
df = df.with_columns(
|
56
|
+
pl.col('FinishPosition').fill_null(pl.col('Position3')))
|
56
57
|
|
57
58
|
df = df.with_columns([
|
58
59
|
(pl.col("StartPosition")-pl.col("FinishPosition")).alias("PositionChange"),
|
59
|
-
pl.mean_horizontal("StartPosition", "Position2"
|
60
|
-
|
60
|
+
pl.mean_horizontal("StartPosition", "Position2").alias(
|
61
|
+
"AvgStartPosition"),
|
61
62
|
]).with_columns(pl.when(pl.col("StartPosition").is_null()).then(pl.lit("--"))
|
62
|
-
.when((pl.col("
|
63
|
+
.when((pl.col("AvgStartPosition") <= 3) & (pl.col("StartPosition") <= 3)).then(pl.lit("FrontRunner"))
|
63
64
|
.when((pl.col("PositionChange") >= 1) & (pl.col("StartPosition") >= 6)).then(pl.lit("Closer"))
|
64
65
|
.otherwise(pl.lit("Pacer")).alias("RunningStyle"))
|
65
66
|
|
@@ -78,35 +79,7 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
|
|
78
79
|
pl.col('Date') != '') # Remove empty rows
|
79
80
|
horse_data = _classify_running_style(horse_data)
|
80
81
|
|
81
|
-
|
82
|
-
table = soup.find_all('table', class_='table_eng_text')
|
83
|
-
profile_data = _parse_html_table(table[0], skip_header=True)
|
84
|
-
profile_data = _parse_html_table(table[1], skip_header=True)
|
85
|
-
|
86
|
-
try:
|
87
|
-
current_rating = int(profile_data.filter(
|
88
|
-
pl.col("column_0").str.starts_with("Current Rating"))['column_2'].item(0))
|
89
|
-
season_start_rating = int(profile_data.filter(pl.col(
|
90
|
-
"column_0").str.starts_with("Start of Season Rating"))['column_2'].item(0))
|
91
|
-
except:
|
92
|
-
current_rating, season_start_rating = 0, 0
|
93
|
-
|
94
|
-
try:
|
95
|
-
last_rating = int(profile_data.filter(
|
96
|
-
pl.col("column_0").str.starts_with("Last Rating"))['column_2'].item(0))
|
97
|
-
except:
|
98
|
-
last_rating = 0
|
99
|
-
|
100
|
-
horse_info = {
|
101
|
-
'HorseID': horse_no,
|
102
|
-
'CurrentRating': current_rating,
|
103
|
-
'SeasonStartRating': season_start_rating,
|
104
|
-
'LastRating': last_rating if current_rating == 0 else current_rating
|
105
|
-
}
|
106
|
-
horse_data = (horse_data.with_columns([
|
107
|
-
pl.lit(value).alias(key) for key, value in horse_info.items()
|
108
|
-
])
|
109
|
-
)
|
82
|
+
horse_data = horse_data.with_columns(pl.lit(horse_no).alias('HorseNo'))
|
110
83
|
|
111
84
|
return horse_data
|
112
85
|
|
@@ -125,16 +98,16 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
|
|
125
98
|
pl.col('Dr').cast(pl.Int64, strict=False),
|
126
99
|
pl.col('Rtg').cast(pl.Int64, strict=False),
|
127
100
|
pl.col('Dist').cast(pl.Int64, strict=False),
|
128
|
-
pl.col('WinOdds').cast(pl.Float64, strict=False)
|
129
|
-
pl.col('RaceIndex').cast(pl.Int64, strict=False)
|
101
|
+
pl.col('WinOdds').cast(pl.Float64, strict=False)
|
130
102
|
])
|
131
103
|
|
132
|
-
df = df.
|
104
|
+
df = (df.filter(~pl.col('FinishTime').str.starts_with('--'))
|
105
|
+
.with_columns(
|
133
106
|
(
|
134
|
-
pl.col("FinishTime").str.
|
135
|
-
pl.col("FinishTime").str.
|
136
|
-
).cast(pl.Float64).alias("FinishTime")
|
137
|
-
)
|
107
|
+
pl.col("FinishTime").str.splitn(".", 2).struct.field("field_0").cast(pl.Int64) * 60 +
|
108
|
+
pl.col("FinishTime").str.splitn(".", 2).struct.field("field_1").cast(pl.Float64)
|
109
|
+
).cast(pl.Float64).round(2).alias("FinishTime")
|
110
|
+
))
|
138
111
|
|
139
112
|
df = df.with_columns(
|
140
113
|
pl.col('RCTrackCourse').str.split_exact(' / ', 2)
|
@@ -142,12 +115,22 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
|
|
142
115
|
.alias('RCTrackCourse')
|
143
116
|
).unnest('RCTrackCourse')
|
144
117
|
|
118
|
+
df = df.with_columns(
|
119
|
+
pl.when(pl.col('Date').str.len_chars() <= 8)
|
120
|
+
.then(pl.col('Date').str.strptime(pl.Date, '%d/%m/%y', strict=False))
|
121
|
+
.otherwise(pl.col('Date').str.strptime(pl.Date, '%d/%m/%Y'))
|
122
|
+
).with_columns(
|
123
|
+
pl.concat_str(pl.col('Date').dt.strftime('%Y%m%d'), pl.col(
|
124
|
+
'Venue'), pl.col('RaceIndex')).alias('RaceId')
|
125
|
+
).drop("VideoReplay")
|
145
126
|
return df
|
146
127
|
|
128
|
+
|
147
129
|
def get_horse_data(horse_no: str) -> pl.DataFrame:
|
148
130
|
df = _extract_horse_data(horse_no)
|
149
131
|
return _clean_horse_data(df)
|
150
132
|
|
133
|
+
|
151
134
|
def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
|
152
135
|
""" Clean and convert horse data to suitable data types
|
153
136
|
"""
|
@@ -166,13 +149,14 @@ def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
|
|
166
149
|
|
167
150
|
df = df.with_columns(
|
168
151
|
(
|
169
|
-
pl.col("FinishTime").str.
|
170
|
-
pl.col("FinishTime").str.
|
171
|
-
).cast(pl.Float64).alias("FinishTime")
|
152
|
+
pl.col("FinishTime").str.splitn(":", 2).struct.field("field_0").cast(pl.Int64) * 60 +
|
153
|
+
pl.col("FinishTime").str.splitn(":", 2).struct.field("field_1").cast(pl.Float64)
|
154
|
+
).cast(pl.Float64).round(2).alias("FinishTime")
|
172
155
|
)
|
173
156
|
|
174
157
|
return df
|
175
158
|
|
159
|
+
|
176
160
|
def _extract_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
|
177
161
|
soup = _soupify_race_page(date, venue_code, race_number)
|
178
162
|
table = soup.find('div', class_='race_tab').find('table')
|
@@ -212,5 +196,5 @@ def _extract_race_data(date: str, venue_code: str, race_number: int) -> pl.DataF
|
|
212
196
|
|
213
197
|
|
214
198
|
def get_race_data(date: str, venue_code: str, race_number: int) -> pl.DataFrame:
|
215
|
-
df = _extract_race_data(date,venue_code,race_number)
|
216
|
-
return _clean_race_data(df)
|
199
|
+
df = _extract_race_data(date, venue_code, race_number)
|
200
|
+
return _clean_race_data(df)
|
hkjc/live.py
ADDED
@@ -0,0 +1,379 @@
|
|
1
|
+
"""Functions to fetch and process data from HKJC
|
2
|
+
"""
|
3
|
+
from __future__ import annotations
|
4
|
+
from typing import Tuple, List
|
5
|
+
|
6
|
+
import requests
|
7
|
+
from cachetools.func import ttl_cache
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
|
11
|
+
|
12
|
+
RACEMTG_PAYLOAD = {
|
13
|
+
"operationName": "raceMeetings",
|
14
|
+
"variables": {"date": None, "venueCode": None},
|
15
|
+
"query": """
|
16
|
+
fragment raceFragment on Race {
|
17
|
+
id
|
18
|
+
no
|
19
|
+
status
|
20
|
+
raceName_en
|
21
|
+
raceName_ch
|
22
|
+
postTime
|
23
|
+
country_en
|
24
|
+
country_ch
|
25
|
+
distance
|
26
|
+
wageringFieldSize
|
27
|
+
go_en
|
28
|
+
go_ch
|
29
|
+
ratingType
|
30
|
+
raceTrack {
|
31
|
+
description_en
|
32
|
+
description_ch
|
33
|
+
}
|
34
|
+
raceCourse {
|
35
|
+
description_en
|
36
|
+
description_ch
|
37
|
+
displayCode
|
38
|
+
}
|
39
|
+
claCode
|
40
|
+
raceClass_en
|
41
|
+
raceClass_ch
|
42
|
+
judgeSigns {
|
43
|
+
value_en
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
fragment racingBlockFragment on RaceMeeting {
|
48
|
+
jpEsts: pmPools(
|
49
|
+
oddsTypes: [WIN, PLA, TCE, TRI, FF, QTT, DT, TT, SixUP]
|
50
|
+
filters: ["jackpot", "estimatedDividend"]
|
51
|
+
) {
|
52
|
+
leg {
|
53
|
+
number
|
54
|
+
races
|
55
|
+
}
|
56
|
+
oddsType
|
57
|
+
jackpot
|
58
|
+
estimatedDividend
|
59
|
+
mergedPoolId
|
60
|
+
}
|
61
|
+
poolInvs: pmPools(
|
62
|
+
oddsTypes: [WIN, PLA, QIN, QPL, CWA, CWB, CWC, IWN, FCT, TCE, TRI, FF, QTT, DBL, TBL, DT, TT, SixUP]
|
63
|
+
) {
|
64
|
+
id
|
65
|
+
leg {
|
66
|
+
races
|
67
|
+
}
|
68
|
+
}
|
69
|
+
penetrometerReadings(filters: ["first"]) {
|
70
|
+
reading
|
71
|
+
readingTime
|
72
|
+
}
|
73
|
+
hammerReadings(filters: ["first"]) {
|
74
|
+
reading
|
75
|
+
readingTime
|
76
|
+
}
|
77
|
+
changeHistories(filters: ["top3"]) {
|
78
|
+
type
|
79
|
+
time
|
80
|
+
raceNo
|
81
|
+
runnerNo
|
82
|
+
horseName_ch
|
83
|
+
horseName_en
|
84
|
+
jockeyName_ch
|
85
|
+
jockeyName_en
|
86
|
+
scratchHorseName_ch
|
87
|
+
scratchHorseName_en
|
88
|
+
handicapWeight
|
89
|
+
scrResvIndicator
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
query raceMeetings($date: String, $venueCode: String) {
|
94
|
+
timeOffset {
|
95
|
+
rc
|
96
|
+
}
|
97
|
+
activeMeetings: raceMeetings {
|
98
|
+
id
|
99
|
+
venueCode
|
100
|
+
date
|
101
|
+
status
|
102
|
+
races {
|
103
|
+
no
|
104
|
+
postTime
|
105
|
+
status
|
106
|
+
wageringFieldSize
|
107
|
+
}
|
108
|
+
}
|
109
|
+
raceMeetings(date: $date, venueCode: $venueCode) {
|
110
|
+
id
|
111
|
+
status
|
112
|
+
venueCode
|
113
|
+
date
|
114
|
+
totalNumberOfRace
|
115
|
+
currentNumberOfRace
|
116
|
+
dateOfWeek
|
117
|
+
meetingType
|
118
|
+
totalInvestment
|
119
|
+
country {
|
120
|
+
code
|
121
|
+
namech
|
122
|
+
nameen
|
123
|
+
seq
|
124
|
+
}
|
125
|
+
races {
|
126
|
+
...raceFragment
|
127
|
+
runners {
|
128
|
+
id
|
129
|
+
no
|
130
|
+
standbyNo
|
131
|
+
status
|
132
|
+
name_ch
|
133
|
+
name_en
|
134
|
+
horse {
|
135
|
+
id
|
136
|
+
code
|
137
|
+
}
|
138
|
+
color
|
139
|
+
barrierDrawNumber
|
140
|
+
handicapWeight
|
141
|
+
currentWeight
|
142
|
+
currentRating
|
143
|
+
internationalRating
|
144
|
+
gearInfo
|
145
|
+
racingColorFileName
|
146
|
+
allowance
|
147
|
+
trainerPreference
|
148
|
+
last6run
|
149
|
+
saddleClothNo
|
150
|
+
trumpCard
|
151
|
+
priority
|
152
|
+
finalPosition
|
153
|
+
deadHeat
|
154
|
+
winOdds
|
155
|
+
jockey {
|
156
|
+
code
|
157
|
+
name_en
|
158
|
+
name_ch
|
159
|
+
}
|
160
|
+
trainer {
|
161
|
+
code
|
162
|
+
name_en
|
163
|
+
name_ch
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
167
|
+
obSt: pmPools(oddsTypes: [WIN, PLA]) {
|
168
|
+
leg {
|
169
|
+
races
|
170
|
+
}
|
171
|
+
oddsType
|
172
|
+
comingleStatus
|
173
|
+
}
|
174
|
+
poolInvs: pmPools(
|
175
|
+
oddsTypes: [WIN, PLA, QIN, QPL, CWA, CWB, CWC, IWN, FCT, TCE, TRI, FF, QTT, DBL, TBL, DT, TT, SixUP]
|
176
|
+
) {
|
177
|
+
id
|
178
|
+
leg {
|
179
|
+
number
|
180
|
+
races
|
181
|
+
}
|
182
|
+
status
|
183
|
+
sellStatus
|
184
|
+
oddsType
|
185
|
+
investment
|
186
|
+
mergedPoolId
|
187
|
+
lastUpdateTime
|
188
|
+
}
|
189
|
+
...racingBlockFragment
|
190
|
+
pmPools(oddsTypes: []) {
|
191
|
+
id
|
192
|
+
}
|
193
|
+
jkcInstNo: foPools(oddsTypes: [JKC], filters: ["top"]) {
|
194
|
+
instNo
|
195
|
+
}
|
196
|
+
tncInstNo: foPools(oddsTypes: [TNC], filters: ["top"]) {
|
197
|
+
instNo
|
198
|
+
}
|
199
|
+
}
|
200
|
+
}
|
201
|
+
"""}
|
202
|
+
|
203
|
+
LIVEODDS_PAYLOAD = {
|
204
|
+
"operationName": "racing",
|
205
|
+
"variables": {"date": None, "venueCode": None, "raceNo": None, "oddsTypes": None},
|
206
|
+
"query": """
|
207
|
+
query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo: Int) {
|
208
|
+
raceMeetings(date: $date, venueCode: $venueCode) {
|
209
|
+
pmPools(oddsTypes: $oddsTypes, raceNo: $raceNo) {
|
210
|
+
id
|
211
|
+
status
|
212
|
+
sellStatus
|
213
|
+
oddsType
|
214
|
+
lastUpdateTime
|
215
|
+
guarantee
|
216
|
+
minTicketCost
|
217
|
+
name_en
|
218
|
+
name_ch
|
219
|
+
leg {
|
220
|
+
number
|
221
|
+
races
|
222
|
+
}
|
223
|
+
cWinSelections {
|
224
|
+
composite
|
225
|
+
name_ch
|
226
|
+
name_en
|
227
|
+
starters
|
228
|
+
}
|
229
|
+
oddsNodes {
|
230
|
+
combString
|
231
|
+
oddsValue
|
232
|
+
hotFavourite
|
233
|
+
oddsDropValue
|
234
|
+
bankerOdds {
|
235
|
+
combString
|
236
|
+
oddsValue
|
237
|
+
}
|
238
|
+
}
|
239
|
+
}
|
240
|
+
}
|
241
|
+
}""",
|
242
|
+
}
|
243
|
+
|
244
|
+
|
245
|
+
@ttl_cache(maxsize=12, ttl=1000)
|
246
|
+
def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
|
247
|
+
"""Fetch live race data from HKJC GraphQL endpoint."""
|
248
|
+
payload = RACEMTG_PAYLOAD.copy()
|
249
|
+
payload["variables"] = payload["variables"].copy()
|
250
|
+
payload["variables"]["date"] = date
|
251
|
+
payload["variables"]["venueCode"] = venue_code
|
252
|
+
|
253
|
+
headers = {
|
254
|
+
"Origin": "https://bet.hkjc.com",
|
255
|
+
"Referer": "https://bet.hkjc.com",
|
256
|
+
"Content-Type": "application/json",
|
257
|
+
"Accept": "application/json",
|
258
|
+
"User-Agent": "python-hkjc-fetch/0.1",
|
259
|
+
}
|
260
|
+
|
261
|
+
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
262
|
+
headers=headers, timeout=10)
|
263
|
+
if r.status_code != 200:
|
264
|
+
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
265
|
+
|
266
|
+
data = r.json()['data']['raceMeetings'][0]
|
267
|
+
races = data['races']
|
268
|
+
|
269
|
+
race_info = {'Date': data['date'], 'Venue': data['venueCode'], 'Races': {}}
|
270
|
+
for race in races:
|
271
|
+
race_num = race['no']
|
272
|
+
race_name = race['raceName_en']
|
273
|
+
race_dist = race['distance']
|
274
|
+
race_going = race['go_en']
|
275
|
+
race_track = race['raceTrack']['description_en']
|
276
|
+
race_class = race['raceClass_en']
|
277
|
+
race_course = race['raceCourse']['displayCode']
|
278
|
+
|
279
|
+
runners = [{'No': runner['no'],
|
280
|
+
'Name': runner['name_en'],
|
281
|
+
'Dr': runner['barrierDrawNumber'],
|
282
|
+
'Rtg': int(runner['currentRating']),
|
283
|
+
'Wt': int(runner['currentWeight']),
|
284
|
+
'Handicap': int(runner['handicapWeight']),
|
285
|
+
'HorseNo': runner['horse']['code']
|
286
|
+
} for runner in race['runners'] if runner['status'] != "Standby"]
|
287
|
+
race_info['Races'][race_num] = {
|
288
|
+
'No': race_num,
|
289
|
+
'Name': race_name,
|
290
|
+
'Class': race_class,
|
291
|
+
'Course': race_course,
|
292
|
+
'Dist': race_dist,
|
293
|
+
'Going': race_going,
|
294
|
+
'Track': race_track,
|
295
|
+
'Runners': runners
|
296
|
+
}
|
297
|
+
return race_info
|
298
|
+
|
299
|
+
|
300
|
+
@ttl_cache(maxsize=12, ttl=30)
|
301
|
+
def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', )) -> List[dict]:
|
302
|
+
"""Fetch live odds data from HKJC GraphQL endpoint."""
|
303
|
+
payload = LIVEODDS_PAYLOAD.copy()
|
304
|
+
payload["variables"] = payload["variables"].copy()
|
305
|
+
payload["variables"]["date"] = date
|
306
|
+
payload["variables"]["venueCode"] = venue_code
|
307
|
+
payload["variables"]["raceNo"] = race_number
|
308
|
+
payload["variables"]["oddsTypes"] = odds_type
|
309
|
+
|
310
|
+
headers = {
|
311
|
+
"Origin": "https://bet.hkjc.com",
|
312
|
+
"Referer": "https://bet.hkjc.com",
|
313
|
+
"Content-Type": "application/json",
|
314
|
+
"Accept": "application/json",
|
315
|
+
"User-Agent": "python-hkjc-fetch/0.1",
|
316
|
+
}
|
317
|
+
|
318
|
+
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
319
|
+
headers=headers, timeout=10)
|
320
|
+
if r.status_code != 200:
|
321
|
+
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
322
|
+
|
323
|
+
meetings = r.json().get("data", {}).get("raceMeetings", [])
|
324
|
+
|
325
|
+
return [
|
326
|
+
{"HorseID": node["combString"], "Type": pool.get(
|
327
|
+
"oddsType"), "Odds": float(node["oddsValue"])}
|
328
|
+
for meeting in meetings
|
329
|
+
for pool in meeting.get("pmPools", [])
|
330
|
+
for node in pool.get("oddsNodes", [])
|
331
|
+
]
|
332
|
+
|
333
|
+
|
334
|
+
def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['WIN', 'PLA', 'QPL', 'QIN']) -> dict:
|
335
|
+
"""Fetch live odds as numpy arrays.
|
336
|
+
|
337
|
+
Args:
|
338
|
+
date (str): Date in 'YYYY-MM-DD' format.
|
339
|
+
venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
|
340
|
+
race_number (int): Race number.
|
341
|
+
odds_type (List[str]): Types of odds to fetch. Default is ['WIN', 'PLA', 'QPL', 'QIN']. Currently the following types are supported:
|
342
|
+
- 'WIN': Win odds
|
343
|
+
- 'PLA': Place odds
|
344
|
+
- 'QIN': Quinella odds
|
345
|
+
- 'QPL': Quinella Place odds
|
346
|
+
fit_harville (bool): Whether to fit the odds using Harville model. Default is False.
|
347
|
+
|
348
|
+
Returns:
|
349
|
+
dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
|
350
|
+
If odds_type is 'WIN','PLA', returns a 1D array of place odds.
|
351
|
+
If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
|
352
|
+
"""
|
353
|
+
race_info = _fetch_live_races(date, venue_code)
|
354
|
+
N = len(race_info['Races'][race_number]['Runners'])
|
355
|
+
|
356
|
+
if (race_info['Date'] != date) or (race_info['Venue'] != venue_code):
|
357
|
+
print(f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
|
358
|
+
date = race_info['Date']
|
359
|
+
venue_code = race_info['Venue']
|
360
|
+
|
361
|
+
data = _fetch_live_odds(date, venue_code, race_number,
|
362
|
+
odds_type=tuple(odds_type))
|
363
|
+
|
364
|
+
odds = {'WIN': np.full(N, np.nan, dtype=float),
|
365
|
+
'PLA': np.full(N, np.nan, dtype=float),
|
366
|
+
'QIN': np.full((N, N), np.nan, dtype=float),
|
367
|
+
'QPL': np.full((N, N), np.nan, dtype=float)}
|
368
|
+
|
369
|
+
for entry in data:
|
370
|
+
if entry["Type"] in ["QIN", "QPL"]:
|
371
|
+
horse_ids = list(map(int, entry["HorseID"].split(",")))
|
372
|
+
odds[entry["Type"]][horse_ids[0] - 1,
|
373
|
+
horse_ids[1] - 1] = entry["Odds"]
|
374
|
+
odds[entry["Type"]][horse_ids[1] - 1,
|
375
|
+
horse_ids[0] - 1] = entry["Odds"]
|
376
|
+
elif entry["Type"] in ["PLA", "WIN"]:
|
377
|
+
odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
|
378
|
+
|
379
|
+
return {t: odds[t] for t in odds_type}
|
hkjc/processing.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
from __future__ import annotations
|
4
4
|
from typing import Tuple, List, Union
|
5
5
|
|
6
|
-
from .
|
6
|
+
from .live import live_odds
|
7
7
|
from .strategy import qpbanker, place_only
|
8
8
|
from .harville_model import fit_harville_to_odds
|
9
9
|
from .historical import _extract_horse_data, _extract_race_data, _clean_horse_data
|
@@ -63,7 +63,12 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
|
|
63
63
|
# Use horse track records
|
64
64
|
dfs = [_extract_horse_data(horse_id) for horse_id in tqdm(horse_ids, desc='Processing horses ...', leave=False)]
|
65
65
|
df = pl.concat(dfs)
|
66
|
-
|
66
|
+
|
67
|
+
try:
|
68
|
+
return _clean_horse_data(df).filter(pl.col('Date').is_between(start_dt, end_dt))
|
69
|
+
except:
|
70
|
+
print('Failed to clean data. Returning raw data for debug.')
|
71
|
+
return df
|
67
72
|
|
68
73
|
|
69
74
|
# ==========================
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hkjc
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.19
|
4
4
|
Summary: Library for scrapping HKJC data and perform basic analysis
|
5
5
|
Requires-Python: >=3.11
|
6
6
|
Requires-Dist: beautifulsoup4>=4.14.2
|
7
7
|
Requires-Dist: cachetools>=6.2.0
|
8
8
|
Requires-Dist: fastexcel>=0.16.0
|
9
|
-
Requires-Dist:
|
9
|
+
Requires-Dist: flask>=3.1.2
|
10
10
|
Requires-Dist: numba>=0.62.1
|
11
11
|
Requires-Dist: numpy>=2.3.3
|
12
12
|
Requires-Dist: polars>=1.33.1
|
@@ -0,0 +1,14 @@
|
|
1
|
+
hkjc/__init__.py,sha256=XSm9N6YbZ2SzyxjO9aR26ctB4Z1-VeBImuroSgncUfk,737
|
2
|
+
hkjc/features.py,sha256=1mcF9Pq2LsnQ8yIv2F8Uyg1HXz7LdNcPf9RDzFN3sbA,297
|
3
|
+
hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
|
4
|
+
hkjc/historical.py,sha256=Yujb4Q2cTkvVvvZxVaSoawbwwxfzq03lprG_s_4H7Dk,7682
|
5
|
+
hkjc/live.py,sha256=CfMeHRQfhKSmhQaexM99sdP0KRbIEqg2DIvNPc1gohk,10696
|
6
|
+
hkjc/processing.py,sha256=uNjM5eeH9Mj8Dg9-9K7z-7xeufaXJT42F49zUHzj0h0,6968
|
7
|
+
hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
|
9
|
+
hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
|
10
|
+
hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
|
11
|
+
hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
|
12
|
+
hkjc-0.3.19.dist-info/METADATA,sha256=npklDb_gSAZHliXMNDZYOlRIuV_Klkzgw4ELdW9zzjc,480
|
13
|
+
hkjc-0.3.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
14
|
+
hkjc-0.3.19.dist-info/RECORD,,
|
hkjc/analysis.py
DELETED
hkjc/live_odds.py
DELETED
@@ -1,136 +0,0 @@
|
|
1
|
-
"""Functions to fetch and process data from HKJC
|
2
|
-
"""
|
3
|
-
from __future__ import annotations
|
4
|
-
from typing import Tuple, List
|
5
|
-
|
6
|
-
import requests
|
7
|
-
from cachetools.func import ttl_cache
|
8
|
-
import numpy as np
|
9
|
-
|
10
|
-
from .utils import _validate_date, _validate_venue_code
|
11
|
-
|
12
|
-
HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
|
13
|
-
|
14
|
-
LIVEODDS_PAYLOAD = {
|
15
|
-
"operationName": "racing",
|
16
|
-
"variables": {"date": None, "venueCode": None, "raceNo": None, "oddsTypes": None},
|
17
|
-
"query": """
|
18
|
-
query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo: Int) {
|
19
|
-
raceMeetings(date: $date, venueCode: $venueCode) {
|
20
|
-
pmPools(oddsTypes: $oddsTypes, raceNo: $raceNo) {
|
21
|
-
id
|
22
|
-
status
|
23
|
-
sellStatus
|
24
|
-
oddsType
|
25
|
-
lastUpdateTime
|
26
|
-
guarantee
|
27
|
-
minTicketCost
|
28
|
-
name_en
|
29
|
-
name_ch
|
30
|
-
leg {
|
31
|
-
number
|
32
|
-
races
|
33
|
-
}
|
34
|
-
cWinSelections {
|
35
|
-
composite
|
36
|
-
name_ch
|
37
|
-
name_en
|
38
|
-
starters
|
39
|
-
}
|
40
|
-
oddsNodes {
|
41
|
-
combString
|
42
|
-
oddsValue
|
43
|
-
hotFavourite
|
44
|
-
oddsDropValue
|
45
|
-
bankerOdds {
|
46
|
-
combString
|
47
|
-
oddsValue
|
48
|
-
}
|
49
|
-
}
|
50
|
-
}
|
51
|
-
}
|
52
|
-
}""",
|
53
|
-
}
|
54
|
-
|
55
|
-
|
56
|
-
@ttl_cache(maxsize=12, ttl=30)
|
57
|
-
def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tuple[str] = ('PLA', 'QPL')) -> Tuple[dict]:
|
58
|
-
"""Fetch live odds data from HKJC GraphQL endpoint."""
|
59
|
-
payload = LIVEODDS_PAYLOAD.copy()
|
60
|
-
payload["variables"] = payload["variables"].copy()
|
61
|
-
payload["variables"]["date"] = date
|
62
|
-
payload["variables"]["venueCode"] = venue_code
|
63
|
-
payload["variables"]["raceNo"] = race_number
|
64
|
-
payload["variables"]["oddsTypes"] = odds_type
|
65
|
-
|
66
|
-
headers = {
|
67
|
-
"Origin": "https://bet.hkjc.com",
|
68
|
-
"Referer": "https://bet.hkjc.com",
|
69
|
-
"Content-Type": "application/json",
|
70
|
-
"Accept": "application/json",
|
71
|
-
"User-Agent": "python-hkjc-fetch/0.1",
|
72
|
-
}
|
73
|
-
|
74
|
-
r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
|
75
|
-
headers=headers, timeout=10)
|
76
|
-
if r.status_code != 200:
|
77
|
-
raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
|
78
|
-
|
79
|
-
meetings = r.json().get("data", {}).get("raceMeetings", [])
|
80
|
-
|
81
|
-
return [
|
82
|
-
{"HorseID": node["combString"], "Type": pool.get(
|
83
|
-
"oddsType"), "Odds": float(node["oddsValue"])}
|
84
|
-
for meeting in meetings
|
85
|
-
for pool in meeting.get("pmPools", [])
|
86
|
-
for node in pool.get("oddsNodes", [])
|
87
|
-
]
|
88
|
-
|
89
|
-
|
90
|
-
def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str] = ['PLA', 'QPL']) -> dict:
|
91
|
-
"""Fetch live odds as numpy arrays.
|
92
|
-
|
93
|
-
Args:
|
94
|
-
date (str): Date in 'YYYY-MM-DD' format.
|
95
|
-
venue_code (str): Venue code, e.g., 'ST' for Shatin, 'HV' for Happy Valley.
|
96
|
-
race_number (int): Race number.
|
97
|
-
odds_type (List[str]): Types of odds to fetch. Default is ['PLA', 'QPL']. Currently the following types are supported:
|
98
|
-
- 'WIN': Win odds
|
99
|
-
- 'PLA': Place odds
|
100
|
-
- 'QIN': Quinella odds
|
101
|
-
- 'QPL': Quinella Place odds
|
102
|
-
fit_harville (bool): Whether to fit the odds using Harville model. Default is False.
|
103
|
-
|
104
|
-
Returns:
|
105
|
-
dict: Dictionary with keys as odds types and values as numpy arrays containing the odds.
|
106
|
-
If odds_type is 'WIN','PLA', returns a 1D array of place odds.
|
107
|
-
If odds_type is 'QIN','QPL', returns a 2D array of quinella place odds.
|
108
|
-
"""
|
109
|
-
_validate_date(date)
|
110
|
-
_validate_venue_code(venue_code)
|
111
|
-
|
112
|
-
mandatory_types = ['PLA']
|
113
|
-
|
114
|
-
data = _fetch_live_odds(date, venue_code, race_number,
|
115
|
-
odds_type=tuple(set(mandatory_types+odds_type)))
|
116
|
-
|
117
|
-
# use place odds to determine number of horses
|
118
|
-
pla_data = [entry for entry in data if entry["Type"] == "PLA"]
|
119
|
-
N = len(pla_data)
|
120
|
-
|
121
|
-
odds = {'WIN': np.full(N, np.nan, dtype=float),
|
122
|
-
'PLA': np.full(N, np.nan, dtype=float),
|
123
|
-
'QIN': np.full((N, N), np.nan, dtype=float),
|
124
|
-
'QPL': np.full((N, N), np.nan, dtype=float)}
|
125
|
-
|
126
|
-
for entry in data:
|
127
|
-
if entry["Type"] in ["QIN", "QPL"]:
|
128
|
-
horse_ids = list(map(int, entry["HorseID"].split(",")))
|
129
|
-
odds[entry["Type"]][horse_ids[0] - 1,
|
130
|
-
horse_ids[1] - 1] = entry["Odds"]
|
131
|
-
odds[entry["Type"]][horse_ids[1] - 1,
|
132
|
-
horse_ids[0] - 1] = entry["Odds"]
|
133
|
-
elif entry["Type"] in ["PLA", "WIN"]:
|
134
|
-
odds[entry["Type"]][int(entry["HorseID"]) - 1] = entry["Odds"]
|
135
|
-
|
136
|
-
return {t: odds[t] for t in odds_type}
|
hkjc-0.3.17.dist-info/RECORD
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
hkjc/__init__.py,sha256=TI7PVhmoWSvYX-xdTEdaT3jfY99LiYQFRQZaIwBhJd8,785
|
2
|
-
hkjc/analysis.py,sha256=0042_NMIkQCl0J6B0P4TFfrBDCnm2B6jsCZKOEO30yI,108
|
3
|
-
hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
|
4
|
-
hkjc/historical.py,sha256=yQQAx8vlr2EqcPazpYp1x2ku7dy3imQoDWImHCRv1QA,8330
|
5
|
-
hkjc/live_odds.py,sha256=G4ELBBp1d2prxye9kKzu2pwtS4vSfRPOmEuT7-Nd-3A,4741
|
6
|
-
hkjc/processing.py,sha256=xrvEUgu_jz8ZxevOsRsYz0T7pWyNtSCMI6LUYByOLOw,6812
|
7
|
-
hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
-
hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
|
9
|
-
hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
|
10
|
-
hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
|
11
|
-
hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
|
12
|
-
hkjc-0.3.17.dist-info/METADATA,sha256=gKSkXKYo_HCg2S4ZeAjnqZniWV0V2kGpRH_g25K9Rmo,481
|
13
|
-
hkjc-0.3.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
14
|
-
hkjc-0.3.17.dist-info/RECORD,,
|
File without changes
|