hkjc 0.3.21__py3-none-any.whl → 0.3.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/historical.py CHANGED
@@ -15,12 +15,21 @@ HKJC_HORSE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Ho
15
15
  incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
16
16
  'UR', 'VOID', 'WR', 'WV', 'WV-A', 'WX', 'WX-A', 'WXNR']
17
17
 
18
+ REQUEST_TIMEOUT = 10
19
+
20
+ HTML_HEADERS = {
21
+ "Origin": "https://racing.hkjc.com",
22
+ "Referer": "https://racing.hkjc.com",
23
+ "Content-Type": "text/plain",
24
+ "Accept": "*/*",
25
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
26
+ }
18
27
 
19
28
  @ttl_cache(maxsize=100, ttl=3600)
20
29
  def _soupify(url: str) -> BeautifulSoup:
21
30
  """Fetch and parse a webpage and return BeautifulSoup object
22
31
  """
23
- response = requests.get(url, timeout=180)
32
+ response = requests.get(url, timeout=REQUEST_TIMEOUT, headers=HTML_HEADERS)
24
33
  response.raise_for_status()
25
34
  return BeautifulSoup(response.content, 'html.parser')
26
35
 
@@ -43,6 +52,9 @@ def _soupify_horse_page(horse_no: str) -> BeautifulSoup:
43
52
  def _classify_running_style(df: pl.DataFrame, running_pos_col="RunningPosition") -> pl.DataFrame:
44
53
  """Classify running style based on RunningPosition column
45
54
  """
55
+ if df.height == 0:
56
+ return df
57
+
46
58
  # Split the RunningPosition column into separate columns and convert to integers
47
59
  df = df.with_columns(
48
60
  pl.col(running_pos_col)
@@ -77,9 +89,9 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
77
89
  table = soup.find('table', class_='bigborder')
78
90
  horse_data = _parse_html_table(table).filter(
79
91
  pl.col('Date') != '') # Remove empty rows
80
- horse_data = _classify_running_style(horse_data)
81
-
82
- horse_data = horse_data.with_columns(pl.lit(horse_no).alias('HorseNo'))
92
+ if horse_data.height > 0:
93
+ horse_data = _classify_running_style(horse_data)
94
+ horse_data = horse_data.with_columns(pl.lit(horse_no).alias('HorseNo'))
83
95
 
84
96
  return horse_data
85
97
 
@@ -87,6 +99,9 @@ def _extract_horse_data(horse_no: str) -> pl.DataFrame:
87
99
  def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
88
100
  """ Clean and convert horse data to suitable data types
89
101
  """
102
+ if df.height == 0:
103
+ return df
104
+
90
105
  df = df.with_columns(
91
106
  pl.col('Pla').str.split(' ').list.first().alias('Pla')
92
107
  ).filter(~pl.col('Pla').is_in(incidents))
@@ -105,7 +120,8 @@ def _clean_horse_data(df: pl.DataFrame) -> pl.DataFrame:
105
120
  .with_columns(
106
121
  (
107
122
  pl.col("FinishTime").str.splitn(".", 2).struct.field("field_0").cast(pl.Int64) * 60 +
108
- pl.col("FinishTime").str.splitn(".", 2).struct.field("field_1").cast(pl.Float64)
123
+ pl.col("FinishTime").str.splitn(
124
+ ".", 2).struct.field("field_1").cast(pl.Float64)
109
125
  ).cast(pl.Float64).round(2).alias("FinishTime")
110
126
  ))
111
127
 
@@ -134,6 +150,9 @@ def get_horse_data(horse_no: str) -> pl.DataFrame:
134
150
  def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
135
151
  """ Clean and convert horse data to suitable data types
136
152
  """
153
+ if df.height == 0:
154
+ return df
155
+
137
156
  df = df.with_columns(
138
157
  pl.col('Pla').str.split(' ').list.first().alias('Pla')
139
158
  ).filter(~pl.col('Pla').is_in(incidents))
@@ -150,7 +169,8 @@ def _clean_race_data(df: pl.DataFrame) -> pl.DataFrame:
150
169
  df = df.with_columns(
151
170
  (
152
171
  pl.col("FinishTime").str.splitn(":", 2).struct.field("field_0").cast(pl.Int64) * 60 +
153
- pl.col("FinishTime").str.splitn(":", 2).struct.field("field_1").cast(pl.Float64)
172
+ pl.col("FinishTime").str.splitn(
173
+ ":", 2).struct.field("field_1").cast(pl.Float64)
154
174
  ).cast(pl.Float64).round(2).alias("FinishTime")
155
175
  )
156
176
 
hkjc/live.py CHANGED
@@ -7,6 +7,8 @@ import requests
7
7
  from cachetools.func import ttl_cache
8
8
  import numpy as np
9
9
 
10
+ from .utils import _try_int
11
+
10
12
  HKJC_LIVEODDS_ENDPOINT = "https://info.cld.hkjc.com/graphql/base/"
11
13
 
12
14
  RACEMTG_PAYLOAD = {
@@ -241,25 +243,29 @@ query racing($date: String, $venueCode: String, $oddsTypes: [OddsType], $raceNo:
241
243
  }""",
242
244
  }
243
245
 
246
+ JSON_HEADERS = {
247
+ "Origin": "https://bet.hkjc.com",
248
+ "Referer": "https://bet.hkjc.com",
249
+ "Content-Type": "application/json",
250
+ "Accept": "application/json",
251
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
252
+ }
253
+
254
+ REQUEST_TIMEOUT = 30
255
+
244
256
 
245
257
  @ttl_cache(maxsize=12, ttl=1000)
246
- def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
258
+ def _fetch_live_races(date: str = None, venue_code: str = None) -> dict:
247
259
  """Fetch live race data from HKJC GraphQL endpoint."""
248
260
  payload = RACEMTG_PAYLOAD.copy()
249
261
  payload["variables"] = payload["variables"].copy()
250
262
  payload["variables"]["date"] = date
251
263
  payload["variables"]["venueCode"] = venue_code
252
264
 
253
- headers = {
254
- "Origin": "https://bet.hkjc.com",
255
- "Referer": "https://bet.hkjc.com",
256
- "Content-Type": "application/json",
257
- "Accept": "application/json",
258
- "User-Agent": "python-hkjc-fetch/0.1",
259
- }
265
+ headers = JSON_HEADERS
260
266
 
261
267
  r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
262
- headers=headers, timeout=10)
268
+ headers=headers, timeout=REQUEST_TIMEOUT)
263
269
  if r.status_code != 200:
264
270
  raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
265
271
 
@@ -279,9 +285,9 @@ def _fetch_live_races(date: str=None, venue_code: str=None) -> dict:
279
285
  runners = [{'No': runner['no'],
280
286
  'Name': runner['name_en'],
281
287
  'Dr': runner['barrierDrawNumber'],
282
- 'Rtg': int(runner['currentRating']),
283
- 'Wt': int(runner['currentWeight']),
284
- 'Handicap': int(runner['handicapWeight']),
288
+ 'Rtg': _try_int(runner['currentRating']),
289
+ 'Wt': _try_int(runner['currentWeight']),
290
+ 'Handicap': _try_int(runner['handicapWeight']),
285
291
  'HorseNo': runner['horse']['code']
286
292
  } for runner in race['runners'] if runner['status'] != "Standby"]
287
293
  race_info['Races'][race_num] = {
@@ -307,16 +313,10 @@ def _fetch_live_odds(date: str, venue_code: str, race_number: int, odds_type: Tu
307
313
  payload["variables"]["raceNo"] = race_number
308
314
  payload["variables"]["oddsTypes"] = odds_type
309
315
 
310
- headers = {
311
- "Origin": "https://bet.hkjc.com",
312
- "Referer": "https://bet.hkjc.com",
313
- "Content-Type": "application/json",
314
- "Accept": "application/json",
315
- "User-Agent": "python-hkjc-fetch/0.1",
316
- }
316
+ headers = JSON_HEADERS
317
317
 
318
318
  r = requests.post(HKJC_LIVEODDS_ENDPOINT, json=payload,
319
- headers=headers, timeout=10)
319
+ headers=headers, timeout=REQUEST_TIMEOUT)
320
320
  if r.status_code != 200:
321
321
  raise RuntimeError(f"Request failed: {r.status_code} - {r.text}")
322
322
 
@@ -354,7 +354,8 @@ def live_odds(date: str, venue_code: str, race_number: int, odds_type: List[str]
354
354
  N = len(race_info['Races'][race_number]['Runners'])
355
355
 
356
356
  if (race_info['Date'] != date) or (race_info['Venue'] != venue_code):
357
- print(f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
357
+ print(
358
+ f"[WARNING] Requested {date} {venue_code} but server returned {race_info['Date']} {race_info['Venue']}.")
358
359
  date = race_info['Date']
359
360
  venue_code = race_info['Venue']
360
361
 
hkjc/utils.py CHANGED
@@ -4,6 +4,12 @@ from datetime import datetime as dt
4
4
  import bs4
5
5
  import re
6
6
 
7
+ def _try_int(value: str) -> int:
8
+ try:
9
+ return int(value)
10
+ except:
11
+ return 0
12
+
7
13
 
8
14
  def _validate_date(date_str: str) -> bool:
9
15
  # validate date format
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hkjc
3
- Version: 0.3.21
3
+ Version: 0.3.22
4
4
  Summary: Library for scrapping HKJC data and perform basic analysis
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: beautifulsoup4>=4.14.2
@@ -1,14 +1,14 @@
1
1
  hkjc/__init__.py,sha256=XSm9N6YbZ2SzyxjO9aR26ctB4Z1-VeBImuroSgncUfk,737
2
2
  hkjc/features.py,sha256=LicwtKBpMzpz_dSX9bjoCLLaRUu8oeZo1AloTe7v7sI,298
3
3
  hkjc/harville_model.py,sha256=WSA_1EcNOHKGraP6WVHJ3FXZPGrDrjKhJc_q70KKx80,20188
4
- hkjc/historical.py,sha256=aONchf7CMNs2B-WVDS_GWg8g0U0ZEH-FjbfhdJwc_N0,7683
5
- hkjc/live.py,sha256=CfMeHRQfhKSmhQaexM99sdP0KRbIEqg2DIvNPc1gohk,10696
4
+ hkjc/historical.py,sha256=22qpMFI7IQ5bfXuGma5jJAtx8chRp4FE-XCIF4RqwTo,8238
5
+ hkjc/live.py,sha256=YZgwSLDFq5v1yxNwjTtQxgU2ru4yvooxpjHksDar1TA,10691
6
6
  hkjc/processing.py,sha256=hQnHxl6HYlFOeSLSOCVsemgTKcwt9_tYUQI-itpvjUg,7188
7
7
  hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
9
- hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
9
+ hkjc/utils.py,sha256=uAiFmy5NXsADUiD1-MCPgs1hs4N3e7tVYtSREkxwKSQ,6425
10
10
  hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
11
11
  hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
12
- hkjc-0.3.21.dist-info/METADATA,sha256=YuIC0EvFVS3Z-8cwdzczMV7qQxMYvIKtO442iUQu5Jg,480
13
- hkjc-0.3.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- hkjc-0.3.21.dist-info/RECORD,,
12
+ hkjc-0.3.22.dist-info/METADATA,sha256=gM8crCYg0Wq-W8GFa0Jyk6RYdT-3MUXrAXpl6ID7VcM,480
13
+ hkjc-0.3.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ hkjc-0.3.22.dist-info/RECORD,,
File without changes