hkjc 0.3.14__py3-none-any.whl → 0.3.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/historical.py CHANGED
@@ -7,7 +7,7 @@ import polars as pl
7
7
  from bs4 import BeautifulSoup
8
8
  from cachetools.func import ttl_cache
9
9
 
10
- from .utils import _validate_date, _validate_venue_code, _parse_html_table
10
+ from .utils import _parse_html_table
11
11
 
12
12
  HKJC_RACE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Racing/LocalResults.aspx?RaceDate={date}&Racecourse={venue_code}&RaceNo={race_number}"
13
13
  HKJC_HORSE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Horse/Horse.aspx?HorseId={horse_id}"
@@ -77,15 +77,12 @@ def _extract_horse_data(horse_id: str) -> pl.DataFrame:
77
77
  # Extract horse profile info
78
78
  table = soup.find_all('table', class_='table_eng_text')
79
79
  profile_data = _parse_html_table(table[0], skip_header=True)
80
- country, age = profile_data.filter(pl.col("column_0").str.starts_with("Country"))['column_2'].item(0).split('/')
81
80
  profile_data = _parse_html_table(table[1], skip_header=True)
82
81
  current_rating = profile_data.filter(pl.col("column_0").str.starts_with("Current Rating"))['column_2'].item(0)
83
82
  season_start_rating = profile_data.filter(pl.col("column_0").str.starts_with("Start of Season Rating"))['column_2'].item(0)
84
83
 
85
84
  horse_info = {
86
85
  'HorseID': horse_id,
87
- 'OriginCountry': country.strip(),
88
- 'Age': int(age),
89
86
  'CurrentRating': int(current_rating),
90
87
  'SeasonStartRating': int(season_start_rating)
91
88
  }
hkjc/processing.py CHANGED
@@ -14,6 +14,7 @@ import numpy as np
14
14
  from itertools import combinations
15
15
  from tqdm import tqdm
16
16
  from datetime import datetime as dt
17
+ from joblib import delayed, Parallel
17
18
 
18
19
 
19
20
  def _all_subsets(lst): return [list(x) for r in range(
@@ -27,13 +28,18 @@ incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
27
28
  'UR', 'VOID', 'WR', 'WV', 'WV-A', 'WX', 'WX-A', 'WXNR']
28
29
 
29
30
 
30
- def _historical_process_single_date_venue(date: str, venue_code: str) -> Union[pl.DataFrame, None]:
31
- for race_number in range(1, 12):
31
+ def _historical_process_single_date_venue(date: str, venue_code: str) -> List[pl.DataFrame]:
32
+ dfs = []
33
+ iter_date = tqdm(range(1, 12), desc=f"Processing {date} {venue_code} ...", leave=False)
34
+ for race_number in iter_date:
32
35
  try:
33
- _extract_race_data(date.strftime('%Y/%m/%d'),
34
- venue_code, race_number)
36
+ dfs.append(_extract_race_data(date.strftime('%Y/%m/%d'),
37
+ venue_code, race_number))
35
38
  except:
36
- return None
39
+ if race_number == 1:
40
+ iter_date.close()
41
+ return []
42
+ return dfs
37
43
 
38
44
 
39
45
  def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
@@ -47,10 +53,10 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
47
53
 
48
54
  for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True)):
49
55
  for venue_code in ['ST', 'HV']:
50
- df = _historical_process_single_date_venue(date, venue_code)
51
- if df is None:
52
- continue
53
- dfs.append(df)
56
+ dfs += _historical_process_single_date_venue(date, venue_code)
57
+
58
+ if dfs == []:
59
+ raise ValueError("Failed to obtain any race data. This could be due to invalid date range, or server requests limit. Please try again later.")
54
60
 
55
61
  df = (pl.concat(dfs)
56
62
  .filter(~pl.col('Pla').is_in(incidents))
@@ -69,10 +75,11 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
69
75
  pl.col('WinOdds').cast(pl.Float64, strict=False)
70
76
  ])
71
77
 
72
- df = df.with_columns(pl.col('Finish Time')
73
- .str.strptime(pl.Duration, format='%M:%S.%f', strict=False)
74
- .dt.total_seconds()
75
- .alias('Finish Time')
78
+ df = df.with_columns(
79
+ (
80
+ pl.col("FinishTime").str.split(":").list.get(0).cast(pl.Int64) * 60 +
81
+ pl.col("FinishTime").str.split(":").list.get(1).cast(pl.Float64)
82
+ ).cast(pl.Float64).alias("FinishTime")
76
83
  )
77
84
 
78
85
  return df
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hkjc
3
- Version: 0.3.14
3
+ Version: 0.3.15
4
4
  Summary: Library for scrapping HKJC data and perform basic analysis
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: beautifulsoup4>=4.14.2
7
7
  Requires-Dist: cachetools>=6.2.0
8
8
  Requires-Dist: fastexcel>=0.16.0
9
+ Requires-Dist: joblib>=1.5.2
9
10
  Requires-Dist: numba>=0.62.1
10
11
  Requires-Dist: numpy>=2.3.3
11
12
  Requires-Dist: polars>=1.33.1
@@ -1,13 +1,13 @@
1
1
  hkjc/__init__.py,sha256=TI7PVhmoWSvYX-xdTEdaT3jfY99LiYQFRQZaIwBhJd8,785
2
2
  hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
3
- hkjc/historical.py,sha256=HRsy8O2NqJQ5Ljcs1ySppngL7kO0rqC49vkIKIDp1Do,6027
3
+ hkjc/historical.py,sha256=FRECc4pmozjFKkFeWN0vTzECF9QOS7URyJoCfTt5hlw,5805
4
4
  hkjc/live_odds.py,sha256=G4ELBBp1d2prxye9kKzu2pwtS4vSfRPOmEuT7-Nd-3A,4741
5
- hkjc/processing.py,sha256=K3mlPiGaE5PlVcbjLpn0QWNpMNOFiaLXFqWGdIBe2xw,7082
5
+ hkjc/processing.py,sha256=XeVrF5KKkU3Oy-vqPvMgM22QHVTCVCuml2IsIGdRbYw,7483
6
6
  hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
8
8
  hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
9
9
  hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
10
10
  hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
11
- hkjc-0.3.14.dist-info/METADATA,sha256=u-6OgmWRvNgS_RySOBRWzowDULmKE7Q0TNPzAQCIPg8,452
12
- hkjc-0.3.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
- hkjc-0.3.14.dist-info/RECORD,,
11
+ hkjc-0.3.15.dist-info/METADATA,sha256=2nQL1EImJ0hXnWFdGnpORIknMdaaFham-Pw9cgjUiO4,481
12
+ hkjc-0.3.15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
+ hkjc-0.3.15.dist-info/RECORD,,
File without changes