hkjc 0.3.14__py3-none-any.whl → 0.3.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hkjc/historical.py
CHANGED
@@ -7,7 +7,7 @@ import polars as pl
|
|
7
7
|
from bs4 import BeautifulSoup
|
8
8
|
from cachetools.func import ttl_cache
|
9
9
|
|
10
|
-
from .utils import
|
10
|
+
from .utils import _parse_html_table
|
11
11
|
|
12
12
|
HKJC_RACE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Racing/LocalResults.aspx?RaceDate={date}&Racecourse={venue_code}&RaceNo={race_number}"
|
13
13
|
HKJC_HORSE_URL_TEMPLATE = "https://racing.hkjc.com/racing/information/English/Horse/Horse.aspx?HorseId={horse_id}"
|
@@ -77,15 +77,12 @@ def _extract_horse_data(horse_id: str) -> pl.DataFrame:
|
|
77
77
|
# Extract horse profile info
|
78
78
|
table = soup.find_all('table', class_='table_eng_text')
|
79
79
|
profile_data = _parse_html_table(table[0], skip_header=True)
|
80
|
-
country, age = profile_data.filter(pl.col("column_0").str.starts_with("Country"))['column_2'].item(0).split('/')
|
81
80
|
profile_data = _parse_html_table(table[1], skip_header=True)
|
82
81
|
current_rating = profile_data.filter(pl.col("column_0").str.starts_with("Current Rating"))['column_2'].item(0)
|
83
82
|
season_start_rating = profile_data.filter(pl.col("column_0").str.starts_with("Start of Season Rating"))['column_2'].item(0)
|
84
83
|
|
85
84
|
horse_info = {
|
86
85
|
'HorseID': horse_id,
|
87
|
-
'OriginCountry': country.strip(),
|
88
|
-
'Age': int(age),
|
89
86
|
'CurrentRating': int(current_rating),
|
90
87
|
'SeasonStartRating': int(season_start_rating)
|
91
88
|
}
|
hkjc/processing.py
CHANGED
@@ -14,6 +14,7 @@ import numpy as np
|
|
14
14
|
from itertools import combinations
|
15
15
|
from tqdm import tqdm
|
16
16
|
from datetime import datetime as dt
|
17
|
+
from joblib import delayed, Parallel
|
17
18
|
|
18
19
|
|
19
20
|
def _all_subsets(lst): return [list(x) for r in range(
|
@@ -27,13 +28,18 @@ incidents = ['DISQ', 'DNF', 'FE', 'ML', 'PU', 'TNP', 'TO',
|
|
27
28
|
'UR', 'VOID', 'WR', 'WV', 'WV-A', 'WX', 'WX-A', 'WXNR']
|
28
29
|
|
29
30
|
|
30
|
-
def _historical_process_single_date_venue(date: str, venue_code: str) ->
|
31
|
-
|
31
|
+
def _historical_process_single_date_venue(date: str, venue_code: str) -> List[pl.DataFrame]:
|
32
|
+
dfs = []
|
33
|
+
iter_date = tqdm(range(1, 12), desc=f"Processing {date} {venue_code} ...", leave=False)
|
34
|
+
for race_number in iter_date:
|
32
35
|
try:
|
33
|
-
_extract_race_data(date.strftime('%Y/%m/%d'),
|
34
|
-
|
36
|
+
dfs.append(_extract_race_data(date.strftime('%Y/%m/%d'),
|
37
|
+
venue_code, race_number))
|
35
38
|
except:
|
36
|
-
|
39
|
+
if race_number == 1:
|
40
|
+
iter_date.close()
|
41
|
+
return []
|
42
|
+
return dfs
|
37
43
|
|
38
44
|
|
39
45
|
def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
|
@@ -47,10 +53,10 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
|
|
47
53
|
|
48
54
|
for date in tqdm(pl.date_range(start_dt, end_dt, interval='1d', eager=True)):
|
49
55
|
for venue_code in ['ST', 'HV']:
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
56
|
+
dfs += _historical_process_single_date_venue(date, venue_code)
|
57
|
+
|
58
|
+
if dfs == []:
|
59
|
+
raise ValueError("Failed to obtain any race data. This could be due to invalid date range, or server requests limit. Please try again later.")
|
54
60
|
|
55
61
|
df = (pl.concat(dfs)
|
56
62
|
.filter(~pl.col('Pla').is_in(incidents))
|
@@ -69,10 +75,11 @@ def generate_historical_data(start_date: str, end_date: str) -> pl.DataFrame:
|
|
69
75
|
pl.col('WinOdds').cast(pl.Float64, strict=False)
|
70
76
|
])
|
71
77
|
|
72
|
-
df = df.with_columns(
|
73
|
-
|
74
|
-
|
75
|
-
|
78
|
+
df = df.with_columns(
|
79
|
+
(
|
80
|
+
pl.col("FinishTime").str.split(":").list.get(0).cast(pl.Int64) * 60 +
|
81
|
+
pl.col("FinishTime").str.split(":").list.get(1).cast(pl.Float64)
|
82
|
+
).cast(pl.Float64).alias("FinishTime")
|
76
83
|
)
|
77
84
|
|
78
85
|
return df
|
@@ -1,11 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hkjc
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.15
|
4
4
|
Summary: Library for scrapping HKJC data and perform basic analysis
|
5
5
|
Requires-Python: >=3.11
|
6
6
|
Requires-Dist: beautifulsoup4>=4.14.2
|
7
7
|
Requires-Dist: cachetools>=6.2.0
|
8
8
|
Requires-Dist: fastexcel>=0.16.0
|
9
|
+
Requires-Dist: joblib>=1.5.2
|
9
10
|
Requires-Dist: numba>=0.62.1
|
10
11
|
Requires-Dist: numpy>=2.3.3
|
11
12
|
Requires-Dist: polars>=1.33.1
|
@@ -1,13 +1,13 @@
|
|
1
1
|
hkjc/__init__.py,sha256=TI7PVhmoWSvYX-xdTEdaT3jfY99LiYQFRQZaIwBhJd8,785
|
2
2
|
hkjc/harville_model.py,sha256=MZjPLS-1nbEhp1d4Syuq13DtraKnd7TlNqBmOOCwxgc,15976
|
3
|
-
hkjc/historical.py,sha256=
|
3
|
+
hkjc/historical.py,sha256=FRECc4pmozjFKkFeWN0vTzECF9QOS7URyJoCfTt5hlw,5805
|
4
4
|
hkjc/live_odds.py,sha256=G4ELBBp1d2prxye9kKzu2pwtS4vSfRPOmEuT7-Nd-3A,4741
|
5
|
-
hkjc/processing.py,sha256=
|
5
|
+
hkjc/processing.py,sha256=XeVrF5KKkU3Oy-vqPvMgM22QHVTCVCuml2IsIGdRbYw,7483
|
6
6
|
hkjc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
hkjc/speedpro.py,sha256=Y2Z3GYGeePc4sM-ZnCHXCI1N7L-_j9nrMqS3CC5BBSo,2031
|
8
8
|
hkjc/utils.py,sha256=4CA_FPf_U3GvzoLkqBX0qDPZgrSvKJKvbP7VWqd5FiA,6323
|
9
9
|
hkjc/strategy/place_only.py,sha256=lHPjTSj8PzghxncNBg8FI4T4HJigekB9a3bV7l7VtPA,2079
|
10
10
|
hkjc/strategy/qpbanker.py,sha256=MQxjwsfhllKZroKS8w8Q3bi3HMjGc1DAyBIjNZAp3yQ,4805
|
11
|
-
hkjc-0.3.
|
12
|
-
hkjc-0.3.
|
13
|
-
hkjc-0.3.
|
11
|
+
hkjc-0.3.15.dist-info/METADATA,sha256=2nQL1EImJ0hXnWFdGnpORIknMdaaFham-Pw9cgjUiO4,481
|
12
|
+
hkjc-0.3.15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
13
|
+
hkjc-0.3.15.dist-info/RECORD,,
|
File without changes
|