jse-tools 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jse_tools-0.2.1/PKG-INFO +11 -0
- jse_tools-0.2.1/jse_tools/__init__.py +16 -0
- jse_tools-0.2.1/jse_tools/core.py +137 -0
- jse_tools-0.2.1/jse_tools.egg-info/PKG-INFO +11 -0
- jse_tools-0.2.1/jse_tools.egg-info/SOURCES.txt +8 -0
- jse_tools-0.2.1/jse_tools.egg-info/dependency_links.txt +1 -0
- jse_tools-0.2.1/jse_tools.egg-info/requires.txt +5 -0
- jse_tools-0.2.1/jse_tools.egg-info/top_level.txt +1 -0
- jse_tools-0.2.1/pyproject.toml +20 -0
- jse_tools-0.2.1/setup.cfg +4 -0
jse_tools-0.2.1/PKG-INFO
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jse-tools
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Utilities for downloading and processing JSE stock data
|
|
5
|
+
Author: Francois Helmie
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Requires-Dist: pandas
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: requests
|
|
10
|
+
Requires-Dist: yfinance
|
|
11
|
+
Requires-Dist: scipy
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
|
|
2
|
+
from .core import (
|
|
3
|
+
get_tickers,
|
|
4
|
+
safe_request,
|
|
5
|
+
verify_dataset_integrity,
|
|
6
|
+
download_and_process,
|
|
7
|
+
save_tickers
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"get_tickers",
|
|
12
|
+
"safe_request",
|
|
13
|
+
"verify_dataset_integrity",
|
|
14
|
+
"download_and_process",
|
|
15
|
+
"save_tickers"
|
|
16
|
+
]
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import requests
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import yfinance as yf
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import stats
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# ================================
|
|
12
|
+
# Resilience Layer
|
|
13
|
+
# ================================
|
|
14
|
+
|
|
15
|
+
def safe_request(url, retries=3, delay=2):
|
|
16
|
+
"""
|
|
17
|
+
Network resilience wrapper.
|
|
18
|
+
Retries API call on failure.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
last_error = None
|
|
22
|
+
|
|
23
|
+
for _ in range(retries):
|
|
24
|
+
try:
|
|
25
|
+
response = requests.get(url, timeout=10)
|
|
26
|
+
if response.status_code == 200:
|
|
27
|
+
return response
|
|
28
|
+
except requests.exceptions.RequestException as e:
|
|
29
|
+
last_error = e
|
|
30
|
+
|
|
31
|
+
time.sleep(delay)
|
|
32
|
+
|
|
33
|
+
raise Exception(f"API request failed after retries. Last error: {last_error}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ================================
|
|
37
|
+
# Reliability Layer
|
|
38
|
+
# ================================
|
|
39
|
+
|
|
40
|
+
def verify_dataset_integrity(df, required_columns=None):
|
|
41
|
+
"""
|
|
42
|
+
Dataset integrity guard.
|
|
43
|
+
|
|
44
|
+
Checks:
|
|
45
|
+
- Dataset not empty
|
|
46
|
+
- Required columns exist
|
|
47
|
+
- Missing data ratio threshold
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
if df is None or df.empty:
|
|
51
|
+
raise ValueError("Dataset integrity failure: DataFrame is empty")
|
|
52
|
+
|
|
53
|
+
if required_columns:
|
|
54
|
+
missing = [c for c in required_columns if c not in df.columns]
|
|
55
|
+
if missing:
|
|
56
|
+
raise ValueError(f"Missing required columns: {missing}")
|
|
57
|
+
|
|
58
|
+
# Detect pathological missing data
|
|
59
|
+
null_ratio = df.isna().mean()
|
|
60
|
+
|
|
61
|
+
if (null_ratio > 0.5).any():
|
|
62
|
+
problematic = null_ratio[null_ratio > 0.5].index.tolist()
|
|
63
|
+
raise ValueError(f"High missing data ratio detected in: {problematic}")
|
|
64
|
+
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ================================
|
|
69
|
+
# Main Functions
|
|
70
|
+
# ================================
|
|
71
|
+
|
|
72
|
+
def get_tickers(api_token):
|
|
73
|
+
url = f"https://eodhd.com/api/exchange-symbol-list/JSE?api_token={api_token}&fmt=json"
|
|
74
|
+
|
|
75
|
+
response = safe_request(url)
|
|
76
|
+
|
|
77
|
+
data = response.json()
|
|
78
|
+
|
|
79
|
+
if not data:
|
|
80
|
+
print("Warning: Empty API response")
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
df = pd.json_normalize(data)
|
|
84
|
+
|
|
85
|
+
df = df[df["Code"].str.len() <= 3]
|
|
86
|
+
df["ticker"] = df["Code"].astype(str) + ".JO"
|
|
87
|
+
|
|
88
|
+
return df["ticker"].tolist()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def download_and_process(tickers, start_date, end_date, output_dir="Stocks"):
|
|
92
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
for ticker in tickers:
|
|
95
|
+
df = yf.download(
|
|
96
|
+
ticker,
|
|
97
|
+
start=start_date,
|
|
98
|
+
end=end_date,
|
|
99
|
+
auto_adjust=True,
|
|
100
|
+
progress=False
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if df is None or df.empty:
|
|
104
|
+
print(f"Skipping {ticker} (no data)")
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
df = df[["Open", "High", "Low", "Close", "Volume"]]
|
|
108
|
+
|
|
109
|
+
# Reliability check
|
|
110
|
+
verify_dataset_integrity(
|
|
111
|
+
df,
|
|
112
|
+
required_columns=["Open", "High", "Low", "Close", "Volume"]
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Outlier removal
|
|
116
|
+
df = df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]
|
|
117
|
+
|
|
118
|
+
df.ffill(inplace=True)
|
|
119
|
+
df.bfill(inplace=True)
|
|
120
|
+
|
|
121
|
+
df.to_csv(os.path.join(output_dir, f"{ticker}.csv"))
|
|
122
|
+
|
|
123
|
+
print(f"Saved data in '{output_dir}'")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def save_tickers(folder_path):
|
|
127
|
+
|
|
128
|
+
tickers = [
|
|
129
|
+
f[:-4]
|
|
130
|
+
for f in os.listdir(folder_path)
|
|
131
|
+
if f.endswith(".csv")
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
df = pd.DataFrame(tickers, columns=["tickers"])
|
|
135
|
+
df.to_csv(os.path.join(folder_path, "tickers.csv"), index=False)
|
|
136
|
+
|
|
137
|
+
return df
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jse-tools
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Utilities for downloading and processing JSE stock data
|
|
5
|
+
Author: Francois Helmie
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Requires-Dist: pandas
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: requests
|
|
10
|
+
Requires-Dist: yfinance
|
|
11
|
+
Requires-Dist: scipy
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
jse_tools
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
|
|
2
|
+
[build-system]
|
|
3
|
+
requires = ["setuptools>=61"]
|
|
4
|
+
build-backend = "setuptools.build_meta"
|
|
5
|
+
|
|
6
|
+
[project]
|
|
7
|
+
name = "jse-tools"
|
|
8
|
+
version = "0.2.1"
|
|
9
|
+
description = "Utilities for downloading and processing JSE stock data"
|
|
10
|
+
authors = [{name = "Francois Helmie"}]
|
|
11
|
+
|
|
12
|
+
dependencies = [
|
|
13
|
+
"pandas",
|
|
14
|
+
"numpy",
|
|
15
|
+
"requests",
|
|
16
|
+
"yfinance",
|
|
17
|
+
"scipy"
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
requires-python = ">=3.9"
|