jse-tools 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: jse-tools
3
+ Version: 0.2.1
4
+ Summary: Utilities for downloading and processing JSE stock data
5
+ Author: Francois Helmie
6
+ Requires-Python: >=3.9
7
+ Requires-Dist: pandas
8
+ Requires-Dist: numpy
9
+ Requires-Dist: requests
10
+ Requires-Dist: yfinance
11
+ Requires-Dist: scipy
@@ -0,0 +1,16 @@
1
+
2
+ from .core import (
3
+ get_tickers,
4
+ safe_request,
5
+ verify_dataset_integrity,
6
+ download_and_process,
7
+ save_tickers
8
+ )
9
+
10
+ __all__ = [
11
+ "get_tickers",
12
+ "safe_request",
13
+ "verify_dataset_integrity",
14
+ "download_and_process",
15
+ "save_tickers"
16
+ ]
@@ -0,0 +1,137 @@
1
+
2
+ import os
3
+ import time
4
+ import requests
5
+ import pandas as pd
6
+ import yfinance as yf
7
+ import numpy as np
8
+ from scipy import stats
9
+
10
+
11
+ # ================================
12
+ # Resilience Layer
13
+ # ================================
14
+
15
+ def safe_request(url, retries=3, delay=2):
16
+ """
17
+ Network resilience wrapper.
18
+ Retries API call on failure.
19
+ """
20
+
21
+ last_error = None
22
+
23
+ for _ in range(retries):
24
+ try:
25
+ response = requests.get(url, timeout=10)
26
+ if response.status_code == 200:
27
+ return response
28
+ except requests.exceptions.RequestException as e:
29
+ last_error = e
30
+
31
+ time.sleep(delay)
32
+
33
+ raise Exception(f"API request failed after retries. Last error: {last_error}")
34
+
35
+
36
+ # ================================
37
+ # Reliability Layer
38
+ # ================================
39
+
40
+ def verify_dataset_integrity(df, required_columns=None):
41
+ """
42
+ Dataset integrity guard.
43
+
44
+ Checks:
45
+ - Dataset not empty
46
+ - Required columns exist
47
+ - Missing data ratio threshold
48
+ """
49
+
50
+ if df is None or df.empty:
51
+ raise ValueError("Dataset integrity failure: DataFrame is empty")
52
+
53
+ if required_columns:
54
+ missing = [c for c in required_columns if c not in df.columns]
55
+ if missing:
56
+ raise ValueError(f"Missing required columns: {missing}")
57
+
58
+ # Detect pathological missing data
59
+ null_ratio = df.isna().mean()
60
+
61
+ if (null_ratio > 0.5).any():
62
+ problematic = null_ratio[null_ratio > 0.5].index.tolist()
63
+ raise ValueError(f"High missing data ratio detected in: {problematic}")
64
+
65
+ return True
66
+
67
+
68
+ # ================================
69
+ # Main Functions
70
+ # ================================
71
+
72
+ def get_tickers(api_token):
73
+ url = f"https://eodhd.com/api/exchange-symbol-list/JSE?api_token={api_token}&fmt=json"
74
+
75
+ response = safe_request(url)
76
+
77
+ data = response.json()
78
+
79
+ if not data:
80
+ print("Warning: Empty API response")
81
+ return []
82
+
83
+ df = pd.json_normalize(data)
84
+
85
+ df = df[df["Code"].str.len() <= 3]
86
+ df["ticker"] = df["Code"].astype(str) + ".JO"
87
+
88
+ return df["ticker"].tolist()
89
+
90
+
91
+ def download_and_process(tickers, start_date, end_date, output_dir="Stocks"):
92
+ os.makedirs(output_dir, exist_ok=True)
93
+
94
+ for ticker in tickers:
95
+ df = yf.download(
96
+ ticker,
97
+ start=start_date,
98
+ end=end_date,
99
+ auto_adjust=True,
100
+ progress=False
101
+ )
102
+
103
+ if df is None or df.empty:
104
+ print(f"Skipping {ticker} (no data)")
105
+ continue
106
+
107
+ df = df[["Open", "High", "Low", "Close", "Volume"]]
108
+
109
+ # Reliability check
110
+ verify_dataset_integrity(
111
+ df,
112
+ required_columns=["Open", "High", "Low", "Close", "Volume"]
113
+ )
114
+
115
+ # Outlier removal
116
+ df = df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]
117
+
118
+ df.ffill(inplace=True)
119
+ df.bfill(inplace=True)
120
+
121
+ df.to_csv(os.path.join(output_dir, f"{ticker}.csv"))
122
+
123
+ print(f"Saved data in '{output_dir}'")
124
+
125
+
126
+ def save_tickers(folder_path):
127
+
128
+ tickers = [
129
+ f[:-4]
130
+ for f in os.listdir(folder_path)
131
+ if f.endswith(".csv")
132
+ ]
133
+
134
+ df = pd.DataFrame(tickers, columns=["tickers"])
135
+ df.to_csv(os.path.join(folder_path, "tickers.csv"), index=False)
136
+
137
+ return df
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: jse-tools
3
+ Version: 0.2.1
4
+ Summary: Utilities for downloading and processing JSE stock data
5
+ Author: Francois Helmie
6
+ Requires-Python: >=3.9
7
+ Requires-Dist: pandas
8
+ Requires-Dist: numpy
9
+ Requires-Dist: requests
10
+ Requires-Dist: yfinance
11
+ Requires-Dist: scipy
@@ -0,0 +1,8 @@
1
+ pyproject.toml
2
+ jse_tools/__init__.py
3
+ jse_tools/core.py
4
+ jse_tools.egg-info/PKG-INFO
5
+ jse_tools.egg-info/SOURCES.txt
6
+ jse_tools.egg-info/dependency_links.txt
7
+ jse_tools.egg-info/requires.txt
8
+ jse_tools.egg-info/top_level.txt
@@ -0,0 +1,5 @@
1
+ pandas
2
+ numpy
3
+ requests
4
+ yfinance
5
+ scipy
@@ -0,0 +1 @@
1
+ jse_tools
@@ -0,0 +1,20 @@
1
+
2
+ [build-system]
3
+ requires = ["setuptools>=61"]
4
+ build-backend = "setuptools.build_meta"
5
+
6
+ [project]
7
+ name = "jse-tools"
8
+ version = "0.2.1"
9
+ description = "Utilities for downloading and processing JSE stock data"
10
+ authors = [{name = "Francois Helmie"}]
11
+
12
+ dependencies = [
13
+ "pandas",
14
+ "numpy",
15
+ "requests",
16
+ "yfinance",
17
+ "scipy"
18
+ ]
19
+
20
+ requires-python = ">=3.9"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+