pwb-toolbox 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pwb_toolbox/datasets/__init__.py +100 -75
- {pwb_toolbox-0.1.4.dist-info → pwb_toolbox-0.1.6.dist-info}/METADATA +1 -1
- pwb_toolbox-0.1.6.dist-info/RECORD +7 -0
- pwb_toolbox-0.1.4.dist-info/RECORD +0 -7
- {pwb_toolbox-0.1.4.dist-info → pwb_toolbox-0.1.6.dist-info}/WHEEL +0 -0
- {pwb_toolbox-0.1.4.dist-info → pwb_toolbox-0.1.6.dist-info}/licenses/LICENSE.txt +0 -0
- {pwb_toolbox-0.1.4.dist-info → pwb_toolbox-0.1.6.dist-info}/top_level.txt +0 -0
pwb_toolbox/datasets/__init__.py
CHANGED
@@ -627,7 +627,9 @@ def load_dataset(
|
|
627
627
|
return df
|
628
628
|
|
629
629
|
|
630
|
-
def __convert_indices_to_usd(
|
630
|
+
def __convert_indices_to_usd(
|
631
|
+
df_indices: pd.DataFrame, df_forex: pd.DataFrame
|
632
|
+
) -> pd.DataFrame:
|
631
633
|
mapping = {
|
632
634
|
"ADSMI": "AED", # United Arab Emirates
|
633
635
|
"AEX": "EUR", # Netherlands
|
@@ -727,32 +729,40 @@ def __convert_indices_to_usd(df_indices, df_forex):
|
|
727
729
|
"SX5E": "EUR", # Europe
|
728
730
|
"TA125": "ILS", # Israel
|
729
731
|
}
|
730
|
-
symbols = df_indices.symbol.unique()
|
731
|
-
mapping = {k: v for k, v in mapping.items() if k in symbols}
|
732
732
|
frames = []
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
733
|
+
|
734
|
+
# iterate over the symbols that actually exist in df_indices
|
735
|
+
for symbol in df_indices["symbol"].unique():
|
736
|
+
df_idx = df_indices[df_indices["symbol"] == symbol].copy()
|
737
|
+
|
738
|
+
# 1) Figure out what currency the index is quoted in.
|
739
|
+
ccy = mapping.get(symbol) # None if not mapped
|
740
|
+
if ccy is None or ccy == "USD":
|
741
|
+
# Unknown or already USD – just keep the original rows
|
742
|
+
frames.append(df_idx)
|
737
743
|
continue
|
738
|
-
|
739
|
-
|
744
|
+
|
745
|
+
# 2) Find the matching FX rate (home-ccy → USD)
|
746
|
+
pair = ccy + "USD"
|
747
|
+
df_fx = df_forex[df_forex["symbol"] == pair].copy()
|
748
|
+
|
749
|
+
if df_idx.empty or df_fx.empty:
|
750
|
+
# No FX data – keep raw index levels instead of dropping them
|
751
|
+
frames.append(df_idx)
|
740
752
|
continue
|
741
|
-
# Merge dataframes on the date column
|
742
|
-
merged_df = pd.merge(
|
743
|
-
df_index, df_forex_currency, on="date", suffixes=("", "_forex")
|
744
|
-
)
|
745
753
|
|
746
|
-
#
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
merged_df["close"] = merged_df["close"] * merged_df["close_forex"]
|
754
|
+
# 3) Merge on date and convert OHLC
|
755
|
+
merged = pd.merge(df_idx, df_fx, on="date", suffixes=("", "_fx"))
|
756
|
+
for col in ("open", "high", "low", "close"):
|
757
|
+
merged[col] = merged[col] * merged[f"{col}_fx"]
|
751
758
|
|
752
|
-
frames.append(
|
759
|
+
frames.append(merged[["symbol", "date", "open", "high", "low", "close"]])
|
753
760
|
|
754
|
-
|
755
|
-
|
761
|
+
if not frames:
|
762
|
+
return pd.DataFrame(columns=df_indices.columns)
|
763
|
+
|
764
|
+
# Combine everything back into one DataFrame
|
765
|
+
return pd.concat(frames, ignore_index=True)
|
756
766
|
|
757
767
|
|
758
768
|
def __extract_years_to_maturity(bond_symbol):
|
@@ -803,62 +813,68 @@ def __extend_etfs(df_etfs):
|
|
803
813
|
symbols = df_etfs.symbol.unique()
|
804
814
|
mapping = {k: v for k, v in mapping.items() if k in symbols}
|
805
815
|
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
+
# Nothing to extend → just return the input
|
817
|
+
if not mapping:
|
818
|
+
return df_etfs.copy()
|
819
|
+
|
820
|
+
# ------------------------------------------------------------------ step 2
|
821
|
+
grouped = defaultdict(list) # {path: [proxy1, proxy2, ...]}
|
822
|
+
for _, (path, proxy) in mapping.items():
|
823
|
+
grouped[path].append(proxy)
|
824
|
+
|
825
|
+
# Load each dataset only if there's at least one proxy symbol
|
826
|
+
other_frames = []
|
827
|
+
for path, proxies in grouped.items():
|
828
|
+
if proxies: # skip empty lists
|
829
|
+
other_frames.append(load_dataset(path, proxies, to_usd=True))
|
816
830
|
|
831
|
+
# If no proxy data could be loaded, fall back to raw ETF data
|
832
|
+
if not other_frames:
|
833
|
+
return df_etfs.copy()
|
834
|
+
|
835
|
+
df_others = pd.concat(other_frames, ignore_index=True)
|
836
|
+
|
837
|
+
# ------------------------------------------------------------------ step 3
|
817
838
|
frames = []
|
818
|
-
for etf,
|
819
|
-
other_symbol = other[1]
|
820
|
-
# Get the ETF & Index data
|
839
|
+
for etf, (__, proxy) in mapping.items():
|
821
840
|
etf_data = df_etfs[df_etfs["symbol"] == etf]
|
822
|
-
|
823
|
-
continue
|
824
|
-
other_data = df_others[df_others["symbol"] == other_symbol]
|
825
|
-
if other_data.empty:
|
826
|
-
continue
|
827
|
-
|
828
|
-
# Find the first overlapping date
|
829
|
-
common_dates = etf_data["date"].isin(other_data["date"])
|
830
|
-
first_common_date = etf_data.loc[common_dates, "date"].min()
|
841
|
+
proxy_data = df_others[df_others["symbol"] == proxy]
|
831
842
|
|
832
|
-
if
|
833
|
-
|
843
|
+
if etf_data.empty or proxy_data.empty:
|
844
|
+
frames.append(etf_data) # keep raw ETF if proxy missing
|
834
845
|
continue
|
835
846
|
|
836
|
-
|
837
|
-
|
847
|
+
# Find first overlapping date
|
848
|
+
first_common = etf_data.loc[
|
849
|
+
etf_data["date"].isin(proxy_data["date"]), "date"
|
850
|
+
].min()
|
851
|
+
if pd.isna(first_common):
|
852
|
+
frames.append(etf_data) # no overlap → keep raw ETF
|
853
|
+
continue
|
838
854
|
|
839
|
-
# Compute
|
840
|
-
|
841
|
-
|
855
|
+
# Compute adjustment factor on that date
|
856
|
+
k = (
|
857
|
+
etf_data.loc[etf_data["date"] == first_common, "close"].iloc[0]
|
858
|
+
/ proxy_data.loc[proxy_data["date"] == first_common, "close"].iloc[0]
|
842
859
|
)
|
843
860
|
|
844
|
-
#
|
845
|
-
|
846
|
-
|
847
|
-
]
|
848
|
-
for column in ["open", "high", "low", "close"]:
|
849
|
-
index_data_before_common.loc[:, column] *= adjustment_factor
|
850
|
-
index_data_before_common.loc[:, "symbol"] = etf
|
861
|
+
# Scale proxy history before the overlap
|
862
|
+
hist = proxy_data[proxy_data["date"] < first_common].copy()
|
863
|
+
hist[["open", "high", "low", "close"]] *= k
|
864
|
+
hist["symbol"] = etf
|
851
865
|
|
852
|
-
# Combine
|
853
|
-
|
854
|
-
frames.append(combined_data)
|
866
|
+
# Combine proxy history + actual ETF data
|
867
|
+
frames.append(pd.concat([hist, etf_data]))
|
855
868
|
|
856
|
-
|
857
|
-
|
869
|
+
# Add ETFs that were never in the mapping
|
870
|
+
untouched = set(symbols) - set(mapping)
|
871
|
+
frames.append(df_etfs[df_etfs["symbol"].isin(untouched)])
|
858
872
|
|
859
|
-
|
860
|
-
|
861
|
-
|
873
|
+
return (
|
874
|
+
pd.concat(frames, ignore_index=True)
|
875
|
+
.sort_values(["date", "symbol"])
|
876
|
+
.reset_index(drop=True)
|
877
|
+
)
|
862
878
|
|
863
879
|
|
864
880
|
ALLOWED_FIELDS = {"open", "high", "low", "close"}
|
@@ -909,16 +925,25 @@ def get_pricing(
|
|
909
925
|
raise ValueError(f"Invalid field(s): {bad}. Allowed: {sorted(ALLOWED_FIELDS)}")
|
910
926
|
|
911
927
|
# --------------------------------------------------------------- download
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
)
|
928
|
+
DATASETS = [
|
929
|
+
("Stocks-Daily-Price", extend),
|
930
|
+
("ETFs-Daily-Price", extend),
|
931
|
+
("Cryptocurrencies-Daily-Price", extend),
|
932
|
+
("Bonds-Daily-Price", extend),
|
933
|
+
("Commodities-Daily-Price", extend),
|
934
|
+
("Forex-Daily-Price", extend),
|
935
|
+
("Indices-Daily-Price", False), # indices generally have no proxy data
|
936
|
+
]
|
937
|
+
remaining = set(symbol_list) # symbols still to fetch
|
938
|
+
frames = []
|
939
|
+
for dataset_name, ext_flag in DATASETS:
|
940
|
+
if not remaining: # all symbols resolved → stop early
|
941
|
+
break
|
942
|
+
df_part = load_dataset(dataset_name, list(remaining), extend=ext_flag)
|
943
|
+
if not df_part.empty:
|
944
|
+
frames.append(df_part)
|
945
|
+
remaining -= set(df_part["symbol"].unique())
|
946
|
+
df = pd.concat(frames, ignore_index=True)
|
922
947
|
|
923
948
|
df["date"] = pd.to_datetime(df["date"])
|
924
949
|
df.set_index("date", inplace=True)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
pwb_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
pwb_toolbox/datasets/__init__.py,sha256=8ruFquxyz5_6D9zImecPmTXruHClkoV0vNX5H0eR4Fw,22249
|
3
|
+
pwb_toolbox-0.1.6.dist-info/licenses/LICENSE.txt,sha256=_Wjz7o7St3iVSPBRzE0keS8XSqSJ03A3NZ6cMlTaSK8,1079
|
4
|
+
pwb_toolbox-0.1.6.dist-info/METADATA,sha256=nao3Zw_tNUmsNxm9tql9HfYc3NAOWc_wyJIaYcuuHBA,4617
|
5
|
+
pwb_toolbox-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
+
pwb_toolbox-0.1.6.dist-info/top_level.txt,sha256=TZcXcF2AMkKkibZOuq6AYsHjajPgddHAGjQUT64OYGY,12
|
7
|
+
pwb_toolbox-0.1.6.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
pwb_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
pwb_toolbox/datasets/__init__.py,sha256=drj-jY3HSxYp8o1X8-hYbDvaHzuVR4JRumrXq-_CQFk,21668
|
3
|
-
pwb_toolbox-0.1.4.dist-info/licenses/LICENSE.txt,sha256=_Wjz7o7St3iVSPBRzE0keS8XSqSJ03A3NZ6cMlTaSK8,1079
|
4
|
-
pwb_toolbox-0.1.4.dist-info/METADATA,sha256=ai7OJKVjtSVmZbjZFH_dbqSD5pebe57j_xCw1yhAt20,4617
|
5
|
-
pwb_toolbox-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
6
|
-
pwb_toolbox-0.1.4.dist-info/top_level.txt,sha256=TZcXcF2AMkKkibZOuq6AYsHjajPgddHAGjQUT64OYGY,12
|
7
|
-
pwb_toolbox-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|