pwb-toolbox 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -627,7 +627,9 @@ def load_dataset(
627
627
  return df
628
628
 
629
629
 
630
- def __convert_indices_to_usd(df_indices, df_forex):
630
+ def __convert_indices_to_usd(
631
+ df_indices: pd.DataFrame, df_forex: pd.DataFrame
632
+ ) -> pd.DataFrame:
631
633
  mapping = {
632
634
  "ADSMI": "AED", # United Arab Emirates
633
635
  "AEX": "EUR", # Netherlands
@@ -727,32 +729,40 @@ def __convert_indices_to_usd(df_indices, df_forex):
727
729
  "SX5E": "EUR", # Europe
728
730
  "TA125": "ILS", # Israel
729
731
  }
730
- symbols = df_indices.symbol.unique()
731
- mapping = {k: v for k, v in mapping.items() if k in symbols}
732
732
  frames = []
733
- for symbol, currency in mapping.items():
734
- df_index = df_indices[df_indices["symbol"] == symbol].copy()
735
- if currency == "USD":
736
- frames.append(df_index)
733
+
734
+ # iterate over the symbols that actually exist in df_indices
735
+ for symbol in df_indices["symbol"].unique():
736
+ df_idx = df_indices[df_indices["symbol"] == symbol].copy()
737
+
738
+ # 1) Figure out what currency the index is quoted in.
739
+ ccy = mapping.get(symbol) # None if not mapped
740
+ if ccy is None or ccy == "USD":
741
+ # Unknown or already USD – just keep the original rows
742
+ frames.append(df_idx)
737
743
  continue
738
- df_forex_currency = df_forex[df_forex["symbol"] == currency + "USD"].copy()
739
- if df_index.empty or df_forex_currency.empty:
744
+
745
+ # 2) Find the matching FX rate (home-ccy → USD)
746
+ pair = ccy + "USD"
747
+ df_fx = df_forex[df_forex["symbol"] == pair].copy()
748
+
749
+ if df_idx.empty or df_fx.empty:
750
+ # No FX data – keep raw index levels instead of dropping them
751
+ frames.append(df_idx)
740
752
  continue
741
- # Merge dataframes on the date column
742
- merged_df = pd.merge(
743
- df_index, df_forex_currency, on="date", suffixes=("", "_forex")
744
- )
745
753
 
746
- # Multiply the index prices by the corresponding forex rates
747
- merged_df["open"] = merged_df["open"] * merged_df["open_forex"]
748
- merged_df["high"] = merged_df["high"] * merged_df["high_forex"]
749
- merged_df["low"] = merged_df["low"] * merged_df["low_forex"]
750
- merged_df["close"] = merged_df["close"] * merged_df["close_forex"]
754
+ # 3) Merge on date and convert OHLC
755
+ merged = pd.merge(df_idx, df_fx, on="date", suffixes=("", "_fx"))
756
+ for col in ("open", "high", "low", "close"):
757
+ merged[col] = merged[col] * merged[f"{col}_fx"]
751
758
 
752
- frames.append(merged_df[["symbol", "date", "open", "high", "low", "close"]])
759
+ frames.append(merged[["symbol", "date", "open", "high", "low", "close"]])
753
760
 
754
- df = pd.concat(frames, ignore_index=True)
755
- return df
761
+ if not frames:
762
+ return pd.DataFrame(columns=df_indices.columns)
763
+
764
+ # Combine everything back into one DataFrame
765
+ return pd.concat(frames, ignore_index=True)
756
766
 
757
767
 
758
768
  def __extract_years_to_maturity(bond_symbol):
@@ -803,62 +813,68 @@ def __extend_etfs(df_etfs):
803
813
  symbols = df_etfs.symbol.unique()
804
814
  mapping = {k: v for k, v in mapping.items() if k in symbols}
805
815
 
806
- grouped_path_symbols = defaultdict(list)
807
- for value in mapping.values():
808
- grouped_path_symbols[value[0]].append(value[1])
809
- grouped_path_symbols = dict(grouped_path_symbols)
810
- df_others = pd.concat(
811
- [
812
- load_dataset(path, symbols, to_usd=True)
813
- for path, symbols in grouped_path_symbols.items()
814
- ]
815
- )
816
+ # Nothing to extend → just return the input
817
+ if not mapping:
818
+ return df_etfs.copy()
819
+
820
+ # ------------------------------------------------------------------ step 2
821
+ grouped = defaultdict(list) # {path: [proxy1, proxy2, ...]}
822
+ for _, (path, proxy) in mapping.items():
823
+ grouped[path].append(proxy)
824
+
825
+ # Load each dataset only if there's at least one proxy symbol
826
+ other_frames = []
827
+ for path, proxies in grouped.items():
828
+ if proxies: # skip empty lists
829
+ other_frames.append(load_dataset(path, proxies, to_usd=True))
816
830
 
831
+ # If no proxy data could be loaded, fall back to raw ETF data
832
+ if not other_frames:
833
+ return df_etfs.copy()
834
+
835
+ df_others = pd.concat(other_frames, ignore_index=True)
836
+
837
+ # ------------------------------------------------------------------ step 3
817
838
  frames = []
818
- for etf, other in mapping.items():
819
- other_symbol = other[1]
820
- # Get the ETF & Index data
839
+ for etf, (__, proxy) in mapping.items():
821
840
  etf_data = df_etfs[df_etfs["symbol"] == etf]
822
- if etf_data.empty:
823
- continue
824
- other_data = df_others[df_others["symbol"] == other_symbol]
825
- if other_data.empty:
826
- continue
827
-
828
- # Find the first overlapping date
829
- common_dates = etf_data["date"].isin(other_data["date"])
830
- first_common_date = etf_data.loc[common_dates, "date"].min()
841
+ proxy_data = df_others[df_others["symbol"] == proxy]
831
842
 
832
- if pd.isnull(first_common_date):
833
- print(f"No common date found for {etf} and {other_symbol}")
843
+ if etf_data.empty or proxy_data.empty:
844
+ frames.append(etf_data) # keep raw ETF if proxy missing
834
845
  continue
835
846
 
836
- etf_first_common = etf_data[etf_data["date"] == first_common_date]
837
- other_first_common = other_data[other_data["date"] == first_common_date]
847
+ # Find first overlapping date
848
+ first_common = etf_data.loc[
849
+ etf_data["date"].isin(proxy_data["date"]), "date"
850
+ ].min()
851
+ if pd.isna(first_common):
852
+ frames.append(etf_data) # no overlap → keep raw ETF
853
+ continue
838
854
 
839
- # Compute the adjustment factor (using closing prices for simplicity)
840
- adjustment_factor = (
841
- etf_first_common["close"].values[0] / other_first_common["close"].values[0]
855
+ # Compute adjustment factor on that date
856
+ k = (
857
+ etf_data.loc[etf_data["date"] == first_common, "close"].iloc[0]
858
+ / proxy_data.loc[proxy_data["date"] == first_common, "close"].iloc[0]
842
859
  )
843
860
 
844
- # Adjust index data before the first common date
845
- index_data_before_common = other_data[
846
- other_data["date"] < first_common_date
847
- ].copy()
848
- for column in ["open", "high", "low", "close"]:
849
- index_data_before_common.loc[:, column] *= adjustment_factor
850
- index_data_before_common.loc[:, "symbol"] = etf
861
+ # Scale proxy history before the overlap
862
+ hist = proxy_data[proxy_data["date"] < first_common].copy()
863
+ hist[["open", "high", "low", "close"]] *= k
864
+ hist["symbol"] = etf
851
865
 
852
- # Combine adjusted index data with ETF data
853
- combined_data = pd.concat([index_data_before_common, etf_data])
854
- frames.append(combined_data)
866
+ # Combine proxy history + actual ETF data
867
+ frames.append(pd.concat([hist, etf_data]))
855
868
 
856
- symbols_not_in_mapping = set(symbols) - set(mapping.keys())
857
- frames.append(df_etfs[df_etfs["symbol"].isin(symbols_not_in_mapping)])
869
+ # Add ETFs that were never in the mapping
870
+ untouched = set(symbols) - set(mapping)
871
+ frames.append(df_etfs[df_etfs["symbol"].isin(untouched)])
858
872
 
859
- # Concatenate all frames to form the final dataframe
860
- df = pd.concat(frames).sort_values(by=["date", "symbol"]).reset_index(drop=True)
861
- return df
873
+ return (
874
+ pd.concat(frames, ignore_index=True)
875
+ .sort_values(["date", "symbol"])
876
+ .reset_index(drop=True)
877
+ )
862
878
 
863
879
 
864
880
  ALLOWED_FIELDS = {"open", "high", "low", "close"}
@@ -909,16 +925,25 @@ def get_pricing(
909
925
  raise ValueError(f"Invalid field(s): {bad}. Allowed: {sorted(ALLOWED_FIELDS)}")
910
926
 
911
927
  # --------------------------------------------------------------- download
912
- df = pd.concat(
913
- [
914
- load_dataset("Stocks-Daily-Price", symbol_list, extend=extend),
915
- load_dataset("ETFs-Daily-Price", symbol_list, extend=extend),
916
- load_dataset("Cryptocurrencies-Daily-Price", symbol_list, extend=extend),
917
- load_dataset("Bonds-Daily-Price", symbol_list, extend=extend),
918
- load_dataset("Commodities-Daily-Price", symbol_list, extend=extend),
919
- ],
920
- ignore_index=True,
921
- )
928
+ DATASETS = [
929
+ ("Stocks-Daily-Price", extend),
930
+ ("ETFs-Daily-Price", extend),
931
+ ("Cryptocurrencies-Daily-Price", extend),
932
+ ("Bonds-Daily-Price", extend),
933
+ ("Commodities-Daily-Price", extend),
934
+ ("Forex-Daily-Price", extend),
935
+ ("Indices-Daily-Price", False), # indices generally have no proxy data
936
+ ]
937
+ remaining = set(symbol_list) # symbols still to fetch
938
+ frames = []
939
+ for dataset_name, ext_flag in DATASETS:
940
+ if not remaining: # all symbols resolved → stop early
941
+ break
942
+ df_part = load_dataset(dataset_name, list(remaining), extend=ext_flag)
943
+ if not df_part.empty:
944
+ frames.append(df_part)
945
+ remaining -= set(df_part["symbol"].unique())
946
+ df = pd.concat(frames, ignore_index=True)
922
947
 
923
948
  df["date"] = pd.to_datetime(df["date"])
924
949
  df.set_index("date", inplace=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pwb-toolbox
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: A toolbox library for quant traders
5
5
  Home-page: https://github.com/paperswithbacktest/pwb-toolbox
6
6
  Author: Your Name
@@ -0,0 +1,7 @@
1
+ pwb_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pwb_toolbox/datasets/__init__.py,sha256=8ruFquxyz5_6D9zImecPmTXruHClkoV0vNX5H0eR4Fw,22249
3
+ pwb_toolbox-0.1.6.dist-info/licenses/LICENSE.txt,sha256=_Wjz7o7St3iVSPBRzE0keS8XSqSJ03A3NZ6cMlTaSK8,1079
4
+ pwb_toolbox-0.1.6.dist-info/METADATA,sha256=nao3Zw_tNUmsNxm9tql9HfYc3NAOWc_wyJIaYcuuHBA,4617
5
+ pwb_toolbox-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ pwb_toolbox-0.1.6.dist-info/top_level.txt,sha256=TZcXcF2AMkKkibZOuq6AYsHjajPgddHAGjQUT64OYGY,12
7
+ pwb_toolbox-0.1.6.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- pwb_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- pwb_toolbox/datasets/__init__.py,sha256=drj-jY3HSxYp8o1X8-hYbDvaHzuVR4JRumrXq-_CQFk,21668
3
- pwb_toolbox-0.1.4.dist-info/licenses/LICENSE.txt,sha256=_Wjz7o7St3iVSPBRzE0keS8XSqSJ03A3NZ6cMlTaSK8,1079
4
- pwb_toolbox-0.1.4.dist-info/METADATA,sha256=ai7OJKVjtSVmZbjZFH_dbqSD5pebe57j_xCw1yhAt20,4617
5
- pwb_toolbox-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- pwb_toolbox-0.1.4.dist-info/top_level.txt,sha256=TZcXcF2AMkKkibZOuq6AYsHjajPgddHAGjQUT64OYGY,12
7
- pwb_toolbox-0.1.4.dist-info/RECORD,,