pwb-toolbox 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,836 @@
1
+ from collections import defaultdict
2
+ import os
3
+ import re
4
+
5
+ import datasets as ds
6
+ import pandas as pd
7
+
8
+ HF_ACCESS_TOKEN = os.environ["HF_ACCESS_TOKEN"]
9
+ if not HF_ACCESS_TOKEN:
10
+ raise ValueError("Hugging Face access token not found in environment variables")
11
+
12
+
13
+ DAILY_PRICE_DATASETS = [
14
+ "Bonds-Daily-Price",
15
+ "Commodities-Daily-Price",
16
+ "Cryptocurrencies-Daily-Price",
17
+ "ETFs-Daily-Price",
18
+ "Forex-Daily-Price",
19
+ "Indices-Daily-Price",
20
+ "Stocks-Daily-Price",
21
+ ]
22
+
23
+ DAILY_FINANCIAL_DATASETS = [
24
+ "Stocks-Quarterly-BalanceSheet",
25
+ "Stocks-Quarterly-CashFlow",
26
+ "Stocks-Quarterly-Earnings",
27
+ "Stocks-Quarterly-IncomeStatement",
28
+ ]
29
+
30
+ INTRADAY_PRICE_DATASETS = [
31
+ "Stocks-1Min-Price",
32
+ ]
33
+
34
+ INTRADAY_NEWS = [
35
+ "All-Daily-News",
36
+ ]
37
+
38
+
39
+ SP500_SYMBOLS = [
40
+ "MMM",
41
+ "AOS",
42
+ "ABT",
43
+ "ABBV",
44
+ "ACN",
45
+ "ADBE",
46
+ "AMD",
47
+ "AES",
48
+ "AFL",
49
+ "A",
50
+ "APD",
51
+ "ABNB",
52
+ "AKAM",
53
+ "ALB",
54
+ "ARE",
55
+ "ALGN",
56
+ "ALLE",
57
+ "LNT",
58
+ "ALL",
59
+ "GOOGL",
60
+ "GOOG",
61
+ "MO",
62
+ "AMZN",
63
+ "AMCR",
64
+ "AEE",
65
+ "AEP",
66
+ "AXP",
67
+ "AIG",
68
+ "AMT",
69
+ "AWK",
70
+ "AMP",
71
+ "AME",
72
+ "AMGN",
73
+ "APH",
74
+ "ADI",
75
+ "ANSS",
76
+ "AON",
77
+ "APA",
78
+ "AAPL",
79
+ "AMAT",
80
+ "APTV",
81
+ "ACGL",
82
+ "ADM",
83
+ "ANET",
84
+ "AJG",
85
+ "AIZ",
86
+ "T",
87
+ "ATO",
88
+ "ADSK",
89
+ "ADP",
90
+ "AZO",
91
+ "AVB",
92
+ "AVY",
93
+ "AXON",
94
+ "BKR",
95
+ "BALL",
96
+ "BAC",
97
+ "BK",
98
+ "BBWI",
99
+ "BAX",
100
+ "BDX",
101
+ "BRK-B",
102
+ "BBY",
103
+ "TECH",
104
+ "BIIB",
105
+ "BLK",
106
+ "BX",
107
+ "BA",
108
+ "BKNG",
109
+ "BWA",
110
+ "BSX",
111
+ "BMY",
112
+ "AVGO",
113
+ "BR",
114
+ "BRO",
115
+ "BF-B",
116
+ "BLDR",
117
+ "BG",
118
+ "BXP",
119
+ "CHRW",
120
+ "CDNS",
121
+ "CZR",
122
+ "CPT",
123
+ "CPB",
124
+ "COF",
125
+ "CAH",
126
+ "KMX",
127
+ "CCL",
128
+ "CARR",
129
+ "CTLT",
130
+ "CAT",
131
+ "CBOE",
132
+ "CBRE",
133
+ "CDW",
134
+ "CE",
135
+ "COR",
136
+ "CNC",
137
+ "CNP",
138
+ "CF",
139
+ "CRL",
140
+ "SCHW",
141
+ "CHTR",
142
+ "CVX",
143
+ "CMG",
144
+ "CB",
145
+ "CHD",
146
+ "CI",
147
+ "CINF",
148
+ "CTAS",
149
+ "CSCO",
150
+ "C",
151
+ "CFG",
152
+ "CLX",
153
+ "CME",
154
+ "CMS",
155
+ "KO",
156
+ "CTSH",
157
+ "CL",
158
+ "CMCSA",
159
+ "CAG",
160
+ "COP",
161
+ "ED",
162
+ "STZ",
163
+ "CEG",
164
+ "COO",
165
+ "CPRT",
166
+ "GLW",
167
+ "CPAY",
168
+ "CTVA",
169
+ "CSGP",
170
+ "COST",
171
+ "CTRA",
172
+ "CRWD",
173
+ "CCI",
174
+ "CSX",
175
+ "CMI",
176
+ "CVS",
177
+ "DHR",
178
+ "DRI",
179
+ "DVA",
180
+ "DAY",
181
+ "DECK",
182
+ "DE",
183
+ "DELL",
184
+ "DAL",
185
+ "DVN",
186
+ "DXCM",
187
+ "FANG",
188
+ "DLR",
189
+ "DFS",
190
+ "DG",
191
+ "DLTR",
192
+ "D",
193
+ "DPZ",
194
+ "DOV",
195
+ "DOW",
196
+ "DHI",
197
+ "DTE",
198
+ "DUK",
199
+ "DD",
200
+ "EMN",
201
+ "ETN",
202
+ "EBAY",
203
+ "ECL",
204
+ "EIX",
205
+ "EW",
206
+ "EA",
207
+ "ELV",
208
+ "EMR",
209
+ "ENPH",
210
+ "ETR",
211
+ "EOG",
212
+ "EPAM",
213
+ "EQT",
214
+ "EFX",
215
+ "EQIX",
216
+ "EQR",
217
+ "ERIE",
218
+ "ESS",
219
+ "EL",
220
+ "EG",
221
+ "EVRG",
222
+ "ES",
223
+ "EXC",
224
+ "EXPE",
225
+ "EXPD",
226
+ "EXR",
227
+ "XOM",
228
+ "FFIV",
229
+ "FDS",
230
+ "FICO",
231
+ "FAST",
232
+ "FRT",
233
+ "FDX",
234
+ "FIS",
235
+ "FITB",
236
+ "FSLR",
237
+ "FE",
238
+ "FI",
239
+ "FMC",
240
+ "F",
241
+ "FTNT",
242
+ "FTV",
243
+ "FOXA",
244
+ "FOX",
245
+ "BEN",
246
+ "FCX",
247
+ "GRMN",
248
+ "IT",
249
+ "GE",
250
+ "GEHC",
251
+ "GEV",
252
+ "GEN",
253
+ "GNRC",
254
+ "GD",
255
+ "GIS",
256
+ "GM",
257
+ "GPC",
258
+ "GILD",
259
+ "GPN",
260
+ "GL",
261
+ "GDDY",
262
+ "GS",
263
+ "HAL",
264
+ "HIG",
265
+ "HAS",
266
+ "HCA",
267
+ "DOC",
268
+ "HSIC",
269
+ "HSY",
270
+ "HES",
271
+ "HPE",
272
+ "HLT",
273
+ "HOLX",
274
+ "HD",
275
+ "HON",
276
+ "HRL",
277
+ "HST",
278
+ "HWM",
279
+ "HPQ",
280
+ "HUBB",
281
+ "HUM",
282
+ "HBAN",
283
+ "HII",
284
+ "IBM",
285
+ "IEX",
286
+ "IDXX",
287
+ "ITW",
288
+ "INCY",
289
+ "IR",
290
+ "PODD",
291
+ "INTC",
292
+ "ICE",
293
+ "IFF",
294
+ "IP",
295
+ "IPG",
296
+ "INTU",
297
+ "ISRG",
298
+ "IVZ",
299
+ "INVH",
300
+ "IQV",
301
+ "IRM",
302
+ "JBHT",
303
+ "JBL",
304
+ "JKHY",
305
+ "J",
306
+ "JNJ",
307
+ "JCI",
308
+ "JPM",
309
+ "JNPR",
310
+ "K",
311
+ "KVUE",
312
+ "KDP",
313
+ "KEY",
314
+ "KEYS",
315
+ "KMB",
316
+ "KIM",
317
+ "KMI",
318
+ "KKR",
319
+ "KLAC",
320
+ "KHC",
321
+ "KR",
322
+ "LHX",
323
+ "LH",
324
+ "LRCX",
325
+ "LW",
326
+ "LVS",
327
+ "LDOS",
328
+ "LEN",
329
+ "LLY",
330
+ "LIN",
331
+ "LYV",
332
+ "LKQ",
333
+ "LMT",
334
+ "L",
335
+ "LOW",
336
+ "LULU",
337
+ "LYB",
338
+ "MTB",
339
+ "MRO",
340
+ "MPC",
341
+ "MKTX",
342
+ "MAR",
343
+ "MMC",
344
+ "MLM",
345
+ "MAS",
346
+ "MA",
347
+ "MTCH",
348
+ "MKC",
349
+ "MCD",
350
+ "MCK",
351
+ "MDT",
352
+ "MRK",
353
+ "META",
354
+ "MET",
355
+ "MTD",
356
+ "MGM",
357
+ "MCHP",
358
+ "MU",
359
+ "MSFT",
360
+ "MAA",
361
+ "MRNA",
362
+ "MHK",
363
+ "MOH",
364
+ "TAP",
365
+ "MDLZ",
366
+ "MPWR",
367
+ "MNST",
368
+ "MCO",
369
+ "MS",
370
+ "MOS",
371
+ "MSI",
372
+ "MSCI",
373
+ "NDAQ",
374
+ "NTAP",
375
+ "NFLX",
376
+ "NEM",
377
+ "NWSA",
378
+ "NWS",
379
+ "NEE",
380
+ "NKE",
381
+ "NI",
382
+ "NDSN",
383
+ "NSC",
384
+ "NTRS",
385
+ "NOC",
386
+ "NCLH",
387
+ "NRG",
388
+ "NUE",
389
+ "NVDA",
390
+ "NVR",
391
+ "NXPI",
392
+ "ORLY",
393
+ "OXY",
394
+ "ODFL",
395
+ "OMC",
396
+ "ON",
397
+ "OKE",
398
+ "ORCL",
399
+ "OTIS",
400
+ "PCAR",
401
+ "PKG",
402
+ "PLTR",
403
+ "PANW",
404
+ "PARA",
405
+ "PH",
406
+ "PAYX",
407
+ "PAYC",
408
+ "PYPL",
409
+ "PNR",
410
+ "PEP",
411
+ "PFE",
412
+ "PCG",
413
+ "PM",
414
+ "PSX",
415
+ "PNW",
416
+ "PNC",
417
+ "POOL",
418
+ "PPG",
419
+ "PPL",
420
+ "PFG",
421
+ "PG",
422
+ "PGR",
423
+ "PLD",
424
+ "PRU",
425
+ "PEG",
426
+ "PTC",
427
+ "PSA",
428
+ "PHM",
429
+ "QRVO",
430
+ "PWR",
431
+ "QCOM",
432
+ "DGX",
433
+ "RL",
434
+ "RJF",
435
+ "RTX",
436
+ "O",
437
+ "REG",
438
+ "REGN",
439
+ "RF",
440
+ "RSG",
441
+ "RMD",
442
+ "RVTY",
443
+ "ROK",
444
+ "ROL",
445
+ "ROP",
446
+ "ROST",
447
+ "RCL",
448
+ "SPGI",
449
+ "CRM",
450
+ "SBAC",
451
+ "SLB",
452
+ "STX",
453
+ "SRE",
454
+ "NOW",
455
+ "SHW",
456
+ "SPG",
457
+ "SWKS",
458
+ "SJM",
459
+ "SW",
460
+ "SNA",
461
+ "SOLV",
462
+ "SO",
463
+ "LUV",
464
+ "SWK",
465
+ "SBUX",
466
+ "STT",
467
+ "STLD",
468
+ "STE",
469
+ "SYK",
470
+ "SMCI",
471
+ "SYF",
472
+ "SNPS",
473
+ "SYY",
474
+ "TMUS",
475
+ "TROW",
476
+ "TTWO",
477
+ "TPR",
478
+ "TRGP",
479
+ "TGT",
480
+ "TEL",
481
+ "TDY",
482
+ "TFX",
483
+ "TER",
484
+ "TSLA",
485
+ "TXN",
486
+ "TXT",
487
+ "TMO",
488
+ "TJX",
489
+ "TSCO",
490
+ "TT",
491
+ "TDG",
492
+ "TRV",
493
+ "TRMB",
494
+ "TFC",
495
+ "TYL",
496
+ "TSN",
497
+ "USB",
498
+ "UBER",
499
+ "UDR",
500
+ "ULTA",
501
+ "UNP",
502
+ "UAL",
503
+ "UPS",
504
+ "URI",
505
+ "UNH",
506
+ "UHS",
507
+ "VLO",
508
+ "VTR",
509
+ "VLTO",
510
+ "VRSN",
511
+ "VRSK",
512
+ "VZ",
513
+ "VRTX",
514
+ "VTRS",
515
+ "VICI",
516
+ "V",
517
+ "VST",
518
+ "VMC",
519
+ "WRB",
520
+ "GWW",
521
+ "WAB",
522
+ "WBA",
523
+ "WMT",
524
+ "DIS",
525
+ "WBD",
526
+ "WM",
527
+ "WAT",
528
+ "WEC",
529
+ "WFC",
530
+ "WELL",
531
+ "WST",
532
+ "WDC",
533
+ "WY",
534
+ "WMB",
535
+ "WTW",
536
+ "WYNN",
537
+ "XEL",
538
+ "XYL",
539
+ "YUM",
540
+ "ZBRA",
541
+ "ZBH",
542
+ "ZTS",
543
+ ]
544
+
545
+
546
+ def load_dataset(
547
+ path,
548
+ symbols=None,
549
+ adjust=True,
550
+ extend=False,
551
+ to_usd=True,
552
+ rate_to_price=True,
553
+ ):
554
+ dataset = ds.load_dataset(f"paperswithbacktest/{path}", token=HF_ACCESS_TOKEN)
555
+ df = dataset["train"].to_pandas()
556
+
557
+ if path in DAILY_PRICE_DATASETS or path in DAILY_FINANCIAL_DATASETS:
558
+ df["date"] = pd.to_datetime(df["date"])
559
+
560
+ if path in INTRADAY_PRICE_DATASETS or path in INTRADAY_NEWS:
561
+ df["datetime"] = pd.to_datetime(df["datetime"])
562
+
563
+ if isinstance(symbols, list) and "sp500" in symbols:
564
+ symbols.remove("sp500")
565
+ symbols += SP500_SYMBOLS
566
+
567
+ if (
568
+ path in DAILY_PRICE_DATASETS
569
+ or path in INTRADAY_PRICE_DATASETS
570
+ or path in DAILY_FINANCIAL_DATASETS
571
+ ) and isinstance(symbols, list):
572
+ df = df[df["symbol"].isin(symbols)].copy()
573
+
574
+ if path in INTRADAY_NEWS and isinstance(symbols, list):
575
+ df = df[
576
+ df["symbols"].apply(lambda x: any(symbol in symbols for symbol in x))
577
+ ].copy()
578
+
579
+ if path in DAILY_PRICE_DATASETS:
580
+ if adjust and "adj_close" in df.columns:
581
+ adj_factor = df["adj_close"] / df["close"]
582
+ df["adj_open"] = df["open"] * adj_factor
583
+ df["adj_high"] = df["high"] * adj_factor
584
+ df["adj_low"] = df["low"] * adj_factor
585
+ df.drop(columns=["open", "high", "low", "close"], inplace=True)
586
+ df.rename(
587
+ columns={
588
+ "adj_open": "open",
589
+ "adj_high": "high",
590
+ "adj_low": "low",
591
+ "adj_close": "close",
592
+ },
593
+ inplace=True,
594
+ )
595
+ else:
596
+ if "adj_close" in df.columns:
597
+ df.drop(columns=["adj_close"])
598
+
599
+ if path in DAILY_PRICE_DATASETS and (extend and path == "ETFs-Daily-Price"):
600
+ df = __extend_etfs(df)
601
+
602
+ if path in DAILY_PRICE_DATASETS and to_usd:
603
+ if path == "Forex-Daily-Price":
604
+ for index, row in df.iterrows():
605
+ if row["symbol"].endswith("USD"):
606
+ continue
607
+ df.at[index, "open"] = 1 / row["open"]
608
+ df.at[index, "high"] = 1 / row["high"]
609
+ df.at[index, "low"] = 1 / row["low"]
610
+ df.at[index, "close"] = 1 / row["close"]
611
+ df.at[index, "symbol"] = row["symbol"][3:] + "USD"
612
+ elif path == "Indices-Daily-Price":
613
+ df_forex = load_dataset("Forex-Daily-Price", to_usd=True)
614
+ df = __convert_indices_to_usd(df, df_forex)
615
+
616
+ if path in DAILY_PRICE_DATASETS and (rate_to_price and path == "Bonds-Daily-Price"):
617
+ for index, row in df.iterrows():
618
+ years_to_maturity = __extract_years_to_maturity(row["symbol"])
619
+ if not years_to_maturity:
620
+ continue
621
+ face_value = 100
622
+ for col in ["open", "high", "low", "close"]:
623
+ rate = row[col]
624
+ df.loc[index, col] = face_value / (1 + rate / 100) ** years_to_maturity
625
+
626
+ return df
627
+
628
+
629
+ def __convert_indices_to_usd(df_indices, df_forex):
630
+ mapping = {
631
+ "ADSMI": "AED", # United Arab Emirates
632
+ "AEX": "EUR", # Netherlands
633
+ "AS30": "AUD", # Australia
634
+ "AS51": "AUD", # Australia
635
+ "AS52": "AUD", # Australia
636
+ "ASE": "EUR", # Greece
637
+ "ATX": "EUR", # Austria
638
+ "BEL20": "EUR", # Belgium
639
+ "BELEX15": "RSD", # Serbia
640
+ "BGSMDC": "BWP", # Botswana
641
+ "BHSEEI": "BHD", # Bahrain
642
+ "BKA": "BAM", # Bosnia and Herzegovina
643
+ "BLOM": "LBP", # Lebanon
644
+ "BSX": "BMD", # Bermuda
645
+ "BUX": "HUF", # Hungary
646
+ "BVLX": "BOB", # Bolivia
647
+ "BVPSBVPS": "PAB", # Panama
648
+ "BVQA": "USD", # Ecuador
649
+ "CAC": "EUR", # France
650
+ "CASE": "EGP", # Egypt
651
+ "CCMP": "USD", # United States
652
+ "COLCAP": "COP", # Colombia
653
+ "CRSMBCT": "CRC", # Costa Rica
654
+ "CSEALL": "LKR", # Sri Lanka
655
+ "CYSMMAPA": "EUR", # Cyprus
656
+ "DARSDSEI": "TZS", # Tanzania
657
+ "DAX": "EUR", # Germany
658
+ "DFMGI": "AED", # United Arab Emirates
659
+ "DSEX": "BDT", # Bangladesh
660
+ "DSM": "QAR", # Qatar
661
+ "ECU": "USD", # Ecuador
662
+ "FBMKLCI": "MYR", # Malaysia
663
+ "FSSTI": "SGD", # Singapore
664
+ "FTN098": "NAD", # Namibia
665
+ "FTSEMIB": "EUR", # Italy
666
+ "GGSECI": "GHS", # Ghana
667
+ "HEX": "EUR", # Finland
668
+ "HEX25": "EUR", # Finland
669
+ "HSI": "HKD", # Hong Kong
670
+ "IBEX": "EUR", # Spain
671
+ "IBOV": "BRL", # Brazil
672
+ "IBVC": "VES", # Venezuela
673
+ "ICEXI": "ISK", # Iceland
674
+ "IGPA": "CLP", # Chile
675
+ "INDEXCF": "RUB", # Russia
676
+ "INDU": "USD", # United States
677
+ "INDZI": "IDR", # Indonesia
678
+ "ISEQ": "EUR", # Ireland
679
+ "JALSH": "ZAR", # South Africa
680
+ "JCI": "IDR", # Indonesia
681
+ "JMSMX": "JMD", # Jamaica
682
+ "JOSMGNFF": "JOD", # Jordan
683
+ "KFX": "DKK", # Denmark
684
+ "KNSMIDX": "KES", # Kenya
685
+ "KSE100": "PKR", # Pakistan
686
+ "KZKAK": "KZT", # Kazakhstan
687
+ "LSXC": "LAK", # Laos
688
+ "LUXXX": "EUR", # Luxembourg
689
+ "MALTEX": "EUR", # Malta
690
+ "MBI": "MKD", # North Macedonia
691
+ "MERVAL": "ARS", # Argentina
692
+ "MEXBOL": "MXN", # Mexico
693
+ "MONEX": "EUR", # Montenegro
694
+ "MOSENEW": "MAD", # Morocco
695
+ "MSETOP": "MKD", # North Macedonia
696
+ "MSM30": "OMR", # Oman
697
+ "NDX": "USD", # United States
698
+ "NGSEINDX": "NGN", # Nigeria
699
+ "NIFTY": "INR", # India
700
+ "NKY": "JPY", # Japan
701
+ "NSEASI": "KES", # Kenya
702
+ "NZSE50FG": "NZD", # New Zealand
703
+ "OMX": "SEK", # Sweden
704
+ "OSEAX": "NOK", # Norway
705
+ "PCOMP": "PHP", # Philippines
706
+ "PFTS": "UAH", # Ukraine
707
+ "PSI20": "EUR", # Portugal
708
+ "PX": "CZK", # Czech Republic
709
+ "RIGSE": "EUR", # Latvia
710
+ "RTY": "USD", # United States
711
+ "SASEIDX": "SAR", # Saudi Arabia
712
+ "SASX10": "BAM", # Bosnia and Herzegovina
713
+ "SBITOP": "EUR", # Slovenia
714
+ "SEMDEX": "MUR", # Mauritius
715
+ "SENSEX": "INR", # India
716
+ "SET50": "THB", # Thailand
717
+ "SHCOMP": "CNY", # China
718
+ "SHSZ300": "CNY", # China
719
+ "SKSM": "EUR", # Slovakia
720
+ "SMI": "CHF", # Switzerland
721
+ "SOFIX": "BGN", # Bulgaria
722
+ "SPBLPGPT": "PEN", # Peru
723
+ "SPTSX": "CAD", # Canada
724
+ "SPX": "USD", # United States
725
+ "SSE50": "CNY", # China
726
+ "SX5E": "EUR", # Europe
727
+ "TA125": "ILS", # Israel
728
+ }
729
+ symbols = df_indices.symbol.unique()
730
+ mapping = {k: v for k, v in mapping.items() if k in symbols}
731
+ frames = []
732
+ for symbol, currency in mapping.items():
733
+ df_index = df_indices[df_indices["symbol"] == symbol].copy()
734
+ if currency == "USD":
735
+ frames.append(df_index)
736
+ continue
737
+ df_forex_currency = df_forex[df_forex["symbol"] == currency + "USD"].copy()
738
+ if df_index.empty or df_forex_currency.empty:
739
+ continue
740
+ # Merge dataframes on the date column
741
+ merged_df = pd.merge(
742
+ df_index, df_forex_currency, on="date", suffixes=("", "_forex")
743
+ )
744
+
745
+ # Multiply the index prices by the corresponding forex rates
746
+ merged_df["open"] = merged_df["open"] * merged_df["open_forex"]
747
+ merged_df["high"] = merged_df["high"] * merged_df["high_forex"]
748
+ merged_df["low"] = merged_df["low"] * merged_df["low_forex"]
749
+ merged_df["close"] = merged_df["close"] * merged_df["close_forex"]
750
+
751
+ frames.append(merged_df[["symbol", "date", "open", "high", "low", "close"]])
752
+
753
+ df = pd.concat(frames, ignore_index=True)
754
+ return df
755
+
756
+
757
+ def __extract_years_to_maturity(bond_symbol):
758
+ match = re.search(r"(\d+)([YM])$", bond_symbol)
759
+ if match:
760
+ time_value = int(match.group(1)) # Extract the numeric value
761
+ time_unit = match.group(2) # Extract the time unit (Y or M)
762
+ if time_unit == "Y":
763
+ return time_value # It's already in years
764
+ elif time_unit == "M":
765
+ return time_value / 12 # Convert months to years
766
+
767
+
768
+ def __extend_etfs(df_etfs):
769
+
770
+ mapping = {
771
+ "AGG": ["Bonds-Daily-Price", "US10Y"],
772
+ "EPP": ["Indices-Daily-Price", "HSI"],
773
+ "EWJ": ["Indices-Daily-Price", "NKY"],
774
+ "GLD": ["Commodities-Daily-Price", "GC1"],
775
+ "IEF": ["Bonds-Daily-Price", "US10Y"],
776
+ "IEV": ["Indices-Daily-Price", "SX5E"],
777
+ "IWB": ["Indices-Daily-Price", "SPX"],
778
+ "SHY": ["Bonds-Daily-Price", "US1Y"],
779
+ "SPY": ["Indices-Daily-Price", "SPX"],
780
+ }
781
+ symbols = df_etfs.symbol.unique()
782
+ mapping = {k: v for k, v in mapping.items() if k in symbols}
783
+
784
+ grouped_path_symbols = defaultdict(list)
785
+ for value in mapping.values():
786
+ grouped_path_symbols[value[0]].append(value[1])
787
+ grouped_path_symbols = dict(grouped_path_symbols)
788
+ df_others = pd.concat(
789
+ [
790
+ load_dataset(path, symbols, to_usd=True)
791
+ for path, symbols in grouped_path_symbols.items()
792
+ ]
793
+ )
794
+
795
+ frames = []
796
+ for etf, other in mapping.items():
797
+ other_symbol = other[1]
798
+ # Get the ETF & Index data
799
+ etf_data = df_etfs[df_etfs["symbol"] == etf]
800
+ if etf_data.empty:
801
+ continue
802
+ other_data = df_others[df_others["symbol"] == other_symbol]
803
+ if other_data.empty:
804
+ continue
805
+
806
+ # Find the first overlapping date
807
+ common_dates = etf_data["date"].isin(other_data["date"])
808
+ first_common_date = etf_data.loc[common_dates, "date"].min()
809
+
810
+ if pd.isnull(first_common_date):
811
+ print(f"No common date found for {etf} and {other_symbol}")
812
+ continue
813
+
814
+ etf_first_common = etf_data[etf_data["date"] == first_common_date]
815
+ other_first_common = other_data[other_data["date"] == first_common_date]
816
+
817
+ # Compute the adjustment factor (using closing prices for simplicity)
818
+ adjustment_factor = (
819
+ etf_first_common["close"].values[0] / other_first_common["close"].values[0]
820
+ )
821
+
822
+ # Adjust index data before the first common date
823
+ index_data_before_common = other_data[
824
+ other_data["date"] < first_common_date
825
+ ].copy()
826
+ for column in ["open", "high", "low", "close"]:
827
+ index_data_before_common.loc[:, column] *= adjustment_factor
828
+ index_data_before_common.loc[:, "symbol"] = etf
829
+
830
+ # Combine adjusted index data with ETF data
831
+ combined_data = pd.concat([index_data_before_common, etf_data])
832
+ frames.append(combined_data)
833
+
834
+ # Concatenate all frames to form the final dataframe
835
+ df = pd.concat(frames).sort_values(by=["date", "symbol"]).reset_index(drop=True)
836
+ return df
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2022-2023 Papers With Backtest and others
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.1
2
+ Name: pwb-toolbox
3
+ Version: 0.1.0
4
+ Summary: A toolbox library for quant traders
5
+ Home-page: https://github.com/paperswithbacktest/pwb-toolbox
6
+ Author: Your Name
7
+ Author-email: hello@paperswithbacktest.com
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE.txt
15
+ Requires-Dist: datasets
16
+ Requires-Dist: pandas
17
+
18
+ <div align="center">
19
+ <img src="static/images/systematic-trading.jpeg" height=200 alt=""/>
20
+ <h1>Papers With Backtest Toolbox</h1>
21
+ </div>
22
+
23
+ The `pwb-toolbox` package is designed to provide tools and resources for systematic trading strategies. It includes datasets and strategy ideas to assist in developing and backtesting trading algorithms. For detailed instructions on how to use this package effectively, please refer to the associated Substack publication by visiting: https://blog.paperswithbacktest.com/.
24
+
25
+
26
+ ## Installation
27
+
28
+ To install the pwb-toolbox package:
29
+
30
+ ```bash
31
+ pip install pwb-toolbox
32
+ ```
33
+
34
+ To login to Huggingface Hub with Access Token
35
+
36
+ ```bash
37
+ huggingface-cli login
38
+ ```
39
+
40
+ ## Usage
41
+
42
+ The `pwb-toolbox` package offers a range of functionalities for systematic trading analysis. Here are some examples of how to utilize the package:
43
+
44
+ - Import `pwb_toolbox.datasets` and sequentially loads datasets for different asset classes, such as bonds, commodities, cryptocurrencies, ETFs, forex, indices, and stocks, using the `load_dataset` function:
45
+
46
+ ```python
47
+ import pwb_toolbox.datasets as pwb_ds
48
+
49
+ df = ds.load_dataset("Bonds-Daily-Price")
50
+ df = ds.load_dataset("Commodities-Daily-Price")
51
+ df = ds.load_dataset("Cryptocurrencies-Daily-Price")
52
+ df = ds.load_dataset("ETFs-Daily-Price")
53
+ df = ds.load_dataset("Forex-Daily-Price")
54
+ df = ds.load_dataset("Indices-Daily-Price")
55
+ df = ds.load_dataset("Stocks-Daily-Price")
56
+ ```
57
+
58
+ - Load daily stock price data for specific symbols using the load_dataset function. The first call retrieves data for Apple and Microsoft. The second call retrieves the same stocks but without price adjustments (`adjust=False`). The third call loads daily price data for the S&P 500 index:
59
+
60
+ ```python
61
+ import pwb_toolbox.datasets as pwb_ds
62
+
63
+ df = pwb_ds.load_dataset(
64
+ "Stocks-Daily-Price",
65
+ ["AAPL", "MSFT"],
66
+ )
67
+
68
+ df = pwb_ds.load_dataset(
69
+ "Stocks-Daily-Price",
70
+ ["AAPL", "MSFT"],
71
+ adjust=False,
72
+ )
73
+
74
+ df = pwb_ds.load_dataset(
75
+ "Stocks-Daily-Price",
76
+ ["sp500"],
77
+ )
78
+ ```
79
+
80
+ - The `extend=True` argument instructs the function to return an extended historical data using indices, commodities, and bonds data.
81
+
82
+ ```python
83
+ import pwb_toolbox.datasets as pwb_ds
84
+
85
+ df = pwb_ds.load_dataset(
86
+ "ETFs-Daily-Price",
87
+ ["SPY", "IEF"],
88
+ extend=True,
89
+ )
90
+ ```
91
+
92
+ - The argument `rate_to_price=False` specifies that bond yield rates should not be converted to price values in the returned data:
93
+
94
+ ```python
95
+ import pwb_toolbox.datasets as pwb_ds
96
+
97
+ df = pwb_ds.load_dataset(
98
+ "Bonds-Daily-Price",
99
+ ["US10Y"],
100
+ rate_to_price=False,
101
+ )
102
+ ```
103
+
104
+ - The argument `to_usd=False` indicates that the data should not be converted to U.S. dollars, implying that it might be available in another currency.
105
+
106
+ ```python
107
+ import pwb_toolbox.datasets as pwb_ds
108
+
109
+ df = pwb_ds.load_dataset(
110
+ "Indices-Daily-Price",
111
+ ["US10Y"],
112
+ to_usd=False,
113
+ )
114
+ ```
115
+
116
+ ## Contributing
117
+
118
+ Contributions to the `pwb-toolbox` package are welcome! If you have any improvements, new datasets, or strategy ideas to share, please follow these guidelines:
119
+
120
+ 1. Fork the repository and create a new branch for your feature.
121
+ 2. Make your changes and ensure they adhere to the package's coding style.
122
+ 3. Write tests to validate the functionality or provide sample usage examples.
123
+ 4. Submit a pull request, clearly explaining the purpose and benefits of your contribution.
124
+
125
+ Please note that all contributions are subject to review and approval by the maintainers.
126
+
127
+ ## Build the Package
128
+
129
+ To build the package, run:
130
+
131
+ ```bash
132
+ python -m pip install --upgrade build
133
+ python -m build
134
+ ```
135
+
136
+ To upload the package to PyPI, run:
137
+
138
+ ```bash
139
+ twine upload dist/*
140
+ ```
141
+
142
+ ## License
143
+
144
+ The `pwb-toolbox` package is released under the MIT license. See the LICENSE file for more details.
145
+
146
+ ## Contact
147
+
148
+ For any questions, issues, or suggestions regarding the `pwb-toolbox` package, please contact the maintainers or create an issue on the repository. We appreciate your feedback and involvement in improving the package.
149
+
150
+ Happy trading!
@@ -0,0 +1,7 @@
1
+ pwb_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pwb_toolbox/datasets/__init__.py,sha256=kaCalJIJABdHBD3QQUx2lHWNkbJDeOszYSRDKuTdBIo,17240
3
+ pwb_toolbox-0.1.0.dist-info/LICENSE.txt,sha256=_Wjz7o7St3iVSPBRzE0keS8XSqSJ03A3NZ6cMlTaSK8,1079
4
+ pwb_toolbox-0.1.0.dist-info/METADATA,sha256=z8vk281UpCyvf3VhsGfJukXflKleNZDgF5E7H0qZ3G4,4504
5
+ pwb_toolbox-0.1.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
6
+ pwb_toolbox-0.1.0.dist-info/top_level.txt,sha256=TZcXcF2AMkKkibZOuq6AYsHjajPgddHAGjQUT64OYGY,12
7
+ pwb_toolbox-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ pwb_toolbox