pwb-toolbox 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pwb_toolbox/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,836 @@
|
|
1
|
+
from collections import defaultdict
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
|
5
|
+
import datasets as ds
|
6
|
+
import pandas as pd
|
7
|
+
|
8
|
+
HF_ACCESS_TOKEN = os.environ["HF_ACCESS_TOKEN"]
|
9
|
+
if not HF_ACCESS_TOKEN:
|
10
|
+
raise ValueError("Hugging Face access token not found in environment variables")
|
11
|
+
|
12
|
+
|
13
|
+
DAILY_PRICE_DATASETS = [
|
14
|
+
"Bonds-Daily-Price",
|
15
|
+
"Commodities-Daily-Price",
|
16
|
+
"Cryptocurrencies-Daily-Price",
|
17
|
+
"ETFs-Daily-Price",
|
18
|
+
"Forex-Daily-Price",
|
19
|
+
"Indices-Daily-Price",
|
20
|
+
"Stocks-Daily-Price",
|
21
|
+
]
|
22
|
+
|
23
|
+
DAILY_FINANCIAL_DATASETS = [
|
24
|
+
"Stocks-Quarterly-BalanceSheet",
|
25
|
+
"Stocks-Quarterly-CashFlow",
|
26
|
+
"Stocks-Quarterly-Earnings",
|
27
|
+
"Stocks-Quarterly-IncomeStatement",
|
28
|
+
]
|
29
|
+
|
30
|
+
INTRADAY_PRICE_DATASETS = [
|
31
|
+
"Stocks-1Min-Price",
|
32
|
+
]
|
33
|
+
|
34
|
+
INTRADAY_NEWS = [
|
35
|
+
"All-Daily-News",
|
36
|
+
]
|
37
|
+
|
38
|
+
|
39
|
+
SP500_SYMBOLS = [
|
40
|
+
"MMM",
|
41
|
+
"AOS",
|
42
|
+
"ABT",
|
43
|
+
"ABBV",
|
44
|
+
"ACN",
|
45
|
+
"ADBE",
|
46
|
+
"AMD",
|
47
|
+
"AES",
|
48
|
+
"AFL",
|
49
|
+
"A",
|
50
|
+
"APD",
|
51
|
+
"ABNB",
|
52
|
+
"AKAM",
|
53
|
+
"ALB",
|
54
|
+
"ARE",
|
55
|
+
"ALGN",
|
56
|
+
"ALLE",
|
57
|
+
"LNT",
|
58
|
+
"ALL",
|
59
|
+
"GOOGL",
|
60
|
+
"GOOG",
|
61
|
+
"MO",
|
62
|
+
"AMZN",
|
63
|
+
"AMCR",
|
64
|
+
"AEE",
|
65
|
+
"AEP",
|
66
|
+
"AXP",
|
67
|
+
"AIG",
|
68
|
+
"AMT",
|
69
|
+
"AWK",
|
70
|
+
"AMP",
|
71
|
+
"AME",
|
72
|
+
"AMGN",
|
73
|
+
"APH",
|
74
|
+
"ADI",
|
75
|
+
"ANSS",
|
76
|
+
"AON",
|
77
|
+
"APA",
|
78
|
+
"AAPL",
|
79
|
+
"AMAT",
|
80
|
+
"APTV",
|
81
|
+
"ACGL",
|
82
|
+
"ADM",
|
83
|
+
"ANET",
|
84
|
+
"AJG",
|
85
|
+
"AIZ",
|
86
|
+
"T",
|
87
|
+
"ATO",
|
88
|
+
"ADSK",
|
89
|
+
"ADP",
|
90
|
+
"AZO",
|
91
|
+
"AVB",
|
92
|
+
"AVY",
|
93
|
+
"AXON",
|
94
|
+
"BKR",
|
95
|
+
"BALL",
|
96
|
+
"BAC",
|
97
|
+
"BK",
|
98
|
+
"BBWI",
|
99
|
+
"BAX",
|
100
|
+
"BDX",
|
101
|
+
"BRK-B",
|
102
|
+
"BBY",
|
103
|
+
"TECH",
|
104
|
+
"BIIB",
|
105
|
+
"BLK",
|
106
|
+
"BX",
|
107
|
+
"BA",
|
108
|
+
"BKNG",
|
109
|
+
"BWA",
|
110
|
+
"BSX",
|
111
|
+
"BMY",
|
112
|
+
"AVGO",
|
113
|
+
"BR",
|
114
|
+
"BRO",
|
115
|
+
"BF-B",
|
116
|
+
"BLDR",
|
117
|
+
"BG",
|
118
|
+
"BXP",
|
119
|
+
"CHRW",
|
120
|
+
"CDNS",
|
121
|
+
"CZR",
|
122
|
+
"CPT",
|
123
|
+
"CPB",
|
124
|
+
"COF",
|
125
|
+
"CAH",
|
126
|
+
"KMX",
|
127
|
+
"CCL",
|
128
|
+
"CARR",
|
129
|
+
"CTLT",
|
130
|
+
"CAT",
|
131
|
+
"CBOE",
|
132
|
+
"CBRE",
|
133
|
+
"CDW",
|
134
|
+
"CE",
|
135
|
+
"COR",
|
136
|
+
"CNC",
|
137
|
+
"CNP",
|
138
|
+
"CF",
|
139
|
+
"CRL",
|
140
|
+
"SCHW",
|
141
|
+
"CHTR",
|
142
|
+
"CVX",
|
143
|
+
"CMG",
|
144
|
+
"CB",
|
145
|
+
"CHD",
|
146
|
+
"CI",
|
147
|
+
"CINF",
|
148
|
+
"CTAS",
|
149
|
+
"CSCO",
|
150
|
+
"C",
|
151
|
+
"CFG",
|
152
|
+
"CLX",
|
153
|
+
"CME",
|
154
|
+
"CMS",
|
155
|
+
"KO",
|
156
|
+
"CTSH",
|
157
|
+
"CL",
|
158
|
+
"CMCSA",
|
159
|
+
"CAG",
|
160
|
+
"COP",
|
161
|
+
"ED",
|
162
|
+
"STZ",
|
163
|
+
"CEG",
|
164
|
+
"COO",
|
165
|
+
"CPRT",
|
166
|
+
"GLW",
|
167
|
+
"CPAY",
|
168
|
+
"CTVA",
|
169
|
+
"CSGP",
|
170
|
+
"COST",
|
171
|
+
"CTRA",
|
172
|
+
"CRWD",
|
173
|
+
"CCI",
|
174
|
+
"CSX",
|
175
|
+
"CMI",
|
176
|
+
"CVS",
|
177
|
+
"DHR",
|
178
|
+
"DRI",
|
179
|
+
"DVA",
|
180
|
+
"DAY",
|
181
|
+
"DECK",
|
182
|
+
"DE",
|
183
|
+
"DELL",
|
184
|
+
"DAL",
|
185
|
+
"DVN",
|
186
|
+
"DXCM",
|
187
|
+
"FANG",
|
188
|
+
"DLR",
|
189
|
+
"DFS",
|
190
|
+
"DG",
|
191
|
+
"DLTR",
|
192
|
+
"D",
|
193
|
+
"DPZ",
|
194
|
+
"DOV",
|
195
|
+
"DOW",
|
196
|
+
"DHI",
|
197
|
+
"DTE",
|
198
|
+
"DUK",
|
199
|
+
"DD",
|
200
|
+
"EMN",
|
201
|
+
"ETN",
|
202
|
+
"EBAY",
|
203
|
+
"ECL",
|
204
|
+
"EIX",
|
205
|
+
"EW",
|
206
|
+
"EA",
|
207
|
+
"ELV",
|
208
|
+
"EMR",
|
209
|
+
"ENPH",
|
210
|
+
"ETR",
|
211
|
+
"EOG",
|
212
|
+
"EPAM",
|
213
|
+
"EQT",
|
214
|
+
"EFX",
|
215
|
+
"EQIX",
|
216
|
+
"EQR",
|
217
|
+
"ERIE",
|
218
|
+
"ESS",
|
219
|
+
"EL",
|
220
|
+
"EG",
|
221
|
+
"EVRG",
|
222
|
+
"ES",
|
223
|
+
"EXC",
|
224
|
+
"EXPE",
|
225
|
+
"EXPD",
|
226
|
+
"EXR",
|
227
|
+
"XOM",
|
228
|
+
"FFIV",
|
229
|
+
"FDS",
|
230
|
+
"FICO",
|
231
|
+
"FAST",
|
232
|
+
"FRT",
|
233
|
+
"FDX",
|
234
|
+
"FIS",
|
235
|
+
"FITB",
|
236
|
+
"FSLR",
|
237
|
+
"FE",
|
238
|
+
"FI",
|
239
|
+
"FMC",
|
240
|
+
"F",
|
241
|
+
"FTNT",
|
242
|
+
"FTV",
|
243
|
+
"FOXA",
|
244
|
+
"FOX",
|
245
|
+
"BEN",
|
246
|
+
"FCX",
|
247
|
+
"GRMN",
|
248
|
+
"IT",
|
249
|
+
"GE",
|
250
|
+
"GEHC",
|
251
|
+
"GEV",
|
252
|
+
"GEN",
|
253
|
+
"GNRC",
|
254
|
+
"GD",
|
255
|
+
"GIS",
|
256
|
+
"GM",
|
257
|
+
"GPC",
|
258
|
+
"GILD",
|
259
|
+
"GPN",
|
260
|
+
"GL",
|
261
|
+
"GDDY",
|
262
|
+
"GS",
|
263
|
+
"HAL",
|
264
|
+
"HIG",
|
265
|
+
"HAS",
|
266
|
+
"HCA",
|
267
|
+
"DOC",
|
268
|
+
"HSIC",
|
269
|
+
"HSY",
|
270
|
+
"HES",
|
271
|
+
"HPE",
|
272
|
+
"HLT",
|
273
|
+
"HOLX",
|
274
|
+
"HD",
|
275
|
+
"HON",
|
276
|
+
"HRL",
|
277
|
+
"HST",
|
278
|
+
"HWM",
|
279
|
+
"HPQ",
|
280
|
+
"HUBB",
|
281
|
+
"HUM",
|
282
|
+
"HBAN",
|
283
|
+
"HII",
|
284
|
+
"IBM",
|
285
|
+
"IEX",
|
286
|
+
"IDXX",
|
287
|
+
"ITW",
|
288
|
+
"INCY",
|
289
|
+
"IR",
|
290
|
+
"PODD",
|
291
|
+
"INTC",
|
292
|
+
"ICE",
|
293
|
+
"IFF",
|
294
|
+
"IP",
|
295
|
+
"IPG",
|
296
|
+
"INTU",
|
297
|
+
"ISRG",
|
298
|
+
"IVZ",
|
299
|
+
"INVH",
|
300
|
+
"IQV",
|
301
|
+
"IRM",
|
302
|
+
"JBHT",
|
303
|
+
"JBL",
|
304
|
+
"JKHY",
|
305
|
+
"J",
|
306
|
+
"JNJ",
|
307
|
+
"JCI",
|
308
|
+
"JPM",
|
309
|
+
"JNPR",
|
310
|
+
"K",
|
311
|
+
"KVUE",
|
312
|
+
"KDP",
|
313
|
+
"KEY",
|
314
|
+
"KEYS",
|
315
|
+
"KMB",
|
316
|
+
"KIM",
|
317
|
+
"KMI",
|
318
|
+
"KKR",
|
319
|
+
"KLAC",
|
320
|
+
"KHC",
|
321
|
+
"KR",
|
322
|
+
"LHX",
|
323
|
+
"LH",
|
324
|
+
"LRCX",
|
325
|
+
"LW",
|
326
|
+
"LVS",
|
327
|
+
"LDOS",
|
328
|
+
"LEN",
|
329
|
+
"LLY",
|
330
|
+
"LIN",
|
331
|
+
"LYV",
|
332
|
+
"LKQ",
|
333
|
+
"LMT",
|
334
|
+
"L",
|
335
|
+
"LOW",
|
336
|
+
"LULU",
|
337
|
+
"LYB",
|
338
|
+
"MTB",
|
339
|
+
"MRO",
|
340
|
+
"MPC",
|
341
|
+
"MKTX",
|
342
|
+
"MAR",
|
343
|
+
"MMC",
|
344
|
+
"MLM",
|
345
|
+
"MAS",
|
346
|
+
"MA",
|
347
|
+
"MTCH",
|
348
|
+
"MKC",
|
349
|
+
"MCD",
|
350
|
+
"MCK",
|
351
|
+
"MDT",
|
352
|
+
"MRK",
|
353
|
+
"META",
|
354
|
+
"MET",
|
355
|
+
"MTD",
|
356
|
+
"MGM",
|
357
|
+
"MCHP",
|
358
|
+
"MU",
|
359
|
+
"MSFT",
|
360
|
+
"MAA",
|
361
|
+
"MRNA",
|
362
|
+
"MHK",
|
363
|
+
"MOH",
|
364
|
+
"TAP",
|
365
|
+
"MDLZ",
|
366
|
+
"MPWR",
|
367
|
+
"MNST",
|
368
|
+
"MCO",
|
369
|
+
"MS",
|
370
|
+
"MOS",
|
371
|
+
"MSI",
|
372
|
+
"MSCI",
|
373
|
+
"NDAQ",
|
374
|
+
"NTAP",
|
375
|
+
"NFLX",
|
376
|
+
"NEM",
|
377
|
+
"NWSA",
|
378
|
+
"NWS",
|
379
|
+
"NEE",
|
380
|
+
"NKE",
|
381
|
+
"NI",
|
382
|
+
"NDSN",
|
383
|
+
"NSC",
|
384
|
+
"NTRS",
|
385
|
+
"NOC",
|
386
|
+
"NCLH",
|
387
|
+
"NRG",
|
388
|
+
"NUE",
|
389
|
+
"NVDA",
|
390
|
+
"NVR",
|
391
|
+
"NXPI",
|
392
|
+
"ORLY",
|
393
|
+
"OXY",
|
394
|
+
"ODFL",
|
395
|
+
"OMC",
|
396
|
+
"ON",
|
397
|
+
"OKE",
|
398
|
+
"ORCL",
|
399
|
+
"OTIS",
|
400
|
+
"PCAR",
|
401
|
+
"PKG",
|
402
|
+
"PLTR",
|
403
|
+
"PANW",
|
404
|
+
"PARA",
|
405
|
+
"PH",
|
406
|
+
"PAYX",
|
407
|
+
"PAYC",
|
408
|
+
"PYPL",
|
409
|
+
"PNR",
|
410
|
+
"PEP",
|
411
|
+
"PFE",
|
412
|
+
"PCG",
|
413
|
+
"PM",
|
414
|
+
"PSX",
|
415
|
+
"PNW",
|
416
|
+
"PNC",
|
417
|
+
"POOL",
|
418
|
+
"PPG",
|
419
|
+
"PPL",
|
420
|
+
"PFG",
|
421
|
+
"PG",
|
422
|
+
"PGR",
|
423
|
+
"PLD",
|
424
|
+
"PRU",
|
425
|
+
"PEG",
|
426
|
+
"PTC",
|
427
|
+
"PSA",
|
428
|
+
"PHM",
|
429
|
+
"QRVO",
|
430
|
+
"PWR",
|
431
|
+
"QCOM",
|
432
|
+
"DGX",
|
433
|
+
"RL",
|
434
|
+
"RJF",
|
435
|
+
"RTX",
|
436
|
+
"O",
|
437
|
+
"REG",
|
438
|
+
"REGN",
|
439
|
+
"RF",
|
440
|
+
"RSG",
|
441
|
+
"RMD",
|
442
|
+
"RVTY",
|
443
|
+
"ROK",
|
444
|
+
"ROL",
|
445
|
+
"ROP",
|
446
|
+
"ROST",
|
447
|
+
"RCL",
|
448
|
+
"SPGI",
|
449
|
+
"CRM",
|
450
|
+
"SBAC",
|
451
|
+
"SLB",
|
452
|
+
"STX",
|
453
|
+
"SRE",
|
454
|
+
"NOW",
|
455
|
+
"SHW",
|
456
|
+
"SPG",
|
457
|
+
"SWKS",
|
458
|
+
"SJM",
|
459
|
+
"SW",
|
460
|
+
"SNA",
|
461
|
+
"SOLV",
|
462
|
+
"SO",
|
463
|
+
"LUV",
|
464
|
+
"SWK",
|
465
|
+
"SBUX",
|
466
|
+
"STT",
|
467
|
+
"STLD",
|
468
|
+
"STE",
|
469
|
+
"SYK",
|
470
|
+
"SMCI",
|
471
|
+
"SYF",
|
472
|
+
"SNPS",
|
473
|
+
"SYY",
|
474
|
+
"TMUS",
|
475
|
+
"TROW",
|
476
|
+
"TTWO",
|
477
|
+
"TPR",
|
478
|
+
"TRGP",
|
479
|
+
"TGT",
|
480
|
+
"TEL",
|
481
|
+
"TDY",
|
482
|
+
"TFX",
|
483
|
+
"TER",
|
484
|
+
"TSLA",
|
485
|
+
"TXN",
|
486
|
+
"TXT",
|
487
|
+
"TMO",
|
488
|
+
"TJX",
|
489
|
+
"TSCO",
|
490
|
+
"TT",
|
491
|
+
"TDG",
|
492
|
+
"TRV",
|
493
|
+
"TRMB",
|
494
|
+
"TFC",
|
495
|
+
"TYL",
|
496
|
+
"TSN",
|
497
|
+
"USB",
|
498
|
+
"UBER",
|
499
|
+
"UDR",
|
500
|
+
"ULTA",
|
501
|
+
"UNP",
|
502
|
+
"UAL",
|
503
|
+
"UPS",
|
504
|
+
"URI",
|
505
|
+
"UNH",
|
506
|
+
"UHS",
|
507
|
+
"VLO",
|
508
|
+
"VTR",
|
509
|
+
"VLTO",
|
510
|
+
"VRSN",
|
511
|
+
"VRSK",
|
512
|
+
"VZ",
|
513
|
+
"VRTX",
|
514
|
+
"VTRS",
|
515
|
+
"VICI",
|
516
|
+
"V",
|
517
|
+
"VST",
|
518
|
+
"VMC",
|
519
|
+
"WRB",
|
520
|
+
"GWW",
|
521
|
+
"WAB",
|
522
|
+
"WBA",
|
523
|
+
"WMT",
|
524
|
+
"DIS",
|
525
|
+
"WBD",
|
526
|
+
"WM",
|
527
|
+
"WAT",
|
528
|
+
"WEC",
|
529
|
+
"WFC",
|
530
|
+
"WELL",
|
531
|
+
"WST",
|
532
|
+
"WDC",
|
533
|
+
"WY",
|
534
|
+
"WMB",
|
535
|
+
"WTW",
|
536
|
+
"WYNN",
|
537
|
+
"XEL",
|
538
|
+
"XYL",
|
539
|
+
"YUM",
|
540
|
+
"ZBRA",
|
541
|
+
"ZBH",
|
542
|
+
"ZTS",
|
543
|
+
]
|
544
|
+
|
545
|
+
|
546
|
+
def load_dataset(
|
547
|
+
path,
|
548
|
+
symbols=None,
|
549
|
+
adjust=True,
|
550
|
+
extend=False,
|
551
|
+
to_usd=True,
|
552
|
+
rate_to_price=True,
|
553
|
+
):
|
554
|
+
dataset = ds.load_dataset(f"paperswithbacktest/{path}", token=HF_ACCESS_TOKEN)
|
555
|
+
df = dataset["train"].to_pandas()
|
556
|
+
|
557
|
+
if path in DAILY_PRICE_DATASETS or path in DAILY_FINANCIAL_DATASETS:
|
558
|
+
df["date"] = pd.to_datetime(df["date"])
|
559
|
+
|
560
|
+
if path in INTRADAY_PRICE_DATASETS or path in INTRADAY_NEWS:
|
561
|
+
df["datetime"] = pd.to_datetime(df["datetime"])
|
562
|
+
|
563
|
+
if isinstance(symbols, list) and "sp500" in symbols:
|
564
|
+
symbols.remove("sp500")
|
565
|
+
symbols += SP500_SYMBOLS
|
566
|
+
|
567
|
+
if (
|
568
|
+
path in DAILY_PRICE_DATASETS
|
569
|
+
or path in INTRADAY_PRICE_DATASETS
|
570
|
+
or path in DAILY_FINANCIAL_DATASETS
|
571
|
+
) and isinstance(symbols, list):
|
572
|
+
df = df[df["symbol"].isin(symbols)].copy()
|
573
|
+
|
574
|
+
if path in INTRADAY_NEWS and isinstance(symbols, list):
|
575
|
+
df = df[
|
576
|
+
df["symbols"].apply(lambda x: any(symbol in symbols for symbol in x))
|
577
|
+
].copy()
|
578
|
+
|
579
|
+
if path in DAILY_PRICE_DATASETS:
|
580
|
+
if adjust and "adj_close" in df.columns:
|
581
|
+
adj_factor = df["adj_close"] / df["close"]
|
582
|
+
df["adj_open"] = df["open"] * adj_factor
|
583
|
+
df["adj_high"] = df["high"] * adj_factor
|
584
|
+
df["adj_low"] = df["low"] * adj_factor
|
585
|
+
df.drop(columns=["open", "high", "low", "close"], inplace=True)
|
586
|
+
df.rename(
|
587
|
+
columns={
|
588
|
+
"adj_open": "open",
|
589
|
+
"adj_high": "high",
|
590
|
+
"adj_low": "low",
|
591
|
+
"adj_close": "close",
|
592
|
+
},
|
593
|
+
inplace=True,
|
594
|
+
)
|
595
|
+
else:
|
596
|
+
if "adj_close" in df.columns:
|
597
|
+
df.drop(columns=["adj_close"])
|
598
|
+
|
599
|
+
if path in DAILY_PRICE_DATASETS and (extend and path == "ETFs-Daily-Price"):
|
600
|
+
df = __extend_etfs(df)
|
601
|
+
|
602
|
+
if path in DAILY_PRICE_DATASETS and to_usd:
|
603
|
+
if path == "Forex-Daily-Price":
|
604
|
+
for index, row in df.iterrows():
|
605
|
+
if row["symbol"].endswith("USD"):
|
606
|
+
continue
|
607
|
+
df.at[index, "open"] = 1 / row["open"]
|
608
|
+
df.at[index, "high"] = 1 / row["high"]
|
609
|
+
df.at[index, "low"] = 1 / row["low"]
|
610
|
+
df.at[index, "close"] = 1 / row["close"]
|
611
|
+
df.at[index, "symbol"] = row["symbol"][3:] + "USD"
|
612
|
+
elif path == "Indices-Daily-Price":
|
613
|
+
df_forex = load_dataset("Forex-Daily-Price", to_usd=True)
|
614
|
+
df = __convert_indices_to_usd(df, df_forex)
|
615
|
+
|
616
|
+
if path in DAILY_PRICE_DATASETS and (rate_to_price and path == "Bonds-Daily-Price"):
|
617
|
+
for index, row in df.iterrows():
|
618
|
+
years_to_maturity = __extract_years_to_maturity(row["symbol"])
|
619
|
+
if not years_to_maturity:
|
620
|
+
continue
|
621
|
+
face_value = 100
|
622
|
+
for col in ["open", "high", "low", "close"]:
|
623
|
+
rate = row[col]
|
624
|
+
df.loc[index, col] = face_value / (1 + rate / 100) ** years_to_maturity
|
625
|
+
|
626
|
+
return df
|
627
|
+
|
628
|
+
|
629
|
+
def __convert_indices_to_usd(df_indices, df_forex):
|
630
|
+
mapping = {
|
631
|
+
"ADSMI": "AED", # United Arab Emirates
|
632
|
+
"AEX": "EUR", # Netherlands
|
633
|
+
"AS30": "AUD", # Australia
|
634
|
+
"AS51": "AUD", # Australia
|
635
|
+
"AS52": "AUD", # Australia
|
636
|
+
"ASE": "EUR", # Greece
|
637
|
+
"ATX": "EUR", # Austria
|
638
|
+
"BEL20": "EUR", # Belgium
|
639
|
+
"BELEX15": "RSD", # Serbia
|
640
|
+
"BGSMDC": "BWP", # Botswana
|
641
|
+
"BHSEEI": "BHD", # Bahrain
|
642
|
+
"BKA": "BAM", # Bosnia and Herzegovina
|
643
|
+
"BLOM": "LBP", # Lebanon
|
644
|
+
"BSX": "BMD", # Bermuda
|
645
|
+
"BUX": "HUF", # Hungary
|
646
|
+
"BVLX": "BOB", # Bolivia
|
647
|
+
"BVPSBVPS": "PAB", # Panama
|
648
|
+
"BVQA": "USD", # Ecuador
|
649
|
+
"CAC": "EUR", # France
|
650
|
+
"CASE": "EGP", # Egypt
|
651
|
+
"CCMP": "USD", # United States
|
652
|
+
"COLCAP": "COP", # Colombia
|
653
|
+
"CRSMBCT": "CRC", # Costa Rica
|
654
|
+
"CSEALL": "LKR", # Sri Lanka
|
655
|
+
"CYSMMAPA": "EUR", # Cyprus
|
656
|
+
"DARSDSEI": "TZS", # Tanzania
|
657
|
+
"DAX": "EUR", # Germany
|
658
|
+
"DFMGI": "AED", # United Arab Emirates
|
659
|
+
"DSEX": "BDT", # Bangladesh
|
660
|
+
"DSM": "QAR", # Qatar
|
661
|
+
"ECU": "USD", # Ecuador
|
662
|
+
"FBMKLCI": "MYR", # Malaysia
|
663
|
+
"FSSTI": "SGD", # Singapore
|
664
|
+
"FTN098": "NAD", # Namibia
|
665
|
+
"FTSEMIB": "EUR", # Italy
|
666
|
+
"GGSECI": "GHS", # Ghana
|
667
|
+
"HEX": "EUR", # Finland
|
668
|
+
"HEX25": "EUR", # Finland
|
669
|
+
"HSI": "HKD", # Hong Kong
|
670
|
+
"IBEX": "EUR", # Spain
|
671
|
+
"IBOV": "BRL", # Brazil
|
672
|
+
"IBVC": "VES", # Venezuela
|
673
|
+
"ICEXI": "ISK", # Iceland
|
674
|
+
"IGPA": "CLP", # Chile
|
675
|
+
"INDEXCF": "RUB", # Russia
|
676
|
+
"INDU": "USD", # United States
|
677
|
+
"INDZI": "IDR", # Indonesia
|
678
|
+
"ISEQ": "EUR", # Ireland
|
679
|
+
"JALSH": "ZAR", # South Africa
|
680
|
+
"JCI": "IDR", # Indonesia
|
681
|
+
"JMSMX": "JMD", # Jamaica
|
682
|
+
"JOSMGNFF": "JOD", # Jordan
|
683
|
+
"KFX": "DKK", # Denmark
|
684
|
+
"KNSMIDX": "KES", # Kenya
|
685
|
+
"KSE100": "PKR", # Pakistan
|
686
|
+
"KZKAK": "KZT", # Kazakhstan
|
687
|
+
"LSXC": "LAK", # Laos
|
688
|
+
"LUXXX": "EUR", # Luxembourg
|
689
|
+
"MALTEX": "EUR", # Malta
|
690
|
+
"MBI": "MKD", # North Macedonia
|
691
|
+
"MERVAL": "ARS", # Argentina
|
692
|
+
"MEXBOL": "MXN", # Mexico
|
693
|
+
"MONEX": "EUR", # Montenegro
|
694
|
+
"MOSENEW": "MAD", # Morocco
|
695
|
+
"MSETOP": "MKD", # North Macedonia
|
696
|
+
"MSM30": "OMR", # Oman
|
697
|
+
"NDX": "USD", # United States
|
698
|
+
"NGSEINDX": "NGN", # Nigeria
|
699
|
+
"NIFTY": "INR", # India
|
700
|
+
"NKY": "JPY", # Japan
|
701
|
+
"NSEASI": "KES", # Kenya
|
702
|
+
"NZSE50FG": "NZD", # New Zealand
|
703
|
+
"OMX": "SEK", # Sweden
|
704
|
+
"OSEAX": "NOK", # Norway
|
705
|
+
"PCOMP": "PHP", # Philippines
|
706
|
+
"PFTS": "UAH", # Ukraine
|
707
|
+
"PSI20": "EUR", # Portugal
|
708
|
+
"PX": "CZK", # Czech Republic
|
709
|
+
"RIGSE": "EUR", # Latvia
|
710
|
+
"RTY": "USD", # United States
|
711
|
+
"SASEIDX": "SAR", # Saudi Arabia
|
712
|
+
"SASX10": "BAM", # Bosnia and Herzegovina
|
713
|
+
"SBITOP": "EUR", # Slovenia
|
714
|
+
"SEMDEX": "MUR", # Mauritius
|
715
|
+
"SENSEX": "INR", # India
|
716
|
+
"SET50": "THB", # Thailand
|
717
|
+
"SHCOMP": "CNY", # China
|
718
|
+
"SHSZ300": "CNY", # China
|
719
|
+
"SKSM": "EUR", # Slovakia
|
720
|
+
"SMI": "CHF", # Switzerland
|
721
|
+
"SOFIX": "BGN", # Bulgaria
|
722
|
+
"SPBLPGPT": "PEN", # Peru
|
723
|
+
"SPTSX": "CAD", # Canada
|
724
|
+
"SPX": "USD", # United States
|
725
|
+
"SSE50": "CNY", # China
|
726
|
+
"SX5E": "EUR", # Europe
|
727
|
+
"TA125": "ILS", # Israel
|
728
|
+
}
|
729
|
+
symbols = df_indices.symbol.unique()
|
730
|
+
mapping = {k: v for k, v in mapping.items() if k in symbols}
|
731
|
+
frames = []
|
732
|
+
for symbol, currency in mapping.items():
|
733
|
+
df_index = df_indices[df_indices["symbol"] == symbol].copy()
|
734
|
+
if currency == "USD":
|
735
|
+
frames.append(df_index)
|
736
|
+
continue
|
737
|
+
df_forex_currency = df_forex[df_forex["symbol"] == currency + "USD"].copy()
|
738
|
+
if df_index.empty or df_forex_currency.empty:
|
739
|
+
continue
|
740
|
+
# Merge dataframes on the date column
|
741
|
+
merged_df = pd.merge(
|
742
|
+
df_index, df_forex_currency, on="date", suffixes=("", "_forex")
|
743
|
+
)
|
744
|
+
|
745
|
+
# Multiply the index prices by the corresponding forex rates
|
746
|
+
merged_df["open"] = merged_df["open"] * merged_df["open_forex"]
|
747
|
+
merged_df["high"] = merged_df["high"] * merged_df["high_forex"]
|
748
|
+
merged_df["low"] = merged_df["low"] * merged_df["low_forex"]
|
749
|
+
merged_df["close"] = merged_df["close"] * merged_df["close_forex"]
|
750
|
+
|
751
|
+
frames.append(merged_df[["symbol", "date", "open", "high", "low", "close"]])
|
752
|
+
|
753
|
+
df = pd.concat(frames, ignore_index=True)
|
754
|
+
return df
|
755
|
+
|
756
|
+
|
757
|
+
def __extract_years_to_maturity(bond_symbol):
|
758
|
+
match = re.search(r"(\d+)([YM])$", bond_symbol)
|
759
|
+
if match:
|
760
|
+
time_value = int(match.group(1)) # Extract the numeric value
|
761
|
+
time_unit = match.group(2) # Extract the time unit (Y or M)
|
762
|
+
if time_unit == "Y":
|
763
|
+
return time_value # It's already in years
|
764
|
+
elif time_unit == "M":
|
765
|
+
return time_value / 12 # Convert months to years
|
766
|
+
|
767
|
+
|
768
|
+
def __extend_etfs(df_etfs):
|
769
|
+
|
770
|
+
mapping = {
|
771
|
+
"AGG": ["Bonds-Daily-Price", "US10Y"],
|
772
|
+
"EPP": ["Indices-Daily-Price", "HSI"],
|
773
|
+
"EWJ": ["Indices-Daily-Price", "NKY"],
|
774
|
+
"GLD": ["Commodities-Daily-Price", "GC1"],
|
775
|
+
"IEF": ["Bonds-Daily-Price", "US10Y"],
|
776
|
+
"IEV": ["Indices-Daily-Price", "SX5E"],
|
777
|
+
"IWB": ["Indices-Daily-Price", "SPX"],
|
778
|
+
"SHY": ["Bonds-Daily-Price", "US1Y"],
|
779
|
+
"SPY": ["Indices-Daily-Price", "SPX"],
|
780
|
+
}
|
781
|
+
symbols = df_etfs.symbol.unique()
|
782
|
+
mapping = {k: v for k, v in mapping.items() if k in symbols}
|
783
|
+
|
784
|
+
grouped_path_symbols = defaultdict(list)
|
785
|
+
for value in mapping.values():
|
786
|
+
grouped_path_symbols[value[0]].append(value[1])
|
787
|
+
grouped_path_symbols = dict(grouped_path_symbols)
|
788
|
+
df_others = pd.concat(
|
789
|
+
[
|
790
|
+
load_dataset(path, symbols, to_usd=True)
|
791
|
+
for path, symbols in grouped_path_symbols.items()
|
792
|
+
]
|
793
|
+
)
|
794
|
+
|
795
|
+
frames = []
|
796
|
+
for etf, other in mapping.items():
|
797
|
+
other_symbol = other[1]
|
798
|
+
# Get the ETF & Index data
|
799
|
+
etf_data = df_etfs[df_etfs["symbol"] == etf]
|
800
|
+
if etf_data.empty:
|
801
|
+
continue
|
802
|
+
other_data = df_others[df_others["symbol"] == other_symbol]
|
803
|
+
if other_data.empty:
|
804
|
+
continue
|
805
|
+
|
806
|
+
# Find the first overlapping date
|
807
|
+
common_dates = etf_data["date"].isin(other_data["date"])
|
808
|
+
first_common_date = etf_data.loc[common_dates, "date"].min()
|
809
|
+
|
810
|
+
if pd.isnull(first_common_date):
|
811
|
+
print(f"No common date found for {etf} and {other_symbol}")
|
812
|
+
continue
|
813
|
+
|
814
|
+
etf_first_common = etf_data[etf_data["date"] == first_common_date]
|
815
|
+
other_first_common = other_data[other_data["date"] == first_common_date]
|
816
|
+
|
817
|
+
# Compute the adjustment factor (using closing prices for simplicity)
|
818
|
+
adjustment_factor = (
|
819
|
+
etf_first_common["close"].values[0] / other_first_common["close"].values[0]
|
820
|
+
)
|
821
|
+
|
822
|
+
# Adjust index data before the first common date
|
823
|
+
index_data_before_common = other_data[
|
824
|
+
other_data["date"] < first_common_date
|
825
|
+
].copy()
|
826
|
+
for column in ["open", "high", "low", "close"]:
|
827
|
+
index_data_before_common.loc[:, column] *= adjustment_factor
|
828
|
+
index_data_before_common.loc[:, "symbol"] = etf
|
829
|
+
|
830
|
+
# Combine adjusted index data with ETF data
|
831
|
+
combined_data = pd.concat([index_data_before_common, etf_data])
|
832
|
+
frames.append(combined_data)
|
833
|
+
|
834
|
+
# Concatenate all frames to form the final dataframe
|
835
|
+
df = pd.concat(frames).sort_values(by=["date", "symbol"]).reset_index(drop=True)
|
836
|
+
return df
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2022-2023 Papers With Backtest and others
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,150 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: pwb-toolbox
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A toolbox library for quant traders
|
5
|
+
Home-page: https://github.com/paperswithbacktest/pwb-toolbox
|
6
|
+
Author: Your Name
|
7
|
+
Author-email: hello@paperswithbacktest.com
|
8
|
+
License: MIT
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Operating System :: OS Independent
|
12
|
+
Requires-Python: >=3.7
|
13
|
+
Description-Content-Type: text/markdown
|
14
|
+
License-File: LICENSE.txt
|
15
|
+
Requires-Dist: datasets
|
16
|
+
Requires-Dist: pandas
|
17
|
+
|
18
|
+
<div align="center">
|
19
|
+
<img src="static/images/systematic-trading.jpeg" height=200 alt=""/>
|
20
|
+
<h1>Papers With Backtest Toolbox</h1>
|
21
|
+
</div>
|
22
|
+
|
23
|
+
The `pwb-toolbox` package is designed to provide tools and resources for systematic trading strategies. It includes datasets and strategy ideas to assist in developing and backtesting trading algorithms. For detailed instructions on how to use this package effectively, please refer to the associated Substack publication by visiting: https://blog.paperswithbacktest.com/.
|
24
|
+
|
25
|
+
|
26
|
+
## Installation
|
27
|
+
|
28
|
+
To install the pwb-toolbox package:
|
29
|
+
|
30
|
+
```bash
|
31
|
+
pip install pwb-toolbox
|
32
|
+
```
|
33
|
+
|
34
|
+
To login to Huggingface Hub with Access Token
|
35
|
+
|
36
|
+
```bash
|
37
|
+
huggingface-cli login
|
38
|
+
```
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
|
42
|
+
The `pwb-toolbox` package offers a range of functionalities for systematic trading analysis. Here are some examples of how to utilize the package:
|
43
|
+
|
44
|
+
- Import `pwb_toolbox.datasets` and sequentially loads datasets for different asset classes, such as bonds, commodities, cryptocurrencies, ETFs, forex, indices, and stocks, using the `load_dataset` function:
|
45
|
+
|
46
|
+
```python
|
47
|
+
import pwb_toolbox.datasets as pwb_ds
|
48
|
+
|
49
|
+
df = ds.load_dataset("Bonds-Daily-Price")
|
50
|
+
df = ds.load_dataset("Commodities-Daily-Price")
|
51
|
+
df = ds.load_dataset("Cryptocurrencies-Daily-Price")
|
52
|
+
df = ds.load_dataset("ETFs-Daily-Price")
|
53
|
+
df = ds.load_dataset("Forex-Daily-Price")
|
54
|
+
df = ds.load_dataset("Indices-Daily-Price")
|
55
|
+
df = ds.load_dataset("Stocks-Daily-Price")
|
56
|
+
```
|
57
|
+
|
58
|
+
- Load daily stock price data for specific symbols using the load_dataset function. The first call retrieves data for Apple and Microsoft. The second call retrieves the same stocks but without price adjustments (`adjust=False`). The third call loads daily price data for the S&P 500 index:
|
59
|
+
|
60
|
+
```python
|
61
|
+
import pwb_toolbox.datasets as pwb_ds
|
62
|
+
|
63
|
+
df = pwb_ds.load_dataset(
|
64
|
+
"Stocks-Daily-Price",
|
65
|
+
["AAPL", "MSFT"],
|
66
|
+
)
|
67
|
+
|
68
|
+
df = pwb_ds.load_dataset(
|
69
|
+
"Stocks-Daily-Price",
|
70
|
+
["AAPL", "MSFT"],
|
71
|
+
adjust=False,
|
72
|
+
)
|
73
|
+
|
74
|
+
df = pwb_ds.load_dataset(
|
75
|
+
"Stocks-Daily-Price",
|
76
|
+
["sp500"],
|
77
|
+
)
|
78
|
+
```
|
79
|
+
|
80
|
+
- The `extend=True` argument instructs the function to return an extended historical data using indices, commodities, and bonds data.
|
81
|
+
|
82
|
+
```python
|
83
|
+
import pwb_toolbox.datasets as pwb_ds
|
84
|
+
|
85
|
+
df = pwb_ds.load_dataset(
|
86
|
+
"ETFs-Daily-Price",
|
87
|
+
["SPY", "IEF"],
|
88
|
+
extend=True,
|
89
|
+
)
|
90
|
+
```
|
91
|
+
|
92
|
+
- The argument `rate_to_price=False` specifies that bond yield rates should not be converted to price values in the returned data:
|
93
|
+
|
94
|
+
```python
|
95
|
+
import pwb_toolbox.datasets as pwb_ds
|
96
|
+
|
97
|
+
df = pwb_ds.load_dataset(
|
98
|
+
"Bonds-Daily-Price",
|
99
|
+
["US10Y"],
|
100
|
+
rate_to_price=False,
|
101
|
+
)
|
102
|
+
```
|
103
|
+
|
104
|
+
- The argument `to_usd=False` indicates that the data should not be converted to U.S. dollars, implying that it might be available in another currency.
|
105
|
+
|
106
|
+
```python
|
107
|
+
import pwb_toolbox.datasets as pwb_ds
|
108
|
+
|
109
|
+
df = pwb_ds.load_dataset(
|
110
|
+
"Indices-Daily-Price",
|
111
|
+
["US10Y"],
|
112
|
+
to_usd=False,
|
113
|
+
)
|
114
|
+
```
|
115
|
+
|
116
|
+
## Contributing
|
117
|
+
|
118
|
+
Contributions to the `pwb-toolbox` package are welcome! If you have any improvements, new datasets, or strategy ideas to share, please follow these guidelines:
|
119
|
+
|
120
|
+
1. Fork the repository and create a new branch for your feature.
|
121
|
+
2. Make your changes and ensure they adhere to the package's coding style.
|
122
|
+
3. Write tests to validate the functionality or provide sample usage examples.
|
123
|
+
4. Submit a pull request, clearly explaining the purpose and benefits of your contribution.
|
124
|
+
|
125
|
+
Please note that all contributions are subject to review and approval by the maintainers.
|
126
|
+
|
127
|
+
## Build the Package
|
128
|
+
|
129
|
+
To build the package, run:
|
130
|
+
|
131
|
+
```bash
|
132
|
+
python -m pip install --upgrade build
|
133
|
+
python -m build
|
134
|
+
```
|
135
|
+
|
136
|
+
To upload the package to PyPI, run:
|
137
|
+
|
138
|
+
```bash
|
139
|
+
twine upload dist/*
|
140
|
+
```
|
141
|
+
|
142
|
+
## License
|
143
|
+
|
144
|
+
The `pwb-toolbox` package is released under the MIT license. See the LICENSE file for more details.
|
145
|
+
|
146
|
+
## Contact
|
147
|
+
|
148
|
+
For any questions, issues, or suggestions regarding the `pwb-toolbox` package, please contact the maintainers or create an issue on the repository. We appreciate your feedback and involvement in improving the package.
|
149
|
+
|
150
|
+
Happy trading!
|
@@ -0,0 +1,7 @@
|
|
1
|
+
pwb_toolbox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
pwb_toolbox/datasets/__init__.py,sha256=kaCalJIJABdHBD3QQUx2lHWNkbJDeOszYSRDKuTdBIo,17240
|
3
|
+
pwb_toolbox-0.1.0.dist-info/LICENSE.txt,sha256=_Wjz7o7St3iVSPBRzE0keS8XSqSJ03A3NZ6cMlTaSK8,1079
|
4
|
+
pwb_toolbox-0.1.0.dist-info/METADATA,sha256=z8vk281UpCyvf3VhsGfJukXflKleNZDgF5E7H0qZ3G4,4504
|
5
|
+
pwb_toolbox-0.1.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
6
|
+
pwb_toolbox-0.1.0.dist-info/top_level.txt,sha256=TZcXcF2AMkKkibZOuq6AYsHjajPgddHAGjQUT64OYGY,12
|
7
|
+
pwb_toolbox-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
pwb_toolbox
|