pandas-market-calendars 4.3.3__py3-none-any.whl → 4.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pandas_market_calendars/__init__.py +39 -38
- pandas_market_calendars/calendar_registry.py +57 -53
- pandas_market_calendars/calendar_utils.py +1200 -261
- pandas_market_calendars/calendars/asx.py +66 -66
- pandas_market_calendars/calendars/bmf.py +223 -206
- pandas_market_calendars/calendars/bse.py +421 -407
- pandas_market_calendars/calendars/cboe.py +145 -145
- pandas_market_calendars/calendars/cme.py +405 -402
- pandas_market_calendars/calendars/cme_globex_agriculture.py +172 -126
- pandas_market_calendars/calendars/cme_globex_base.py +119 -119
- pandas_market_calendars/calendars/cme_globex_crypto.py +160 -160
- pandas_market_calendars/calendars/cme_globex_energy_and_metals.py +216 -216
- pandas_market_calendars/calendars/cme_globex_equities.py +123 -123
- pandas_market_calendars/calendars/cme_globex_fixed_income.py +136 -136
- pandas_market_calendars/calendars/cme_globex_fx.py +101 -101
- pandas_market_calendars/calendars/eurex.py +131 -139
- pandas_market_calendars/calendars/eurex_fixed_income.py +98 -98
- pandas_market_calendars/calendars/hkex.py +429 -426
- pandas_market_calendars/calendars/ice.py +81 -81
- pandas_market_calendars/calendars/iex.py +151 -112
- pandas_market_calendars/calendars/jpx.py +113 -109
- pandas_market_calendars/calendars/lse.py +114 -114
- pandas_market_calendars/calendars/mirror.py +149 -130
- pandas_market_calendars/calendars/nyse.py +1466 -1324
- pandas_market_calendars/calendars/ose.py +116 -116
- pandas_market_calendars/calendars/sifma.py +354 -350
- pandas_market_calendars/calendars/six.py +132 -132
- pandas_market_calendars/calendars/sse.py +311 -311
- pandas_market_calendars/calendars/tase.py +220 -197
- pandas_market_calendars/calendars/tsx.py +181 -181
- pandas_market_calendars/holidays/cme.py +385 -385
- pandas_market_calendars/holidays/cme_globex.py +214 -214
- pandas_market_calendars/holidays/cn.py +1476 -1455
- pandas_market_calendars/holidays/jp.py +401 -398
- pandas_market_calendars/holidays/jpx_equinox.py +1 -0
- pandas_market_calendars/holidays/nyse.py +1536 -1531
- pandas_market_calendars/holidays/oz.py +63 -63
- pandas_market_calendars/holidays/sifma.py +350 -338
- pandas_market_calendars/holidays/us.py +376 -376
- pandas_market_calendars/market_calendar.py +1057 -895
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/METADATA +13 -9
- pandas_market_calendars-4.6.0.dist-info/RECORD +50 -0
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/WHEEL +1 -1
- pandas_market_calendars-4.3.3.dist-info/RECORD +0 -50
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/LICENSE +0 -0
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/NOTICE +0 -0
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/top_level.txt +0 -0
@@ -1,261 +1,1200 @@
|
|
1
|
-
"""
|
2
|
-
Utilities to use with market_calendars
|
3
|
-
"""
|
4
|
-
|
5
|
-
import
|
6
|
-
|
7
|
-
import
|
8
|
-
import
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
23
|
-
"""
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
Only
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
)
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
1
|
+
"""
|
2
|
+
Utilities to use with market_calendars
|
3
|
+
"""
|
4
|
+
|
5
|
+
import itertools
|
6
|
+
from math import ceil, floor
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Iterable, Literal, Tuple, Union
|
8
|
+
import warnings
|
9
|
+
|
10
|
+
from re import finditer, split
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from pandas.tseries.offsets import CustomBusinessDay
|
16
|
+
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
|
17
|
+
|
18
|
+
DEFAULT_LABEL_MAP = {
|
19
|
+
"pre": "pre",
|
20
|
+
"rth_pre_break": "rth",
|
21
|
+
"rth": "rth",
|
22
|
+
"break": "break",
|
23
|
+
"rth_post_break": "rth",
|
24
|
+
"post": "post",
|
25
|
+
"closed": "closed",
|
26
|
+
}
|
27
|
+
|
28
|
+
|
29
|
+
def mark_session(
|
30
|
+
schedule: pd.DataFrame,
|
31
|
+
timestamps: pd.DatetimeIndex,
|
32
|
+
label_map: Dict[str, Any] = {},
|
33
|
+
*,
|
34
|
+
closed: Literal["left", "right"] = "right",
|
35
|
+
) -> pd.Series:
|
36
|
+
"""
|
37
|
+
Return a Series that denotes the trading session of each timestamp in a DatetimeIndex.
|
38
|
+
The returned Series's Index is the provided Datetime Index, the Series's values
|
39
|
+
are the timestamps' corresponding session.
|
40
|
+
|
41
|
+
PARAMETERS:
|
42
|
+
|
43
|
+
:param schedule: The market schedule to check the timestamps against. This Schedule must include
|
44
|
+
all of the trading days that are in the provided DatetimeIndex of timestamps.
|
45
|
+
Note: The columns need to be sorted into ascending order, if not, then an error will be
|
46
|
+
raised saying the bins must be in ascending order.
|
47
|
+
|
48
|
+
:param timestamps: A DatetimeIndex of Timestamps to check. Must be sorted in ascending order.
|
49
|
+
|
50
|
+
:param label_map: Optional mapping of Dict[str, Any] to change the values returned in the
|
51
|
+
series. The keys of the given mapping should match the keys of the default dict, but the
|
52
|
+
values can be anything. A subset of mappings may also be provided, e.g. {'closed':-1} will
|
53
|
+
only change the label of the 'closed' session. All others will remain the default label.
|
54
|
+
|
55
|
+
>>> Default Mapping == {
|
56
|
+
"pre": "pre",
|
57
|
+
"rth_pre_break": "rth", # When the Schedule has a break
|
58
|
+
"rth": "rth", # When the Schedule doesn't have a break
|
59
|
+
"break": "break", # When the Schedule has a break
|
60
|
+
"rth_post_break": "rth", # When the Schedule has a break
|
61
|
+
"post": "post",
|
62
|
+
"closed": "closed",
|
63
|
+
}
|
64
|
+
|
65
|
+
:param closed: Which side of each interval should be closed (inclusive)
|
66
|
+
left: == [start, end)
|
67
|
+
right: == (start, end]
|
68
|
+
"""
|
69
|
+
# ---- ---- ---- Determine which columns need to be dropped ---- ---- ----
|
70
|
+
session_labels = ["closed"]
|
71
|
+
columns = set(schedule.columns)
|
72
|
+
needed_cols = set()
|
73
|
+
|
74
|
+
def _extend_statement(session: str, parts: set):
|
75
|
+
if parts.issubset(columns):
|
76
|
+
needed_cols.update(parts)
|
77
|
+
session_labels.append(session)
|
78
|
+
|
79
|
+
_extend_statement("pre", {"pre", "market_open"})
|
80
|
+
if {"break_start", "break_end"}.issubset(columns):
|
81
|
+
_extend_statement("rth_pre_break", {"market_open", "break_start"})
|
82
|
+
_extend_statement("break", {"break_start", "break_end"})
|
83
|
+
_extend_statement("rth_post_break", {"break_end", "market_close"})
|
84
|
+
else:
|
85
|
+
_extend_statement("rth", {"market_open", "market_close"})
|
86
|
+
_extend_statement("post", {"market_close", "post"})
|
87
|
+
|
88
|
+
# ---- ---- ---- Error Check ---- ---- ----
|
89
|
+
if len(extra_cols := columns - needed_cols) > 0:
|
90
|
+
schedule = schedule.drop(columns=[*extra_cols])
|
91
|
+
warnings.warn(
|
92
|
+
f"Attempting to mark trading sessions and the schedule ({columns = }) contains the "
|
93
|
+
f"extra columns: {extra_cols}. Returned sessions may not be labeled as desired."
|
94
|
+
)
|
95
|
+
|
96
|
+
start = timestamps[0]
|
97
|
+
end = timestamps[-1]
|
98
|
+
if start < schedule.iloc[0, 0]: # type: ignore
|
99
|
+
raise ValueError(
|
100
|
+
f"Insufficient Schedule. Needed Start-Time: {start.normalize().tz_localize(None)}. "
|
101
|
+
f"Schedule starts at: {schedule.iloc[0, 0]}"
|
102
|
+
)
|
103
|
+
if end > schedule.iloc[-1, -1]: # type: ignore
|
104
|
+
raise ValueError(
|
105
|
+
f"Insufficient Schedule. Needed End-Time: {end.normalize().tz_localize(None)}. "
|
106
|
+
f"Schedule ends at: {schedule.iloc[-1, -1]}"
|
107
|
+
)
|
108
|
+
|
109
|
+
# Trim the schedule to match the timeframe covered by the given timeseries
|
110
|
+
schedule = schedule[
|
111
|
+
(schedule.index >= start.normalize().tz_localize(None))
|
112
|
+
& (schedule.index <= end.normalize().tz_localize(None))
|
113
|
+
]
|
114
|
+
|
115
|
+
backfilled_map = DEFAULT_LABEL_MAP | label_map
|
116
|
+
mapped_labels = [backfilled_map[label] for label in session_labels]
|
117
|
+
labels = pd.Series([mapped_labels]).repeat(len(schedule)).explode()
|
118
|
+
labels = pd.concat([labels, pd.Series([backfilled_map["closed"]])])
|
119
|
+
|
120
|
+
# Append on additional Edge-Case Bins so result doesn't include NaNs
|
121
|
+
bins = schedule.to_numpy().flatten()
|
122
|
+
bins = np.insert(bins, 0, bins[0].normalize())
|
123
|
+
bins = np.append(bins, bins[-1].normalize() + pd.Timedelta("1D"))
|
124
|
+
|
125
|
+
bins, _ind, _counts = np.unique(bins, return_index=True, return_counts=True)
|
126
|
+
|
127
|
+
if len(bins) - 1 != len(labels):
|
128
|
+
# np.Unique Dropped some bins, need to drop the associated labels
|
129
|
+
label_inds = (_ind + _counts - 1)[:-1]
|
130
|
+
labels = labels.iloc[label_inds]
|
131
|
+
|
132
|
+
return pd.Series(
|
133
|
+
pd.cut(timestamps, bins, closed != "left", labels=labels, ordered=False), # type: ignore
|
134
|
+
index=timestamps,
|
135
|
+
)
|
136
|
+
|
137
|
+
|
138
|
+
def merge_schedules(schedules, how="outer"):
|
139
|
+
"""
|
140
|
+
Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
|
141
|
+
of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
|
142
|
+
|
143
|
+
CAVEATS:
|
144
|
+
* This does not work for schedules with breaks, the break information will be lost.
|
145
|
+
* Only "market_open" and "market_close" are considered, other market times are not yet supported.
|
146
|
+
|
147
|
+
:param schedules: list of schedules
|
148
|
+
:param how: outer or inner
|
149
|
+
:return: schedule DataFrame
|
150
|
+
"""
|
151
|
+
all_cols = [x.columns for x in schedules]
|
152
|
+
all_cols = list(itertools.chain(*all_cols))
|
153
|
+
if ("break_start" in all_cols) or ("break_end" in all_cols):
|
154
|
+
warnings.warn(
|
155
|
+
"Merge schedules will drop the break_start and break_end from result."
|
156
|
+
)
|
157
|
+
|
158
|
+
result = schedules[0]
|
159
|
+
for schedule in schedules[1:]:
|
160
|
+
result = result.merge(schedule, how=how, right_index=True, left_index=True)
|
161
|
+
if how == "outer":
|
162
|
+
result["market_open"] = result.apply(
|
163
|
+
lambda x: min(x.market_open_x, x.market_open_y), axis=1
|
164
|
+
)
|
165
|
+
result["market_close"] = result.apply(
|
166
|
+
lambda x: max(x.market_close_x, x.market_close_y), axis=1
|
167
|
+
)
|
168
|
+
elif how == "inner":
|
169
|
+
result["market_open"] = result.apply(
|
170
|
+
lambda x: max(x.market_open_x, x.market_open_y), axis=1
|
171
|
+
)
|
172
|
+
result["market_close"] = result.apply(
|
173
|
+
lambda x: min(x.market_close_x, x.market_close_y), axis=1
|
174
|
+
)
|
175
|
+
else:
|
176
|
+
raise ValueError('how argument must be "inner" or "outer"')
|
177
|
+
result = result[["market_open", "market_close"]]
|
178
|
+
return result
|
179
|
+
|
180
|
+
|
181
|
+
def is_single_observance(holiday: "Holiday"):
|
182
|
+
"Returns the Date of the Holiday if it is only observed once, None otherwise."
|
183
|
+
return holiday.start_date if holiday.start_date == holiday.end_date else None # type: ignore ??
|
184
|
+
|
185
|
+
|
186
|
+
def all_single_observance_rules(calendar: "AbstractHolidayCalendar"):
|
187
|
+
"Returns a list of timestamps if the Calendar's Rules are all single observance holidays, None Otherwise"
|
188
|
+
observances = [is_single_observance(rule) for rule in calendar.rules]
|
189
|
+
return observances if all(observances) else None
|
190
|
+
|
191
|
+
|
192
|
+
def convert_freq(index, frequency):
|
193
|
+
"""
|
194
|
+
Converts a DateTimeIndex to a new lower frequency
|
195
|
+
|
196
|
+
:param index: DateTimeIndex
|
197
|
+
:param frequency: frequency string
|
198
|
+
:return: DateTimeIndex
|
199
|
+
"""
|
200
|
+
return pd.DataFrame(index=index).asfreq(frequency).index
|
201
|
+
|
202
|
+
|
203
|
+
SESSIONS = Literal[
|
204
|
+
"pre",
|
205
|
+
"post",
|
206
|
+
"RTH",
|
207
|
+
"pre_break",
|
208
|
+
"post_break",
|
209
|
+
"ETH",
|
210
|
+
"break",
|
211
|
+
"closed",
|
212
|
+
"closed_masked",
|
213
|
+
]
|
214
|
+
MKT_TIMES = Literal[
|
215
|
+
"pre", "post", "market_open", "market_close", "break_start", "break_end"
|
216
|
+
]
|
217
|
+
|
218
|
+
|
219
|
+
# region ---- ---- ---- Date Range Warning Types ---- ---- ----
|
220
|
+
class DateRangeWarning(UserWarning):
|
221
|
+
"Super Class to all Date_range Warning Types"
|
222
|
+
|
223
|
+
|
224
|
+
class OverlappingSessionWarning(DateRangeWarning):
|
225
|
+
"""
|
226
|
+
Warning thrown when date_range is called with a timedelta that is larger than the
|
227
|
+
gap between two sessions leading to them overlapping.
|
228
|
+
This is only an issue when closed='right'/'both'/None and force_close=None
|
229
|
+
|
230
|
+
For Example, the following raises a warning because the 10:00 Timestamp that is from the 'pre'
|
231
|
+
session comes after the start of the 9:30 'RTH' session, but belongs to the 'pre' session
|
232
|
+
>>> date_range(NYSE, '2h', 'right', None, {'pre', 'RTH'}, merge_adjacent = False)
|
233
|
+
>>> ['2020-01-02 06:00:00', '2020-01-02 08:00:00',
|
234
|
+
'2020-01-02 10:00:00', '2020-01-02 11:30:00',
|
235
|
+
'2020-01-02 13:30:00', '2020-01-02 15:30:00',
|
236
|
+
'2020-01-02 17:30:00'],
|
237
|
+
This is particularly convoluted when close='both'/None
|
238
|
+
>>> date_range(NYSE, '2h', 'both', None, {'pre', 'RTH'}, merge_adjacent = False)
|
239
|
+
>>> ['2020-01-02 04:00:00' (pre), '2020-01-02 06:00:00' (pre),
|
240
|
+
'2020-01-02 08:00:00' (pre), '2020-01-02 09:30:00' (rth),
|
241
|
+
'2020-01-02 10:00:00' (pre), '2020-01-02 11:30:00' (rth),
|
242
|
+
'2020-01-02 13:30:00' (rth), '2020-01-02 15:30:00' (rth),
|
243
|
+
'2020-01-02 17:30:00' (rth)],
|
244
|
+
"""
|
245
|
+
|
246
|
+
|
247
|
+
class DisappearingSessionWarning(DateRangeWarning):
|
248
|
+
"""
|
249
|
+
Warning thrown when date_range is called with a timedelta that is larger than an entire session
|
250
|
+
resulting in the session disappearing from the DatetimeIndex.
|
251
|
+
|
252
|
+
Only an issue when closed='right' and force_close = False
|
253
|
+
"""
|
254
|
+
|
255
|
+
|
256
|
+
class MissingSessionWarning(DateRangeWarning):
|
257
|
+
"""
|
258
|
+
Warning thrown when a date_range() call is made with a requested session,
|
259
|
+
but lacks the necessary columns. When this warning is ignored the returned
|
260
|
+
datetimeindex will simply lack the relevant sessions
|
261
|
+
|
262
|
+
e.g. 'pre' Session requested and schedule lacks 'pre' and/or 'market_open' column
|
263
|
+
"""
|
264
|
+
|
265
|
+
|
266
|
+
class InsufficientScheduleWarning(DateRangeWarning):
|
267
|
+
"""
|
268
|
+
Warning thrown when a date_range() call is made with a requested number of periods,
|
269
|
+
or start-date / end-date that exceed what was provided in the given schedule.
|
270
|
+
|
271
|
+
If a Schedule has an insufficient start and end date then this warning is thrown twice.
|
272
|
+
|
273
|
+
If this warning is thrown when date_range is called with a number of desired periods, then
|
274
|
+
the desired start/end date is an approximate value. This 'approximation' is biased to
|
275
|
+
overestimate the needed start/end time by about 1 week. This is done to limit the edge
|
276
|
+
cases where this warning could get thrown multiple times in a row.
|
277
|
+
"""
|
278
|
+
|
279
|
+
|
280
|
+
def filter_date_range_warnings(
|
281
|
+
action: Literal["error", "ignore", "always", "default", "once"],
|
282
|
+
source: Union[
|
283
|
+
Iterable[type[DateRangeWarning]], type[DateRangeWarning]
|
284
|
+
] = DateRangeWarning,
|
285
|
+
):
|
286
|
+
"""
|
287
|
+
Adjust the behavior of the date_range() warnings to the desired action.
|
288
|
+
|
289
|
+
:param action: - The desired change to the warning behavior
|
290
|
+
'error': Escalate Warnings into Errors
|
291
|
+
'ignore': Silence Warning Messages
|
292
|
+
'once': Only display a message of the given category once
|
293
|
+
'default': Reset the behavior of the given warning category
|
294
|
+
'always': Always show the Warning of a given category
|
295
|
+
|
296
|
+
:param source: - The Category/Categories to apply the action to. Can be a single Warning or a list of warnings
|
297
|
+
default: DateRangeWarning (All Warnings)
|
298
|
+
Warning Types: MissingSessionWarning, OverlappingSessionWarning,
|
299
|
+
DisappearingSessionWarning, InsufficientScheduleWarning
|
300
|
+
"""
|
301
|
+
if not isinstance(source, Iterable):
|
302
|
+
warnings.filterwarnings(action, category=source)
|
303
|
+
return
|
304
|
+
|
305
|
+
for src in source:
|
306
|
+
warnings.filterwarnings(action, category=src)
|
307
|
+
|
308
|
+
|
309
|
+
def parse_missing_session_warning(
|
310
|
+
err: MissingSessionWarning,
|
311
|
+
) -> Tuple[set[SESSIONS], set[MKT_TIMES]]:
|
312
|
+
"""
|
313
|
+
Parses a Missing Session Warning's Error Message.
|
314
|
+
:returns Tuple[set[str], set[str]]:
|
315
|
+
Set #1: The Missing Sessions
|
316
|
+
Set #2: The Missing Schedule Columns
|
317
|
+
"""
|
318
|
+
splits = split(r"[{|}]", err.args[0].replace("'", ""))
|
319
|
+
return (set(splits[1].split(", ")), set(splits[3].split(", "))) # type: ignore
|
320
|
+
|
321
|
+
|
322
|
+
def parse_insufficient_schedule_warning(
|
323
|
+
err: InsufficientScheduleWarning,
|
324
|
+
) -> Tuple[bool, pd.Timestamp, pd.Timestamp]:
|
325
|
+
"""
|
326
|
+
Parses the information from an Insufficient Schedule Warning.
|
327
|
+
:returns Tuple[bool, pd.Timestamp, pd.Timestamp]:
|
328
|
+
bool: True == Range is missing from the start, False == Range missing from the end
|
329
|
+
Timestamp 1: Start of missing range
|
330
|
+
Timestamp 2: End of the missing range.
|
331
|
+
Note: The Timestamps are always ordered (t1 <= t2) and do not overlap with the original schedule.
|
332
|
+
If a supplemental schedule is generated it can be concatenated on without any overlapping indices.
|
333
|
+
data
|
334
|
+
"""
|
335
|
+
matcher = finditer(r"\d{4}-\d{2}-\d{2}", err.args[0])
|
336
|
+
b = "Start-Time" in err.args[0]
|
337
|
+
t1 = pd.Timestamp(next(matcher).group())
|
338
|
+
t2 = pd.Timestamp(next(matcher).group())
|
339
|
+
|
340
|
+
if b:
|
341
|
+
t2 -= pd.Timedelta("1D")
|
342
|
+
else:
|
343
|
+
t2 += pd.Timedelta("1D")
|
344
|
+
|
345
|
+
return (b, t1, t2) if t1 <= t2 else (b, t2, t1)
|
346
|
+
|
347
|
+
|
348
|
+
# endregion
|
349
|
+
|
350
|
+
|
351
|
+
def date_range(
|
352
|
+
schedule: pd.DataFrame,
|
353
|
+
frequency: Union[str, pd.Timedelta, int, float],
|
354
|
+
closed: Union[Literal["left", "right", "both"], None] = "right",
|
355
|
+
force_close: Union[bool, None] = True,
|
356
|
+
session: Union[SESSIONS, Iterable[SESSIONS]] = {"RTH"},
|
357
|
+
merge_adjacent: bool = True,
|
358
|
+
start: Union[str, pd.Timestamp, int, float, None] = None,
|
359
|
+
end: Union[str, pd.Timestamp, int, float, None] = None,
|
360
|
+
periods: Union[int, None] = None,
|
361
|
+
) -> pd.DatetimeIndex:
|
362
|
+
"""
|
363
|
+
Interpolates a Market's Schedule at the desired frequency and returns the result as a DatetimeIndex.
|
364
|
+
This function is only valid for periods less than 1 Day, for longer periods use date_range_htf().
|
365
|
+
|
366
|
+
Note: The slowest part of this function is by far generating the necessary schedule (which in
|
367
|
+
turn is limited by pandas' date_range() function). If speed is a concern, store and update the
|
368
|
+
schedule as needed instead of generating it every time.
|
369
|
+
|
370
|
+
WARNINGS SYSTEM:
|
371
|
+
*There are multiple edge-case warnings that are thrown by this function. See the Docstrings
|
372
|
+
of each warning for more info. (DateRangeWarning, InsufficientScheduleWarning,
|
373
|
+
MissingSessionWarning, OverlappingSessionWarning, DisappearingSessionWarning)
|
374
|
+
|
375
|
+
*The thrown warnings can be ignored or escalated into catchable errors by using the
|
376
|
+
filter_date_range_warnings() function.
|
377
|
+
|
378
|
+
parse_missing_session_warning() & parse_insufficient_schedule_warning() exist to easily
|
379
|
+
process the warnings those warnings if they are escalated into errors.
|
380
|
+
|
381
|
+
PARAMETERS:
|
382
|
+
|
383
|
+
:param schedule: Schedule of a calendar which includes all the columns necessary
|
384
|
+
for the desired sessions.
|
385
|
+
|
386
|
+
:param frequency: String, Int/float (seconds) or pd.Timedelta that represents the desired
|
387
|
+
interval of the date_range. Intervals larger than 1D are not supported.
|
388
|
+
|
389
|
+
:param closed: the way the intervals are labeled
|
390
|
+
'right': use the end of the interval
|
391
|
+
'left': use the start of the interval
|
392
|
+
None / 'both': use the end of the interval but include the start of the first interval
|
393
|
+
|
394
|
+
:param force_close: How the last value of a trading session is handled
|
395
|
+
True: guarantee that the close of the trading session is the last value
|
396
|
+
False: guarantee that there is no value greater than the close of the trading session
|
397
|
+
None: leave the last value as it is calculated based on the closed parameter
|
398
|
+
|
399
|
+
:param session: A str representing a single session or an Iterable of the following Sessions.
|
400
|
+
RTH: The Default Option. This is [Market_open, Market_close], if the schedule includes a
|
401
|
+
break then the break is excluded from the returned datetime index.
|
402
|
+
ETH: [pre, market_open] & [market_close, post]
|
403
|
+
pre: [pre, market_open]
|
404
|
+
post: [market_close, post]
|
405
|
+
break: [break_start, break_end]
|
406
|
+
pre_break: [market_open, break_start]
|
407
|
+
post_break: [break_end, market_close]
|
408
|
+
closed: [market_close, market_open (of the next day)] If ETH market times are given then
|
409
|
+
this will be [post, pre (of the next day)] instead. The last session will end at
|
410
|
+
Midnight of the timezone the schedule is given in.
|
411
|
+
closed_masked: Same as closed, but Weekends & Holidays are ignored. Instead, the Datetime
|
412
|
+
index stops at Midnight on the trading day before the break and resumes at midnight
|
413
|
+
prior to the next trading day. **Note: This is Midnight of the Timezone the schedule is
|
414
|
+
given in, not Midnight of the exchange's tz since the exchange's tz is not known.
|
415
|
+
|
416
|
+
:param merge_adjacent: Bool representing if adjacent sessions should be merged into a single session.
|
417
|
+
For Example, NYSE w/ session={'RTH', 'ETH'}, frequency=2h, closed=left, force_close=False
|
418
|
+
merge_adjacent == True => [pre, post]
|
419
|
+
>>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
|
420
|
+
'2020-01-02 08:00:00', '2020-01-02 10:00:00',
|
421
|
+
'2020-01-02 12:00:00', '2020-01-02 14:00:00',
|
422
|
+
'2020-01-02 16:00:00', '2020-01-02 18:00:00']
|
423
|
+
merge_adjacent == False => [pre, market_open] & [market_open, market_close] & [market_close, post]
|
424
|
+
>>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
|
425
|
+
'2020-01-02 08:00:00', '2020-01-02 09:30:00',
|
426
|
+
'2020-01-02 11:30:00', '2020-01-02 13:30:00',
|
427
|
+
'2020-01-02 15:30:00', '2020-01-02 16:00:00',
|
428
|
+
'2020-01-02 18:00:00']
|
429
|
+
merge_adjacent=False re-aligns the timestamps to the session, but this results in
|
430
|
+
the difference between timestamps not always equaling the desired frequency.
|
431
|
+
|
432
|
+
:param start: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired start time.
|
433
|
+
:If left as None then the start-time of the the Schedule is used.
|
434
|
+
:If no TZ info is given it will be interpreted in the same timezone as the first column
|
435
|
+
of the schedule
|
436
|
+
:Start can be a Day and Time, but the returned index will still be aligned to the underlying
|
437
|
+
schedule. e.g. Session = [9:30am, 12pm], frequency=7min, start=9:45am. Underlying session
|
438
|
+
= [9:30, 9:37, 9:44, 9:51, ...] => returned DatetimeIndex = [9:51, ...]
|
439
|
+
|
440
|
+
:param end: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired end time.
|
441
|
+
:If left as None then the end-time of the the Schedule is used.
|
442
|
+
:If no TZ info is given it will be interpreted in the same timezone as the first column
|
443
|
+
**Note: The time given is an absolute value. i.e. end="2020-01-01" == "2020-01-01 00:00"
|
444
|
+
returning times prior to Midnight of "2019-12-31", not to the EOD of "2020-01-01"
|
445
|
+
|
446
|
+
:param periods: Optional Integer number of periods to return. If a Period count, Start time,
|
447
|
+
and End time are given the period count is ignored.
|
448
|
+
None: Period count is ignored. Returned index is all periods in [Start, End]
|
449
|
+
Int: # of periods to return. By default, this is the first N periods following the start.
|
450
|
+
If an end time is given then this is the N periods prior to the End Time (inclusive).
|
451
|
+
CAVEAT: When Force_close == False & closed == 'right'/'both' the number of periods returned
|
452
|
+
may be less than the parameter given.
|
453
|
+
|
454
|
+
:return: pd.DatetimeIndex of datetime64[ns, TZ-Aware]
|
455
|
+
"""
|
456
|
+
# ---- ---- Error Check Inputs ---- ----
|
457
|
+
if closed not in ("left", "right", "both", None):
|
458
|
+
raise ValueError("closed must be 'left', 'right', 'both' or None.")
|
459
|
+
if force_close not in (True, False, None):
|
460
|
+
raise ValueError("force_close must be True, False or None.")
|
461
|
+
if merge_adjacent not in (True, False):
|
462
|
+
raise ValueError("merge_adjacent must be True or False")
|
463
|
+
|
464
|
+
# ---- ---- Standardize Frequency Param ---- ----
|
465
|
+
if isinstance(frequency, (int, float)):
|
466
|
+
frequency = int(frequency * 1_000_000_000)
|
467
|
+
try:
|
468
|
+
frequency = pd.Timedelta(frequency)
|
469
|
+
except ValueError as e:
|
470
|
+
raise ValueError(f"Market Calendar Date_range Timeframe Error: {e}") from e
|
471
|
+
if frequency <= pd.Timedelta("0s"):
|
472
|
+
raise ValueError("Market Calendar Date_Range Frequency must be Positive.")
|
473
|
+
if frequency > pd.Timedelta("1D"):
|
474
|
+
raise ValueError(
|
475
|
+
"Market Calendar Date_Range Frequency Cannot Be longer than '1D'."
|
476
|
+
)
|
477
|
+
|
478
|
+
session_list, mask = _make_session_list(
|
479
|
+
set(schedule.columns), session, merge_adjacent
|
480
|
+
)
|
481
|
+
if len(session_list) == 0:
|
482
|
+
return pd.DatetimeIndex([], dtype="datetime64[ns, UTC]")
|
483
|
+
|
484
|
+
session_times = _reconfigure_schedule(schedule, session_list, mask)
|
485
|
+
# Trim off all 0 length sessions
|
486
|
+
session_times = session_times[session_times.start.ne(session_times.end)]
|
487
|
+
_error_check_sessions(session_times, frequency, closed, force_close)
|
488
|
+
|
489
|
+
tz = schedule[session_list[0][0]].dt.tz # copy tz info from schedule
|
490
|
+
dtype = schedule[session_list[0][0]].dtype # copy dtype info from schedule
|
491
|
+
start, end, periods = _standardize_times(schedule, start, end, periods, tz)
|
492
|
+
|
493
|
+
time_series = _calc_time_series(
|
494
|
+
session_times, frequency, closed, force_close, start, end, periods
|
495
|
+
)
|
496
|
+
time_series.name = None
|
497
|
+
|
498
|
+
return pd.DatetimeIndex(time_series, tz=tz, dtype=dtype)
|
499
|
+
|
500
|
+
|
501
|
+
# region ------------------ Date Range LTF Subroutines ------------------
|
502
|
+
|
503
|
+
|
504
|
+
def _make_session_list(
|
505
|
+
columns: set, sessions: Union[str, Iterable], merge_adjacent: bool
|
506
|
+
) -> Tuple[list, bool]:
|
507
|
+
"Create a list of (Session Start, Session End) Tuples"
|
508
|
+
session_times = []
|
509
|
+
missing_cols = set()
|
510
|
+
missing_sess = set()
|
511
|
+
sessions = {sessions} if isinstance(sessions, str) else set(sessions)
|
512
|
+
|
513
|
+
if len(extras := sessions.difference(set(SESSIONS.__args__))) > 0: # type: ignore
|
514
|
+
raise ValueError(f"Unknown Date_Range Market Session: {extras}")
|
515
|
+
|
516
|
+
if "ETH" in sessions: # Standardize ETH to 'pre' and 'post'
|
517
|
+
sessions = sessions - {"ETH"} | {"pre", "post"}
|
518
|
+
if "closed_masked" in sessions: # closed_masked == 'closed' for this step
|
519
|
+
sessions |= {"closed"}
|
520
|
+
if "pre" in columns: # Add wrap-around sessions
|
521
|
+
columns |= {"pre_wrap"}
|
522
|
+
if "market_open" in columns:
|
523
|
+
columns |= {"market_open_wrap"}
|
524
|
+
|
525
|
+
def _extend_statement(session, parts):
|
526
|
+
if session not in sessions:
|
527
|
+
return
|
528
|
+
if columns.issuperset(parts):
|
529
|
+
session_times.extend(parts)
|
530
|
+
else:
|
531
|
+
missing_sess.update({session})
|
532
|
+
missing_cols.update(set(parts) - columns)
|
533
|
+
|
534
|
+
# Append session_start, session_end for each desired session *in session order*
|
535
|
+
_extend_statement("pre", ("pre", "market_open"))
|
536
|
+
if {"break_start", "break_end"}.issubset(columns):
|
537
|
+
# If the schedule has breaks then sub-divide RTH into pre & post break sessions
|
538
|
+
if "RTH" in sessions:
|
539
|
+
sessions = sessions - {"RTH"} | {"pre_break", "post_break"}
|
540
|
+
_extend_statement("pre_break", ("market_open", "break_start"))
|
541
|
+
_extend_statement("break", ("break_start", "break_end"))
|
542
|
+
_extend_statement("post_break", ("break_end", "market_close"))
|
543
|
+
else:
|
544
|
+
_extend_statement("RTH", ("market_open", "market_close"))
|
545
|
+
_extend_statement("post", ("market_close", "post"))
|
546
|
+
|
547
|
+
# Closed can mean [close, open], [close, pre], [pre, post], or [post, open] Adjust accordingly
|
548
|
+
s_start = "post" if "post" in columns else "market_close"
|
549
|
+
s_end = "pre_wrap" if "pre" in columns else "market_open_wrap"
|
550
|
+
_extend_statement("closed", (s_start, s_end))
|
551
|
+
|
552
|
+
if len(missing_sess) > 0:
|
553
|
+
warnings.warn(
|
554
|
+
f"Requested Sessions: {missing_sess}, but schedule is missing columns: {missing_cols}."
|
555
|
+
"\nResulting DatetimeIndex will lack those sessions. ",
|
556
|
+
category=MissingSessionWarning,
|
557
|
+
)
|
558
|
+
|
559
|
+
if merge_adjacent:
|
560
|
+
drop_set = set()
|
561
|
+
for i in range(1, len(session_times) - 1, 2):
|
562
|
+
if session_times[i] == session_times[i + 1]:
|
563
|
+
drop_set |= {session_times[i]}
|
564
|
+
|
565
|
+
# Guaranteed to drop in pairs => no check needed before zipping
|
566
|
+
session_times = [t for t in session_times if t not in drop_set]
|
567
|
+
|
568
|
+
# Zip the flat list into a list of pairs
|
569
|
+
session_pairs = list(zip(*(iter(session_times),) * 2))
|
570
|
+
|
571
|
+
return session_pairs, "closed_masked" in sessions
|
572
|
+
|
573
|
+
|
574
|
+
def _standardize_times(
|
575
|
+
schedule, start, end, periods, tz
|
576
|
+
) -> Tuple[pd.Timestamp, pd.Timestamp, Union[int, None]]:
|
577
|
+
"Standardize start and end into a timestamp of the relevant timezone"
|
578
|
+
if all((start, end, periods)):
|
579
|
+
periods = None # Ignore Periods if all 3 params are given.
|
580
|
+
|
581
|
+
if start is not None:
|
582
|
+
if isinstance(start, (int, float)):
|
583
|
+
start *= 1_000_000_000
|
584
|
+
try:
|
585
|
+
start = pd.Timestamp(start)
|
586
|
+
if start.tz is None:
|
587
|
+
start = start.tz_localize(tz)
|
588
|
+
except ValueError as e:
|
589
|
+
raise ValueError(f"Invalid Time ({start = }) given to date_range()") from e
|
590
|
+
|
591
|
+
if start < schedule.index[0].tz_localize(tz):
|
592
|
+
warnings.warn(
|
593
|
+
f"Insufficient Schedule. Requested Start-Time: {start.normalize().tz_localize(None)}. "
|
594
|
+
f"Schedule starts at: {schedule.index[0].normalize().tz_localize(None)}",
|
595
|
+
category=InsufficientScheduleWarning,
|
596
|
+
)
|
597
|
+
|
598
|
+
if end is not None:
|
599
|
+
if isinstance(end, (int, float)):
|
600
|
+
end *= 1_000_000_000
|
601
|
+
try:
|
602
|
+
end = pd.Timestamp(end)
|
603
|
+
if end.tz is None and tz is not None:
|
604
|
+
end = end.tz_localize(tz)
|
605
|
+
except ValueError as e:
|
606
|
+
raise ValueError(f"Invalid Time ({end = }) given to date_range()") from e
|
607
|
+
|
608
|
+
if end > schedule.index[-1].tz_localize(tz) + pd.Timedelta("1D"):
|
609
|
+
# Checking against the day and not the specific session since so requesting a time
|
610
|
+
# after the last session's close but before the next day doesn't throw a warning.
|
611
|
+
requested_end = end.normalize().tz_localize(None) - pd.Timedelta("1D")
|
612
|
+
warnings.warn(
|
613
|
+
f"Insufficient Schedule. Requested End-Time: {requested_end}. "
|
614
|
+
f"Schedule ends at: {schedule.index[-1].normalize().tz_localize(None)}",
|
615
|
+
category=InsufficientScheduleWarning,
|
616
|
+
)
|
617
|
+
|
618
|
+
if start is not None and end is not None and start > end:
|
619
|
+
raise ValueError(
|
620
|
+
"Date_range() given a start-date that occurs after the given end-date. "
|
621
|
+
f"{start = }, {end = }"
|
622
|
+
)
|
623
|
+
|
624
|
+
return start, end, periods
|
625
|
+
|
626
|
+
|
627
|
+
def _reconfigure_schedule(schedule, session_list, mask_close) -> pd.DataFrame:
|
628
|
+
"Reconfigure a schedule into a sorted dataframe of [start, end] times for each session"
|
629
|
+
|
630
|
+
sessions = []
|
631
|
+
|
632
|
+
for start, end in session_list:
|
633
|
+
if not end.endswith("_wrap"):
|
634
|
+
# Simple Session where 'start' occurs before 'end'
|
635
|
+
sessions.append(
|
636
|
+
schedule[[start, end]]
|
637
|
+
.rename(columns={start: "start", end: "end"})
|
638
|
+
.set_index("start", drop=False)
|
639
|
+
)
|
640
|
+
continue
|
641
|
+
|
642
|
+
# 'closed' Session that wraps around midnight. Shift the 'end' col by 1 Day
|
643
|
+
end = end.rstrip("_wrap")
|
644
|
+
tmp = pd.DataFrame(
|
645
|
+
{
|
646
|
+
"start": schedule[start],
|
647
|
+
"end": schedule[end].shift(-1),
|
648
|
+
}
|
649
|
+
).set_index("start", drop=False)
|
650
|
+
|
651
|
+
# Shift(-1) leaves last index of 'end' as 'NaT'
|
652
|
+
# Set the [-1, 'end' ('end' === 1)] cell to Midnight of the 'start' time of that row.
|
653
|
+
tmp.iloc[-1, 1] = tmp.iloc[-1, 0].normalize() + pd.Timedelta("1D") # type: ignore
|
654
|
+
|
655
|
+
if mask_close:
|
656
|
+
# Do some additional work to split 'closed' sessions that span weekends/holidays
|
657
|
+
sessions_to_split = tmp["end"] - tmp["start"] > pd.Timedelta("1D")
|
658
|
+
|
659
|
+
split_strt = tmp[sessions_to_split]["start"]
|
660
|
+
split_end = tmp[sessions_to_split]["end"]
|
661
|
+
|
662
|
+
sessions.append(
|
663
|
+
pd.DataFrame( # From start of the long close to Midnight
|
664
|
+
{
|
665
|
+
"start": split_strt,
|
666
|
+
"end": split_strt.dt.normalize() + pd.Timedelta("1D"),
|
667
|
+
}
|
668
|
+
).set_index("start", drop=False)
|
669
|
+
)
|
670
|
+
sessions.append(
|
671
|
+
pd.DataFrame( # From Midnight to the end of the long close
|
672
|
+
{
|
673
|
+
"start": split_end.dt.normalize(),
|
674
|
+
"end": split_end,
|
675
|
+
}
|
676
|
+
).set_index("start", drop=False)
|
677
|
+
)
|
678
|
+
|
679
|
+
# leave tmp as all the sessions that were not split
|
680
|
+
tmp = tmp[~sessions_to_split]
|
681
|
+
|
682
|
+
sessions.append(tmp)
|
683
|
+
|
684
|
+
return pd.concat(sessions).sort_index()
|
685
|
+
|
686
|
+
|
687
|
+
def _error_check_sessions(session_times, timestep, closed, force_close):
|
688
|
+
if session_times.start.gt(session_times.end).any():
|
689
|
+
raise ValueError(
|
690
|
+
"Desired Sessions from the Schedule contain rows where session start < session end, "
|
691
|
+
"please correct the schedule"
|
692
|
+
)
|
693
|
+
|
694
|
+
# Disappearing Session
|
695
|
+
if force_close is False and closed == "right":
|
696
|
+
# only check if needed
|
697
|
+
if (session_times.end - session_times.start).lt(timestep).any():
|
698
|
+
warnings.warn(
|
699
|
+
"An interval of the chosen frequency is larger than some of the trading sessions, "
|
700
|
+
"while closed='right' and force_close=False. This will make those trading sessions "
|
701
|
+
"disappear. Use a higher frequency or change the values of closed/force_close, to "
|
702
|
+
"keep this from happening.",
|
703
|
+
category=DisappearingSessionWarning,
|
704
|
+
)
|
705
|
+
|
706
|
+
# Overlapping Session
|
707
|
+
if force_close is None and closed != "left":
|
708
|
+
num_bars = _num_bars_ltf(session_times, timestep, closed)
|
709
|
+
end_times = session_times.start + num_bars * timestep
|
710
|
+
|
711
|
+
if end_times.gt(session_times.start.shift(-1)).any():
|
712
|
+
warnings.warn(
|
713
|
+
"The desired frequency results in date_range() generating overlapping sessions. "
|
714
|
+
"This can happen when the timestep is larger than a session, or when "
|
715
|
+
"merge_session = False and a session is not evenly divisible by the timestep. "
|
716
|
+
"The overlapping timestep can be deleted with force_close = True or False",
|
717
|
+
category=OverlappingSessionWarning,
|
718
|
+
)
|
719
|
+
|
720
|
+
|
721
|
+
def _num_bars_ltf(session_times, timestep, closed) -> pd.Series:
|
722
|
+
"Calculate the number of timestamps needed for each trading session."
|
723
|
+
if closed in ("both", None):
|
724
|
+
return np.ceil((session_times.end - session_times.start) / timestep) + 1
|
725
|
+
else:
|
726
|
+
return np.ceil((session_times.end - session_times.start) / timestep)
|
727
|
+
|
728
|
+
|
729
|
+
def _course_trim_to_period_count(num_bars, periods, reverse) -> pd.Series:
|
730
|
+
"""
|
731
|
+
Course Trim the Session times to the desired period count.
|
732
|
+
Large enough of a sub-routine to merit its own function call.
|
733
|
+
"""
|
734
|
+
if reverse:
|
735
|
+
# If end-date is given calculate sum in reverse order
|
736
|
+
num_bars = num_bars[::-1]
|
737
|
+
|
738
|
+
_sum = num_bars.cumsum()
|
739
|
+
|
740
|
+
if _sum.iloc[-1] < periods:
|
741
|
+
# Insufficient Number of Periods. Try to estimate an ending time from the data given.
|
742
|
+
# delta = (end_date - start_date) / (cumulative # of periods) * (periods still needed) * fudge factor
|
743
|
+
delta = abs(
|
744
|
+
# (end_date - start_date) / (cumulative # of periods)
|
745
|
+
((_sum.index[-1] - _sum.index[0]) / _sum.iloc[-1])
|
746
|
+
* (periods - _sum.iloc[-1]) # (periods still needed)
|
747
|
+
* 1.05 # (Fudge Factor for weekends/holidays)
|
748
|
+
)
|
749
|
+
# delta = math.ceil(delta) + '1W'
|
750
|
+
delta = (delta // pd.Timedelta("1D") + 8) * pd.Timedelta("1D")
|
751
|
+
# The 1.05 Factor handles when the schedule is short by a few months, the + '1W' handles
|
752
|
+
# when the schedule is short by only a few periods. While 1 Week is absolute overkill,
|
753
|
+
# generating the extra few days is very little extra cost compared to throwing this error
|
754
|
+
# a second or even third time.
|
755
|
+
|
756
|
+
if reverse:
|
757
|
+
approx_start = _sum.index[-1] - delta
|
758
|
+
warnings.warn(
|
759
|
+
f"Insufficient Schedule. Requested Approx Start-Time: {approx_start}. "
|
760
|
+
f"Schedule starts at: {_sum.index[-1].normalize().tz_localize(None)}",
|
761
|
+
category=InsufficientScheduleWarning,
|
762
|
+
)
|
763
|
+
else:
|
764
|
+
approx_end = _sum.index[-1] + delta
|
765
|
+
warnings.warn(
|
766
|
+
f"Insufficient Schedule. Requested Approx End-Time: {approx_end}. "
|
767
|
+
f"Schedule ends at: {_sum.index[-1].normalize().tz_localize(None)}",
|
768
|
+
category=InsufficientScheduleWarning,
|
769
|
+
)
|
770
|
+
|
771
|
+
sessions_to_keep = _sum < periods
|
772
|
+
# Shifting Ensures the number of needed periods are generated, but no more.
|
773
|
+
sessions_to_keep = sessions_to_keep.shift(1, fill_value=True)
|
774
|
+
|
775
|
+
if reverse:
|
776
|
+
# If end-date is given calculate un-reverse the order of the series
|
777
|
+
sessions_to_keep = sessions_to_keep[::-1]
|
778
|
+
|
779
|
+
return sessions_to_keep
|
780
|
+
|
781
|
+
|
782
|
+
def _calc_time_series(
|
783
|
+
session_times, timestep, closed, force_close, start, end, periods
|
784
|
+
) -> pd.Series:
|
785
|
+
"Interpolate each session into a datetime series at the desired frequency."
|
786
|
+
# region ---- ---- ---- Trim the Sessions ---- ---- ----
|
787
|
+
# Compare 'start' to the session end times so that if 'start' is in the middle of a session
|
788
|
+
# that session remains in session_times. Vise-vera for End
|
789
|
+
if start is not None:
|
790
|
+
session_times = session_times[session_times.end > start]
|
791
|
+
if end is not None:
|
792
|
+
session_times = session_times[session_times.start < end]
|
793
|
+
if len(session_times) == 0:
|
794
|
+
return pd.Series([])
|
795
|
+
|
796
|
+
# Override the First Session's Start and Last Session's End times if needed
|
797
|
+
if start is not None and start > session_times.loc[session_times.index[0], "start"]:
|
798
|
+
# Align the start to a multiple of the timestep after the session's beginning.
|
799
|
+
# This is to make the returned DTIndex consistent across all start/end/period settings.
|
800
|
+
session_start = session_times.loc[session_times.index[0], "start"]
|
801
|
+
start_aligned = session_start + (
|
802
|
+
ceil((start - session_start) / timestep) * timestep
|
803
|
+
)
|
804
|
+
session_times.loc[session_times.index[0], "start"] = start_aligned
|
805
|
+
if end is not None and end < session_times.loc[session_times.index[-1], "end"]:
|
806
|
+
session_start = session_times.loc[session_times.index[0], "start"]
|
807
|
+
end_aligned = session_start + (
|
808
|
+
floor((end - session_start) / timestep) * timestep
|
809
|
+
)
|
810
|
+
session_times.loc[session_times.index[-1], "end"] = end_aligned
|
811
|
+
|
812
|
+
num_bars = _num_bars_ltf(session_times, timestep, closed)
|
813
|
+
|
814
|
+
if periods is not None:
|
815
|
+
sessions_to_keep = _course_trim_to_period_count(
|
816
|
+
num_bars, periods, end is not None
|
817
|
+
)
|
818
|
+
num_bars = num_bars[sessions_to_keep]
|
819
|
+
session_times = session_times[sessions_to_keep]
|
820
|
+
|
821
|
+
# endregion
|
822
|
+
|
823
|
+
starts = session_times.start.repeat(num_bars) # type: ignore
|
824
|
+
|
825
|
+
if closed == "right":
|
826
|
+
# Right side of addition is cumulative time since session start in multiples of timestep
|
827
|
+
time_series = starts + (starts.groupby(starts.index).cumcount() + 1) * timestep
|
828
|
+
else:
|
829
|
+
time_series = starts + (starts.groupby(starts.index).cumcount()) * timestep
|
830
|
+
|
831
|
+
if force_close is not None:
|
832
|
+
# Trim off all timestamps that stretched beyond their intended session
|
833
|
+
time_series = time_series[time_series.le(session_times.end.repeat(num_bars))]
|
834
|
+
|
835
|
+
if force_close:
|
836
|
+
time_series = pd.concat([time_series, session_times.end])
|
837
|
+
|
838
|
+
time_series = time_series.drop_duplicates().sort_values() # type: ignore
|
839
|
+
|
840
|
+
if periods is not None and len(time_series) > 0:
|
841
|
+
# Although likely redundant, Fine Trim to desired period count.
|
842
|
+
if end is not None:
|
843
|
+
s_len = len(time_series)
|
844
|
+
time_series = time_series[max(s_len - periods, 0) : s_len]
|
845
|
+
else:
|
846
|
+
time_series = time_series[0:periods]
|
847
|
+
|
848
|
+
return time_series
|
849
|
+
|
850
|
+
|
851
|
+
# endregion
|
852
|
+
|
853
|
+
|
854
|
+
PeriodCode = Literal["D", "W", "M", "Q", "Y"]
|
855
|
+
Day_Anchor = Literal["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
|
856
|
+
Month_Anchor = Literal[
|
857
|
+
"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
|
858
|
+
]
|
859
|
+
|
860
|
+
# These needed because the pandas Period Object is stupid and not consistant w/ date_range.
|
861
|
+
# pd.date_range(s,e, freq = 'W-SUN') == [DatetimeIndex of all sundays] (as Expected)
|
862
|
+
# but, pd.Timestamp([A Sunday]).to_period('W-SUN').start_time == [The Monday Prior???]
|
863
|
+
days_rolled = list(Day_Anchor.__args__)
|
864
|
+
days_rolled.insert(0, days_rolled.pop())
|
865
|
+
weekly_roll_map = dict(zip(Day_Anchor.__args__, days_rolled))
|
866
|
+
|
867
|
+
months_rolled = list(Month_Anchor.__args__)
|
868
|
+
months_rolled.insert(0, months_rolled.pop())
|
869
|
+
yearly_roll_map = dict(zip(Month_Anchor.__args__, months_rolled))
|
870
|
+
|
871
|
+
|
872
|
+
def date_range_htf(
|
873
|
+
cal: "CustomBusinessDay",
|
874
|
+
frequency: Union[str, pd.Timedelta, int, float],
|
875
|
+
start: Union[str, pd.Timestamp, int, float, None] = None,
|
876
|
+
end: Union[str, pd.Timestamp, int, float, None] = None,
|
877
|
+
periods: Union[int, None] = None,
|
878
|
+
closed: Union[Literal["left", "right"], None] = "right",
|
879
|
+
*,
|
880
|
+
day_anchor: Day_Anchor = "SUN",
|
881
|
+
month_anchor: Month_Anchor = "JAN",
|
882
|
+
) -> pd.DatetimeIndex:
|
883
|
+
"""
|
884
|
+
Returns a Normalized DatetimeIndex from the start-date to End-Date for Time periods of 1D and Higher.
|
885
|
+
|
886
|
+
Unless using a custom calendar, it is advised to call the date_range_htf() method of the desired calendar.
|
887
|
+
This is because default_anchors may change, or a single calendar may not be sufficient to model a market.
|
888
|
+
|
889
|
+
For example, NYSE has two calendars: The first covers pre-1952 where saturdays were trading days. The second
|
890
|
+
covers post-1952 where saturdays are closed.
|
891
|
+
|
892
|
+
PARAMETERS:
|
893
|
+
|
894
|
+
:param cal: CustomBuisnessDay Calendar associated with a MarketCalendar. This can be retieved by
|
895
|
+
calling the holidays() method of a MarketCalendar.
|
896
|
+
|
897
|
+
:param frequency: String, Int/float (POSIX seconds) or pd.Timedelta of the desired frequency.
|
898
|
+
:Must be Greater than '1D' and an integer multiple of the base frequency (D, W, M, Q, or Y)
|
899
|
+
:Important Note: Ints/Floats & Timedeltas are always considered as 'Open Business Days',
|
900
|
+
'2D' == Every Other Buisness Day, '3D' == Every 3rd B.Day, '7D' == Every 7th B.Day
|
901
|
+
:Higher periods (passed as strings) align to the beginning or end of the relevant period
|
902
|
+
:i.e. '1W' == First/[Last] Trading Day of each Week, '1Q' == First/[Last] Day of every Quarter
|
903
|
+
|
904
|
+
:param start: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
|
905
|
+
:The Time & Timezone information is ignored. Only the Normalized Day is considered.
|
906
|
+
|
907
|
+
:param end: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
|
908
|
+
:The Time & Timezone information is ignored. Only the Normalized Day is considered.
|
909
|
+
|
910
|
+
:param periods: Optional Integer number of periods to return. If a Period count, Start time,
|
911
|
+
and End time are given the period count is ignored.
|
912
|
+
|
913
|
+
:param closed: Literal['left', 'right']. Method used to close each range.
|
914
|
+
:Left: First open trading day of the Session is returned (e.g. First Open Day of The Month)
|
915
|
+
:right: Last open trading day of the Session is returned (e.g. Last Open Day of The Month)
|
916
|
+
:Note, This has no effect when the desired frequency is a number of days.
|
917
|
+
|
918
|
+
:param day_anchor: Day to Anchor the start of the Weekly timeframes to. Default 'SUN'.
|
919
|
+
: To get the First/Last Days of the trading Week then the Anchor needs to be on a day the relevant
|
920
|
+
market is closed.
|
921
|
+
: This can be set so that a specific day each week is returned.
|
922
|
+
: freq='1W' & day_anchor='WED' Will return Every 'WED' when the market is open, and nearest day
|
923
|
+
to the left or right (based on 'closed') when the market is closed.
|
924
|
+
Options: ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
|
925
|
+
|
926
|
+
:param month_anchor: Month to Anchor the start of the year to for Quarter and yearly timeframes.
|
927
|
+
: Default 'JAN' for Calendar Quarters/Years. Can be set to 'JUL' to return Fiscal Years
|
928
|
+
Options: ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
|
929
|
+
"""
|
930
|
+
|
931
|
+
start, end, periods = _error_check_htf_range(start, end, periods)
|
932
|
+
mult, _period_code = _standardize_htf_freq(frequency)
|
933
|
+
|
934
|
+
if _period_code == "D":
|
935
|
+
if mult == 1:
|
936
|
+
# When desiring a frequency of '1D' default to pd.date_range. It will give the same
|
937
|
+
# answer but it is more performant than the method in _cal_day_range.
|
938
|
+
return pd.date_range(start, end, periods, freq=cal)
|
939
|
+
else:
|
940
|
+
return _cal_day_range(cal, start, end, periods, mult)
|
941
|
+
|
942
|
+
elif _period_code == "W":
|
943
|
+
freq = str(mult) + "W-" + day_anchor.upper()
|
944
|
+
grouping_period = "W-" + weekly_roll_map[day_anchor.upper()]
|
945
|
+
|
946
|
+
return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
|
947
|
+
|
948
|
+
elif _period_code == "M":
|
949
|
+
freq = str(mult) + "M" + ("S" if closed == "left" else "E")
|
950
|
+
return _cal_WMQY_range(cal, start, end, periods, freq, "M", closed)
|
951
|
+
|
952
|
+
else: # Yearly & Quarterly Period
|
953
|
+
freq = str(mult) + _period_code
|
954
|
+
freq += (
|
955
|
+
"S-" + month_anchor.upper()
|
956
|
+
if closed == "left" # *Insert Angry Tom Meme Here*
|
957
|
+
else "E-" + yearly_roll_map[month_anchor.upper()]
|
958
|
+
)
|
959
|
+
grouping_period = _period_code + "-" + yearly_roll_map[month_anchor.upper()]
|
960
|
+
|
961
|
+
return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
|
962
|
+
|
963
|
+
|
964
|
+
# region ---- ---- ---- Date Range HTF Subroutines ---- ---- ----
|
965
|
+
|
966
|
+
|
967
|
+
def _error_check_htf_range(
|
968
|
+
start, end, periods: Union[int, None]
|
969
|
+
) -> Tuple[Union[pd.Timestamp, None], Union[pd.Timestamp, None], Union[int, None]]:
|
970
|
+
"Standardize and Error Check Start, End, and period params"
|
971
|
+
if periods is not None:
|
972
|
+
if not isinstance(periods, int):
|
973
|
+
raise ValueError(
|
974
|
+
f"Date_Range_HTF Must be either an int or None. Given {type(periods)}"
|
975
|
+
)
|
976
|
+
if periods < 0:
|
977
|
+
raise ValueError("Date_range_HTF Periods must be Positive.")
|
978
|
+
|
979
|
+
if isinstance(start, (int, float)):
|
980
|
+
start = int(start * 1_000_000_000)
|
981
|
+
if isinstance(end, (int, float)):
|
982
|
+
end = int(end * 1_000_000_000)
|
983
|
+
|
984
|
+
if start is not None:
|
985
|
+
start = pd.Timestamp(start).normalize().tz_localize(None)
|
986
|
+
if end is not None:
|
987
|
+
end = pd.Timestamp(end).normalize().tz_localize(None)
|
988
|
+
|
989
|
+
if all((start, end, periods)):
|
990
|
+
periods = None # Ignore Periods if passed too many params
|
991
|
+
if len([param for param in (start, end, periods) if param is not None]) < 2:
|
992
|
+
raise ValueError(
|
993
|
+
"Date_Range_HTF must be given two of the three following params: (start, end, periods)"
|
994
|
+
)
|
995
|
+
|
996
|
+
if start is not None and end is not None and end < start:
|
997
|
+
raise ValueError("Date_Range_HTF() Start-Date must be before the End-Date")
|
998
|
+
|
999
|
+
return start, end, periods
|
1000
|
+
|
1001
|
+
|
1002
|
+
def _standardize_htf_freq(
|
1003
|
+
frequency: Union[str, pd.Timedelta, int, float]
|
1004
|
+
) -> Tuple[int, PeriodCode]:
|
1005
|
+
"Standardize the frequency multiplier and Code, throwing errors as needed."
|
1006
|
+
if isinstance(frequency, str):
|
1007
|
+
if len(frequency) == 0:
|
1008
|
+
raise ValueError("Date_Range_HTF Frequency is an empty string.")
|
1009
|
+
if len(frequency) == 1:
|
1010
|
+
frequency = "1" + frequency # Turn 'D' into '1D' for all period codes
|
1011
|
+
if frequency[-1].upper() in {"W", "M", "Q", "Y"}:
|
1012
|
+
try:
|
1013
|
+
if (mult := int(frequency[0:-1])) <= 0:
|
1014
|
+
raise ValueError()
|
1015
|
+
return mult, frequency[-1].upper() # type: ignore
|
1016
|
+
except ValueError as e:
|
1017
|
+
raise ValueError(
|
1018
|
+
"Date_Range_HTF() Week, Month, Quarter and Year frequency must "
|
1019
|
+
"have a positive integer multiplier"
|
1020
|
+
) from e
|
1021
|
+
|
1022
|
+
# All remaining frequencies (int, float, strs, & Timedeltas) are parsed as business days.
|
1023
|
+
if isinstance(frequency, (int, float)): # Convert To Seconds
|
1024
|
+
frequency = int(frequency * 1_000_000_000)
|
1025
|
+
|
1026
|
+
frequency = pd.Timedelta(frequency)
|
1027
|
+
if frequency < pd.Timedelta("1D"):
|
1028
|
+
raise ValueError("Date_Range_HTF() Frequency must be '1D' or Higher.")
|
1029
|
+
if frequency % pd.Timedelta("1D") != pd.Timedelta(0):
|
1030
|
+
raise ValueError(
|
1031
|
+
"Date_Range_HTF() Week and Day frequency must be an integer multiple of Days"
|
1032
|
+
)
|
1033
|
+
|
1034
|
+
return frequency.days, "D"
|
1035
|
+
|
1036
|
+
|
1037
|
+
def _days_per_week(weekmask: Union[Iterable, str]) -> int:
|
1038
|
+
"Used to get a more accurate estimate of the number of days per week"
|
1039
|
+
# Return any 'Array Like' Representation
|
1040
|
+
if not isinstance(weekmask, str):
|
1041
|
+
return len([day for day in weekmask if bool(day)])
|
1042
|
+
|
1043
|
+
if len(weekmask) == 0:
|
1044
|
+
raise ValueError("Weekmask cannot be blank")
|
1045
|
+
|
1046
|
+
weekmask = weekmask.upper()
|
1047
|
+
day_abbrs = {day for day in weekly_roll_map.values() if day in weekmask}
|
1048
|
+
if len(day_abbrs) != 0:
|
1049
|
+
return len(day_abbrs)
|
1050
|
+
|
1051
|
+
# Weekmask Something like '0111110'
|
1052
|
+
return len([day for day in weekmask if bool(day)])
|
1053
|
+
|
1054
|
+
|
1055
|
+
def _cal_day_range(
|
1056
|
+
cb_day: "CustomBusinessDay", start, end, periods, mult
|
1057
|
+
) -> pd.DatetimeIndex:
|
1058
|
+
"""
|
1059
|
+
Returns a Normalized DateTimeIndex of Open Buisness Days.
|
1060
|
+
Exactly two of the (start, end, periods) arguments must be given.
|
1061
|
+
|
1062
|
+
** Arguments should be Type/Error Checked before calling this function **
|
1063
|
+
|
1064
|
+
:param cb_day: CustomBusinessDay Object from the respective calendar
|
1065
|
+
:param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1066
|
+
:param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1067
|
+
:param periods: Optional Number of periods to return
|
1068
|
+
:param mult: Integer Multiple of buisness days between data-points.
|
1069
|
+
e.g: 1 == Every Business Day, 2 == Every Other B.Day, 3 == Every Third B.Day, etc.
|
1070
|
+
:returns: DateRangeIndex[datetime64[ns]]
|
1071
|
+
"""
|
1072
|
+
|
1073
|
+
# Ensure Start and End are open Business days in the desired range
|
1074
|
+
if start is not None:
|
1075
|
+
start = cb_day.rollforward(start)
|
1076
|
+
if end is not None:
|
1077
|
+
end = cb_day.rollback(end)
|
1078
|
+
|
1079
|
+
# ---- Start-Date to End-Date ----
|
1080
|
+
if isinstance(start, pd.Timestamp) and isinstance(end, pd.Timestamp):
|
1081
|
+
num_days = (end - start) / mult
|
1082
|
+
# Get a better estimate of the number of open days since date_range calc is slow
|
1083
|
+
est_open_days = (
|
1084
|
+
(num_days // 7) * _days_per_week(cb_day.weekmask)
|
1085
|
+
) + num_days % pd.Timedelta("1W")
|
1086
|
+
|
1087
|
+
# Should always produce a small overestimate since Holidays aren't accounted for.
|
1088
|
+
est_open_days = ceil(est_open_days / pd.Timedelta("1D"))
|
1089
|
+
_range = pd.RangeIndex(0, est_open_days * mult, mult)
|
1090
|
+
|
1091
|
+
dt_index = pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
|
1092
|
+
return dt_index[dt_index <= end]
|
1093
|
+
|
1094
|
+
# ---- Periods from Start-Date ----
|
1095
|
+
elif isinstance(start, pd.Timestamp):
|
1096
|
+
_range = pd.RangeIndex(0, periods * mult, mult)
|
1097
|
+
return pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
|
1098
|
+
|
1099
|
+
# ---- Periods from End-Date ----
|
1100
|
+
else:
|
1101
|
+
# Ensure the end-date is the first valid Trading Day <= given end-date
|
1102
|
+
end = cb_day.rollback(end)
|
1103
|
+
_range = pd.RangeIndex(0, -1 * periods * mult, -1 * mult)
|
1104
|
+
|
1105
|
+
return pd.DatetimeIndex(end + _range * cb_day, dtype="datetime64[ns]")[::-1]
|
1106
|
+
|
1107
|
+
|
1108
|
+
def _cal_WMQY_range(
|
1109
|
+
cb_day: "CustomBusinessDay",
|
1110
|
+
start: Union[pd.Timestamp, None],
|
1111
|
+
end: Union[pd.Timestamp, None],
|
1112
|
+
periods: Union[int, None],
|
1113
|
+
freq: str,
|
1114
|
+
grouping_period: str,
|
1115
|
+
closed: Union[Literal["left", "right"], None] = "right",
|
1116
|
+
):
|
1117
|
+
"""
|
1118
|
+
Return A DateRangeIndex of the Weekdays that mark either the start or end of each
|
1119
|
+
buisness week based on the 'closed' parameter.
|
1120
|
+
|
1121
|
+
** Arguments should be Type/Error Checked before calling this function **
|
1122
|
+
|
1123
|
+
:param cb_day: CustomBusinessDay Object from the respective calendar
|
1124
|
+
:param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1125
|
+
:param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1126
|
+
:param periods: Optional Number of periods to return
|
1127
|
+
:param freq: Formatted frequency of '1W' and Higher with desired multiple, S/E Chars,
|
1128
|
+
and Anchoring code.
|
1129
|
+
:param grouping_period: Period_Code with anchor that matches the given period Code.
|
1130
|
+
i.e. 'W-[DAY]', 'M', 'Q-[MONTH]', 'Y-[MONTH]'
|
1131
|
+
:param closed: Union['left', Any].
|
1132
|
+
'left': The normalized start-day of the relative period is returned
|
1133
|
+
Everything else: The normalized last-day of the relative period is returned
|
1134
|
+
:returns: DateRangeIndex[datetime64[ns]]
|
1135
|
+
"""
|
1136
|
+
|
1137
|
+
# Need to Adjust the Start/End Dates given to pandas since Rolling forward or backward can shift
|
1138
|
+
# the calculated date range out of the desired [start, end] range adding or ignoring desired values.
|
1139
|
+
|
1140
|
+
# For Example, say we want NYSE-Month-Starts between [2020-01-02, 2020-02-02]. W/O Adjusting dates
|
1141
|
+
# we call pd.date_range('2020-01-02, '2020-02-02', 'MS') => ['2020-02-01'] Rolled to ['2020-02-03'].
|
1142
|
+
# '02-03' date is then trimmed off returning an empty Index. despite '2020-01-02' being a valid Month Start
|
1143
|
+
# By Adjusting the Dates we call pd.date_range('2020-01-01, '2020-02-02') => ['2020-01-01, '2020-02-01']
|
1144
|
+
# That's then Rolled into [2020-01-02, 2020-02-03] & Trimmed to [2020-01-02] as desired.
|
1145
|
+
|
1146
|
+
_dr_start, _dr_end = None, None
|
1147
|
+
|
1148
|
+
if closed == "left":
|
1149
|
+
roll_func = cb_day.rollforward
|
1150
|
+
if start is not None:
|
1151
|
+
normalized_start = start.to_period(grouping_period).start_time
|
1152
|
+
_dr_start = (
|
1153
|
+
normalized_start if start <= roll_func(normalized_start) else start
|
1154
|
+
)
|
1155
|
+
|
1156
|
+
if end is not None:
|
1157
|
+
if periods is not None:
|
1158
|
+
normalized_end = end.to_period(grouping_period).start_time
|
1159
|
+
_dr_end = (
|
1160
|
+
normalized_end - pd.Timedelta("1D") # Shift into preceding group
|
1161
|
+
if end < roll_func(normalized_end)
|
1162
|
+
else cb_day.rollback(end)
|
1163
|
+
)
|
1164
|
+
else:
|
1165
|
+
_dr_end = cb_day.rollback(end)
|
1166
|
+
|
1167
|
+
else:
|
1168
|
+
roll_func = cb_day.rollback
|
1169
|
+
if start is not None:
|
1170
|
+
if periods is not None:
|
1171
|
+
normalized_start = start.to_period(grouping_period).end_time.normalize()
|
1172
|
+
_dr_start = (
|
1173
|
+
normalized_start + pd.Timedelta("1D") # Shift into trailing group
|
1174
|
+
if start > roll_func(normalized_start)
|
1175
|
+
else cb_day.rollforward(start)
|
1176
|
+
)
|
1177
|
+
else:
|
1178
|
+
_dr_start = cb_day.rollforward(start)
|
1179
|
+
|
1180
|
+
if end is not None:
|
1181
|
+
normalized_end = end.to_period(grouping_period).end_time.normalize()
|
1182
|
+
_dr_end = normalized_end if end >= roll_func(normalized_end) else end
|
1183
|
+
|
1184
|
+
_range = (
|
1185
|
+
pd.date_range(_dr_start, _dr_end, periods, freq).to_series().apply(roll_func)
|
1186
|
+
)
|
1187
|
+
|
1188
|
+
# Ensure that Rolled Timestamps are in the desired range When given both Start and End
|
1189
|
+
if start is not None and end is not None:
|
1190
|
+
if len(_range) > 0 and _range.iloc[0] < start:
|
1191
|
+
# Trims off the first 'WMQY End' that might have been Rolled before start
|
1192
|
+
_range = _range[1:]
|
1193
|
+
if len(_range) > 0 and _range.iloc[-1] > end:
|
1194
|
+
# Trims off the last 'WMQY Start' the might have been Rolled after end
|
1195
|
+
_range = _range[0:-1]
|
1196
|
+
|
1197
|
+
return pd.DatetimeIndex(_range, dtype="datetime64[ns]")
|
1198
|
+
|
1199
|
+
|
1200
|
+
# endregion
|