pandas-market-calendars 4.3.3__py3-none-any.whl → 4.6.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pandas_market_calendars/__init__.py +39 -38
- pandas_market_calendars/calendar_registry.py +57 -53
- pandas_market_calendars/calendar_utils.py +1200 -261
- pandas_market_calendars/calendars/asx.py +66 -66
- pandas_market_calendars/calendars/bmf.py +223 -206
- pandas_market_calendars/calendars/bse.py +421 -407
- pandas_market_calendars/calendars/cboe.py +145 -145
- pandas_market_calendars/calendars/cme.py +405 -402
- pandas_market_calendars/calendars/cme_globex_agriculture.py +172 -126
- pandas_market_calendars/calendars/cme_globex_base.py +119 -119
- pandas_market_calendars/calendars/cme_globex_crypto.py +160 -160
- pandas_market_calendars/calendars/cme_globex_energy_and_metals.py +216 -216
- pandas_market_calendars/calendars/cme_globex_equities.py +123 -123
- pandas_market_calendars/calendars/cme_globex_fixed_income.py +136 -136
- pandas_market_calendars/calendars/cme_globex_fx.py +101 -101
- pandas_market_calendars/calendars/eurex.py +131 -139
- pandas_market_calendars/calendars/eurex_fixed_income.py +98 -98
- pandas_market_calendars/calendars/hkex.py +429 -426
- pandas_market_calendars/calendars/ice.py +81 -81
- pandas_market_calendars/calendars/iex.py +151 -112
- pandas_market_calendars/calendars/jpx.py +113 -109
- pandas_market_calendars/calendars/lse.py +114 -114
- pandas_market_calendars/calendars/mirror.py +149 -130
- pandas_market_calendars/calendars/nyse.py +1466 -1324
- pandas_market_calendars/calendars/ose.py +116 -116
- pandas_market_calendars/calendars/sifma.py +354 -350
- pandas_market_calendars/calendars/six.py +132 -132
- pandas_market_calendars/calendars/sse.py +311 -311
- pandas_market_calendars/calendars/tase.py +220 -197
- pandas_market_calendars/calendars/tsx.py +181 -181
- pandas_market_calendars/holidays/cme.py +385 -385
- pandas_market_calendars/holidays/cme_globex.py +214 -214
- pandas_market_calendars/holidays/cn.py +1476 -1455
- pandas_market_calendars/holidays/jp.py +401 -398
- pandas_market_calendars/holidays/jpx_equinox.py +1 -0
- pandas_market_calendars/holidays/nyse.py +1536 -1531
- pandas_market_calendars/holidays/oz.py +63 -63
- pandas_market_calendars/holidays/sifma.py +350 -338
- pandas_market_calendars/holidays/us.py +376 -376
- pandas_market_calendars/market_calendar.py +1057 -895
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/METADATA +13 -9
- pandas_market_calendars-4.6.0.dist-info/RECORD +50 -0
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/WHEEL +1 -1
- pandas_market_calendars-4.3.3.dist-info/RECORD +0 -50
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/LICENSE +0 -0
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/NOTICE +0 -0
- {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/top_level.txt +0 -0
@@ -1,261 +1,1200 @@
|
|
1
|
-
"""
|
2
|
-
Utilities to use with market_calendars
|
3
|
-
"""
|
4
|
-
|
5
|
-
import
|
6
|
-
|
7
|
-
import
|
8
|
-
import
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
:
|
21
|
-
:
|
22
|
-
:
|
23
|
-
"""
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
Only
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
)
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
1
|
+
"""
|
2
|
+
Utilities to use with market_calendars
|
3
|
+
"""
|
4
|
+
|
5
|
+
import itertools
|
6
|
+
from math import ceil, floor
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Iterable, Literal, Tuple, Union
|
8
|
+
import warnings
|
9
|
+
|
10
|
+
from re import finditer, split
|
11
|
+
import numpy as np
|
12
|
+
import pandas as pd
|
13
|
+
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from pandas.tseries.offsets import CustomBusinessDay
|
16
|
+
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
|
17
|
+
|
18
|
+
DEFAULT_LABEL_MAP = {
|
19
|
+
"pre": "pre",
|
20
|
+
"rth_pre_break": "rth",
|
21
|
+
"rth": "rth",
|
22
|
+
"break": "break",
|
23
|
+
"rth_post_break": "rth",
|
24
|
+
"post": "post",
|
25
|
+
"closed": "closed",
|
26
|
+
}
|
27
|
+
|
28
|
+
|
29
|
+
def mark_session(
|
30
|
+
schedule: pd.DataFrame,
|
31
|
+
timestamps: pd.DatetimeIndex,
|
32
|
+
label_map: Dict[str, Any] = {},
|
33
|
+
*,
|
34
|
+
closed: Literal["left", "right"] = "right",
|
35
|
+
) -> pd.Series:
|
36
|
+
"""
|
37
|
+
Return a Series that denotes the trading session of each timestamp in a DatetimeIndex.
|
38
|
+
The returned Series's Index is the provided Datetime Index, the Series's values
|
39
|
+
are the timestamps' corresponding session.
|
40
|
+
|
41
|
+
PARAMETERS:
|
42
|
+
|
43
|
+
:param schedule: The market schedule to check the timestamps against. This Schedule must include
|
44
|
+
all of the trading days that are in the provided DatetimeIndex of timestamps.
|
45
|
+
Note: The columns need to be sorted into ascending order, if not, then an error will be
|
46
|
+
raised saying the bins must be in ascending order.
|
47
|
+
|
48
|
+
:param timestamps: A DatetimeIndex of Timestamps to check. Must be sorted in ascending order.
|
49
|
+
|
50
|
+
:param label_map: Optional mapping of Dict[str, Any] to change the values returned in the
|
51
|
+
series. The keys of the given mapping should match the keys of the default dict, but the
|
52
|
+
values can be anything. A subset of mappings may also be provided, e.g. {'closed':-1} will
|
53
|
+
only change the label of the 'closed' session. All others will remain the default label.
|
54
|
+
|
55
|
+
>>> Default Mapping == {
|
56
|
+
"pre": "pre",
|
57
|
+
"rth_pre_break": "rth", # When the Schedule has a break
|
58
|
+
"rth": "rth", # When the Schedule doesn't have a break
|
59
|
+
"break": "break", # When the Schedule has a break
|
60
|
+
"rth_post_break": "rth", # When the Schedule has a break
|
61
|
+
"post": "post",
|
62
|
+
"closed": "closed",
|
63
|
+
}
|
64
|
+
|
65
|
+
:param closed: Which side of each interval should be closed (inclusive)
|
66
|
+
left: == [start, end)
|
67
|
+
right: == (start, end]
|
68
|
+
"""
|
69
|
+
# ---- ---- ---- Determine which columns need to be dropped ---- ---- ----
|
70
|
+
session_labels = ["closed"]
|
71
|
+
columns = set(schedule.columns)
|
72
|
+
needed_cols = set()
|
73
|
+
|
74
|
+
def _extend_statement(session: str, parts: set):
|
75
|
+
if parts.issubset(columns):
|
76
|
+
needed_cols.update(parts)
|
77
|
+
session_labels.append(session)
|
78
|
+
|
79
|
+
_extend_statement("pre", {"pre", "market_open"})
|
80
|
+
if {"break_start", "break_end"}.issubset(columns):
|
81
|
+
_extend_statement("rth_pre_break", {"market_open", "break_start"})
|
82
|
+
_extend_statement("break", {"break_start", "break_end"})
|
83
|
+
_extend_statement("rth_post_break", {"break_end", "market_close"})
|
84
|
+
else:
|
85
|
+
_extend_statement("rth", {"market_open", "market_close"})
|
86
|
+
_extend_statement("post", {"market_close", "post"})
|
87
|
+
|
88
|
+
# ---- ---- ---- Error Check ---- ---- ----
|
89
|
+
if len(extra_cols := columns - needed_cols) > 0:
|
90
|
+
schedule = schedule.drop(columns=[*extra_cols])
|
91
|
+
warnings.warn(
|
92
|
+
f"Attempting to mark trading sessions and the schedule ({columns = }) contains the "
|
93
|
+
f"extra columns: {extra_cols}. Returned sessions may not be labeled as desired."
|
94
|
+
)
|
95
|
+
|
96
|
+
start = timestamps[0]
|
97
|
+
end = timestamps[-1]
|
98
|
+
if start < schedule.iloc[0, 0]: # type: ignore
|
99
|
+
raise ValueError(
|
100
|
+
f"Insufficient Schedule. Needed Start-Time: {start.normalize().tz_localize(None)}. "
|
101
|
+
f"Schedule starts at: {schedule.iloc[0, 0]}"
|
102
|
+
)
|
103
|
+
if end > schedule.iloc[-1, -1]: # type: ignore
|
104
|
+
raise ValueError(
|
105
|
+
f"Insufficient Schedule. Needed End-Time: {end.normalize().tz_localize(None)}. "
|
106
|
+
f"Schedule ends at: {schedule.iloc[-1, -1]}"
|
107
|
+
)
|
108
|
+
|
109
|
+
# Trim the schedule to match the timeframe covered by the given timeseries
|
110
|
+
schedule = schedule[
|
111
|
+
(schedule.index >= start.normalize().tz_localize(None))
|
112
|
+
& (schedule.index <= end.normalize().tz_localize(None))
|
113
|
+
]
|
114
|
+
|
115
|
+
backfilled_map = DEFAULT_LABEL_MAP | label_map
|
116
|
+
mapped_labels = [backfilled_map[label] for label in session_labels]
|
117
|
+
labels = pd.Series([mapped_labels]).repeat(len(schedule)).explode()
|
118
|
+
labels = pd.concat([labels, pd.Series([backfilled_map["closed"]])])
|
119
|
+
|
120
|
+
# Append on additional Edge-Case Bins so result doesn't include NaNs
|
121
|
+
bins = schedule.to_numpy().flatten()
|
122
|
+
bins = np.insert(bins, 0, bins[0].normalize())
|
123
|
+
bins = np.append(bins, bins[-1].normalize() + pd.Timedelta("1D"))
|
124
|
+
|
125
|
+
bins, _ind, _counts = np.unique(bins, return_index=True, return_counts=True)
|
126
|
+
|
127
|
+
if len(bins) - 1 != len(labels):
|
128
|
+
# np.Unique Dropped some bins, need to drop the associated labels
|
129
|
+
label_inds = (_ind + _counts - 1)[:-1]
|
130
|
+
labels = labels.iloc[label_inds]
|
131
|
+
|
132
|
+
return pd.Series(
|
133
|
+
pd.cut(timestamps, bins, closed != "left", labels=labels, ordered=False), # type: ignore
|
134
|
+
index=timestamps,
|
135
|
+
)
|
136
|
+
|
137
|
+
|
138
|
+
def merge_schedules(schedules, how="outer"):
|
139
|
+
"""
|
140
|
+
Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
|
141
|
+
of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
|
142
|
+
|
143
|
+
CAVEATS:
|
144
|
+
* This does not work for schedules with breaks, the break information will be lost.
|
145
|
+
* Only "market_open" and "market_close" are considered, other market times are not yet supported.
|
146
|
+
|
147
|
+
:param schedules: list of schedules
|
148
|
+
:param how: outer or inner
|
149
|
+
:return: schedule DataFrame
|
150
|
+
"""
|
151
|
+
all_cols = [x.columns for x in schedules]
|
152
|
+
all_cols = list(itertools.chain(*all_cols))
|
153
|
+
if ("break_start" in all_cols) or ("break_end" in all_cols):
|
154
|
+
warnings.warn(
|
155
|
+
"Merge schedules will drop the break_start and break_end from result."
|
156
|
+
)
|
157
|
+
|
158
|
+
result = schedules[0]
|
159
|
+
for schedule in schedules[1:]:
|
160
|
+
result = result.merge(schedule, how=how, right_index=True, left_index=True)
|
161
|
+
if how == "outer":
|
162
|
+
result["market_open"] = result.apply(
|
163
|
+
lambda x: min(x.market_open_x, x.market_open_y), axis=1
|
164
|
+
)
|
165
|
+
result["market_close"] = result.apply(
|
166
|
+
lambda x: max(x.market_close_x, x.market_close_y), axis=1
|
167
|
+
)
|
168
|
+
elif how == "inner":
|
169
|
+
result["market_open"] = result.apply(
|
170
|
+
lambda x: max(x.market_open_x, x.market_open_y), axis=1
|
171
|
+
)
|
172
|
+
result["market_close"] = result.apply(
|
173
|
+
lambda x: min(x.market_close_x, x.market_close_y), axis=1
|
174
|
+
)
|
175
|
+
else:
|
176
|
+
raise ValueError('how argument must be "inner" or "outer"')
|
177
|
+
result = result[["market_open", "market_close"]]
|
178
|
+
return result
|
179
|
+
|
180
|
+
|
181
|
+
def is_single_observance(holiday: "Holiday"):
|
182
|
+
"Returns the Date of the Holiday if it is only observed once, None otherwise."
|
183
|
+
return holiday.start_date if holiday.start_date == holiday.end_date else None # type: ignore ??
|
184
|
+
|
185
|
+
|
186
|
+
def all_single_observance_rules(calendar: "AbstractHolidayCalendar"):
|
187
|
+
"Returns a list of timestamps if the Calendar's Rules are all single observance holidays, None Otherwise"
|
188
|
+
observances = [is_single_observance(rule) for rule in calendar.rules]
|
189
|
+
return observances if all(observances) else None
|
190
|
+
|
191
|
+
|
192
|
+
def convert_freq(index, frequency):
|
193
|
+
"""
|
194
|
+
Converts a DateTimeIndex to a new lower frequency
|
195
|
+
|
196
|
+
:param index: DateTimeIndex
|
197
|
+
:param frequency: frequency string
|
198
|
+
:return: DateTimeIndex
|
199
|
+
"""
|
200
|
+
return pd.DataFrame(index=index).asfreq(frequency).index
|
201
|
+
|
202
|
+
|
203
|
+
SESSIONS = Literal[
|
204
|
+
"pre",
|
205
|
+
"post",
|
206
|
+
"RTH",
|
207
|
+
"pre_break",
|
208
|
+
"post_break",
|
209
|
+
"ETH",
|
210
|
+
"break",
|
211
|
+
"closed",
|
212
|
+
"closed_masked",
|
213
|
+
]
|
214
|
+
MKT_TIMES = Literal[
|
215
|
+
"pre", "post", "market_open", "market_close", "break_start", "break_end"
|
216
|
+
]
|
217
|
+
|
218
|
+
|
219
|
+
# region ---- ---- ---- Date Range Warning Types ---- ---- ----
|
220
|
+
class DateRangeWarning(UserWarning):
|
221
|
+
"Super Class to all Date_range Warning Types"
|
222
|
+
|
223
|
+
|
224
|
+
class OverlappingSessionWarning(DateRangeWarning):
|
225
|
+
"""
|
226
|
+
Warning thrown when date_range is called with a timedelta that is larger than the
|
227
|
+
gap between two sessions leading to them overlapping.
|
228
|
+
This is only an issue when closed='right'/'both'/None and force_close=None
|
229
|
+
|
230
|
+
For Example, the following raises a warning because the 10:00 Timestamp that is from the 'pre'
|
231
|
+
session comes after the start of the 9:30 'RTH' session, but belongs to the 'pre' session
|
232
|
+
>>> date_range(NYSE, '2h', 'right', None, {'pre', 'RTH'}, merge_adjacent = False)
|
233
|
+
>>> ['2020-01-02 06:00:00', '2020-01-02 08:00:00',
|
234
|
+
'2020-01-02 10:00:00', '2020-01-02 11:30:00',
|
235
|
+
'2020-01-02 13:30:00', '2020-01-02 15:30:00',
|
236
|
+
'2020-01-02 17:30:00'],
|
237
|
+
This is particularly convoluted when close='both'/None
|
238
|
+
>>> date_range(NYSE, '2h', 'both', None, {'pre', 'RTH'}, merge_adjacent = False)
|
239
|
+
>>> ['2020-01-02 04:00:00' (pre), '2020-01-02 06:00:00' (pre),
|
240
|
+
'2020-01-02 08:00:00' (pre), '2020-01-02 09:30:00' (rth),
|
241
|
+
'2020-01-02 10:00:00' (pre), '2020-01-02 11:30:00' (rth),
|
242
|
+
'2020-01-02 13:30:00' (rth), '2020-01-02 15:30:00' (rth),
|
243
|
+
'2020-01-02 17:30:00' (rth)],
|
244
|
+
"""
|
245
|
+
|
246
|
+
|
247
|
+
class DisappearingSessionWarning(DateRangeWarning):
|
248
|
+
"""
|
249
|
+
Warning thrown when date_range is called with a timedelta that is larger than an entire session
|
250
|
+
resulting in the session disappearing from the DatetimeIndex.
|
251
|
+
|
252
|
+
Only an issue when closed='right' and force_close = False
|
253
|
+
"""
|
254
|
+
|
255
|
+
|
256
|
+
class MissingSessionWarning(DateRangeWarning):
|
257
|
+
"""
|
258
|
+
Warning thrown when a date_range() call is made with a requested session,
|
259
|
+
but lacks the necessary columns. When this warning is ignored the returned
|
260
|
+
datetimeindex will simply lack the relevant sessions
|
261
|
+
|
262
|
+
e.g. 'pre' Session requested and schedule lacks 'pre' and/or 'market_open' column
|
263
|
+
"""
|
264
|
+
|
265
|
+
|
266
|
+
class InsufficientScheduleWarning(DateRangeWarning):
|
267
|
+
"""
|
268
|
+
Warning thrown when a date_range() call is made with a requested number of periods,
|
269
|
+
or start-date / end-date that exceed what was provided in the given schedule.
|
270
|
+
|
271
|
+
If a Schedule has an insufficient start and end date then this warning is thrown twice.
|
272
|
+
|
273
|
+
If this warning is thrown when date_range is called with a number of desired periods, then
|
274
|
+
the desired start/end date is an approximate value. This 'approximation' is biased to
|
275
|
+
overestimate the needed start/end time by about 1 week. This is done to limit the edge
|
276
|
+
cases where this warning could get thrown multiple times in a row.
|
277
|
+
"""
|
278
|
+
|
279
|
+
|
280
|
+
def filter_date_range_warnings(
|
281
|
+
action: Literal["error", "ignore", "always", "default", "once"],
|
282
|
+
source: Union[
|
283
|
+
Iterable[type[DateRangeWarning]], type[DateRangeWarning]
|
284
|
+
] = DateRangeWarning,
|
285
|
+
):
|
286
|
+
"""
|
287
|
+
Adjust the behavior of the date_range() warnings to the desired action.
|
288
|
+
|
289
|
+
:param action: - The desired change to the warning behavior
|
290
|
+
'error': Escalate Warnings into Errors
|
291
|
+
'ignore': Silence Warning Messages
|
292
|
+
'once': Only display a message of the given category once
|
293
|
+
'default': Reset the behavior of the given warning category
|
294
|
+
'always': Always show the Warning of a given category
|
295
|
+
|
296
|
+
:param source: - The Category/Categories to apply the action to. Can be a single Warning or a list of warnings
|
297
|
+
default: DateRangeWarning (All Warnings)
|
298
|
+
Warning Types: MissingSessionWarning, OverlappingSessionWarning,
|
299
|
+
DisappearingSessionWarning, InsufficientScheduleWarning
|
300
|
+
"""
|
301
|
+
if not isinstance(source, Iterable):
|
302
|
+
warnings.filterwarnings(action, category=source)
|
303
|
+
return
|
304
|
+
|
305
|
+
for src in source:
|
306
|
+
warnings.filterwarnings(action, category=src)
|
307
|
+
|
308
|
+
|
309
|
+
def parse_missing_session_warning(
|
310
|
+
err: MissingSessionWarning,
|
311
|
+
) -> Tuple[set[SESSIONS], set[MKT_TIMES]]:
|
312
|
+
"""
|
313
|
+
Parses a Missing Session Warning's Error Message.
|
314
|
+
:returns Tuple[set[str], set[str]]:
|
315
|
+
Set #1: The Missing Sessions
|
316
|
+
Set #2: The Missing Schedule Columns
|
317
|
+
"""
|
318
|
+
splits = split(r"[{|}]", err.args[0].replace("'", ""))
|
319
|
+
return (set(splits[1].split(", ")), set(splits[3].split(", "))) # type: ignore
|
320
|
+
|
321
|
+
|
322
|
+
def parse_insufficient_schedule_warning(
|
323
|
+
err: InsufficientScheduleWarning,
|
324
|
+
) -> Tuple[bool, pd.Timestamp, pd.Timestamp]:
|
325
|
+
"""
|
326
|
+
Parses the information from an Insufficient Schedule Warning.
|
327
|
+
:returns Tuple[bool, pd.Timestamp, pd.Timestamp]:
|
328
|
+
bool: True == Range is missing from the start, False == Range missing from the end
|
329
|
+
Timestamp 1: Start of missing range
|
330
|
+
Timestamp 2: End of the missing range.
|
331
|
+
Note: The Timestamps are always ordered (t1 <= t2) and do not overlap with the original schedule.
|
332
|
+
If a supplemental schedule is generated it can be concatenated on without any overlapping indices.
|
333
|
+
data
|
334
|
+
"""
|
335
|
+
matcher = finditer(r"\d{4}-\d{2}-\d{2}", err.args[0])
|
336
|
+
b = "Start-Time" in err.args[0]
|
337
|
+
t1 = pd.Timestamp(next(matcher).group())
|
338
|
+
t2 = pd.Timestamp(next(matcher).group())
|
339
|
+
|
340
|
+
if b:
|
341
|
+
t2 -= pd.Timedelta("1D")
|
342
|
+
else:
|
343
|
+
t2 += pd.Timedelta("1D")
|
344
|
+
|
345
|
+
return (b, t1, t2) if t1 <= t2 else (b, t2, t1)
|
346
|
+
|
347
|
+
|
348
|
+
# endregion
|
349
|
+
|
350
|
+
|
351
|
+
def date_range(
|
352
|
+
schedule: pd.DataFrame,
|
353
|
+
frequency: Union[str, pd.Timedelta, int, float],
|
354
|
+
closed: Union[Literal["left", "right", "both"], None] = "right",
|
355
|
+
force_close: Union[bool, None] = True,
|
356
|
+
session: Union[SESSIONS, Iterable[SESSIONS]] = {"RTH"},
|
357
|
+
merge_adjacent: bool = True,
|
358
|
+
start: Union[str, pd.Timestamp, int, float, None] = None,
|
359
|
+
end: Union[str, pd.Timestamp, int, float, None] = None,
|
360
|
+
periods: Union[int, None] = None,
|
361
|
+
) -> pd.DatetimeIndex:
|
362
|
+
"""
|
363
|
+
Interpolates a Market's Schedule at the desired frequency and returns the result as a DatetimeIndex.
|
364
|
+
This function is only valid for periods less than 1 Day, for longer periods use date_range_htf().
|
365
|
+
|
366
|
+
Note: The slowest part of this function is by far generating the necessary schedule (which in
|
367
|
+
turn is limited by pandas' date_range() function). If speed is a concern, store and update the
|
368
|
+
schedule as needed instead of generating it every time.
|
369
|
+
|
370
|
+
WARNINGS SYSTEM:
|
371
|
+
*There are multiple edge-case warnings that are thrown by this function. See the Docstrings
|
372
|
+
of each warning for more info. (DateRangeWarning, InsufficientScheduleWarning,
|
373
|
+
MissingSessionWarning, OverlappingSessionWarning, DisappearingSessionWarning)
|
374
|
+
|
375
|
+
*The thrown warnings can be ignored or escalated into catchable errors by using the
|
376
|
+
filter_date_range_warnings() function.
|
377
|
+
|
378
|
+
parse_missing_session_warning() & parse_insufficient_schedule_warning() exist to easily
|
379
|
+
process the warnings those warnings if they are escalated into errors.
|
380
|
+
|
381
|
+
PARAMETERS:
|
382
|
+
|
383
|
+
:param schedule: Schedule of a calendar which includes all the columns necessary
|
384
|
+
for the desired sessions.
|
385
|
+
|
386
|
+
:param frequency: String, Int/float (seconds) or pd.Timedelta that represents the desired
|
387
|
+
interval of the date_range. Intervals larger than 1D are not supported.
|
388
|
+
|
389
|
+
:param closed: the way the intervals are labeled
|
390
|
+
'right': use the end of the interval
|
391
|
+
'left': use the start of the interval
|
392
|
+
None / 'both': use the end of the interval but include the start of the first interval
|
393
|
+
|
394
|
+
:param force_close: How the last value of a trading session is handled
|
395
|
+
True: guarantee that the close of the trading session is the last value
|
396
|
+
False: guarantee that there is no value greater than the close of the trading session
|
397
|
+
None: leave the last value as it is calculated based on the closed parameter
|
398
|
+
|
399
|
+
:param session: A str representing a single session or an Iterable of the following Sessions.
|
400
|
+
RTH: The Default Option. This is [Market_open, Market_close], if the schedule includes a
|
401
|
+
break then the break is excluded from the returned datetime index.
|
402
|
+
ETH: [pre, market_open] & [market_close, post]
|
403
|
+
pre: [pre, market_open]
|
404
|
+
post: [market_close, post]
|
405
|
+
break: [break_start, break_end]
|
406
|
+
pre_break: [market_open, break_start]
|
407
|
+
post_break: [break_end, market_close]
|
408
|
+
closed: [market_close, market_open (of the next day)] If ETH market times are given then
|
409
|
+
this will be [post, pre (of the next day)] instead. The last session will end at
|
410
|
+
Midnight of the timezone the schedule is given in.
|
411
|
+
closed_masked: Same as closed, but Weekends & Holidays are ignored. Instead, the Datetime
|
412
|
+
index stops at Midnight on the trading day before the break and resumes at midnight
|
413
|
+
prior to the next trading day. **Note: This is Midnight of the Timezone the schedule is
|
414
|
+
given in, not Midnight of the exchange's tz since the exchange's tz is not known.
|
415
|
+
|
416
|
+
:param merge_adjacent: Bool representing if adjacent sessions should be merged into a single session.
|
417
|
+
For Example, NYSE w/ session={'RTH', 'ETH'}, frequency=2h, closed=left, force_close=False
|
418
|
+
merge_adjacent == True => [pre, post]
|
419
|
+
>>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
|
420
|
+
'2020-01-02 08:00:00', '2020-01-02 10:00:00',
|
421
|
+
'2020-01-02 12:00:00', '2020-01-02 14:00:00',
|
422
|
+
'2020-01-02 16:00:00', '2020-01-02 18:00:00']
|
423
|
+
merge_adjacent == False => [pre, market_open] & [market_open, market_close] & [market_close, post]
|
424
|
+
>>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
|
425
|
+
'2020-01-02 08:00:00', '2020-01-02 09:30:00',
|
426
|
+
'2020-01-02 11:30:00', '2020-01-02 13:30:00',
|
427
|
+
'2020-01-02 15:30:00', '2020-01-02 16:00:00',
|
428
|
+
'2020-01-02 18:00:00']
|
429
|
+
merge_adjacent=False re-aligns the timestamps to the session, but this results in
|
430
|
+
the difference between timestamps not always equaling the desired frequency.
|
431
|
+
|
432
|
+
:param start: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired start time.
|
433
|
+
:If left as None then the start-time of the the Schedule is used.
|
434
|
+
:If no TZ info is given it will be interpreted in the same timezone as the first column
|
435
|
+
of the schedule
|
436
|
+
:Start can be a Day and Time, but the returned index will still be aligned to the underlying
|
437
|
+
schedule. e.g. Session = [9:30am, 12pm], frequency=7min, start=9:45am. Underlying session
|
438
|
+
= [9:30, 9:37, 9:44, 9:51, ...] => returned DatetimeIndex = [9:51, ...]
|
439
|
+
|
440
|
+
:param end: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired end time.
|
441
|
+
:If left as None then the end-time of the the Schedule is used.
|
442
|
+
:If no TZ info is given it will be interpreted in the same timezone as the first column
|
443
|
+
**Note: The time given is an absolute value. i.e. end="2020-01-01" == "2020-01-01 00:00"
|
444
|
+
returning times prior to Midnight of "2019-12-31", not to the EOD of "2020-01-01"
|
445
|
+
|
446
|
+
:param periods: Optional Integer number of periods to return. If a Period count, Start time,
|
447
|
+
and End time are given the period count is ignored.
|
448
|
+
None: Period count is ignored. Returned index is all periods in [Start, End]
|
449
|
+
Int: # of periods to return. By default, this is the first N periods following the start.
|
450
|
+
If an end time is given then this is the N periods prior to the End Time (inclusive).
|
451
|
+
CAVEAT: When Force_close == False & closed == 'right'/'both' the number of periods returned
|
452
|
+
may be less than the parameter given.
|
453
|
+
|
454
|
+
:return: pd.DatetimeIndex of datetime64[ns, TZ-Aware]
|
455
|
+
"""
|
456
|
+
# ---- ---- Error Check Inputs ---- ----
|
457
|
+
if closed not in ("left", "right", "both", None):
|
458
|
+
raise ValueError("closed must be 'left', 'right', 'both' or None.")
|
459
|
+
if force_close not in (True, False, None):
|
460
|
+
raise ValueError("force_close must be True, False or None.")
|
461
|
+
if merge_adjacent not in (True, False):
|
462
|
+
raise ValueError("merge_adjacent must be True or False")
|
463
|
+
|
464
|
+
# ---- ---- Standardize Frequency Param ---- ----
|
465
|
+
if isinstance(frequency, (int, float)):
|
466
|
+
frequency = int(frequency * 1_000_000_000)
|
467
|
+
try:
|
468
|
+
frequency = pd.Timedelta(frequency)
|
469
|
+
except ValueError as e:
|
470
|
+
raise ValueError(f"Market Calendar Date_range Timeframe Error: {e}") from e
|
471
|
+
if frequency <= pd.Timedelta("0s"):
|
472
|
+
raise ValueError("Market Calendar Date_Range Frequency must be Positive.")
|
473
|
+
if frequency > pd.Timedelta("1D"):
|
474
|
+
raise ValueError(
|
475
|
+
"Market Calendar Date_Range Frequency Cannot Be longer than '1D'."
|
476
|
+
)
|
477
|
+
|
478
|
+
session_list, mask = _make_session_list(
|
479
|
+
set(schedule.columns), session, merge_adjacent
|
480
|
+
)
|
481
|
+
if len(session_list) == 0:
|
482
|
+
return pd.DatetimeIndex([], dtype="datetime64[ns, UTC]")
|
483
|
+
|
484
|
+
session_times = _reconfigure_schedule(schedule, session_list, mask)
|
485
|
+
# Trim off all 0 length sessions
|
486
|
+
session_times = session_times[session_times.start.ne(session_times.end)]
|
487
|
+
_error_check_sessions(session_times, frequency, closed, force_close)
|
488
|
+
|
489
|
+
tz = schedule[session_list[0][0]].dt.tz # copy tz info from schedule
|
490
|
+
dtype = schedule[session_list[0][0]].dtype # copy dtype info from schedule
|
491
|
+
start, end, periods = _standardize_times(schedule, start, end, periods, tz)
|
492
|
+
|
493
|
+
time_series = _calc_time_series(
|
494
|
+
session_times, frequency, closed, force_close, start, end, periods
|
495
|
+
)
|
496
|
+
time_series.name = None
|
497
|
+
|
498
|
+
return pd.DatetimeIndex(time_series, tz=tz, dtype=dtype)
|
499
|
+
|
500
|
+
|
501
|
+
# region ------------------ Date Range LTF Subroutines ------------------
|
502
|
+
|
503
|
+
|
504
|
+
def _make_session_list(
|
505
|
+
columns: set, sessions: Union[str, Iterable], merge_adjacent: bool
|
506
|
+
) -> Tuple[list, bool]:
|
507
|
+
"Create a list of (Session Start, Session End) Tuples"
|
508
|
+
session_times = []
|
509
|
+
missing_cols = set()
|
510
|
+
missing_sess = set()
|
511
|
+
sessions = {sessions} if isinstance(sessions, str) else set(sessions)
|
512
|
+
|
513
|
+
if len(extras := sessions.difference(set(SESSIONS.__args__))) > 0: # type: ignore
|
514
|
+
raise ValueError(f"Unknown Date_Range Market Session: {extras}")
|
515
|
+
|
516
|
+
if "ETH" in sessions: # Standardize ETH to 'pre' and 'post'
|
517
|
+
sessions = sessions - {"ETH"} | {"pre", "post"}
|
518
|
+
if "closed_masked" in sessions: # closed_masked == 'closed' for this step
|
519
|
+
sessions |= {"closed"}
|
520
|
+
if "pre" in columns: # Add wrap-around sessions
|
521
|
+
columns |= {"pre_wrap"}
|
522
|
+
if "market_open" in columns:
|
523
|
+
columns |= {"market_open_wrap"}
|
524
|
+
|
525
|
+
def _extend_statement(session, parts):
|
526
|
+
if session not in sessions:
|
527
|
+
return
|
528
|
+
if columns.issuperset(parts):
|
529
|
+
session_times.extend(parts)
|
530
|
+
else:
|
531
|
+
missing_sess.update({session})
|
532
|
+
missing_cols.update(set(parts) - columns)
|
533
|
+
|
534
|
+
# Append session_start, session_end for each desired session *in session order*
|
535
|
+
_extend_statement("pre", ("pre", "market_open"))
|
536
|
+
if {"break_start", "break_end"}.issubset(columns):
|
537
|
+
# If the schedule has breaks then sub-divide RTH into pre & post break sessions
|
538
|
+
if "RTH" in sessions:
|
539
|
+
sessions = sessions - {"RTH"} | {"pre_break", "post_break"}
|
540
|
+
_extend_statement("pre_break", ("market_open", "break_start"))
|
541
|
+
_extend_statement("break", ("break_start", "break_end"))
|
542
|
+
_extend_statement("post_break", ("break_end", "market_close"))
|
543
|
+
else:
|
544
|
+
_extend_statement("RTH", ("market_open", "market_close"))
|
545
|
+
_extend_statement("post", ("market_close", "post"))
|
546
|
+
|
547
|
+
# Closed can mean [close, open], [close, pre], [pre, post], or [post, open] Adjust accordingly
|
548
|
+
s_start = "post" if "post" in columns else "market_close"
|
549
|
+
s_end = "pre_wrap" if "pre" in columns else "market_open_wrap"
|
550
|
+
_extend_statement("closed", (s_start, s_end))
|
551
|
+
|
552
|
+
if len(missing_sess) > 0:
|
553
|
+
warnings.warn(
|
554
|
+
f"Requested Sessions: {missing_sess}, but schedule is missing columns: {missing_cols}."
|
555
|
+
"\nResulting DatetimeIndex will lack those sessions. ",
|
556
|
+
category=MissingSessionWarning,
|
557
|
+
)
|
558
|
+
|
559
|
+
if merge_adjacent:
|
560
|
+
drop_set = set()
|
561
|
+
for i in range(1, len(session_times) - 1, 2):
|
562
|
+
if session_times[i] == session_times[i + 1]:
|
563
|
+
drop_set |= {session_times[i]}
|
564
|
+
|
565
|
+
# Guaranteed to drop in pairs => no check needed before zipping
|
566
|
+
session_times = [t for t in session_times if t not in drop_set]
|
567
|
+
|
568
|
+
# Zip the flat list into a list of pairs
|
569
|
+
session_pairs = list(zip(*(iter(session_times),) * 2))
|
570
|
+
|
571
|
+
return session_pairs, "closed_masked" in sessions
|
572
|
+
|
573
|
+
|
574
|
+
def _standardize_times(
|
575
|
+
schedule, start, end, periods, tz
|
576
|
+
) -> Tuple[pd.Timestamp, pd.Timestamp, Union[int, None]]:
|
577
|
+
"Standardize start and end into a timestamp of the relevant timezone"
|
578
|
+
if all((start, end, periods)):
|
579
|
+
periods = None # Ignore Periods if all 3 params are given.
|
580
|
+
|
581
|
+
if start is not None:
|
582
|
+
if isinstance(start, (int, float)):
|
583
|
+
start *= 1_000_000_000
|
584
|
+
try:
|
585
|
+
start = pd.Timestamp(start)
|
586
|
+
if start.tz is None:
|
587
|
+
start = start.tz_localize(tz)
|
588
|
+
except ValueError as e:
|
589
|
+
raise ValueError(f"Invalid Time ({start = }) given to date_range()") from e
|
590
|
+
|
591
|
+
if start < schedule.index[0].tz_localize(tz):
|
592
|
+
warnings.warn(
|
593
|
+
f"Insufficient Schedule. Requested Start-Time: {start.normalize().tz_localize(None)}. "
|
594
|
+
f"Schedule starts at: {schedule.index[0].normalize().tz_localize(None)}",
|
595
|
+
category=InsufficientScheduleWarning,
|
596
|
+
)
|
597
|
+
|
598
|
+
if end is not None:
|
599
|
+
if isinstance(end, (int, float)):
|
600
|
+
end *= 1_000_000_000
|
601
|
+
try:
|
602
|
+
end = pd.Timestamp(end)
|
603
|
+
if end.tz is None and tz is not None:
|
604
|
+
end = end.tz_localize(tz)
|
605
|
+
except ValueError as e:
|
606
|
+
raise ValueError(f"Invalid Time ({end = }) given to date_range()") from e
|
607
|
+
|
608
|
+
if end > schedule.index[-1].tz_localize(tz) + pd.Timedelta("1D"):
|
609
|
+
# Checking against the day and not the specific session since so requesting a time
|
610
|
+
# after the last session's close but before the next day doesn't throw a warning.
|
611
|
+
requested_end = end.normalize().tz_localize(None) - pd.Timedelta("1D")
|
612
|
+
warnings.warn(
|
613
|
+
f"Insufficient Schedule. Requested End-Time: {requested_end}. "
|
614
|
+
f"Schedule ends at: {schedule.index[-1].normalize().tz_localize(None)}",
|
615
|
+
category=InsufficientScheduleWarning,
|
616
|
+
)
|
617
|
+
|
618
|
+
if start is not None and end is not None and start > end:
|
619
|
+
raise ValueError(
|
620
|
+
"Date_range() given a start-date that occurs after the given end-date. "
|
621
|
+
f"{start = }, {end = }"
|
622
|
+
)
|
623
|
+
|
624
|
+
return start, end, periods
|
625
|
+
|
626
|
+
|
627
|
+
def _reconfigure_schedule(schedule, session_list, mask_close) -> pd.DataFrame:
|
628
|
+
"Reconfigure a schedule into a sorted dataframe of [start, end] times for each session"
|
629
|
+
|
630
|
+
sessions = []
|
631
|
+
|
632
|
+
for start, end in session_list:
|
633
|
+
if not end.endswith("_wrap"):
|
634
|
+
# Simple Session where 'start' occurs before 'end'
|
635
|
+
sessions.append(
|
636
|
+
schedule[[start, end]]
|
637
|
+
.rename(columns={start: "start", end: "end"})
|
638
|
+
.set_index("start", drop=False)
|
639
|
+
)
|
640
|
+
continue
|
641
|
+
|
642
|
+
# 'closed' Session that wraps around midnight. Shift the 'end' col by 1 Day
|
643
|
+
end = end.rstrip("_wrap")
|
644
|
+
tmp = pd.DataFrame(
|
645
|
+
{
|
646
|
+
"start": schedule[start],
|
647
|
+
"end": schedule[end].shift(-1),
|
648
|
+
}
|
649
|
+
).set_index("start", drop=False)
|
650
|
+
|
651
|
+
# Shift(-1) leaves last index of 'end' as 'NaT'
|
652
|
+
# Set the [-1, 'end' ('end' === 1)] cell to Midnight of the 'start' time of that row.
|
653
|
+
tmp.iloc[-1, 1] = tmp.iloc[-1, 0].normalize() + pd.Timedelta("1D") # type: ignore
|
654
|
+
|
655
|
+
if mask_close:
|
656
|
+
# Do some additional work to split 'closed' sessions that span weekends/holidays
|
657
|
+
sessions_to_split = tmp["end"] - tmp["start"] > pd.Timedelta("1D")
|
658
|
+
|
659
|
+
split_strt = tmp[sessions_to_split]["start"]
|
660
|
+
split_end = tmp[sessions_to_split]["end"]
|
661
|
+
|
662
|
+
sessions.append(
|
663
|
+
pd.DataFrame( # From start of the long close to Midnight
|
664
|
+
{
|
665
|
+
"start": split_strt,
|
666
|
+
"end": split_strt.dt.normalize() + pd.Timedelta("1D"),
|
667
|
+
}
|
668
|
+
).set_index("start", drop=False)
|
669
|
+
)
|
670
|
+
sessions.append(
|
671
|
+
pd.DataFrame( # From Midnight to the end of the long close
|
672
|
+
{
|
673
|
+
"start": split_end.dt.normalize(),
|
674
|
+
"end": split_end,
|
675
|
+
}
|
676
|
+
).set_index("start", drop=False)
|
677
|
+
)
|
678
|
+
|
679
|
+
# leave tmp as all the sessions that were not split
|
680
|
+
tmp = tmp[~sessions_to_split]
|
681
|
+
|
682
|
+
sessions.append(tmp)
|
683
|
+
|
684
|
+
return pd.concat(sessions).sort_index()
|
685
|
+
|
686
|
+
|
687
|
+
def _error_check_sessions(session_times, timestep, closed, force_close):
|
688
|
+
if session_times.start.gt(session_times.end).any():
|
689
|
+
raise ValueError(
|
690
|
+
"Desired Sessions from the Schedule contain rows where session start < session end, "
|
691
|
+
"please correct the schedule"
|
692
|
+
)
|
693
|
+
|
694
|
+
# Disappearing Session
|
695
|
+
if force_close is False and closed == "right":
|
696
|
+
# only check if needed
|
697
|
+
if (session_times.end - session_times.start).lt(timestep).any():
|
698
|
+
warnings.warn(
|
699
|
+
"An interval of the chosen frequency is larger than some of the trading sessions, "
|
700
|
+
"while closed='right' and force_close=False. This will make those trading sessions "
|
701
|
+
"disappear. Use a higher frequency or change the values of closed/force_close, to "
|
702
|
+
"keep this from happening.",
|
703
|
+
category=DisappearingSessionWarning,
|
704
|
+
)
|
705
|
+
|
706
|
+
# Overlapping Session
|
707
|
+
if force_close is None and closed != "left":
|
708
|
+
num_bars = _num_bars_ltf(session_times, timestep, closed)
|
709
|
+
end_times = session_times.start + num_bars * timestep
|
710
|
+
|
711
|
+
if end_times.gt(session_times.start.shift(-1)).any():
|
712
|
+
warnings.warn(
|
713
|
+
"The desired frequency results in date_range() generating overlapping sessions. "
|
714
|
+
"This can happen when the timestep is larger than a session, or when "
|
715
|
+
"merge_session = False and a session is not evenly divisible by the timestep. "
|
716
|
+
"The overlapping timestep can be deleted with force_close = True or False",
|
717
|
+
category=OverlappingSessionWarning,
|
718
|
+
)
|
719
|
+
|
720
|
+
|
721
|
+
def _num_bars_ltf(session_times, timestep, closed) -> pd.Series:
|
722
|
+
"Calculate the number of timestamps needed for each trading session."
|
723
|
+
if closed in ("both", None):
|
724
|
+
return np.ceil((session_times.end - session_times.start) / timestep) + 1
|
725
|
+
else:
|
726
|
+
return np.ceil((session_times.end - session_times.start) / timestep)
|
727
|
+
|
728
|
+
|
729
|
+
def _course_trim_to_period_count(num_bars, periods, reverse) -> pd.Series:
|
730
|
+
"""
|
731
|
+
Course Trim the Session times to the desired period count.
|
732
|
+
Large enough of a sub-routine to merit its own function call.
|
733
|
+
"""
|
734
|
+
if reverse:
|
735
|
+
# If end-date is given calculate sum in reverse order
|
736
|
+
num_bars = num_bars[::-1]
|
737
|
+
|
738
|
+
_sum = num_bars.cumsum()
|
739
|
+
|
740
|
+
if _sum.iloc[-1] < periods:
|
741
|
+
# Insufficient Number of Periods. Try to estimate an ending time from the data given.
|
742
|
+
# delta = (end_date - start_date) / (cumulative # of periods) * (periods still needed) * fudge factor
|
743
|
+
delta = abs(
|
744
|
+
# (end_date - start_date) / (cumulative # of periods)
|
745
|
+
((_sum.index[-1] - _sum.index[0]) / _sum.iloc[-1])
|
746
|
+
* (periods - _sum.iloc[-1]) # (periods still needed)
|
747
|
+
* 1.05 # (Fudge Factor for weekends/holidays)
|
748
|
+
)
|
749
|
+
# delta = math.ceil(delta) + '1W'
|
750
|
+
delta = (delta // pd.Timedelta("1D") + 8) * pd.Timedelta("1D")
|
751
|
+
# The 1.05 Factor handles when the schedule is short by a few months, the + '1W' handles
|
752
|
+
# when the schedule is short by only a few periods. While 1 Week is absolute overkill,
|
753
|
+
# generating the extra few days is very little extra cost compared to throwing this error
|
754
|
+
# a second or even third time.
|
755
|
+
|
756
|
+
if reverse:
|
757
|
+
approx_start = _sum.index[-1] - delta
|
758
|
+
warnings.warn(
|
759
|
+
f"Insufficient Schedule. Requested Approx Start-Time: {approx_start}. "
|
760
|
+
f"Schedule starts at: {_sum.index[-1].normalize().tz_localize(None)}",
|
761
|
+
category=InsufficientScheduleWarning,
|
762
|
+
)
|
763
|
+
else:
|
764
|
+
approx_end = _sum.index[-1] + delta
|
765
|
+
warnings.warn(
|
766
|
+
f"Insufficient Schedule. Requested Approx End-Time: {approx_end}. "
|
767
|
+
f"Schedule ends at: {_sum.index[-1].normalize().tz_localize(None)}",
|
768
|
+
category=InsufficientScheduleWarning,
|
769
|
+
)
|
770
|
+
|
771
|
+
sessions_to_keep = _sum < periods
|
772
|
+
# Shifting Ensures the number of needed periods are generated, but no more.
|
773
|
+
sessions_to_keep = sessions_to_keep.shift(1, fill_value=True)
|
774
|
+
|
775
|
+
if reverse:
|
776
|
+
# If end-date is given calculate un-reverse the order of the series
|
777
|
+
sessions_to_keep = sessions_to_keep[::-1]
|
778
|
+
|
779
|
+
return sessions_to_keep
|
780
|
+
|
781
|
+
|
782
|
+
def _calc_time_series(
|
783
|
+
session_times, timestep, closed, force_close, start, end, periods
|
784
|
+
) -> pd.Series:
|
785
|
+
"Interpolate each session into a datetime series at the desired frequency."
|
786
|
+
# region ---- ---- ---- Trim the Sessions ---- ---- ----
|
787
|
+
# Compare 'start' to the session end times so that if 'start' is in the middle of a session
|
788
|
+
# that session remains in session_times. Vise-vera for End
|
789
|
+
if start is not None:
|
790
|
+
session_times = session_times[session_times.end > start]
|
791
|
+
if end is not None:
|
792
|
+
session_times = session_times[session_times.start < end]
|
793
|
+
if len(session_times) == 0:
|
794
|
+
return pd.Series([])
|
795
|
+
|
796
|
+
# Override the First Session's Start and Last Session's End times if needed
|
797
|
+
if start is not None and start > session_times.loc[session_times.index[0], "start"]:
|
798
|
+
# Align the start to a multiple of the timestep after the session's beginning.
|
799
|
+
# This is to make the returned DTIndex consistent across all start/end/period settings.
|
800
|
+
session_start = session_times.loc[session_times.index[0], "start"]
|
801
|
+
start_aligned = session_start + (
|
802
|
+
ceil((start - session_start) / timestep) * timestep
|
803
|
+
)
|
804
|
+
session_times.loc[session_times.index[0], "start"] = start_aligned
|
805
|
+
if end is not None and end < session_times.loc[session_times.index[-1], "end"]:
|
806
|
+
session_start = session_times.loc[session_times.index[0], "start"]
|
807
|
+
end_aligned = session_start + (
|
808
|
+
floor((end - session_start) / timestep) * timestep
|
809
|
+
)
|
810
|
+
session_times.loc[session_times.index[-1], "end"] = end_aligned
|
811
|
+
|
812
|
+
num_bars = _num_bars_ltf(session_times, timestep, closed)
|
813
|
+
|
814
|
+
if periods is not None:
|
815
|
+
sessions_to_keep = _course_trim_to_period_count(
|
816
|
+
num_bars, periods, end is not None
|
817
|
+
)
|
818
|
+
num_bars = num_bars[sessions_to_keep]
|
819
|
+
session_times = session_times[sessions_to_keep]
|
820
|
+
|
821
|
+
# endregion
|
822
|
+
|
823
|
+
starts = session_times.start.repeat(num_bars) # type: ignore
|
824
|
+
|
825
|
+
if closed == "right":
|
826
|
+
# Right side of addition is cumulative time since session start in multiples of timestep
|
827
|
+
time_series = starts + (starts.groupby(starts.index).cumcount() + 1) * timestep
|
828
|
+
else:
|
829
|
+
time_series = starts + (starts.groupby(starts.index).cumcount()) * timestep
|
830
|
+
|
831
|
+
if force_close is not None:
|
832
|
+
# Trim off all timestamps that stretched beyond their intended session
|
833
|
+
time_series = time_series[time_series.le(session_times.end.repeat(num_bars))]
|
834
|
+
|
835
|
+
if force_close:
|
836
|
+
time_series = pd.concat([time_series, session_times.end])
|
837
|
+
|
838
|
+
time_series = time_series.drop_duplicates().sort_values() # type: ignore
|
839
|
+
|
840
|
+
if periods is not None and len(time_series) > 0:
|
841
|
+
# Although likely redundant, Fine Trim to desired period count.
|
842
|
+
if end is not None:
|
843
|
+
s_len = len(time_series)
|
844
|
+
time_series = time_series[max(s_len - periods, 0) : s_len]
|
845
|
+
else:
|
846
|
+
time_series = time_series[0:periods]
|
847
|
+
|
848
|
+
return time_series
|
849
|
+
|
850
|
+
|
851
|
+
# endregion
|
852
|
+
|
853
|
+
|
854
|
+
PeriodCode = Literal["D", "W", "M", "Q", "Y"]
|
855
|
+
Day_Anchor = Literal["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
|
856
|
+
Month_Anchor = Literal[
|
857
|
+
"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
|
858
|
+
]
|
859
|
+
|
860
|
+
# These needed because the pandas Period Object is stupid and not consistant w/ date_range.
|
861
|
+
# pd.date_range(s,e, freq = 'W-SUN') == [DatetimeIndex of all sundays] (as Expected)
|
862
|
+
# but, pd.Timestamp([A Sunday]).to_period('W-SUN').start_time == [The Monday Prior???]
|
863
|
+
days_rolled = list(Day_Anchor.__args__)
|
864
|
+
days_rolled.insert(0, days_rolled.pop())
|
865
|
+
weekly_roll_map = dict(zip(Day_Anchor.__args__, days_rolled))
|
866
|
+
|
867
|
+
months_rolled = list(Month_Anchor.__args__)
|
868
|
+
months_rolled.insert(0, months_rolled.pop())
|
869
|
+
yearly_roll_map = dict(zip(Month_Anchor.__args__, months_rolled))
|
870
|
+
|
871
|
+
|
872
|
+
def date_range_htf(
|
873
|
+
cal: "CustomBusinessDay",
|
874
|
+
frequency: Union[str, pd.Timedelta, int, float],
|
875
|
+
start: Union[str, pd.Timestamp, int, float, None] = None,
|
876
|
+
end: Union[str, pd.Timestamp, int, float, None] = None,
|
877
|
+
periods: Union[int, None] = None,
|
878
|
+
closed: Union[Literal["left", "right"], None] = "right",
|
879
|
+
*,
|
880
|
+
day_anchor: Day_Anchor = "SUN",
|
881
|
+
month_anchor: Month_Anchor = "JAN",
|
882
|
+
) -> pd.DatetimeIndex:
|
883
|
+
"""
|
884
|
+
Returns a Normalized DatetimeIndex from the start-date to End-Date for Time periods of 1D and Higher.
|
885
|
+
|
886
|
+
Unless using a custom calendar, it is advised to call the date_range_htf() method of the desired calendar.
|
887
|
+
This is because default_anchors may change, or a single calendar may not be sufficient to model a market.
|
888
|
+
|
889
|
+
For example, NYSE has two calendars: The first covers pre-1952 where saturdays were trading days. The second
|
890
|
+
covers post-1952 where saturdays are closed.
|
891
|
+
|
892
|
+
PARAMETERS:
|
893
|
+
|
894
|
+
:param cal: CustomBuisnessDay Calendar associated with a MarketCalendar. This can be retieved by
|
895
|
+
calling the holidays() method of a MarketCalendar.
|
896
|
+
|
897
|
+
:param frequency: String, Int/float (POSIX seconds) or pd.Timedelta of the desired frequency.
|
898
|
+
:Must be Greater than '1D' and an integer multiple of the base frequency (D, W, M, Q, or Y)
|
899
|
+
:Important Note: Ints/Floats & Timedeltas are always considered as 'Open Business Days',
|
900
|
+
'2D' == Every Other Buisness Day, '3D' == Every 3rd B.Day, '7D' == Every 7th B.Day
|
901
|
+
:Higher periods (passed as strings) align to the beginning or end of the relevant period
|
902
|
+
:i.e. '1W' == First/[Last] Trading Day of each Week, '1Q' == First/[Last] Day of every Quarter
|
903
|
+
|
904
|
+
:param start: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
|
905
|
+
:The Time & Timezone information is ignored. Only the Normalized Day is considered.
|
906
|
+
|
907
|
+
:param end: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
|
908
|
+
:The Time & Timezone information is ignored. Only the Normalized Day is considered.
|
909
|
+
|
910
|
+
:param periods: Optional Integer number of periods to return. If a Period count, Start time,
|
911
|
+
and End time are given the period count is ignored.
|
912
|
+
|
913
|
+
:param closed: Literal['left', 'right']. Method used to close each range.
|
914
|
+
:Left: First open trading day of the Session is returned (e.g. First Open Day of The Month)
|
915
|
+
:right: Last open trading day of the Session is returned (e.g. Last Open Day of The Month)
|
916
|
+
:Note, This has no effect when the desired frequency is a number of days.
|
917
|
+
|
918
|
+
:param day_anchor: Day to Anchor the start of the Weekly timeframes to. Default 'SUN'.
|
919
|
+
: To get the First/Last Days of the trading Week then the Anchor needs to be on a day the relevant
|
920
|
+
market is closed.
|
921
|
+
: This can be set so that a specific day each week is returned.
|
922
|
+
: freq='1W' & day_anchor='WED' Will return Every 'WED' when the market is open, and nearest day
|
923
|
+
to the left or right (based on 'closed') when the market is closed.
|
924
|
+
Options: ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
|
925
|
+
|
926
|
+
:param month_anchor: Month to Anchor the start of the year to for Quarter and yearly timeframes.
|
927
|
+
: Default 'JAN' for Calendar Quarters/Years. Can be set to 'JUL' to return Fiscal Years
|
928
|
+
Options: ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
|
929
|
+
"""
|
930
|
+
|
931
|
+
start, end, periods = _error_check_htf_range(start, end, periods)
|
932
|
+
mult, _period_code = _standardize_htf_freq(frequency)
|
933
|
+
|
934
|
+
if _period_code == "D":
|
935
|
+
if mult == 1:
|
936
|
+
# When desiring a frequency of '1D' default to pd.date_range. It will give the same
|
937
|
+
# answer but it is more performant than the method in _cal_day_range.
|
938
|
+
return pd.date_range(start, end, periods, freq=cal)
|
939
|
+
else:
|
940
|
+
return _cal_day_range(cal, start, end, periods, mult)
|
941
|
+
|
942
|
+
elif _period_code == "W":
|
943
|
+
freq = str(mult) + "W-" + day_anchor.upper()
|
944
|
+
grouping_period = "W-" + weekly_roll_map[day_anchor.upper()]
|
945
|
+
|
946
|
+
return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
|
947
|
+
|
948
|
+
elif _period_code == "M":
|
949
|
+
freq = str(mult) + "M" + ("S" if closed == "left" else "E")
|
950
|
+
return _cal_WMQY_range(cal, start, end, periods, freq, "M", closed)
|
951
|
+
|
952
|
+
else: # Yearly & Quarterly Period
|
953
|
+
freq = str(mult) + _period_code
|
954
|
+
freq += (
|
955
|
+
"S-" + month_anchor.upper()
|
956
|
+
if closed == "left" # *Insert Angry Tom Meme Here*
|
957
|
+
else "E-" + yearly_roll_map[month_anchor.upper()]
|
958
|
+
)
|
959
|
+
grouping_period = _period_code + "-" + yearly_roll_map[month_anchor.upper()]
|
960
|
+
|
961
|
+
return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
|
962
|
+
|
963
|
+
|
964
|
+
# region ---- ---- ---- Date Range HTF Subroutines ---- ---- ----
|
965
|
+
|
966
|
+
|
967
|
+
def _error_check_htf_range(
|
968
|
+
start, end, periods: Union[int, None]
|
969
|
+
) -> Tuple[Union[pd.Timestamp, None], Union[pd.Timestamp, None], Union[int, None]]:
|
970
|
+
"Standardize and Error Check Start, End, and period params"
|
971
|
+
if periods is not None:
|
972
|
+
if not isinstance(periods, int):
|
973
|
+
raise ValueError(
|
974
|
+
f"Date_Range_HTF Must be either an int or None. Given {type(periods)}"
|
975
|
+
)
|
976
|
+
if periods < 0:
|
977
|
+
raise ValueError("Date_range_HTF Periods must be Positive.")
|
978
|
+
|
979
|
+
if isinstance(start, (int, float)):
|
980
|
+
start = int(start * 1_000_000_000)
|
981
|
+
if isinstance(end, (int, float)):
|
982
|
+
end = int(end * 1_000_000_000)
|
983
|
+
|
984
|
+
if start is not None:
|
985
|
+
start = pd.Timestamp(start).normalize().tz_localize(None)
|
986
|
+
if end is not None:
|
987
|
+
end = pd.Timestamp(end).normalize().tz_localize(None)
|
988
|
+
|
989
|
+
if all((start, end, periods)):
|
990
|
+
periods = None # Ignore Periods if passed too many params
|
991
|
+
if len([param for param in (start, end, periods) if param is not None]) < 2:
|
992
|
+
raise ValueError(
|
993
|
+
"Date_Range_HTF must be given two of the three following params: (start, end, periods)"
|
994
|
+
)
|
995
|
+
|
996
|
+
if start is not None and end is not None and end < start:
|
997
|
+
raise ValueError("Date_Range_HTF() Start-Date must be before the End-Date")
|
998
|
+
|
999
|
+
return start, end, periods
|
1000
|
+
|
1001
|
+
|
1002
|
+
def _standardize_htf_freq(
|
1003
|
+
frequency: Union[str, pd.Timedelta, int, float]
|
1004
|
+
) -> Tuple[int, PeriodCode]:
|
1005
|
+
"Standardize the frequency multiplier and Code, throwing errors as needed."
|
1006
|
+
if isinstance(frequency, str):
|
1007
|
+
if len(frequency) == 0:
|
1008
|
+
raise ValueError("Date_Range_HTF Frequency is an empty string.")
|
1009
|
+
if len(frequency) == 1:
|
1010
|
+
frequency = "1" + frequency # Turn 'D' into '1D' for all period codes
|
1011
|
+
if frequency[-1].upper() in {"W", "M", "Q", "Y"}:
|
1012
|
+
try:
|
1013
|
+
if (mult := int(frequency[0:-1])) <= 0:
|
1014
|
+
raise ValueError()
|
1015
|
+
return mult, frequency[-1].upper() # type: ignore
|
1016
|
+
except ValueError as e:
|
1017
|
+
raise ValueError(
|
1018
|
+
"Date_Range_HTF() Week, Month, Quarter and Year frequency must "
|
1019
|
+
"have a positive integer multiplier"
|
1020
|
+
) from e
|
1021
|
+
|
1022
|
+
# All remaining frequencies (int, float, strs, & Timedeltas) are parsed as business days.
|
1023
|
+
if isinstance(frequency, (int, float)): # Convert To Seconds
|
1024
|
+
frequency = int(frequency * 1_000_000_000)
|
1025
|
+
|
1026
|
+
frequency = pd.Timedelta(frequency)
|
1027
|
+
if frequency < pd.Timedelta("1D"):
|
1028
|
+
raise ValueError("Date_Range_HTF() Frequency must be '1D' or Higher.")
|
1029
|
+
if frequency % pd.Timedelta("1D") != pd.Timedelta(0):
|
1030
|
+
raise ValueError(
|
1031
|
+
"Date_Range_HTF() Week and Day frequency must be an integer multiple of Days"
|
1032
|
+
)
|
1033
|
+
|
1034
|
+
return frequency.days, "D"
|
1035
|
+
|
1036
|
+
|
1037
|
+
def _days_per_week(weekmask: Union[Iterable, str]) -> int:
|
1038
|
+
"Used to get a more accurate estimate of the number of days per week"
|
1039
|
+
# Return any 'Array Like' Representation
|
1040
|
+
if not isinstance(weekmask, str):
|
1041
|
+
return len([day for day in weekmask if bool(day)])
|
1042
|
+
|
1043
|
+
if len(weekmask) == 0:
|
1044
|
+
raise ValueError("Weekmask cannot be blank")
|
1045
|
+
|
1046
|
+
weekmask = weekmask.upper()
|
1047
|
+
day_abbrs = {day for day in weekly_roll_map.values() if day in weekmask}
|
1048
|
+
if len(day_abbrs) != 0:
|
1049
|
+
return len(day_abbrs)
|
1050
|
+
|
1051
|
+
# Weekmask Something like '0111110'
|
1052
|
+
return len([day for day in weekmask if bool(day)])
|
1053
|
+
|
1054
|
+
|
1055
|
+
def _cal_day_range(
|
1056
|
+
cb_day: "CustomBusinessDay", start, end, periods, mult
|
1057
|
+
) -> pd.DatetimeIndex:
|
1058
|
+
"""
|
1059
|
+
Returns a Normalized DateTimeIndex of Open Buisness Days.
|
1060
|
+
Exactly two of the (start, end, periods) arguments must be given.
|
1061
|
+
|
1062
|
+
** Arguments should be Type/Error Checked before calling this function **
|
1063
|
+
|
1064
|
+
:param cb_day: CustomBusinessDay Object from the respective calendar
|
1065
|
+
:param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1066
|
+
:param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1067
|
+
:param periods: Optional Number of periods to return
|
1068
|
+
:param mult: Integer Multiple of buisness days between data-points.
|
1069
|
+
e.g: 1 == Every Business Day, 2 == Every Other B.Day, 3 == Every Third B.Day, etc.
|
1070
|
+
:returns: DateRangeIndex[datetime64[ns]]
|
1071
|
+
"""
|
1072
|
+
|
1073
|
+
# Ensure Start and End are open Business days in the desired range
|
1074
|
+
if start is not None:
|
1075
|
+
start = cb_day.rollforward(start)
|
1076
|
+
if end is not None:
|
1077
|
+
end = cb_day.rollback(end)
|
1078
|
+
|
1079
|
+
# ---- Start-Date to End-Date ----
|
1080
|
+
if isinstance(start, pd.Timestamp) and isinstance(end, pd.Timestamp):
|
1081
|
+
num_days = (end - start) / mult
|
1082
|
+
# Get a better estimate of the number of open days since date_range calc is slow
|
1083
|
+
est_open_days = (
|
1084
|
+
(num_days // 7) * _days_per_week(cb_day.weekmask)
|
1085
|
+
) + num_days % pd.Timedelta("1W")
|
1086
|
+
|
1087
|
+
# Should always produce a small overestimate since Holidays aren't accounted for.
|
1088
|
+
est_open_days = ceil(est_open_days / pd.Timedelta("1D"))
|
1089
|
+
_range = pd.RangeIndex(0, est_open_days * mult, mult)
|
1090
|
+
|
1091
|
+
dt_index = pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
|
1092
|
+
return dt_index[dt_index <= end]
|
1093
|
+
|
1094
|
+
# ---- Periods from Start-Date ----
|
1095
|
+
elif isinstance(start, pd.Timestamp):
|
1096
|
+
_range = pd.RangeIndex(0, periods * mult, mult)
|
1097
|
+
return pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
|
1098
|
+
|
1099
|
+
# ---- Periods from End-Date ----
|
1100
|
+
else:
|
1101
|
+
# Ensure the end-date is the first valid Trading Day <= given end-date
|
1102
|
+
end = cb_day.rollback(end)
|
1103
|
+
_range = pd.RangeIndex(0, -1 * periods * mult, -1 * mult)
|
1104
|
+
|
1105
|
+
return pd.DatetimeIndex(end + _range * cb_day, dtype="datetime64[ns]")[::-1]
|
1106
|
+
|
1107
|
+
|
1108
|
+
def _cal_WMQY_range(
|
1109
|
+
cb_day: "CustomBusinessDay",
|
1110
|
+
start: Union[pd.Timestamp, None],
|
1111
|
+
end: Union[pd.Timestamp, None],
|
1112
|
+
periods: Union[int, None],
|
1113
|
+
freq: str,
|
1114
|
+
grouping_period: str,
|
1115
|
+
closed: Union[Literal["left", "right"], None] = "right",
|
1116
|
+
):
|
1117
|
+
"""
|
1118
|
+
Return A DateRangeIndex of the Weekdays that mark either the start or end of each
|
1119
|
+
buisness week based on the 'closed' parameter.
|
1120
|
+
|
1121
|
+
** Arguments should be Type/Error Checked before calling this function **
|
1122
|
+
|
1123
|
+
:param cb_day: CustomBusinessDay Object from the respective calendar
|
1124
|
+
:param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1125
|
+
:param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
|
1126
|
+
:param periods: Optional Number of periods to return
|
1127
|
+
:param freq: Formatted frequency of '1W' and Higher with desired multiple, S/E Chars,
|
1128
|
+
and Anchoring code.
|
1129
|
+
:param grouping_period: Period_Code with anchor that matches the given period Code.
|
1130
|
+
i.e. 'W-[DAY]', 'M', 'Q-[MONTH]', 'Y-[MONTH]'
|
1131
|
+
:param closed: Union['left', Any].
|
1132
|
+
'left': The normalized start-day of the relative period is returned
|
1133
|
+
Everything else: The normalized last-day of the relative period is returned
|
1134
|
+
:returns: DateRangeIndex[datetime64[ns]]
|
1135
|
+
"""
|
1136
|
+
|
1137
|
+
# Need to Adjust the Start/End Dates given to pandas since Rolling forward or backward can shift
|
1138
|
+
# the calculated date range out of the desired [start, end] range adding or ignoring desired values.
|
1139
|
+
|
1140
|
+
# For Example, say we want NYSE-Month-Starts between [2020-01-02, 2020-02-02]. W/O Adjusting dates
|
1141
|
+
# we call pd.date_range('2020-01-02, '2020-02-02', 'MS') => ['2020-02-01'] Rolled to ['2020-02-03'].
|
1142
|
+
# '02-03' date is then trimmed off returning an empty Index. despite '2020-01-02' being a valid Month Start
|
1143
|
+
# By Adjusting the Dates we call pd.date_range('2020-01-01, '2020-02-02') => ['2020-01-01, '2020-02-01']
|
1144
|
+
# That's then Rolled into [2020-01-02, 2020-02-03] & Trimmed to [2020-01-02] as desired.
|
1145
|
+
|
1146
|
+
_dr_start, _dr_end = None, None
|
1147
|
+
|
1148
|
+
if closed == "left":
|
1149
|
+
roll_func = cb_day.rollforward
|
1150
|
+
if start is not None:
|
1151
|
+
normalized_start = start.to_period(grouping_period).start_time
|
1152
|
+
_dr_start = (
|
1153
|
+
normalized_start if start <= roll_func(normalized_start) else start
|
1154
|
+
)
|
1155
|
+
|
1156
|
+
if end is not None:
|
1157
|
+
if periods is not None:
|
1158
|
+
normalized_end = end.to_period(grouping_period).start_time
|
1159
|
+
_dr_end = (
|
1160
|
+
normalized_end - pd.Timedelta("1D") # Shift into preceding group
|
1161
|
+
if end < roll_func(normalized_end)
|
1162
|
+
else cb_day.rollback(end)
|
1163
|
+
)
|
1164
|
+
else:
|
1165
|
+
_dr_end = cb_day.rollback(end)
|
1166
|
+
|
1167
|
+
else:
|
1168
|
+
roll_func = cb_day.rollback
|
1169
|
+
if start is not None:
|
1170
|
+
if periods is not None:
|
1171
|
+
normalized_start = start.to_period(grouping_period).end_time.normalize()
|
1172
|
+
_dr_start = (
|
1173
|
+
normalized_start + pd.Timedelta("1D") # Shift into trailing group
|
1174
|
+
if start > roll_func(normalized_start)
|
1175
|
+
else cb_day.rollforward(start)
|
1176
|
+
)
|
1177
|
+
else:
|
1178
|
+
_dr_start = cb_day.rollforward(start)
|
1179
|
+
|
1180
|
+
if end is not None:
|
1181
|
+
normalized_end = end.to_period(grouping_period).end_time.normalize()
|
1182
|
+
_dr_end = normalized_end if end >= roll_func(normalized_end) else end
|
1183
|
+
|
1184
|
+
_range = (
|
1185
|
+
pd.date_range(_dr_start, _dr_end, periods, freq).to_series().apply(roll_func)
|
1186
|
+
)
|
1187
|
+
|
1188
|
+
# Ensure that Rolled Timestamps are in the desired range When given both Start and End
|
1189
|
+
if start is not None and end is not None:
|
1190
|
+
if len(_range) > 0 and _range.iloc[0] < start:
|
1191
|
+
# Trims off the first 'WMQY End' that might have been Rolled before start
|
1192
|
+
_range = _range[1:]
|
1193
|
+
if len(_range) > 0 and _range.iloc[-1] > end:
|
1194
|
+
# Trims off the last 'WMQY Start' the might have been Rolled after end
|
1195
|
+
_range = _range[0:-1]
|
1196
|
+
|
1197
|
+
return pd.DatetimeIndex(_range, dtype="datetime64[ns]")
|
1198
|
+
|
1199
|
+
|
1200
|
+
# endregion
|