pandas-market-calendars 4.3.3__py3-none-any.whl → 4.6.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. pandas_market_calendars/__init__.py +39 -38
  2. pandas_market_calendars/calendar_registry.py +57 -53
  3. pandas_market_calendars/calendar_utils.py +1200 -261
  4. pandas_market_calendars/calendars/asx.py +66 -66
  5. pandas_market_calendars/calendars/bmf.py +223 -206
  6. pandas_market_calendars/calendars/bse.py +421 -407
  7. pandas_market_calendars/calendars/cboe.py +145 -145
  8. pandas_market_calendars/calendars/cme.py +405 -402
  9. pandas_market_calendars/calendars/cme_globex_agriculture.py +172 -126
  10. pandas_market_calendars/calendars/cme_globex_base.py +119 -119
  11. pandas_market_calendars/calendars/cme_globex_crypto.py +160 -160
  12. pandas_market_calendars/calendars/cme_globex_energy_and_metals.py +216 -216
  13. pandas_market_calendars/calendars/cme_globex_equities.py +123 -123
  14. pandas_market_calendars/calendars/cme_globex_fixed_income.py +136 -136
  15. pandas_market_calendars/calendars/cme_globex_fx.py +101 -101
  16. pandas_market_calendars/calendars/eurex.py +131 -139
  17. pandas_market_calendars/calendars/eurex_fixed_income.py +98 -98
  18. pandas_market_calendars/calendars/hkex.py +429 -426
  19. pandas_market_calendars/calendars/ice.py +81 -81
  20. pandas_market_calendars/calendars/iex.py +151 -112
  21. pandas_market_calendars/calendars/jpx.py +113 -109
  22. pandas_market_calendars/calendars/lse.py +114 -114
  23. pandas_market_calendars/calendars/mirror.py +149 -130
  24. pandas_market_calendars/calendars/nyse.py +1466 -1324
  25. pandas_market_calendars/calendars/ose.py +116 -116
  26. pandas_market_calendars/calendars/sifma.py +354 -350
  27. pandas_market_calendars/calendars/six.py +132 -132
  28. pandas_market_calendars/calendars/sse.py +311 -311
  29. pandas_market_calendars/calendars/tase.py +220 -197
  30. pandas_market_calendars/calendars/tsx.py +181 -181
  31. pandas_market_calendars/holidays/cme.py +385 -385
  32. pandas_market_calendars/holidays/cme_globex.py +214 -214
  33. pandas_market_calendars/holidays/cn.py +1476 -1455
  34. pandas_market_calendars/holidays/jp.py +401 -398
  35. pandas_market_calendars/holidays/jpx_equinox.py +1 -0
  36. pandas_market_calendars/holidays/nyse.py +1536 -1531
  37. pandas_market_calendars/holidays/oz.py +63 -63
  38. pandas_market_calendars/holidays/sifma.py +350 -338
  39. pandas_market_calendars/holidays/us.py +376 -376
  40. pandas_market_calendars/market_calendar.py +1057 -895
  41. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/METADATA +13 -9
  42. pandas_market_calendars-4.6.0.dist-info/RECORD +50 -0
  43. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/WHEEL +1 -1
  44. pandas_market_calendars-4.3.3.dist-info/RECORD +0 -50
  45. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/LICENSE +0 -0
  46. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/NOTICE +0 -0
  47. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/top_level.txt +0 -0
@@ -1,261 +1,1200 @@
1
- """
2
- Utilities to use with market_calendars
3
- """
4
- import itertools
5
- import warnings
6
-
7
- import numpy as np
8
- import pandas as pd
9
-
10
-
11
- def merge_schedules(schedules, how="outer"):
12
- """
13
- Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
14
- of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
15
-
16
- CAVEATS:
17
- * This does not work for schedules with breaks, the break information will be lost.
18
- * Only "market_open" and "market_close" are considered, other market times are not yet supported.
19
-
20
- :param schedules: list of schedules
21
- :param how: outer or inner
22
- :return: schedule DataFrame
23
- """
24
- all_cols = [x.columns for x in schedules]
25
- all_cols = list(itertools.chain(*all_cols))
26
- if ("break_start" in all_cols) or ("break_end" in all_cols):
27
- warnings.warn(
28
- "Merge schedules will drop the break_start and break_end from result."
29
- )
30
-
31
- result = schedules[0]
32
- for schedule in schedules[1:]:
33
- result = result.merge(schedule, how=how, right_index=True, left_index=True)
34
- if how == "outer":
35
- result["market_open"] = result.apply(
36
- lambda x: min(x.market_open_x, x.market_open_y), axis=1
37
- )
38
- result["market_close"] = result.apply(
39
- lambda x: max(x.market_close_x, x.market_close_y), axis=1
40
- )
41
- elif how == "inner":
42
- result["market_open"] = result.apply(
43
- lambda x: max(x.market_open_x, x.market_open_y), axis=1
44
- )
45
- result["market_close"] = result.apply(
46
- lambda x: min(x.market_close_x, x.market_close_y), axis=1
47
- )
48
- else:
49
- raise ValueError('how argument must be "inner" or "outer"')
50
- result = result[["market_open", "market_close"]]
51
- return result
52
-
53
-
54
- def convert_freq(index, frequency):
55
- """
56
- Converts a DateTimeIndex to a new lower frequency
57
-
58
- :param index: DateTimeIndex
59
- :param frequency: frequency string
60
- :return: DateTimeIndex
61
- """
62
- return pd.DataFrame(index=index).asfreq(frequency).index
63
-
64
-
65
- class _date_range:
66
- """
67
- This is a callable class that should be used by calling the already initiated instance: `date_range`.
68
- Given a schedule, it will return a DatetimeIndex with all of the valid datetimes at the frequency given.
69
-
70
- The schedule columns should all have the same time zone.
71
-
72
- The calculations will be made for each trading session. If the passed schedule-DataFrame doesn't have
73
- breaks, there is one trading session per day going from market_open to market_close, otherwise there are two,
74
- the first one going from market_open to break_start and the second one from break_end to market_close.
75
-
76
- *Any trading session where start == end is considered a 'no-trading session' and will always be dropped*
77
-
78
- CAVEATS:
79
- * Only "market_open", "market_close" (and, optionally, "breaak_start" and "break_end")
80
- are considered, other market times are not yet supported by this class.
81
-
82
- * If the difference between start and end of a trading session is smaller than an interval of the
83
- frequency, and closed= "right" and force_close = False, the whole session will disappear.
84
- This will also raise a warning.
85
-
86
-
87
- Signature:
88
- .__call__(self, schedule, frequency, closed='right', force_close=True, **kwargs)
89
-
90
- :param schedule: schedule of a calendar, which may or may not include break_start and break_end columns
91
- :param frequency: frequency string that is used by pd.Timedelta to calculate the timestamps
92
- this must be "1D" or higher frequency
93
- :param closed: the way the intervals are labeled
94
- 'right': use the end of the interval
95
- 'left': use the start of the interval
96
- None: (or 'both') use the end of the interval but include the start of the first interval (the open)
97
- :param force_close: how the last value of a trading session is handled
98
- True: guarantee that the close of the trading session is the last value
99
- False: guarantee that there is no value greater than the close of the trading session
100
- None: leave the last value as it is calculated based on the closed parameter
101
- :param kwargs: unused. Solely for compatibility.
102
-
103
-
104
- """
105
-
106
- def __init__(self, schedule=None, frequency=None, closed="right", force_close=True):
107
- if closed not in ("left", "right", "both", None):
108
- raise ValueError("closed must be 'left', 'right', 'both' or None.")
109
- elif force_close not in (True, False, None):
110
- raise ValueError("force_close must be True, False or None.")
111
-
112
- self.closed = closed
113
- self.force_close = force_close
114
- self.has_breaks = False
115
- if frequency is None:
116
- self.frequency = None
117
- else:
118
- self.frequency = pd.Timedelta(frequency)
119
- if self.frequency > pd.Timedelta("1D"):
120
- raise ValueError("Frequency must be 1D or higher frequency.")
121
-
122
- elif schedule.market_close.lt(schedule.market_open).any():
123
- raise ValueError(
124
- "Schedule contains rows where market_close < market_open,"
125
- " please correct the schedule"
126
- )
127
-
128
- if "break_start" in schedule:
129
- if not all(
130
- [
131
- schedule.market_open.le(schedule.break_start).all(),
132
- schedule.break_start.le(schedule.break_end).all(),
133
- schedule.break_end.le(schedule.market_close).all(),
134
- ]
135
- ):
136
- raise ValueError(
137
- "Not all rows match the condition: "
138
- "market_open <= break_start <= break_end <= market_close, "
139
- "please correct the schedule"
140
- )
141
- self.has_breaks = True
142
-
143
- def _check_overlap(self, schedule):
144
- """checks if calculated end times would overlap with the next start times.
145
- Only an issue when force_close is None and closed != left.
146
-
147
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
148
- :raises ValueError:"""
149
- if self.force_close is None and self.closed != "left":
150
- num_bars = self._calc_num_bars(schedule)
151
- end_times = schedule.start + num_bars * self.frequency
152
-
153
- if end_times.gt(schedule.start.shift(-1)).any():
154
- raise ValueError(
155
- "The chosen frequency will lead to overlaps in the calculated index. "
156
- "Either choose a higher frequency or avoid setting force_close to None "
157
- "when setting closed to 'right', 'both' or None."
158
- )
159
-
160
- def _check_disappearing_session(self, schedule):
161
- """checks if requested frequency and schedule would lead to lost trading sessions.
162
- Only necessary when force_close = False and closed = "right".
163
-
164
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
165
- :raises UserWarning:"""
166
- if self.force_close is False and self.closed == "right":
167
- if (schedule.end - schedule.start).lt(self.frequency).any():
168
- warnings.warn(
169
- "An interval of the chosen frequency is larger than some of the trading sessions, "
170
- "while closed== 'right' and force_close is False. This will make those trading sessions "
171
- "disappear. Use a higher frequency or change the values of closed/force_close, to "
172
- "keep this from happening."
173
- )
174
-
175
- def _calc_num_bars(self, schedule):
176
- """calculate the number of timestamps needed for each trading session.
177
-
178
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
179
- :return: pd.Series of float64"""
180
- return np.ceil((schedule.end - schedule.start) / self.frequency)
181
-
182
- def _calc_time_series(self, schedule):
183
- """Method used by date_range to calculate the trading index.
184
-
185
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
186
- :return: pd.Series of datetime64[ns, UTC]"""
187
- num_bars = self._calc_num_bars(schedule)
188
-
189
- # ---> calculate the desired timeseries:
190
- if self.closed == "left":
191
- opens = schedule.start.repeat(num_bars) # keep as is
192
- time_series = (
193
- opens.groupby(opens.index).cumcount()
194
- ) * self.frequency + opens
195
- elif self.closed == "right":
196
- opens = schedule.start.repeat(num_bars) # dont add row but shift up
197
- time_series = (
198
- opens.groupby(opens.index).cumcount() + 1
199
- ) * self.frequency + opens
200
- else:
201
- num_bars += 1
202
- opens = schedule.start.repeat(num_bars) # add row but dont shift up
203
- time_series = (
204
- opens.groupby(opens.index).cumcount()
205
- ) * self.frequency + opens
206
-
207
- if self.force_close is not None:
208
- time_series = time_series[time_series.le(schedule.end.repeat(num_bars))]
209
- if self.force_close:
210
- time_series = pd.concat([time_series, schedule.end]).sort_values()
211
-
212
- return time_series
213
-
214
- def __call__(self, schedule, frequency, closed="right", force_close=True, **kwargs):
215
- """
216
- See class docstring for more information.
217
-
218
- :param schedule: schedule of a calendar, which may or may not include break_start and break_end columns
219
- :param frequency: frequency string that is used by pd.Timedelta to calculate the timestamps
220
- this must be "1D" or higher frequency
221
- :param closed: the way the intervals are labeled
222
- 'right': use the end of the interval
223
- 'left': use the start of the interval
224
- None: (or 'both') use the end of the interval but include the start of the first interval
225
- :param force_close: how the last value of a trading session is handled
226
- True: guarantee that the close of the trading session is the last value
227
- False: guarantee that there is no value greater than the close of the trading session
228
- None: leave the last value as it is calculated based on the closed parameter
229
- :param kwargs: unused. Solely for compatibility.
230
- :return: pd.DatetimeIndex of datetime64[ns, UTC]
231
- """
232
- self.__init__(schedule, frequency, closed, force_close)
233
- if self.has_breaks:
234
- # rearrange the schedule, to make every row one session
235
- before = schedule[["market_open", "break_start"]].set_index(
236
- schedule["market_open"]
237
- )
238
- after = schedule[["break_end", "market_close"]].set_index(
239
- schedule["break_end"]
240
- )
241
- before.columns = after.columns = ["start", "end"]
242
- schedule = pd.concat([before, after]).sort_index()
243
-
244
- else:
245
- schedule = schedule.rename(
246
- columns={"market_open": "start", "market_close": "end"}
247
- )
248
-
249
- schedule = schedule[
250
- schedule.start.ne(schedule.end)
251
- ] # drop the 'no-trading sessions'
252
- self._check_overlap(schedule)
253
- self._check_disappearing_session(schedule)
254
-
255
- time_series = self._calc_time_series(schedule)
256
-
257
- time_series.name = None
258
- return pd.DatetimeIndex(time_series.drop_duplicates())
259
-
260
-
261
- date_range = _date_range()
1
+ """
2
+ Utilities to use with market_calendars
3
+ """
4
+
5
+ import itertools
6
+ from math import ceil, floor
7
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, Literal, Tuple, Union
8
+ import warnings
9
+
10
+ from re import finditer, split
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ if TYPE_CHECKING:
15
+ from pandas.tseries.offsets import CustomBusinessDay
16
+ from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
17
+
18
+ DEFAULT_LABEL_MAP = {
19
+ "pre": "pre",
20
+ "rth_pre_break": "rth",
21
+ "rth": "rth",
22
+ "break": "break",
23
+ "rth_post_break": "rth",
24
+ "post": "post",
25
+ "closed": "closed",
26
+ }
27
+
28
+
29
+ def mark_session(
30
+ schedule: pd.DataFrame,
31
+ timestamps: pd.DatetimeIndex,
32
+ label_map: Dict[str, Any] = {},
33
+ *,
34
+ closed: Literal["left", "right"] = "right",
35
+ ) -> pd.Series:
36
+ """
37
+ Return a Series that denotes the trading session of each timestamp in a DatetimeIndex.
38
+ The returned Series's Index is the provided Datetime Index, the Series's values
39
+ are the timestamps' corresponding session.
40
+
41
+ PARAMETERS:
42
+
43
+ :param schedule: The market schedule to check the timestamps against. This Schedule must include
44
+ all of the trading days that are in the provided DatetimeIndex of timestamps.
45
+ Note: The columns need to be sorted into ascending order, if not, then an error will be
46
+ raised saying the bins must be in ascending order.
47
+
48
+ :param timestamps: A DatetimeIndex of Timestamps to check. Must be sorted in ascending order.
49
+
50
+ :param label_map: Optional mapping of Dict[str, Any] to change the values returned in the
51
+ series. The keys of the given mapping should match the keys of the default dict, but the
52
+ values can be anything. A subset of mappings may also be provided, e.g. {'closed':-1} will
53
+ only change the label of the 'closed' session. All others will remain the default label.
54
+
55
+ >>> Default Mapping == {
56
+ "pre": "pre",
57
+ "rth_pre_break": "rth", # When the Schedule has a break
58
+ "rth": "rth", # When the Schedule doesn't have a break
59
+ "break": "break", # When the Schedule has a break
60
+ "rth_post_break": "rth", # When the Schedule has a break
61
+ "post": "post",
62
+ "closed": "closed",
63
+ }
64
+
65
+ :param closed: Which side of each interval should be closed (inclusive)
66
+ left: == [start, end)
67
+ right: == (start, end]
68
+ """
69
+ # ---- ---- ---- Determine which columns need to be dropped ---- ---- ----
70
+ session_labels = ["closed"]
71
+ columns = set(schedule.columns)
72
+ needed_cols = set()
73
+
74
+ def _extend_statement(session: str, parts: set):
75
+ if parts.issubset(columns):
76
+ needed_cols.update(parts)
77
+ session_labels.append(session)
78
+
79
+ _extend_statement("pre", {"pre", "market_open"})
80
+ if {"break_start", "break_end"}.issubset(columns):
81
+ _extend_statement("rth_pre_break", {"market_open", "break_start"})
82
+ _extend_statement("break", {"break_start", "break_end"})
83
+ _extend_statement("rth_post_break", {"break_end", "market_close"})
84
+ else:
85
+ _extend_statement("rth", {"market_open", "market_close"})
86
+ _extend_statement("post", {"market_close", "post"})
87
+
88
+ # ---- ---- ---- Error Check ---- ---- ----
89
+ if len(extra_cols := columns - needed_cols) > 0:
90
+ schedule = schedule.drop(columns=[*extra_cols])
91
+ warnings.warn(
92
+ f"Attempting to mark trading sessions and the schedule ({columns = }) contains the "
93
+ f"extra columns: {extra_cols}. Returned sessions may not be labeled as desired."
94
+ )
95
+
96
+ start = timestamps[0]
97
+ end = timestamps[-1]
98
+ if start < schedule.iloc[0, 0]: # type: ignore
99
+ raise ValueError(
100
+ f"Insufficient Schedule. Needed Start-Time: {start.normalize().tz_localize(None)}. "
101
+ f"Schedule starts at: {schedule.iloc[0, 0]}"
102
+ )
103
+ if end > schedule.iloc[-1, -1]: # type: ignore
104
+ raise ValueError(
105
+ f"Insufficient Schedule. Needed End-Time: {end.normalize().tz_localize(None)}. "
106
+ f"Schedule ends at: {schedule.iloc[-1, -1]}"
107
+ )
108
+
109
+ # Trim the schedule to match the timeframe covered by the given timeseries
110
+ schedule = schedule[
111
+ (schedule.index >= start.normalize().tz_localize(None))
112
+ & (schedule.index <= end.normalize().tz_localize(None))
113
+ ]
114
+
115
+ backfilled_map = DEFAULT_LABEL_MAP | label_map
116
+ mapped_labels = [backfilled_map[label] for label in session_labels]
117
+ labels = pd.Series([mapped_labels]).repeat(len(schedule)).explode()
118
+ labels = pd.concat([labels, pd.Series([backfilled_map["closed"]])])
119
+
120
+ # Append on additional Edge-Case Bins so result doesn't include NaNs
121
+ bins = schedule.to_numpy().flatten()
122
+ bins = np.insert(bins, 0, bins[0].normalize())
123
+ bins = np.append(bins, bins[-1].normalize() + pd.Timedelta("1D"))
124
+
125
+ bins, _ind, _counts = np.unique(bins, return_index=True, return_counts=True)
126
+
127
+ if len(bins) - 1 != len(labels):
128
+ # np.Unique Dropped some bins, need to drop the associated labels
129
+ label_inds = (_ind + _counts - 1)[:-1]
130
+ labels = labels.iloc[label_inds]
131
+
132
+ return pd.Series(
133
+ pd.cut(timestamps, bins, closed != "left", labels=labels, ordered=False), # type: ignore
134
+ index=timestamps,
135
+ )
136
+
137
+
138
+ def merge_schedules(schedules, how="outer"):
139
+ """
140
+ Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
141
+ of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
142
+
143
+ CAVEATS:
144
+ * This does not work for schedules with breaks, the break information will be lost.
145
+ * Only "market_open" and "market_close" are considered, other market times are not yet supported.
146
+
147
+ :param schedules: list of schedules
148
+ :param how: outer or inner
149
+ :return: schedule DataFrame
150
+ """
151
+ all_cols = [x.columns for x in schedules]
152
+ all_cols = list(itertools.chain(*all_cols))
153
+ if ("break_start" in all_cols) or ("break_end" in all_cols):
154
+ warnings.warn(
155
+ "Merge schedules will drop the break_start and break_end from result."
156
+ )
157
+
158
+ result = schedules[0]
159
+ for schedule in schedules[1:]:
160
+ result = result.merge(schedule, how=how, right_index=True, left_index=True)
161
+ if how == "outer":
162
+ result["market_open"] = result.apply(
163
+ lambda x: min(x.market_open_x, x.market_open_y), axis=1
164
+ )
165
+ result["market_close"] = result.apply(
166
+ lambda x: max(x.market_close_x, x.market_close_y), axis=1
167
+ )
168
+ elif how == "inner":
169
+ result["market_open"] = result.apply(
170
+ lambda x: max(x.market_open_x, x.market_open_y), axis=1
171
+ )
172
+ result["market_close"] = result.apply(
173
+ lambda x: min(x.market_close_x, x.market_close_y), axis=1
174
+ )
175
+ else:
176
+ raise ValueError('how argument must be "inner" or "outer"')
177
+ result = result[["market_open", "market_close"]]
178
+ return result
179
+
180
+
181
+ def is_single_observance(holiday: "Holiday"):
182
+ "Returns the Date of the Holiday if it is only observed once, None otherwise."
183
+ return holiday.start_date if holiday.start_date == holiday.end_date else None # type: ignore ??
184
+
185
+
186
+ def all_single_observance_rules(calendar: "AbstractHolidayCalendar"):
187
+ "Returns a list of timestamps if the Calendar's Rules are all single observance holidays, None Otherwise"
188
+ observances = [is_single_observance(rule) for rule in calendar.rules]
189
+ return observances if all(observances) else None
190
+
191
+
192
+ def convert_freq(index, frequency):
193
+ """
194
+ Converts a DateTimeIndex to a new lower frequency
195
+
196
+ :param index: DateTimeIndex
197
+ :param frequency: frequency string
198
+ :return: DateTimeIndex
199
+ """
200
+ return pd.DataFrame(index=index).asfreq(frequency).index
201
+
202
+
203
+ SESSIONS = Literal[
204
+ "pre",
205
+ "post",
206
+ "RTH",
207
+ "pre_break",
208
+ "post_break",
209
+ "ETH",
210
+ "break",
211
+ "closed",
212
+ "closed_masked",
213
+ ]
214
+ MKT_TIMES = Literal[
215
+ "pre", "post", "market_open", "market_close", "break_start", "break_end"
216
+ ]
217
+
218
+
219
+ # region ---- ---- ---- Date Range Warning Types ---- ---- ----
220
+ class DateRangeWarning(UserWarning):
221
+ "Super Class to all Date_range Warning Types"
222
+
223
+
224
+ class OverlappingSessionWarning(DateRangeWarning):
225
+ """
226
+ Warning thrown when date_range is called with a timedelta that is larger than the
227
+ gap between two sessions leading to them overlapping.
228
+ This is only an issue when closed='right'/'both'/None and force_close=None
229
+
230
+ For Example, the following raises a warning because the 10:00 Timestamp that is from the 'pre'
231
+ session comes after the start of the 9:30 'RTH' session, but belongs to the 'pre' session
232
+ >>> date_range(NYSE, '2h', 'right', None, {'pre', 'RTH'}, merge_adjacent = False)
233
+ >>> ['2020-01-02 06:00:00', '2020-01-02 08:00:00',
234
+ '2020-01-02 10:00:00', '2020-01-02 11:30:00',
235
+ '2020-01-02 13:30:00', '2020-01-02 15:30:00',
236
+ '2020-01-02 17:30:00'],
237
+ This is particularly convoluted when close='both'/None
238
+ >>> date_range(NYSE, '2h', 'both', None, {'pre', 'RTH'}, merge_adjacent = False)
239
+ >>> ['2020-01-02 04:00:00' (pre), '2020-01-02 06:00:00' (pre),
240
+ '2020-01-02 08:00:00' (pre), '2020-01-02 09:30:00' (rth),
241
+ '2020-01-02 10:00:00' (pre), '2020-01-02 11:30:00' (rth),
242
+ '2020-01-02 13:30:00' (rth), '2020-01-02 15:30:00' (rth),
243
+ '2020-01-02 17:30:00' (rth)],
244
+ """
245
+
246
+
247
+ class DisappearingSessionWarning(DateRangeWarning):
248
+ """
249
+ Warning thrown when date_range is called with a timedelta that is larger than an entire session
250
+ resulting in the session disappearing from the DatetimeIndex.
251
+
252
+ Only an issue when closed='right' and force_close = False
253
+ """
254
+
255
+
256
+ class MissingSessionWarning(DateRangeWarning):
257
+ """
258
+ Warning thrown when a date_range() call is made with a requested session,
259
+ but lacks the necessary columns. When this warning is ignored the returned
260
+ datetimeindex will simply lack the relevant sessions
261
+
262
+ e.g. 'pre' Session requested and schedule lacks 'pre' and/or 'market_open' column
263
+ """
264
+
265
+
266
+ class InsufficientScheduleWarning(DateRangeWarning):
267
+ """
268
+ Warning thrown when a date_range() call is made with a requested number of periods,
269
+ or start-date / end-date that exceed what was provided in the given schedule.
270
+
271
+ If a Schedule has an insufficient start and end date then this warning is thrown twice.
272
+
273
+ If this warning is thrown when date_range is called with a number of desired periods, then
274
+ the desired start/end date is an approximate value. This 'approximation' is biased to
275
+ overestimate the needed start/end time by about 1 week. This is done to limit the edge
276
+ cases where this warning could get thrown multiple times in a row.
277
+ """
278
+
279
+
280
+ def filter_date_range_warnings(
281
+ action: Literal["error", "ignore", "always", "default", "once"],
282
+ source: Union[
283
+ Iterable[type[DateRangeWarning]], type[DateRangeWarning]
284
+ ] = DateRangeWarning,
285
+ ):
286
+ """
287
+ Adjust the behavior of the date_range() warnings to the desired action.
288
+
289
+ :param action: - The desired change to the warning behavior
290
+ 'error': Escalate Warnings into Errors
291
+ 'ignore': Silence Warning Messages
292
+ 'once': Only display a message of the given category once
293
+ 'default': Reset the behavior of the given warning category
294
+ 'always': Always show the Warning of a given category
295
+
296
+ :param source: - The Category/Categories to apply the action to. Can be a single Warning or a list of warnings
297
+ default: DateRangeWarning (All Warnings)
298
+ Warning Types: MissingSessionWarning, OverlappingSessionWarning,
299
+ DisappearingSessionWarning, InsufficientScheduleWarning
300
+ """
301
+ if not isinstance(source, Iterable):
302
+ warnings.filterwarnings(action, category=source)
303
+ return
304
+
305
+ for src in source:
306
+ warnings.filterwarnings(action, category=src)
307
+
308
+
309
+ def parse_missing_session_warning(
310
+ err: MissingSessionWarning,
311
+ ) -> Tuple[set[SESSIONS], set[MKT_TIMES]]:
312
+ """
313
+ Parses a Missing Session Warning's Error Message.
314
+ :returns Tuple[set[str], set[str]]:
315
+ Set #1: The Missing Sessions
316
+ Set #2: The Missing Schedule Columns
317
+ """
318
+ splits = split(r"[{|}]", err.args[0].replace("'", ""))
319
+ return (set(splits[1].split(", ")), set(splits[3].split(", "))) # type: ignore
320
+
321
+
322
+ def parse_insufficient_schedule_warning(
323
+ err: InsufficientScheduleWarning,
324
+ ) -> Tuple[bool, pd.Timestamp, pd.Timestamp]:
325
+ """
326
+ Parses the information from an Insufficient Schedule Warning.
327
+ :returns Tuple[bool, pd.Timestamp, pd.Timestamp]:
328
+ bool: True == Range is missing from the start, False == Range missing from the end
329
+ Timestamp 1: Start of missing range
330
+ Timestamp 2: End of the missing range.
331
+ Note: The Timestamps are always ordered (t1 <= t2) and do not overlap with the original schedule.
332
+ If a supplemental schedule is generated it can be concatenated on without any overlapping indices.
333
+ data
334
+ """
335
+ matcher = finditer(r"\d{4}-\d{2}-\d{2}", err.args[0])
336
+ b = "Start-Time" in err.args[0]
337
+ t1 = pd.Timestamp(next(matcher).group())
338
+ t2 = pd.Timestamp(next(matcher).group())
339
+
340
+ if b:
341
+ t2 -= pd.Timedelta("1D")
342
+ else:
343
+ t2 += pd.Timedelta("1D")
344
+
345
+ return (b, t1, t2) if t1 <= t2 else (b, t2, t1)
346
+
347
+
348
+ # endregion
349
+
350
+
351
+ def date_range(
352
+ schedule: pd.DataFrame,
353
+ frequency: Union[str, pd.Timedelta, int, float],
354
+ closed: Union[Literal["left", "right", "both"], None] = "right",
355
+ force_close: Union[bool, None] = True,
356
+ session: Union[SESSIONS, Iterable[SESSIONS]] = {"RTH"},
357
+ merge_adjacent: bool = True,
358
+ start: Union[str, pd.Timestamp, int, float, None] = None,
359
+ end: Union[str, pd.Timestamp, int, float, None] = None,
360
+ periods: Union[int, None] = None,
361
+ ) -> pd.DatetimeIndex:
362
+ """
363
+ Interpolates a Market's Schedule at the desired frequency and returns the result as a DatetimeIndex.
364
+ This function is only valid for periods less than 1 Day, for longer periods use date_range_htf().
365
+
366
+ Note: The slowest part of this function is by far generating the necessary schedule (which in
367
+ turn is limited by pandas' date_range() function). If speed is a concern, store and update the
368
+ schedule as needed instead of generating it every time.
369
+
370
+ WARNINGS SYSTEM:
371
+ *There are multiple edge-case warnings that are thrown by this function. See the Docstrings
372
+ of each warning for more info. (DateRangeWarning, InsufficientScheduleWarning,
373
+ MissingSessionWarning, OverlappingSessionWarning, DisappearingSessionWarning)
374
+
375
+ *The thrown warnings can be ignored or escalated into catchable errors by using the
376
+ filter_date_range_warnings() function.
377
+
378
+ parse_missing_session_warning() & parse_insufficient_schedule_warning() exist to easily
379
+ process the warnings those warnings if they are escalated into errors.
380
+
381
+ PARAMETERS:
382
+
383
+ :param schedule: Schedule of a calendar which includes all the columns necessary
384
+ for the desired sessions.
385
+
386
+ :param frequency: String, Int/float (seconds) or pd.Timedelta that represents the desired
387
+ interval of the date_range. Intervals larger than 1D are not supported.
388
+
389
+ :param closed: the way the intervals are labeled
390
+ 'right': use the end of the interval
391
+ 'left': use the start of the interval
392
+ None / 'both': use the end of the interval but include the start of the first interval
393
+
394
+ :param force_close: How the last value of a trading session is handled
395
+ True: guarantee that the close of the trading session is the last value
396
+ False: guarantee that there is no value greater than the close of the trading session
397
+ None: leave the last value as it is calculated based on the closed parameter
398
+
399
+ :param session: A str representing a single session or an Iterable of the following Sessions.
400
+ RTH: The Default Option. This is [Market_open, Market_close], if the schedule includes a
401
+ break then the break is excluded from the returned datetime index.
402
+ ETH: [pre, market_open] & [market_close, post]
403
+ pre: [pre, market_open]
404
+ post: [market_close, post]
405
+ break: [break_start, break_end]
406
+ pre_break: [market_open, break_start]
407
+ post_break: [break_end, market_close]
408
+ closed: [market_close, market_open (of the next day)] If ETH market times are given then
409
+ this will be [post, pre (of the next day)] instead. The last session will end at
410
+ Midnight of the timezone the schedule is given in.
411
+ closed_masked: Same as closed, but Weekends & Holidays are ignored. Instead, the Datetime
412
+ index stops at Midnight on the trading day before the break and resumes at midnight
413
+ prior to the next trading day. **Note: This is Midnight of the Timezone the schedule is
414
+ given in, not Midnight of the exchange's tz since the exchange's tz is not known.
415
+
416
+ :param merge_adjacent: Bool representing if adjacent sessions should be merged into a single session.
417
+ For Example, NYSE w/ session={'RTH', 'ETH'}, frequency=2h, closed=left, force_close=False
418
+ merge_adjacent == True => [pre, post]
419
+ >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
420
+ '2020-01-02 08:00:00', '2020-01-02 10:00:00',
421
+ '2020-01-02 12:00:00', '2020-01-02 14:00:00',
422
+ '2020-01-02 16:00:00', '2020-01-02 18:00:00']
423
+ merge_adjacent == False => [pre, market_open] & [market_open, market_close] & [market_close, post]
424
+ >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
425
+ '2020-01-02 08:00:00', '2020-01-02 09:30:00',
426
+ '2020-01-02 11:30:00', '2020-01-02 13:30:00',
427
+ '2020-01-02 15:30:00', '2020-01-02 16:00:00',
428
+ '2020-01-02 18:00:00']
429
+ merge_adjacent=False re-aligns the timestamps to the session, but this results in
430
+ the difference between timestamps not always equaling the desired frequency.
431
+
432
+ :param start: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired start time.
433
+ :If left as None then the start-time of the the Schedule is used.
434
+ :If no TZ info is given it will be interpreted in the same timezone as the first column
435
+ of the schedule
436
+ :Start can be a Day and Time, but the returned index will still be aligned to the underlying
437
+ schedule. e.g. Session = [9:30am, 12pm], frequency=7min, start=9:45am. Underlying session
438
+ = [9:30, 9:37, 9:44, 9:51, ...] => returned DatetimeIndex = [9:51, ...]
439
+
440
+ :param end: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired end time.
441
+ :If left as None then the end-time of the the Schedule is used.
442
+ :If no TZ info is given it will be interpreted in the same timezone as the first column
443
+ **Note: The time given is an absolute value. i.e. end="2020-01-01" == "2020-01-01 00:00"
444
+ returning times prior to Midnight of "2019-12-31", not to the EOD of "2020-01-01"
445
+
446
+ :param periods: Optional Integer number of periods to return. If a Period count, Start time,
447
+ and End time are given the period count is ignored.
448
+ None: Period count is ignored. Returned index is all periods in [Start, End]
449
+ Int: # of periods to return. By default, this is the first N periods following the start.
450
+ If an end time is given then this is the N periods prior to the End Time (inclusive).
451
+ CAVEAT: When Force_close == False & closed == 'right'/'both' the number of periods returned
452
+ may be less than the parameter given.
453
+
454
+ :return: pd.DatetimeIndex of datetime64[ns, TZ-Aware]
455
+ """
456
+ # ---- ---- Error Check Inputs ---- ----
457
+ if closed not in ("left", "right", "both", None):
458
+ raise ValueError("closed must be 'left', 'right', 'both' or None.")
459
+ if force_close not in (True, False, None):
460
+ raise ValueError("force_close must be True, False or None.")
461
+ if merge_adjacent not in (True, False):
462
+ raise ValueError("merge_adjacent must be True or False")
463
+
464
+ # ---- ---- Standardize Frequency Param ---- ----
465
+ if isinstance(frequency, (int, float)):
466
+ frequency = int(frequency * 1_000_000_000)
467
+ try:
468
+ frequency = pd.Timedelta(frequency)
469
+ except ValueError as e:
470
+ raise ValueError(f"Market Calendar Date_range Timeframe Error: {e}") from e
471
+ if frequency <= pd.Timedelta("0s"):
472
+ raise ValueError("Market Calendar Date_Range Frequency must be Positive.")
473
+ if frequency > pd.Timedelta("1D"):
474
+ raise ValueError(
475
+ "Market Calendar Date_Range Frequency Cannot Be longer than '1D'."
476
+ )
477
+
478
+ session_list, mask = _make_session_list(
479
+ set(schedule.columns), session, merge_adjacent
480
+ )
481
+ if len(session_list) == 0:
482
+ return pd.DatetimeIndex([], dtype="datetime64[ns, UTC]")
483
+
484
+ session_times = _reconfigure_schedule(schedule, session_list, mask)
485
+ # Trim off all 0 length sessions
486
+ session_times = session_times[session_times.start.ne(session_times.end)]
487
+ _error_check_sessions(session_times, frequency, closed, force_close)
488
+
489
+ tz = schedule[session_list[0][0]].dt.tz # copy tz info from schedule
490
+ dtype = schedule[session_list[0][0]].dtype # copy dtype info from schedule
491
+ start, end, periods = _standardize_times(schedule, start, end, periods, tz)
492
+
493
+ time_series = _calc_time_series(
494
+ session_times, frequency, closed, force_close, start, end, periods
495
+ )
496
+ time_series.name = None
497
+
498
+ return pd.DatetimeIndex(time_series, tz=tz, dtype=dtype)
499
+
500
+
501
+ # region ------------------ Date Range LTF Subroutines ------------------
502
+
503
+
504
+ def _make_session_list(
505
+ columns: set, sessions: Union[str, Iterable], merge_adjacent: bool
506
+ ) -> Tuple[list, bool]:
507
+ "Create a list of (Session Start, Session End) Tuples"
508
+ session_times = []
509
+ missing_cols = set()
510
+ missing_sess = set()
511
+ sessions = {sessions} if isinstance(sessions, str) else set(sessions)
512
+
513
+ if len(extras := sessions.difference(set(SESSIONS.__args__))) > 0: # type: ignore
514
+ raise ValueError(f"Unknown Date_Range Market Session: {extras}")
515
+
516
+ if "ETH" in sessions: # Standardize ETH to 'pre' and 'post'
517
+ sessions = sessions - {"ETH"} | {"pre", "post"}
518
+ if "closed_masked" in sessions: # closed_masked == 'closed' for this step
519
+ sessions |= {"closed"}
520
+ if "pre" in columns: # Add wrap-around sessions
521
+ columns |= {"pre_wrap"}
522
+ if "market_open" in columns:
523
+ columns |= {"market_open_wrap"}
524
+
525
+ def _extend_statement(session, parts):
526
+ if session not in sessions:
527
+ return
528
+ if columns.issuperset(parts):
529
+ session_times.extend(parts)
530
+ else:
531
+ missing_sess.update({session})
532
+ missing_cols.update(set(parts) - columns)
533
+
534
+ # Append session_start, session_end for each desired session *in session order*
535
+ _extend_statement("pre", ("pre", "market_open"))
536
+ if {"break_start", "break_end"}.issubset(columns):
537
+ # If the schedule has breaks then sub-divide RTH into pre & post break sessions
538
+ if "RTH" in sessions:
539
+ sessions = sessions - {"RTH"} | {"pre_break", "post_break"}
540
+ _extend_statement("pre_break", ("market_open", "break_start"))
541
+ _extend_statement("break", ("break_start", "break_end"))
542
+ _extend_statement("post_break", ("break_end", "market_close"))
543
+ else:
544
+ _extend_statement("RTH", ("market_open", "market_close"))
545
+ _extend_statement("post", ("market_close", "post"))
546
+
547
+ # Closed can mean [close, open], [close, pre], [pre, post], or [post, open] Adjust accordingly
548
+ s_start = "post" if "post" in columns else "market_close"
549
+ s_end = "pre_wrap" if "pre" in columns else "market_open_wrap"
550
+ _extend_statement("closed", (s_start, s_end))
551
+
552
+ if len(missing_sess) > 0:
553
+ warnings.warn(
554
+ f"Requested Sessions: {missing_sess}, but schedule is missing columns: {missing_cols}."
555
+ "\nResulting DatetimeIndex will lack those sessions. ",
556
+ category=MissingSessionWarning,
557
+ )
558
+
559
+ if merge_adjacent:
560
+ drop_set = set()
561
+ for i in range(1, len(session_times) - 1, 2):
562
+ if session_times[i] == session_times[i + 1]:
563
+ drop_set |= {session_times[i]}
564
+
565
+ # Guaranteed to drop in pairs => no check needed before zipping
566
+ session_times = [t for t in session_times if t not in drop_set]
567
+
568
+ # Zip the flat list into a list of pairs
569
+ session_pairs = list(zip(*(iter(session_times),) * 2))
570
+
571
+ return session_pairs, "closed_masked" in sessions
572
+
573
+
574
+ def _standardize_times(
575
+ schedule, start, end, periods, tz
576
+ ) -> Tuple[pd.Timestamp, pd.Timestamp, Union[int, None]]:
577
+ "Standardize start and end into a timestamp of the relevant timezone"
578
+ if all((start, end, periods)):
579
+ periods = None # Ignore Periods if all 3 params are given.
580
+
581
+ if start is not None:
582
+ if isinstance(start, (int, float)):
583
+ start *= 1_000_000_000
584
+ try:
585
+ start = pd.Timestamp(start)
586
+ if start.tz is None:
587
+ start = start.tz_localize(tz)
588
+ except ValueError as e:
589
+ raise ValueError(f"Invalid Time ({start = }) given to date_range()") from e
590
+
591
+ if start < schedule.index[0].tz_localize(tz):
592
+ warnings.warn(
593
+ f"Insufficient Schedule. Requested Start-Time: {start.normalize().tz_localize(None)}. "
594
+ f"Schedule starts at: {schedule.index[0].normalize().tz_localize(None)}",
595
+ category=InsufficientScheduleWarning,
596
+ )
597
+
598
+ if end is not None:
599
+ if isinstance(end, (int, float)):
600
+ end *= 1_000_000_000
601
+ try:
602
+ end = pd.Timestamp(end)
603
+ if end.tz is None and tz is not None:
604
+ end = end.tz_localize(tz)
605
+ except ValueError as e:
606
+ raise ValueError(f"Invalid Time ({end = }) given to date_range()") from e
607
+
608
+ if end > schedule.index[-1].tz_localize(tz) + pd.Timedelta("1D"):
609
+ # Checking against the day and not the specific session since so requesting a time
610
+ # after the last session's close but before the next day doesn't throw a warning.
611
+ requested_end = end.normalize().tz_localize(None) - pd.Timedelta("1D")
612
+ warnings.warn(
613
+ f"Insufficient Schedule. Requested End-Time: {requested_end}. "
614
+ f"Schedule ends at: {schedule.index[-1].normalize().tz_localize(None)}",
615
+ category=InsufficientScheduleWarning,
616
+ )
617
+
618
+ if start is not None and end is not None and start > end:
619
+ raise ValueError(
620
+ "Date_range() given a start-date that occurs after the given end-date. "
621
+ f"{start = }, {end = }"
622
+ )
623
+
624
+ return start, end, periods
625
+
626
+
627
+ def _reconfigure_schedule(schedule, session_list, mask_close) -> pd.DataFrame:
628
+ "Reconfigure a schedule into a sorted dataframe of [start, end] times for each session"
629
+
630
+ sessions = []
631
+
632
+ for start, end in session_list:
633
+ if not end.endswith("_wrap"):
634
+ # Simple Session where 'start' occurs before 'end'
635
+ sessions.append(
636
+ schedule[[start, end]]
637
+ .rename(columns={start: "start", end: "end"})
638
+ .set_index("start", drop=False)
639
+ )
640
+ continue
641
+
642
+ # 'closed' Session that wraps around midnight. Shift the 'end' col by 1 Day
643
+ end = end.rstrip("_wrap")
644
+ tmp = pd.DataFrame(
645
+ {
646
+ "start": schedule[start],
647
+ "end": schedule[end].shift(-1),
648
+ }
649
+ ).set_index("start", drop=False)
650
+
651
+ # Shift(-1) leaves last index of 'end' as 'NaT'
652
+ # Set the [-1, 'end' ('end' === 1)] cell to Midnight of the 'start' time of that row.
653
+ tmp.iloc[-1, 1] = tmp.iloc[-1, 0].normalize() + pd.Timedelta("1D") # type: ignore
654
+
655
+ if mask_close:
656
+ # Do some additional work to split 'closed' sessions that span weekends/holidays
657
+ sessions_to_split = tmp["end"] - tmp["start"] > pd.Timedelta("1D")
658
+
659
+ split_strt = tmp[sessions_to_split]["start"]
660
+ split_end = tmp[sessions_to_split]["end"]
661
+
662
+ sessions.append(
663
+ pd.DataFrame( # From start of the long close to Midnight
664
+ {
665
+ "start": split_strt,
666
+ "end": split_strt.dt.normalize() + pd.Timedelta("1D"),
667
+ }
668
+ ).set_index("start", drop=False)
669
+ )
670
+ sessions.append(
671
+ pd.DataFrame( # From Midnight to the end of the long close
672
+ {
673
+ "start": split_end.dt.normalize(),
674
+ "end": split_end,
675
+ }
676
+ ).set_index("start", drop=False)
677
+ )
678
+
679
+ # leave tmp as all the sessions that were not split
680
+ tmp = tmp[~sessions_to_split]
681
+
682
+ sessions.append(tmp)
683
+
684
+ return pd.concat(sessions).sort_index()
685
+
686
+
687
+ def _error_check_sessions(session_times, timestep, closed, force_close):
688
+ if session_times.start.gt(session_times.end).any():
689
+ raise ValueError(
690
+ "Desired Sessions from the Schedule contain rows where session start < session end, "
691
+ "please correct the schedule"
692
+ )
693
+
694
+ # Disappearing Session
695
+ if force_close is False and closed == "right":
696
+ # only check if needed
697
+ if (session_times.end - session_times.start).lt(timestep).any():
698
+ warnings.warn(
699
+ "An interval of the chosen frequency is larger than some of the trading sessions, "
700
+ "while closed='right' and force_close=False. This will make those trading sessions "
701
+ "disappear. Use a higher frequency or change the values of closed/force_close, to "
702
+ "keep this from happening.",
703
+ category=DisappearingSessionWarning,
704
+ )
705
+
706
+ # Overlapping Session
707
+ if force_close is None and closed != "left":
708
+ num_bars = _num_bars_ltf(session_times, timestep, closed)
709
+ end_times = session_times.start + num_bars * timestep
710
+
711
+ if end_times.gt(session_times.start.shift(-1)).any():
712
+ warnings.warn(
713
+ "The desired frequency results in date_range() generating overlapping sessions. "
714
+ "This can happen when the timestep is larger than a session, or when "
715
+ "merge_session = False and a session is not evenly divisible by the timestep. "
716
+ "The overlapping timestep can be deleted with force_close = True or False",
717
+ category=OverlappingSessionWarning,
718
+ )
719
+
720
+
721
+ def _num_bars_ltf(session_times, timestep, closed) -> pd.Series:
722
+ "Calculate the number of timestamps needed for each trading session."
723
+ if closed in ("both", None):
724
+ return np.ceil((session_times.end - session_times.start) / timestep) + 1
725
+ else:
726
+ return np.ceil((session_times.end - session_times.start) / timestep)
727
+
728
+
729
+ def _course_trim_to_period_count(num_bars, periods, reverse) -> pd.Series:
730
+ """
731
+ Course Trim the Session times to the desired period count.
732
+ Large enough of a sub-routine to merit its own function call.
733
+ """
734
+ if reverse:
735
+ # If end-date is given calculate sum in reverse order
736
+ num_bars = num_bars[::-1]
737
+
738
+ _sum = num_bars.cumsum()
739
+
740
+ if _sum.iloc[-1] < periods:
741
+ # Insufficient Number of Periods. Try to estimate an ending time from the data given.
742
+ # delta = (end_date - start_date) / (cumulative # of periods) * (periods still needed) * fudge factor
743
+ delta = abs(
744
+ # (end_date - start_date) / (cumulative # of periods)
745
+ ((_sum.index[-1] - _sum.index[0]) / _sum.iloc[-1])
746
+ * (periods - _sum.iloc[-1]) # (periods still needed)
747
+ * 1.05 # (Fudge Factor for weekends/holidays)
748
+ )
749
+ # delta = math.ceil(delta) + '1W'
750
+ delta = (delta // pd.Timedelta("1D") + 8) * pd.Timedelta("1D")
751
+ # The 1.05 Factor handles when the schedule is short by a few months, the + '1W' handles
752
+ # when the schedule is short by only a few periods. While 1 Week is absolute overkill,
753
+ # generating the extra few days is very little extra cost compared to throwing this error
754
+ # a second or even third time.
755
+
756
+ if reverse:
757
+ approx_start = _sum.index[-1] - delta
758
+ warnings.warn(
759
+ f"Insufficient Schedule. Requested Approx Start-Time: {approx_start}. "
760
+ f"Schedule starts at: {_sum.index[-1].normalize().tz_localize(None)}",
761
+ category=InsufficientScheduleWarning,
762
+ )
763
+ else:
764
+ approx_end = _sum.index[-1] + delta
765
+ warnings.warn(
766
+ f"Insufficient Schedule. Requested Approx End-Time: {approx_end}. "
767
+ f"Schedule ends at: {_sum.index[-1].normalize().tz_localize(None)}",
768
+ category=InsufficientScheduleWarning,
769
+ )
770
+
771
+ sessions_to_keep = _sum < periods
772
+ # Shifting Ensures the number of needed periods are generated, but no more.
773
+ sessions_to_keep = sessions_to_keep.shift(1, fill_value=True)
774
+
775
+ if reverse:
776
+ # If end-date is given calculate un-reverse the order of the series
777
+ sessions_to_keep = sessions_to_keep[::-1]
778
+
779
+ return sessions_to_keep
780
+
781
+
782
+ def _calc_time_series(
783
+ session_times, timestep, closed, force_close, start, end, periods
784
+ ) -> pd.Series:
785
+ "Interpolate each session into a datetime series at the desired frequency."
786
+ # region ---- ---- ---- Trim the Sessions ---- ---- ----
787
+ # Compare 'start' to the session end times so that if 'start' is in the middle of a session
788
+ # that session remains in session_times. Vise-vera for End
789
+ if start is not None:
790
+ session_times = session_times[session_times.end > start]
791
+ if end is not None:
792
+ session_times = session_times[session_times.start < end]
793
+ if len(session_times) == 0:
794
+ return pd.Series([])
795
+
796
+ # Override the First Session's Start and Last Session's End times if needed
797
+ if start is not None and start > session_times.loc[session_times.index[0], "start"]:
798
+ # Align the start to a multiple of the timestep after the session's beginning.
799
+ # This is to make the returned DTIndex consistent across all start/end/period settings.
800
+ session_start = session_times.loc[session_times.index[0], "start"]
801
+ start_aligned = session_start + (
802
+ ceil((start - session_start) / timestep) * timestep
803
+ )
804
+ session_times.loc[session_times.index[0], "start"] = start_aligned
805
+ if end is not None and end < session_times.loc[session_times.index[-1], "end"]:
806
+ session_start = session_times.loc[session_times.index[0], "start"]
807
+ end_aligned = session_start + (
808
+ floor((end - session_start) / timestep) * timestep
809
+ )
810
+ session_times.loc[session_times.index[-1], "end"] = end_aligned
811
+
812
+ num_bars = _num_bars_ltf(session_times, timestep, closed)
813
+
814
+ if periods is not None:
815
+ sessions_to_keep = _course_trim_to_period_count(
816
+ num_bars, periods, end is not None
817
+ )
818
+ num_bars = num_bars[sessions_to_keep]
819
+ session_times = session_times[sessions_to_keep]
820
+
821
+ # endregion
822
+
823
+ starts = session_times.start.repeat(num_bars) # type: ignore
824
+
825
+ if closed == "right":
826
+ # Right side of addition is cumulative time since session start in multiples of timestep
827
+ time_series = starts + (starts.groupby(starts.index).cumcount() + 1) * timestep
828
+ else:
829
+ time_series = starts + (starts.groupby(starts.index).cumcount()) * timestep
830
+
831
+ if force_close is not None:
832
+ # Trim off all timestamps that stretched beyond their intended session
833
+ time_series = time_series[time_series.le(session_times.end.repeat(num_bars))]
834
+
835
+ if force_close:
836
+ time_series = pd.concat([time_series, session_times.end])
837
+
838
+ time_series = time_series.drop_duplicates().sort_values() # type: ignore
839
+
840
+ if periods is not None and len(time_series) > 0:
841
+ # Although likely redundant, Fine Trim to desired period count.
842
+ if end is not None:
843
+ s_len = len(time_series)
844
+ time_series = time_series[max(s_len - periods, 0) : s_len]
845
+ else:
846
+ time_series = time_series[0:periods]
847
+
848
+ return time_series
849
+
850
+
851
+ # endregion
852
+
853
+
854
+ PeriodCode = Literal["D", "W", "M", "Q", "Y"]
855
+ Day_Anchor = Literal["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
856
+ Month_Anchor = Literal[
857
+ "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
858
+ ]
859
+
860
+ # These needed because the pandas Period Object is stupid and not consistant w/ date_range.
861
+ # pd.date_range(s,e, freq = 'W-SUN') == [DatetimeIndex of all sundays] (as Expected)
862
+ # but, pd.Timestamp([A Sunday]).to_period('W-SUN').start_time == [The Monday Prior???]
863
+ days_rolled = list(Day_Anchor.__args__)
864
+ days_rolled.insert(0, days_rolled.pop())
865
+ weekly_roll_map = dict(zip(Day_Anchor.__args__, days_rolled))
866
+
867
+ months_rolled = list(Month_Anchor.__args__)
868
+ months_rolled.insert(0, months_rolled.pop())
869
+ yearly_roll_map = dict(zip(Month_Anchor.__args__, months_rolled))
870
+
871
+
872
+ def date_range_htf(
873
+ cal: "CustomBusinessDay",
874
+ frequency: Union[str, pd.Timedelta, int, float],
875
+ start: Union[str, pd.Timestamp, int, float, None] = None,
876
+ end: Union[str, pd.Timestamp, int, float, None] = None,
877
+ periods: Union[int, None] = None,
878
+ closed: Union[Literal["left", "right"], None] = "right",
879
+ *,
880
+ day_anchor: Day_Anchor = "SUN",
881
+ month_anchor: Month_Anchor = "JAN",
882
+ ) -> pd.DatetimeIndex:
883
+ """
884
+ Returns a Normalized DatetimeIndex from the start-date to End-Date for Time periods of 1D and Higher.
885
+
886
+ Unless using a custom calendar, it is advised to call the date_range_htf() method of the desired calendar.
887
+ This is because default_anchors may change, or a single calendar may not be sufficient to model a market.
888
+
889
+ For example, NYSE has two calendars: The first covers pre-1952 where saturdays were trading days. The second
890
+ covers post-1952 where saturdays are closed.
891
+
892
+ PARAMETERS:
893
+
894
+ :param cal: CustomBuisnessDay Calendar associated with a MarketCalendar. This can be retieved by
895
+ calling the holidays() method of a MarketCalendar.
896
+
897
+ :param frequency: String, Int/float (POSIX seconds) or pd.Timedelta of the desired frequency.
898
+ :Must be Greater than '1D' and an integer multiple of the base frequency (D, W, M, Q, or Y)
899
+ :Important Note: Ints/Floats & Timedeltas are always considered as 'Open Business Days',
900
+ '2D' == Every Other Buisness Day, '3D' == Every 3rd B.Day, '7D' == Every 7th B.Day
901
+ :Higher periods (passed as strings) align to the beginning or end of the relevant period
902
+ :i.e. '1W' == First/[Last] Trading Day of each Week, '1Q' == First/[Last] Day of every Quarter
903
+
904
+ :param start: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
905
+ :The Time & Timezone information is ignored. Only the Normalized Day is considered.
906
+
907
+ :param end: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
908
+ :The Time & Timezone information is ignored. Only the Normalized Day is considered.
909
+
910
+ :param periods: Optional Integer number of periods to return. If a Period count, Start time,
911
+ and End time are given the period count is ignored.
912
+
913
+ :param closed: Literal['left', 'right']. Method used to close each range.
914
+ :Left: First open trading day of the Session is returned (e.g. First Open Day of The Month)
915
+ :right: Last open trading day of the Session is returned (e.g. Last Open Day of The Month)
916
+ :Note, This has no effect when the desired frequency is a number of days.
917
+
918
+ :param day_anchor: Day to Anchor the start of the Weekly timeframes to. Default 'SUN'.
919
+ : To get the First/Last Days of the trading Week then the Anchor needs to be on a day the relevant
920
+ market is closed.
921
+ : This can be set so that a specific day each week is returned.
922
+ : freq='1W' & day_anchor='WED' Will return Every 'WED' when the market is open, and nearest day
923
+ to the left or right (based on 'closed') when the market is closed.
924
+ Options: ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
925
+
926
+ :param month_anchor: Month to Anchor the start of the year to for Quarter and yearly timeframes.
927
+ : Default 'JAN' for Calendar Quarters/Years. Can be set to 'JUL' to return Fiscal Years
928
+ Options: ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
929
+ """
930
+
931
+ start, end, periods = _error_check_htf_range(start, end, periods)
932
+ mult, _period_code = _standardize_htf_freq(frequency)
933
+
934
+ if _period_code == "D":
935
+ if mult == 1:
936
+ # When desiring a frequency of '1D' default to pd.date_range. It will give the same
937
+ # answer but it is more performant than the method in _cal_day_range.
938
+ return pd.date_range(start, end, periods, freq=cal)
939
+ else:
940
+ return _cal_day_range(cal, start, end, periods, mult)
941
+
942
+ elif _period_code == "W":
943
+ freq = str(mult) + "W-" + day_anchor.upper()
944
+ grouping_period = "W-" + weekly_roll_map[day_anchor.upper()]
945
+
946
+ return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
947
+
948
+ elif _period_code == "M":
949
+ freq = str(mult) + "M" + ("S" if closed == "left" else "E")
950
+ return _cal_WMQY_range(cal, start, end, periods, freq, "M", closed)
951
+
952
+ else: # Yearly & Quarterly Period
953
+ freq = str(mult) + _period_code
954
+ freq += (
955
+ "S-" + month_anchor.upper()
956
+ if closed == "left" # *Insert Angry Tom Meme Here*
957
+ else "E-" + yearly_roll_map[month_anchor.upper()]
958
+ )
959
+ grouping_period = _period_code + "-" + yearly_roll_map[month_anchor.upper()]
960
+
961
+ return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
962
+
963
+
964
+ # region ---- ---- ---- Date Range HTF Subroutines ---- ---- ----
965
+
966
+
967
+ def _error_check_htf_range(
968
+ start, end, periods: Union[int, None]
969
+ ) -> Tuple[Union[pd.Timestamp, None], Union[pd.Timestamp, None], Union[int, None]]:
970
+ "Standardize and Error Check Start, End, and period params"
971
+ if periods is not None:
972
+ if not isinstance(periods, int):
973
+ raise ValueError(
974
+ f"Date_Range_HTF Must be either an int or None. Given {type(periods)}"
975
+ )
976
+ if periods < 0:
977
+ raise ValueError("Date_range_HTF Periods must be Positive.")
978
+
979
+ if isinstance(start, (int, float)):
980
+ start = int(start * 1_000_000_000)
981
+ if isinstance(end, (int, float)):
982
+ end = int(end * 1_000_000_000)
983
+
984
+ if start is not None:
985
+ start = pd.Timestamp(start).normalize().tz_localize(None)
986
+ if end is not None:
987
+ end = pd.Timestamp(end).normalize().tz_localize(None)
988
+
989
+ if all((start, end, periods)):
990
+ periods = None # Ignore Periods if passed too many params
991
+ if len([param for param in (start, end, periods) if param is not None]) < 2:
992
+ raise ValueError(
993
+ "Date_Range_HTF must be given two of the three following params: (start, end, periods)"
994
+ )
995
+
996
+ if start is not None and end is not None and end < start:
997
+ raise ValueError("Date_Range_HTF() Start-Date must be before the End-Date")
998
+
999
+ return start, end, periods
1000
+
1001
+
1002
+ def _standardize_htf_freq(
1003
+ frequency: Union[str, pd.Timedelta, int, float]
1004
+ ) -> Tuple[int, PeriodCode]:
1005
+ "Standardize the frequency multiplier and Code, throwing errors as needed."
1006
+ if isinstance(frequency, str):
1007
+ if len(frequency) == 0:
1008
+ raise ValueError("Date_Range_HTF Frequency is an empty string.")
1009
+ if len(frequency) == 1:
1010
+ frequency = "1" + frequency # Turn 'D' into '1D' for all period codes
1011
+ if frequency[-1].upper() in {"W", "M", "Q", "Y"}:
1012
+ try:
1013
+ if (mult := int(frequency[0:-1])) <= 0:
1014
+ raise ValueError()
1015
+ return mult, frequency[-1].upper() # type: ignore
1016
+ except ValueError as e:
1017
+ raise ValueError(
1018
+ "Date_Range_HTF() Week, Month, Quarter and Year frequency must "
1019
+ "have a positive integer multiplier"
1020
+ ) from e
1021
+
1022
+ # All remaining frequencies (int, float, strs, & Timedeltas) are parsed as business days.
1023
+ if isinstance(frequency, (int, float)): # Convert To Seconds
1024
+ frequency = int(frequency * 1_000_000_000)
1025
+
1026
+ frequency = pd.Timedelta(frequency)
1027
+ if frequency < pd.Timedelta("1D"):
1028
+ raise ValueError("Date_Range_HTF() Frequency must be '1D' or Higher.")
1029
+ if frequency % pd.Timedelta("1D") != pd.Timedelta(0):
1030
+ raise ValueError(
1031
+ "Date_Range_HTF() Week and Day frequency must be an integer multiple of Days"
1032
+ )
1033
+
1034
+ return frequency.days, "D"
1035
+
1036
+
1037
+ def _days_per_week(weekmask: Union[Iterable, str]) -> int:
1038
+ "Used to get a more accurate estimate of the number of days per week"
1039
+ # Return any 'Array Like' Representation
1040
+ if not isinstance(weekmask, str):
1041
+ return len([day for day in weekmask if bool(day)])
1042
+
1043
+ if len(weekmask) == 0:
1044
+ raise ValueError("Weekmask cannot be blank")
1045
+
1046
+ weekmask = weekmask.upper()
1047
+ day_abbrs = {day for day in weekly_roll_map.values() if day in weekmask}
1048
+ if len(day_abbrs) != 0:
1049
+ return len(day_abbrs)
1050
+
1051
+ # Weekmask Something like '0111110'
1052
+ return len([day for day in weekmask if bool(day)])
1053
+
1054
+
1055
+ def _cal_day_range(
1056
+ cb_day: "CustomBusinessDay", start, end, periods, mult
1057
+ ) -> pd.DatetimeIndex:
1058
+ """
1059
+ Returns a Normalized DateTimeIndex of Open Buisness Days.
1060
+ Exactly two of the (start, end, periods) arguments must be given.
1061
+
1062
+ ** Arguments should be Type/Error Checked before calling this function **
1063
+
1064
+ :param cb_day: CustomBusinessDay Object from the respective calendar
1065
+ :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1066
+ :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1067
+ :param periods: Optional Number of periods to return
1068
+ :param mult: Integer Multiple of buisness days between data-points.
1069
+ e.g: 1 == Every Business Day, 2 == Every Other B.Day, 3 == Every Third B.Day, etc.
1070
+ :returns: DateRangeIndex[datetime64[ns]]
1071
+ """
1072
+
1073
+ # Ensure Start and End are open Business days in the desired range
1074
+ if start is not None:
1075
+ start = cb_day.rollforward(start)
1076
+ if end is not None:
1077
+ end = cb_day.rollback(end)
1078
+
1079
+ # ---- Start-Date to End-Date ----
1080
+ if isinstance(start, pd.Timestamp) and isinstance(end, pd.Timestamp):
1081
+ num_days = (end - start) / mult
1082
+ # Get a better estimate of the number of open days since date_range calc is slow
1083
+ est_open_days = (
1084
+ (num_days // 7) * _days_per_week(cb_day.weekmask)
1085
+ ) + num_days % pd.Timedelta("1W")
1086
+
1087
+ # Should always produce a small overestimate since Holidays aren't accounted for.
1088
+ est_open_days = ceil(est_open_days / pd.Timedelta("1D"))
1089
+ _range = pd.RangeIndex(0, est_open_days * mult, mult)
1090
+
1091
+ dt_index = pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1092
+ return dt_index[dt_index <= end]
1093
+
1094
+ # ---- Periods from Start-Date ----
1095
+ elif isinstance(start, pd.Timestamp):
1096
+ _range = pd.RangeIndex(0, periods * mult, mult)
1097
+ return pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1098
+
1099
+ # ---- Periods from End-Date ----
1100
+ else:
1101
+ # Ensure the end-date is the first valid Trading Day <= given end-date
1102
+ end = cb_day.rollback(end)
1103
+ _range = pd.RangeIndex(0, -1 * periods * mult, -1 * mult)
1104
+
1105
+ return pd.DatetimeIndex(end + _range * cb_day, dtype="datetime64[ns]")[::-1]
1106
+
1107
+
1108
+ def _cal_WMQY_range(
1109
+ cb_day: "CustomBusinessDay",
1110
+ start: Union[pd.Timestamp, None],
1111
+ end: Union[pd.Timestamp, None],
1112
+ periods: Union[int, None],
1113
+ freq: str,
1114
+ grouping_period: str,
1115
+ closed: Union[Literal["left", "right"], None] = "right",
1116
+ ):
1117
+ """
1118
+ Return A DateRangeIndex of the Weekdays that mark either the start or end of each
1119
+ buisness week based on the 'closed' parameter.
1120
+
1121
+ ** Arguments should be Type/Error Checked before calling this function **
1122
+
1123
+ :param cb_day: CustomBusinessDay Object from the respective calendar
1124
+ :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1125
+ :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1126
+ :param periods: Optional Number of periods to return
1127
+ :param freq: Formatted frequency of '1W' and Higher with desired multiple, S/E Chars,
1128
+ and Anchoring code.
1129
+ :param grouping_period: Period_Code with anchor that matches the given period Code.
1130
+ i.e. 'W-[DAY]', 'M', 'Q-[MONTH]', 'Y-[MONTH]'
1131
+ :param closed: Union['left', Any].
1132
+ 'left': The normalized start-day of the relative period is returned
1133
+ Everything else: The normalized last-day of the relative period is returned
1134
+ :returns: DateRangeIndex[datetime64[ns]]
1135
+ """
1136
+
1137
+ # Need to Adjust the Start/End Dates given to pandas since Rolling forward or backward can shift
1138
+ # the calculated date range out of the desired [start, end] range adding or ignoring desired values.
1139
+
1140
+ # For Example, say we want NYSE-Month-Starts between [2020-01-02, 2020-02-02]. W/O Adjusting dates
1141
+ # we call pd.date_range('2020-01-02, '2020-02-02', 'MS') => ['2020-02-01'] Rolled to ['2020-02-03'].
1142
+ # '02-03' date is then trimmed off returning an empty Index. despite '2020-01-02' being a valid Month Start
1143
+ # By Adjusting the Dates we call pd.date_range('2020-01-01, '2020-02-02') => ['2020-01-01, '2020-02-01']
1144
+ # That's then Rolled into [2020-01-02, 2020-02-03] & Trimmed to [2020-01-02] as desired.
1145
+
1146
+ _dr_start, _dr_end = None, None
1147
+
1148
+ if closed == "left":
1149
+ roll_func = cb_day.rollforward
1150
+ if start is not None:
1151
+ normalized_start = start.to_period(grouping_period).start_time
1152
+ _dr_start = (
1153
+ normalized_start if start <= roll_func(normalized_start) else start
1154
+ )
1155
+
1156
+ if end is not None:
1157
+ if periods is not None:
1158
+ normalized_end = end.to_period(grouping_period).start_time
1159
+ _dr_end = (
1160
+ normalized_end - pd.Timedelta("1D") # Shift into preceding group
1161
+ if end < roll_func(normalized_end)
1162
+ else cb_day.rollback(end)
1163
+ )
1164
+ else:
1165
+ _dr_end = cb_day.rollback(end)
1166
+
1167
+ else:
1168
+ roll_func = cb_day.rollback
1169
+ if start is not None:
1170
+ if periods is not None:
1171
+ normalized_start = start.to_period(grouping_period).end_time.normalize()
1172
+ _dr_start = (
1173
+ normalized_start + pd.Timedelta("1D") # Shift into trailing group
1174
+ if start > roll_func(normalized_start)
1175
+ else cb_day.rollforward(start)
1176
+ )
1177
+ else:
1178
+ _dr_start = cb_day.rollforward(start)
1179
+
1180
+ if end is not None:
1181
+ normalized_end = end.to_period(grouping_period).end_time.normalize()
1182
+ _dr_end = normalized_end if end >= roll_func(normalized_end) else end
1183
+
1184
+ _range = (
1185
+ pd.date_range(_dr_start, _dr_end, periods, freq).to_series().apply(roll_func)
1186
+ )
1187
+
1188
+ # Ensure that Rolled Timestamps are in the desired range When given both Start and End
1189
+ if start is not None and end is not None:
1190
+ if len(_range) > 0 and _range.iloc[0] < start:
1191
+ # Trims off the first 'WMQY End' that might have been Rolled before start
1192
+ _range = _range[1:]
1193
+ if len(_range) > 0 and _range.iloc[-1] > end:
1194
+ # Trims off the last 'WMQY Start' the might have been Rolled after end
1195
+ _range = _range[0:-1]
1196
+
1197
+ return pd.DatetimeIndex(_range, dtype="datetime64[ns]")
1198
+
1199
+
1200
+ # endregion