pandas-market-calendars 4.3.3__py3-none-any.whl → 4.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pandas_market_calendars/__init__.py +39 -38
  2. pandas_market_calendars/calendar_registry.py +57 -53
  3. pandas_market_calendars/calendar_utils.py +1200 -261
  4. pandas_market_calendars/calendars/asx.py +66 -66
  5. pandas_market_calendars/calendars/bmf.py +223 -206
  6. pandas_market_calendars/calendars/bse.py +421 -407
  7. pandas_market_calendars/calendars/cboe.py +145 -145
  8. pandas_market_calendars/calendars/cme.py +405 -402
  9. pandas_market_calendars/calendars/cme_globex_agriculture.py +172 -126
  10. pandas_market_calendars/calendars/cme_globex_base.py +119 -119
  11. pandas_market_calendars/calendars/cme_globex_crypto.py +160 -160
  12. pandas_market_calendars/calendars/cme_globex_energy_and_metals.py +216 -216
  13. pandas_market_calendars/calendars/cme_globex_equities.py +123 -123
  14. pandas_market_calendars/calendars/cme_globex_fixed_income.py +136 -136
  15. pandas_market_calendars/calendars/cme_globex_fx.py +101 -101
  16. pandas_market_calendars/calendars/eurex.py +131 -139
  17. pandas_market_calendars/calendars/eurex_fixed_income.py +98 -98
  18. pandas_market_calendars/calendars/hkex.py +429 -426
  19. pandas_market_calendars/calendars/ice.py +81 -81
  20. pandas_market_calendars/calendars/iex.py +151 -112
  21. pandas_market_calendars/calendars/jpx.py +113 -109
  22. pandas_market_calendars/calendars/lse.py +114 -114
  23. pandas_market_calendars/calendars/mirror.py +149 -130
  24. pandas_market_calendars/calendars/nyse.py +1466 -1324
  25. pandas_market_calendars/calendars/ose.py +116 -116
  26. pandas_market_calendars/calendars/sifma.py +354 -350
  27. pandas_market_calendars/calendars/six.py +132 -132
  28. pandas_market_calendars/calendars/sse.py +311 -311
  29. pandas_market_calendars/calendars/tase.py +220 -197
  30. pandas_market_calendars/calendars/tsx.py +181 -181
  31. pandas_market_calendars/holidays/cme.py +385 -385
  32. pandas_market_calendars/holidays/cme_globex.py +214 -214
  33. pandas_market_calendars/holidays/cn.py +1476 -1455
  34. pandas_market_calendars/holidays/jp.py +401 -398
  35. pandas_market_calendars/holidays/jpx_equinox.py +1 -0
  36. pandas_market_calendars/holidays/nyse.py +1536 -1531
  37. pandas_market_calendars/holidays/oz.py +63 -63
  38. pandas_market_calendars/holidays/sifma.py +350 -338
  39. pandas_market_calendars/holidays/us.py +376 -376
  40. pandas_market_calendars/market_calendar.py +1057 -895
  41. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/METADATA +13 -9
  42. pandas_market_calendars-4.6.0.dist-info/RECORD +50 -0
  43. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/WHEEL +1 -1
  44. pandas_market_calendars-4.3.3.dist-info/RECORD +0 -50
  45. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/LICENSE +0 -0
  46. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/NOTICE +0 -0
  47. {pandas_market_calendars-4.3.3.dist-info → pandas_market_calendars-4.6.0.dist-info}/top_level.txt +0 -0
@@ -1,261 +1,1200 @@
1
- """
2
- Utilities to use with market_calendars
3
- """
4
- import itertools
5
- import warnings
6
-
7
- import numpy as np
8
- import pandas as pd
9
-
10
-
11
- def merge_schedules(schedules, how="outer"):
12
- """
13
- Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
14
- of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
15
-
16
- CAVEATS:
17
- * This does not work for schedules with breaks, the break information will be lost.
18
- * Only "market_open" and "market_close" are considered, other market times are not yet supported.
19
-
20
- :param schedules: list of schedules
21
- :param how: outer or inner
22
- :return: schedule DataFrame
23
- """
24
- all_cols = [x.columns for x in schedules]
25
- all_cols = list(itertools.chain(*all_cols))
26
- if ("break_start" in all_cols) or ("break_end" in all_cols):
27
- warnings.warn(
28
- "Merge schedules will drop the break_start and break_end from result."
29
- )
30
-
31
- result = schedules[0]
32
- for schedule in schedules[1:]:
33
- result = result.merge(schedule, how=how, right_index=True, left_index=True)
34
- if how == "outer":
35
- result["market_open"] = result.apply(
36
- lambda x: min(x.market_open_x, x.market_open_y), axis=1
37
- )
38
- result["market_close"] = result.apply(
39
- lambda x: max(x.market_close_x, x.market_close_y), axis=1
40
- )
41
- elif how == "inner":
42
- result["market_open"] = result.apply(
43
- lambda x: max(x.market_open_x, x.market_open_y), axis=1
44
- )
45
- result["market_close"] = result.apply(
46
- lambda x: min(x.market_close_x, x.market_close_y), axis=1
47
- )
48
- else:
49
- raise ValueError('how argument must be "inner" or "outer"')
50
- result = result[["market_open", "market_close"]]
51
- return result
52
-
53
-
54
- def convert_freq(index, frequency):
55
- """
56
- Converts a DateTimeIndex to a new lower frequency
57
-
58
- :param index: DateTimeIndex
59
- :param frequency: frequency string
60
- :return: DateTimeIndex
61
- """
62
- return pd.DataFrame(index=index).asfreq(frequency).index
63
-
64
-
65
- class _date_range:
66
- """
67
- This is a callable class that should be used by calling the already initiated instance: `date_range`.
68
- Given a schedule, it will return a DatetimeIndex with all of the valid datetimes at the frequency given.
69
-
70
- The schedule columns should all have the same time zone.
71
-
72
- The calculations will be made for each trading session. If the passed schedule-DataFrame doesn't have
73
- breaks, there is one trading session per day going from market_open to market_close, otherwise there are two,
74
- the first one going from market_open to break_start and the second one from break_end to market_close.
75
-
76
- *Any trading session where start == end is considered a 'no-trading session' and will always be dropped*
77
-
78
- CAVEATS:
79
- * Only "market_open", "market_close" (and, optionally, "breaak_start" and "break_end")
80
- are considered, other market times are not yet supported by this class.
81
-
82
- * If the difference between start and end of a trading session is smaller than an interval of the
83
- frequency, and closed= "right" and force_close = False, the whole session will disappear.
84
- This will also raise a warning.
85
-
86
-
87
- Signature:
88
- .__call__(self, schedule, frequency, closed='right', force_close=True, **kwargs)
89
-
90
- :param schedule: schedule of a calendar, which may or may not include break_start and break_end columns
91
- :param frequency: frequency string that is used by pd.Timedelta to calculate the timestamps
92
- this must be "1D" or higher frequency
93
- :param closed: the way the intervals are labeled
94
- 'right': use the end of the interval
95
- 'left': use the start of the interval
96
- None: (or 'both') use the end of the interval but include the start of the first interval (the open)
97
- :param force_close: how the last value of a trading session is handled
98
- True: guarantee that the close of the trading session is the last value
99
- False: guarantee that there is no value greater than the close of the trading session
100
- None: leave the last value as it is calculated based on the closed parameter
101
- :param kwargs: unused. Solely for compatibility.
102
-
103
-
104
- """
105
-
106
- def __init__(self, schedule=None, frequency=None, closed="right", force_close=True):
107
- if closed not in ("left", "right", "both", None):
108
- raise ValueError("closed must be 'left', 'right', 'both' or None.")
109
- elif force_close not in (True, False, None):
110
- raise ValueError("force_close must be True, False or None.")
111
-
112
- self.closed = closed
113
- self.force_close = force_close
114
- self.has_breaks = False
115
- if frequency is None:
116
- self.frequency = None
117
- else:
118
- self.frequency = pd.Timedelta(frequency)
119
- if self.frequency > pd.Timedelta("1D"):
120
- raise ValueError("Frequency must be 1D or higher frequency.")
121
-
122
- elif schedule.market_close.lt(schedule.market_open).any():
123
- raise ValueError(
124
- "Schedule contains rows where market_close < market_open,"
125
- " please correct the schedule"
126
- )
127
-
128
- if "break_start" in schedule:
129
- if not all(
130
- [
131
- schedule.market_open.le(schedule.break_start).all(),
132
- schedule.break_start.le(schedule.break_end).all(),
133
- schedule.break_end.le(schedule.market_close).all(),
134
- ]
135
- ):
136
- raise ValueError(
137
- "Not all rows match the condition: "
138
- "market_open <= break_start <= break_end <= market_close, "
139
- "please correct the schedule"
140
- )
141
- self.has_breaks = True
142
-
143
- def _check_overlap(self, schedule):
144
- """checks if calculated end times would overlap with the next start times.
145
- Only an issue when force_close is None and closed != left.
146
-
147
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
148
- :raises ValueError:"""
149
- if self.force_close is None and self.closed != "left":
150
- num_bars = self._calc_num_bars(schedule)
151
- end_times = schedule.start + num_bars * self.frequency
152
-
153
- if end_times.gt(schedule.start.shift(-1)).any():
154
- raise ValueError(
155
- "The chosen frequency will lead to overlaps in the calculated index. "
156
- "Either choose a higher frequency or avoid setting force_close to None "
157
- "when setting closed to 'right', 'both' or None."
158
- )
159
-
160
- def _check_disappearing_session(self, schedule):
161
- """checks if requested frequency and schedule would lead to lost trading sessions.
162
- Only necessary when force_close = False and closed = "right".
163
-
164
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
165
- :raises UserWarning:"""
166
- if self.force_close is False and self.closed == "right":
167
- if (schedule.end - schedule.start).lt(self.frequency).any():
168
- warnings.warn(
169
- "An interval of the chosen frequency is larger than some of the trading sessions, "
170
- "while closed== 'right' and force_close is False. This will make those trading sessions "
171
- "disappear. Use a higher frequency or change the values of closed/force_close, to "
172
- "keep this from happening."
173
- )
174
-
175
- def _calc_num_bars(self, schedule):
176
- """calculate the number of timestamps needed for each trading session.
177
-
178
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
179
- :return: pd.Series of float64"""
180
- return np.ceil((schedule.end - schedule.start) / self.frequency)
181
-
182
- def _calc_time_series(self, schedule):
183
- """Method used by date_range to calculate the trading index.
184
-
185
- :param schedule: pd.DataFrame with first column: 'start' and second column: 'end'
186
- :return: pd.Series of datetime64[ns, UTC]"""
187
- num_bars = self._calc_num_bars(schedule)
188
-
189
- # ---> calculate the desired timeseries:
190
- if self.closed == "left":
191
- opens = schedule.start.repeat(num_bars) # keep as is
192
- time_series = (
193
- opens.groupby(opens.index).cumcount()
194
- ) * self.frequency + opens
195
- elif self.closed == "right":
196
- opens = schedule.start.repeat(num_bars) # dont add row but shift up
197
- time_series = (
198
- opens.groupby(opens.index).cumcount() + 1
199
- ) * self.frequency + opens
200
- else:
201
- num_bars += 1
202
- opens = schedule.start.repeat(num_bars) # add row but dont shift up
203
- time_series = (
204
- opens.groupby(opens.index).cumcount()
205
- ) * self.frequency + opens
206
-
207
- if self.force_close is not None:
208
- time_series = time_series[time_series.le(schedule.end.repeat(num_bars))]
209
- if self.force_close:
210
- time_series = pd.concat([time_series, schedule.end]).sort_values()
211
-
212
- return time_series
213
-
214
- def __call__(self, schedule, frequency, closed="right", force_close=True, **kwargs):
215
- """
216
- See class docstring for more information.
217
-
218
- :param schedule: schedule of a calendar, which may or may not include break_start and break_end columns
219
- :param frequency: frequency string that is used by pd.Timedelta to calculate the timestamps
220
- this must be "1D" or higher frequency
221
- :param closed: the way the intervals are labeled
222
- 'right': use the end of the interval
223
- 'left': use the start of the interval
224
- None: (or 'both') use the end of the interval but include the start of the first interval
225
- :param force_close: how the last value of a trading session is handled
226
- True: guarantee that the close of the trading session is the last value
227
- False: guarantee that there is no value greater than the close of the trading session
228
- None: leave the last value as it is calculated based on the closed parameter
229
- :param kwargs: unused. Solely for compatibility.
230
- :return: pd.DatetimeIndex of datetime64[ns, UTC]
231
- """
232
- self.__init__(schedule, frequency, closed, force_close)
233
- if self.has_breaks:
234
- # rearrange the schedule, to make every row one session
235
- before = schedule[["market_open", "break_start"]].set_index(
236
- schedule["market_open"]
237
- )
238
- after = schedule[["break_end", "market_close"]].set_index(
239
- schedule["break_end"]
240
- )
241
- before.columns = after.columns = ["start", "end"]
242
- schedule = pd.concat([before, after]).sort_index()
243
-
244
- else:
245
- schedule = schedule.rename(
246
- columns={"market_open": "start", "market_close": "end"}
247
- )
248
-
249
- schedule = schedule[
250
- schedule.start.ne(schedule.end)
251
- ] # drop the 'no-trading sessions'
252
- self._check_overlap(schedule)
253
- self._check_disappearing_session(schedule)
254
-
255
- time_series = self._calc_time_series(schedule)
256
-
257
- time_series.name = None
258
- return pd.DatetimeIndex(time_series.drop_duplicates())
259
-
260
-
261
- date_range = _date_range()
1
+ """
2
+ Utilities to use with market_calendars
3
+ """
4
+
5
+ import itertools
6
+ from math import ceil, floor
7
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, Literal, Tuple, Union
8
+ import warnings
9
+
10
+ from re import finditer, split
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ if TYPE_CHECKING:
15
+ from pandas.tseries.offsets import CustomBusinessDay
16
+ from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
17
+
18
+ DEFAULT_LABEL_MAP = {
19
+ "pre": "pre",
20
+ "rth_pre_break": "rth",
21
+ "rth": "rth",
22
+ "break": "break",
23
+ "rth_post_break": "rth",
24
+ "post": "post",
25
+ "closed": "closed",
26
+ }
27
+
28
+
29
+ def mark_session(
30
+ schedule: pd.DataFrame,
31
+ timestamps: pd.DatetimeIndex,
32
+ label_map: Dict[str, Any] = {},
33
+ *,
34
+ closed: Literal["left", "right"] = "right",
35
+ ) -> pd.Series:
36
+ """
37
+ Return a Series that denotes the trading session of each timestamp in a DatetimeIndex.
38
+ The returned Series's Index is the provided Datetime Index, the Series's values
39
+ are the timestamps' corresponding session.
40
+
41
+ PARAMETERS:
42
+
43
+ :param schedule: The market schedule to check the timestamps against. This Schedule must include
44
+ all of the trading days that are in the provided DatetimeIndex of timestamps.
45
+ Note: The columns need to be sorted into ascending order, if not, then an error will be
46
+ raised saying the bins must be in ascending order.
47
+
48
+ :param timestamps: A DatetimeIndex of Timestamps to check. Must be sorted in ascending order.
49
+
50
+ :param label_map: Optional mapping of Dict[str, Any] to change the values returned in the
51
+ series. The keys of the given mapping should match the keys of the default dict, but the
52
+ values can be anything. A subset of mappings may also be provided, e.g. {'closed':-1} will
53
+ only change the label of the 'closed' session. All others will remain the default label.
54
+
55
+ >>> Default Mapping == {
56
+ "pre": "pre",
57
+ "rth_pre_break": "rth", # When the Schedule has a break
58
+ "rth": "rth", # When the Schedule doesn't have a break
59
+ "break": "break", # When the Schedule has a break
60
+ "rth_post_break": "rth", # When the Schedule has a break
61
+ "post": "post",
62
+ "closed": "closed",
63
+ }
64
+
65
+ :param closed: Which side of each interval should be closed (inclusive)
66
+ left: == [start, end)
67
+ right: == (start, end]
68
+ """
69
+ # ---- ---- ---- Determine which columns need to be dropped ---- ---- ----
70
+ session_labels = ["closed"]
71
+ columns = set(schedule.columns)
72
+ needed_cols = set()
73
+
74
+ def _extend_statement(session: str, parts: set):
75
+ if parts.issubset(columns):
76
+ needed_cols.update(parts)
77
+ session_labels.append(session)
78
+
79
+ _extend_statement("pre", {"pre", "market_open"})
80
+ if {"break_start", "break_end"}.issubset(columns):
81
+ _extend_statement("rth_pre_break", {"market_open", "break_start"})
82
+ _extend_statement("break", {"break_start", "break_end"})
83
+ _extend_statement("rth_post_break", {"break_end", "market_close"})
84
+ else:
85
+ _extend_statement("rth", {"market_open", "market_close"})
86
+ _extend_statement("post", {"market_close", "post"})
87
+
88
+ # ---- ---- ---- Error Check ---- ---- ----
89
+ if len(extra_cols := columns - needed_cols) > 0:
90
+ schedule = schedule.drop(columns=[*extra_cols])
91
+ warnings.warn(
92
+ f"Attempting to mark trading sessions and the schedule ({columns = }) contains the "
93
+ f"extra columns: {extra_cols}. Returned sessions may not be labeled as desired."
94
+ )
95
+
96
+ start = timestamps[0]
97
+ end = timestamps[-1]
98
+ if start < schedule.iloc[0, 0]: # type: ignore
99
+ raise ValueError(
100
+ f"Insufficient Schedule. Needed Start-Time: {start.normalize().tz_localize(None)}. "
101
+ f"Schedule starts at: {schedule.iloc[0, 0]}"
102
+ )
103
+ if end > schedule.iloc[-1, -1]: # type: ignore
104
+ raise ValueError(
105
+ f"Insufficient Schedule. Needed End-Time: {end.normalize().tz_localize(None)}. "
106
+ f"Schedule ends at: {schedule.iloc[-1, -1]}"
107
+ )
108
+
109
+ # Trim the schedule to match the timeframe covered by the given timeseries
110
+ schedule = schedule[
111
+ (schedule.index >= start.normalize().tz_localize(None))
112
+ & (schedule.index <= end.normalize().tz_localize(None))
113
+ ]
114
+
115
+ backfilled_map = DEFAULT_LABEL_MAP | label_map
116
+ mapped_labels = [backfilled_map[label] for label in session_labels]
117
+ labels = pd.Series([mapped_labels]).repeat(len(schedule)).explode()
118
+ labels = pd.concat([labels, pd.Series([backfilled_map["closed"]])])
119
+
120
+ # Append on additional Edge-Case Bins so result doesn't include NaNs
121
+ bins = schedule.to_numpy().flatten()
122
+ bins = np.insert(bins, 0, bins[0].normalize())
123
+ bins = np.append(bins, bins[-1].normalize() + pd.Timedelta("1D"))
124
+
125
+ bins, _ind, _counts = np.unique(bins, return_index=True, return_counts=True)
126
+
127
+ if len(bins) - 1 != len(labels):
128
+ # np.Unique Dropped some bins, need to drop the associated labels
129
+ label_inds = (_ind + _counts - 1)[:-1]
130
+ labels = labels.iloc[label_inds]
131
+
132
+ return pd.Series(
133
+ pd.cut(timestamps, bins, closed != "left", labels=labels, ordered=False), # type: ignore
134
+ index=timestamps,
135
+ )
136
+
137
+
138
+ def merge_schedules(schedules, how="outer"):
139
+ """
140
+ Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
141
+ of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
142
+
143
+ CAVEATS:
144
+ * This does not work for schedules with breaks, the break information will be lost.
145
+ * Only "market_open" and "market_close" are considered, other market times are not yet supported.
146
+
147
+ :param schedules: list of schedules
148
+ :param how: outer or inner
149
+ :return: schedule DataFrame
150
+ """
151
+ all_cols = [x.columns for x in schedules]
152
+ all_cols = list(itertools.chain(*all_cols))
153
+ if ("break_start" in all_cols) or ("break_end" in all_cols):
154
+ warnings.warn(
155
+ "Merge schedules will drop the break_start and break_end from result."
156
+ )
157
+
158
+ result = schedules[0]
159
+ for schedule in schedules[1:]:
160
+ result = result.merge(schedule, how=how, right_index=True, left_index=True)
161
+ if how == "outer":
162
+ result["market_open"] = result.apply(
163
+ lambda x: min(x.market_open_x, x.market_open_y), axis=1
164
+ )
165
+ result["market_close"] = result.apply(
166
+ lambda x: max(x.market_close_x, x.market_close_y), axis=1
167
+ )
168
+ elif how == "inner":
169
+ result["market_open"] = result.apply(
170
+ lambda x: max(x.market_open_x, x.market_open_y), axis=1
171
+ )
172
+ result["market_close"] = result.apply(
173
+ lambda x: min(x.market_close_x, x.market_close_y), axis=1
174
+ )
175
+ else:
176
+ raise ValueError('how argument must be "inner" or "outer"')
177
+ result = result[["market_open", "market_close"]]
178
+ return result
179
+
180
+
181
+ def is_single_observance(holiday: "Holiday"):
182
+ "Returns the Date of the Holiday if it is only observed once, None otherwise."
183
+ return holiday.start_date if holiday.start_date == holiday.end_date else None # type: ignore ??
184
+
185
+
186
+ def all_single_observance_rules(calendar: "AbstractHolidayCalendar"):
187
+ "Returns a list of timestamps if the Calendar's Rules are all single observance holidays, None Otherwise"
188
+ observances = [is_single_observance(rule) for rule in calendar.rules]
189
+ return observances if all(observances) else None
190
+
191
+
192
+ def convert_freq(index, frequency):
193
+ """
194
+ Converts a DateTimeIndex to a new lower frequency
195
+
196
+ :param index: DateTimeIndex
197
+ :param frequency: frequency string
198
+ :return: DateTimeIndex
199
+ """
200
+ return pd.DataFrame(index=index).asfreq(frequency).index
201
+
202
+
203
+ SESSIONS = Literal[
204
+ "pre",
205
+ "post",
206
+ "RTH",
207
+ "pre_break",
208
+ "post_break",
209
+ "ETH",
210
+ "break",
211
+ "closed",
212
+ "closed_masked",
213
+ ]
214
+ MKT_TIMES = Literal[
215
+ "pre", "post", "market_open", "market_close", "break_start", "break_end"
216
+ ]
217
+
218
+
219
+ # region ---- ---- ---- Date Range Warning Types ---- ---- ----
220
+ class DateRangeWarning(UserWarning):
221
+ "Super Class to all Date_range Warning Types"
222
+
223
+
224
+ class OverlappingSessionWarning(DateRangeWarning):
225
+ """
226
+ Warning thrown when date_range is called with a timedelta that is larger than the
227
+ gap between two sessions leading to them overlapping.
228
+ This is only an issue when closed='right'/'both'/None and force_close=None
229
+
230
+ For Example, the following raises a warning because the 10:00 Timestamp that is from the 'pre'
231
+ session comes after the start of the 9:30 'RTH' session, but belongs to the 'pre' session
232
+ >>> date_range(NYSE, '2h', 'right', None, {'pre', 'RTH'}, merge_adjacent = False)
233
+ >>> ['2020-01-02 06:00:00', '2020-01-02 08:00:00',
234
+ '2020-01-02 10:00:00', '2020-01-02 11:30:00',
235
+ '2020-01-02 13:30:00', '2020-01-02 15:30:00',
236
+ '2020-01-02 17:30:00'],
237
+ This is particularly convoluted when close='both'/None
238
+ >>> date_range(NYSE, '2h', 'both', None, {'pre', 'RTH'}, merge_adjacent = False)
239
+ >>> ['2020-01-02 04:00:00' (pre), '2020-01-02 06:00:00' (pre),
240
+ '2020-01-02 08:00:00' (pre), '2020-01-02 09:30:00' (rth),
241
+ '2020-01-02 10:00:00' (pre), '2020-01-02 11:30:00' (rth),
242
+ '2020-01-02 13:30:00' (rth), '2020-01-02 15:30:00' (rth),
243
+ '2020-01-02 17:30:00' (rth)],
244
+ """
245
+
246
+
247
+ class DisappearingSessionWarning(DateRangeWarning):
248
+ """
249
+ Warning thrown when date_range is called with a timedelta that is larger than an entire session
250
+ resulting in the session disappearing from the DatetimeIndex.
251
+
252
+ Only an issue when closed='right' and force_close = False
253
+ """
254
+
255
+
256
+ class MissingSessionWarning(DateRangeWarning):
257
+ """
258
+ Warning thrown when a date_range() call is made with a requested session,
259
+ but lacks the necessary columns. When this warning is ignored the returned
260
+ datetimeindex will simply lack the relevant sessions
261
+
262
+ e.g. 'pre' Session requested and schedule lacks 'pre' and/or 'market_open' column
263
+ """
264
+
265
+
266
+ class InsufficientScheduleWarning(DateRangeWarning):
267
+ """
268
+ Warning thrown when a date_range() call is made with a requested number of periods,
269
+ or start-date / end-date that exceed what was provided in the given schedule.
270
+
271
+ If a Schedule has an insufficient start and end date then this warning is thrown twice.
272
+
273
+ If this warning is thrown when date_range is called with a number of desired periods, then
274
+ the desired start/end date is an approximate value. This 'approximation' is biased to
275
+ overestimate the needed start/end time by about 1 week. This is done to limit the edge
276
+ cases where this warning could get thrown multiple times in a row.
277
+ """
278
+
279
+
280
+ def filter_date_range_warnings(
281
+ action: Literal["error", "ignore", "always", "default", "once"],
282
+ source: Union[
283
+ Iterable[type[DateRangeWarning]], type[DateRangeWarning]
284
+ ] = DateRangeWarning,
285
+ ):
286
+ """
287
+ Adjust the behavior of the date_range() warnings to the desired action.
288
+
289
+ :param action: - The desired change to the warning behavior
290
+ 'error': Escalate Warnings into Errors
291
+ 'ignore': Silence Warning Messages
292
+ 'once': Only display a message of the given category once
293
+ 'default': Reset the behavior of the given warning category
294
+ 'always': Always show the Warning of a given category
295
+
296
+ :param source: - The Category/Categories to apply the action to. Can be a single Warning or a list of warnings
297
+ default: DateRangeWarning (All Warnings)
298
+ Warning Types: MissingSessionWarning, OverlappingSessionWarning,
299
+ DisappearingSessionWarning, InsufficientScheduleWarning
300
+ """
301
+ if not isinstance(source, Iterable):
302
+ warnings.filterwarnings(action, category=source)
303
+ return
304
+
305
+ for src in source:
306
+ warnings.filterwarnings(action, category=src)
307
+
308
+
309
+ def parse_missing_session_warning(
310
+ err: MissingSessionWarning,
311
+ ) -> Tuple[set[SESSIONS], set[MKT_TIMES]]:
312
+ """
313
+ Parses a Missing Session Warning's Error Message.
314
+ :returns Tuple[set[str], set[str]]:
315
+ Set #1: The Missing Sessions
316
+ Set #2: The Missing Schedule Columns
317
+ """
318
+ splits = split(r"[{|}]", err.args[0].replace("'", ""))
319
+ return (set(splits[1].split(", ")), set(splits[3].split(", "))) # type: ignore
320
+
321
+
322
+ def parse_insufficient_schedule_warning(
323
+ err: InsufficientScheduleWarning,
324
+ ) -> Tuple[bool, pd.Timestamp, pd.Timestamp]:
325
+ """
326
+ Parses the information from an Insufficient Schedule Warning.
327
+ :returns Tuple[bool, pd.Timestamp, pd.Timestamp]:
328
+ bool: True == Range is missing from the start, False == Range missing from the end
329
+ Timestamp 1: Start of missing range
330
+ Timestamp 2: End of the missing range.
331
+ Note: The Timestamps are always ordered (t1 <= t2) and do not overlap with the original schedule.
332
+ If a supplemental schedule is generated it can be concatenated on without any overlapping indices.
333
+ data
334
+ """
335
+ matcher = finditer(r"\d{4}-\d{2}-\d{2}", err.args[0])
336
+ b = "Start-Time" in err.args[0]
337
+ t1 = pd.Timestamp(next(matcher).group())
338
+ t2 = pd.Timestamp(next(matcher).group())
339
+
340
+ if b:
341
+ t2 -= pd.Timedelta("1D")
342
+ else:
343
+ t2 += pd.Timedelta("1D")
344
+
345
+ return (b, t1, t2) if t1 <= t2 else (b, t2, t1)
346
+
347
+
348
+ # endregion
349
+
350
+
351
+ def date_range(
352
+ schedule: pd.DataFrame,
353
+ frequency: Union[str, pd.Timedelta, int, float],
354
+ closed: Union[Literal["left", "right", "both"], None] = "right",
355
+ force_close: Union[bool, None] = True,
356
+ session: Union[SESSIONS, Iterable[SESSIONS]] = {"RTH"},
357
+ merge_adjacent: bool = True,
358
+ start: Union[str, pd.Timestamp, int, float, None] = None,
359
+ end: Union[str, pd.Timestamp, int, float, None] = None,
360
+ periods: Union[int, None] = None,
361
+ ) -> pd.DatetimeIndex:
362
+ """
363
+ Interpolates a Market's Schedule at the desired frequency and returns the result as a DatetimeIndex.
364
+ This function is only valid for periods less than 1 Day, for longer periods use date_range_htf().
365
+
366
+ Note: The slowest part of this function is by far generating the necessary schedule (which in
367
+ turn is limited by pandas' date_range() function). If speed is a concern, store and update the
368
+ schedule as needed instead of generating it every time.
369
+
370
+ WARNINGS SYSTEM:
371
+ *There are multiple edge-case warnings that are thrown by this function. See the Docstrings
372
+ of each warning for more info. (DateRangeWarning, InsufficientScheduleWarning,
373
+ MissingSessionWarning, OverlappingSessionWarning, DisappearingSessionWarning)
374
+
375
+ *The thrown warnings can be ignored or escalated into catchable errors by using the
376
+ filter_date_range_warnings() function.
377
+
378
+ parse_missing_session_warning() & parse_insufficient_schedule_warning() exist to easily
379
+ process the warnings those warnings if they are escalated into errors.
380
+
381
+ PARAMETERS:
382
+
383
+ :param schedule: Schedule of a calendar which includes all the columns necessary
384
+ for the desired sessions.
385
+
386
+ :param frequency: String, Int/float (seconds) or pd.Timedelta that represents the desired
387
+ interval of the date_range. Intervals larger than 1D are not supported.
388
+
389
+ :param closed: the way the intervals are labeled
390
+ 'right': use the end of the interval
391
+ 'left': use the start of the interval
392
+ None / 'both': use the end of the interval but include the start of the first interval
393
+
394
+ :param force_close: How the last value of a trading session is handled
395
+ True: guarantee that the close of the trading session is the last value
396
+ False: guarantee that there is no value greater than the close of the trading session
397
+ None: leave the last value as it is calculated based on the closed parameter
398
+
399
+ :param session: A str representing a single session or an Iterable of the following Sessions.
400
+ RTH: The Default Option. This is [Market_open, Market_close], if the schedule includes a
401
+ break then the break is excluded from the returned datetime index.
402
+ ETH: [pre, market_open] & [market_close, post]
403
+ pre: [pre, market_open]
404
+ post: [market_close, post]
405
+ break: [break_start, break_end]
406
+ pre_break: [market_open, break_start]
407
+ post_break: [break_end, market_close]
408
+ closed: [market_close, market_open (of the next day)] If ETH market times are given then
409
+ this will be [post, pre (of the next day)] instead. The last session will end at
410
+ Midnight of the timezone the schedule is given in.
411
+ closed_masked: Same as closed, but Weekends & Holidays are ignored. Instead, the Datetime
412
+ index stops at Midnight on the trading day before the break and resumes at midnight
413
+ prior to the next trading day. **Note: This is Midnight of the Timezone the schedule is
414
+ given in, not Midnight of the exchange's tz since the exchange's tz is not known.
415
+
416
+ :param merge_adjacent: Bool representing if adjacent sessions should be merged into a single session.
417
+ For Example, NYSE w/ session={'RTH', 'ETH'}, frequency=2h, closed=left, force_close=False
418
+ merge_adjacent == True => [pre, post]
419
+ >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
420
+ '2020-01-02 08:00:00', '2020-01-02 10:00:00',
421
+ '2020-01-02 12:00:00', '2020-01-02 14:00:00',
422
+ '2020-01-02 16:00:00', '2020-01-02 18:00:00']
423
+ merge_adjacent == False => [pre, market_open] & [market_open, market_close] & [market_close, post]
424
+ >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
425
+ '2020-01-02 08:00:00', '2020-01-02 09:30:00',
426
+ '2020-01-02 11:30:00', '2020-01-02 13:30:00',
427
+ '2020-01-02 15:30:00', '2020-01-02 16:00:00',
428
+ '2020-01-02 18:00:00']
429
+ merge_adjacent=False re-aligns the timestamps to the session, but this results in
430
+ the difference between timestamps not always equaling the desired frequency.
431
+
432
+ :param start: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired start time.
433
+ :If left as None then the start-time of the the Schedule is used.
434
+ :If no TZ info is given it will be interpreted in the same timezone as the first column
435
+ of the schedule
436
+ :Start can be a Day and Time, but the returned index will still be aligned to the underlying
437
+ schedule. e.g. Session = [9:30am, 12pm], frequency=7min, start=9:45am. Underlying session
438
+ = [9:30, 9:37, 9:44, 9:51, ...] => returned DatetimeIndex = [9:51, ...]
439
+
440
+ :param end: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired end time.
441
+ :If left as None then the end-time of the the Schedule is used.
442
+ :If no TZ info is given it will be interpreted in the same timezone as the first column
443
+ **Note: The time given is an absolute value. i.e. end="2020-01-01" == "2020-01-01 00:00"
444
+ returning times prior to Midnight of "2019-12-31", not to the EOD of "2020-01-01"
445
+
446
+ :param periods: Optional Integer number of periods to return. If a Period count, Start time,
447
+ and End time are given the period count is ignored.
448
+ None: Period count is ignored. Returned index is all periods in [Start, End]
449
+ Int: # of periods to return. By default, this is the first N periods following the start.
450
+ If an end time is given then this is the N periods prior to the End Time (inclusive).
451
+ CAVEAT: When Force_close == False & closed == 'right'/'both' the number of periods returned
452
+ may be less than the parameter given.
453
+
454
+ :return: pd.DatetimeIndex of datetime64[ns, TZ-Aware]
455
+ """
456
+ # ---- ---- Error Check Inputs ---- ----
457
+ if closed not in ("left", "right", "both", None):
458
+ raise ValueError("closed must be 'left', 'right', 'both' or None.")
459
+ if force_close not in (True, False, None):
460
+ raise ValueError("force_close must be True, False or None.")
461
+ if merge_adjacent not in (True, False):
462
+ raise ValueError("merge_adjacent must be True or False")
463
+
464
+ # ---- ---- Standardize Frequency Param ---- ----
465
+ if isinstance(frequency, (int, float)):
466
+ frequency = int(frequency * 1_000_000_000)
467
+ try:
468
+ frequency = pd.Timedelta(frequency)
469
+ except ValueError as e:
470
+ raise ValueError(f"Market Calendar Date_range Timeframe Error: {e}") from e
471
+ if frequency <= pd.Timedelta("0s"):
472
+ raise ValueError("Market Calendar Date_Range Frequency must be Positive.")
473
+ if frequency > pd.Timedelta("1D"):
474
+ raise ValueError(
475
+ "Market Calendar Date_Range Frequency Cannot Be longer than '1D'."
476
+ )
477
+
478
+ session_list, mask = _make_session_list(
479
+ set(schedule.columns), session, merge_adjacent
480
+ )
481
+ if len(session_list) == 0:
482
+ return pd.DatetimeIndex([], dtype="datetime64[ns, UTC]")
483
+
484
+ session_times = _reconfigure_schedule(schedule, session_list, mask)
485
+ # Trim off all 0 length sessions
486
+ session_times = session_times[session_times.start.ne(session_times.end)]
487
+ _error_check_sessions(session_times, frequency, closed, force_close)
488
+
489
+ tz = schedule[session_list[0][0]].dt.tz # copy tz info from schedule
490
+ dtype = schedule[session_list[0][0]].dtype # copy dtype info from schedule
491
+ start, end, periods = _standardize_times(schedule, start, end, periods, tz)
492
+
493
+ time_series = _calc_time_series(
494
+ session_times, frequency, closed, force_close, start, end, periods
495
+ )
496
+ time_series.name = None
497
+
498
+ return pd.DatetimeIndex(time_series, tz=tz, dtype=dtype)
499
+
500
+
501
+ # region ------------------ Date Range LTF Subroutines ------------------
502
+
503
+
504
+ def _make_session_list(
505
+ columns: set, sessions: Union[str, Iterable], merge_adjacent: bool
506
+ ) -> Tuple[list, bool]:
507
+ "Create a list of (Session Start, Session End) Tuples"
508
+ session_times = []
509
+ missing_cols = set()
510
+ missing_sess = set()
511
+ sessions = {sessions} if isinstance(sessions, str) else set(sessions)
512
+
513
+ if len(extras := sessions.difference(set(SESSIONS.__args__))) > 0: # type: ignore
514
+ raise ValueError(f"Unknown Date_Range Market Session: {extras}")
515
+
516
+ if "ETH" in sessions: # Standardize ETH to 'pre' and 'post'
517
+ sessions = sessions - {"ETH"} | {"pre", "post"}
518
+ if "closed_masked" in sessions: # closed_masked == 'closed' for this step
519
+ sessions |= {"closed"}
520
+ if "pre" in columns: # Add wrap-around sessions
521
+ columns |= {"pre_wrap"}
522
+ if "market_open" in columns:
523
+ columns |= {"market_open_wrap"}
524
+
525
+ def _extend_statement(session, parts):
526
+ if session not in sessions:
527
+ return
528
+ if columns.issuperset(parts):
529
+ session_times.extend(parts)
530
+ else:
531
+ missing_sess.update({session})
532
+ missing_cols.update(set(parts) - columns)
533
+
534
+ # Append session_start, session_end for each desired session *in session order*
535
+ _extend_statement("pre", ("pre", "market_open"))
536
+ if {"break_start", "break_end"}.issubset(columns):
537
+ # If the schedule has breaks then sub-divide RTH into pre & post break sessions
538
+ if "RTH" in sessions:
539
+ sessions = sessions - {"RTH"} | {"pre_break", "post_break"}
540
+ _extend_statement("pre_break", ("market_open", "break_start"))
541
+ _extend_statement("break", ("break_start", "break_end"))
542
+ _extend_statement("post_break", ("break_end", "market_close"))
543
+ else:
544
+ _extend_statement("RTH", ("market_open", "market_close"))
545
+ _extend_statement("post", ("market_close", "post"))
546
+
547
+ # Closed can mean [close, open], [close, pre], [pre, post], or [post, open] Adjust accordingly
548
+ s_start = "post" if "post" in columns else "market_close"
549
+ s_end = "pre_wrap" if "pre" in columns else "market_open_wrap"
550
+ _extend_statement("closed", (s_start, s_end))
551
+
552
+ if len(missing_sess) > 0:
553
+ warnings.warn(
554
+ f"Requested Sessions: {missing_sess}, but schedule is missing columns: {missing_cols}."
555
+ "\nResulting DatetimeIndex will lack those sessions. ",
556
+ category=MissingSessionWarning,
557
+ )
558
+
559
+ if merge_adjacent:
560
+ drop_set = set()
561
+ for i in range(1, len(session_times) - 1, 2):
562
+ if session_times[i] == session_times[i + 1]:
563
+ drop_set |= {session_times[i]}
564
+
565
+ # Guaranteed to drop in pairs => no check needed before zipping
566
+ session_times = [t for t in session_times if t not in drop_set]
567
+
568
+ # Zip the flat list into a list of pairs
569
+ session_pairs = list(zip(*(iter(session_times),) * 2))
570
+
571
+ return session_pairs, "closed_masked" in sessions
572
+
573
+
574
+ def _standardize_times(
575
+ schedule, start, end, periods, tz
576
+ ) -> Tuple[pd.Timestamp, pd.Timestamp, Union[int, None]]:
577
+ "Standardize start and end into a timestamp of the relevant timezone"
578
+ if all((start, end, periods)):
579
+ periods = None # Ignore Periods if all 3 params are given.
580
+
581
+ if start is not None:
582
+ if isinstance(start, (int, float)):
583
+ start *= 1_000_000_000
584
+ try:
585
+ start = pd.Timestamp(start)
586
+ if start.tz is None:
587
+ start = start.tz_localize(tz)
588
+ except ValueError as e:
589
+ raise ValueError(f"Invalid Time ({start = }) given to date_range()") from e
590
+
591
+ if start < schedule.index[0].tz_localize(tz):
592
+ warnings.warn(
593
+ f"Insufficient Schedule. Requested Start-Time: {start.normalize().tz_localize(None)}. "
594
+ f"Schedule starts at: {schedule.index[0].normalize().tz_localize(None)}",
595
+ category=InsufficientScheduleWarning,
596
+ )
597
+
598
+ if end is not None:
599
+ if isinstance(end, (int, float)):
600
+ end *= 1_000_000_000
601
+ try:
602
+ end = pd.Timestamp(end)
603
+ if end.tz is None and tz is not None:
604
+ end = end.tz_localize(tz)
605
+ except ValueError as e:
606
+ raise ValueError(f"Invalid Time ({end = }) given to date_range()") from e
607
+
608
+ if end > schedule.index[-1].tz_localize(tz) + pd.Timedelta("1D"):
609
+ # Checking against the day and not the specific session since so requesting a time
610
+ # after the last session's close but before the next day doesn't throw a warning.
611
+ requested_end = end.normalize().tz_localize(None) - pd.Timedelta("1D")
612
+ warnings.warn(
613
+ f"Insufficient Schedule. Requested End-Time: {requested_end}. "
614
+ f"Schedule ends at: {schedule.index[-1].normalize().tz_localize(None)}",
615
+ category=InsufficientScheduleWarning,
616
+ )
617
+
618
+ if start is not None and end is not None and start > end:
619
+ raise ValueError(
620
+ "Date_range() given a start-date that occurs after the given end-date. "
621
+ f"{start = }, {end = }"
622
+ )
623
+
624
+ return start, end, periods
625
+
626
+
627
+ def _reconfigure_schedule(schedule, session_list, mask_close) -> pd.DataFrame:
628
+ "Reconfigure a schedule into a sorted dataframe of [start, end] times for each session"
629
+
630
+ sessions = []
631
+
632
+ for start, end in session_list:
633
+ if not end.endswith("_wrap"):
634
+ # Simple Session where 'start' occurs before 'end'
635
+ sessions.append(
636
+ schedule[[start, end]]
637
+ .rename(columns={start: "start", end: "end"})
638
+ .set_index("start", drop=False)
639
+ )
640
+ continue
641
+
642
+ # 'closed' Session that wraps around midnight. Shift the 'end' col by 1 Day
643
+ end = end.rstrip("_wrap")
644
+ tmp = pd.DataFrame(
645
+ {
646
+ "start": schedule[start],
647
+ "end": schedule[end].shift(-1),
648
+ }
649
+ ).set_index("start", drop=False)
650
+
651
+ # Shift(-1) leaves last index of 'end' as 'NaT'
652
+ # Set the [-1, 'end' ('end' === 1)] cell to Midnight of the 'start' time of that row.
653
+ tmp.iloc[-1, 1] = tmp.iloc[-1, 0].normalize() + pd.Timedelta("1D") # type: ignore
654
+
655
+ if mask_close:
656
+ # Do some additional work to split 'closed' sessions that span weekends/holidays
657
+ sessions_to_split = tmp["end"] - tmp["start"] > pd.Timedelta("1D")
658
+
659
+ split_strt = tmp[sessions_to_split]["start"]
660
+ split_end = tmp[sessions_to_split]["end"]
661
+
662
+ sessions.append(
663
+ pd.DataFrame( # From start of the long close to Midnight
664
+ {
665
+ "start": split_strt,
666
+ "end": split_strt.dt.normalize() + pd.Timedelta("1D"),
667
+ }
668
+ ).set_index("start", drop=False)
669
+ )
670
+ sessions.append(
671
+ pd.DataFrame( # From Midnight to the end of the long close
672
+ {
673
+ "start": split_end.dt.normalize(),
674
+ "end": split_end,
675
+ }
676
+ ).set_index("start", drop=False)
677
+ )
678
+
679
+ # leave tmp as all the sessions that were not split
680
+ tmp = tmp[~sessions_to_split]
681
+
682
+ sessions.append(tmp)
683
+
684
+ return pd.concat(sessions).sort_index()
685
+
686
+
687
+ def _error_check_sessions(session_times, timestep, closed, force_close):
688
+ if session_times.start.gt(session_times.end).any():
689
+ raise ValueError(
690
+ "Desired Sessions from the Schedule contain rows where session start < session end, "
691
+ "please correct the schedule"
692
+ )
693
+
694
+ # Disappearing Session
695
+ if force_close is False and closed == "right":
696
+ # only check if needed
697
+ if (session_times.end - session_times.start).lt(timestep).any():
698
+ warnings.warn(
699
+ "An interval of the chosen frequency is larger than some of the trading sessions, "
700
+ "while closed='right' and force_close=False. This will make those trading sessions "
701
+ "disappear. Use a higher frequency or change the values of closed/force_close, to "
702
+ "keep this from happening.",
703
+ category=DisappearingSessionWarning,
704
+ )
705
+
706
+ # Overlapping Session
707
+ if force_close is None and closed != "left":
708
+ num_bars = _num_bars_ltf(session_times, timestep, closed)
709
+ end_times = session_times.start + num_bars * timestep
710
+
711
+ if end_times.gt(session_times.start.shift(-1)).any():
712
+ warnings.warn(
713
+ "The desired frequency results in date_range() generating overlapping sessions. "
714
+ "This can happen when the timestep is larger than a session, or when "
715
+ "merge_session = False and a session is not evenly divisible by the timestep. "
716
+ "The overlapping timestep can be deleted with force_close = True or False",
717
+ category=OverlappingSessionWarning,
718
+ )
719
+
720
+
721
+ def _num_bars_ltf(session_times, timestep, closed) -> pd.Series:
722
+ "Calculate the number of timestamps needed for each trading session."
723
+ if closed in ("both", None):
724
+ return np.ceil((session_times.end - session_times.start) / timestep) + 1
725
+ else:
726
+ return np.ceil((session_times.end - session_times.start) / timestep)
727
+
728
+
729
+ def _course_trim_to_period_count(num_bars, periods, reverse) -> pd.Series:
730
+ """
731
+ Course Trim the Session times to the desired period count.
732
+ Large enough of a sub-routine to merit its own function call.
733
+ """
734
+ if reverse:
735
+ # If end-date is given calculate sum in reverse order
736
+ num_bars = num_bars[::-1]
737
+
738
+ _sum = num_bars.cumsum()
739
+
740
+ if _sum.iloc[-1] < periods:
741
+ # Insufficient Number of Periods. Try to estimate an ending time from the data given.
742
+ # delta = (end_date - start_date) / (cumulative # of periods) * (periods still needed) * fudge factor
743
+ delta = abs(
744
+ # (end_date - start_date) / (cumulative # of periods)
745
+ ((_sum.index[-1] - _sum.index[0]) / _sum.iloc[-1])
746
+ * (periods - _sum.iloc[-1]) # (periods still needed)
747
+ * 1.05 # (Fudge Factor for weekends/holidays)
748
+ )
749
+ # delta = math.ceil(delta) + '1W'
750
+ delta = (delta // pd.Timedelta("1D") + 8) * pd.Timedelta("1D")
751
+ # The 1.05 Factor handles when the schedule is short by a few months, the + '1W' handles
752
+ # when the schedule is short by only a few periods. While 1 Week is absolute overkill,
753
+ # generating the extra few days is very little extra cost compared to throwing this error
754
+ # a second or even third time.
755
+
756
+ if reverse:
757
+ approx_start = _sum.index[-1] - delta
758
+ warnings.warn(
759
+ f"Insufficient Schedule. Requested Approx Start-Time: {approx_start}. "
760
+ f"Schedule starts at: {_sum.index[-1].normalize().tz_localize(None)}",
761
+ category=InsufficientScheduleWarning,
762
+ )
763
+ else:
764
+ approx_end = _sum.index[-1] + delta
765
+ warnings.warn(
766
+ f"Insufficient Schedule. Requested Approx End-Time: {approx_end}. "
767
+ f"Schedule ends at: {_sum.index[-1].normalize().tz_localize(None)}",
768
+ category=InsufficientScheduleWarning,
769
+ )
770
+
771
+ sessions_to_keep = _sum < periods
772
+ # Shifting Ensures the number of needed periods are generated, but no more.
773
+ sessions_to_keep = sessions_to_keep.shift(1, fill_value=True)
774
+
775
+ if reverse:
776
+ # If end-date is given calculate un-reverse the order of the series
777
+ sessions_to_keep = sessions_to_keep[::-1]
778
+
779
+ return sessions_to_keep
780
+
781
+
782
+ def _calc_time_series(
783
+ session_times, timestep, closed, force_close, start, end, periods
784
+ ) -> pd.Series:
785
+ "Interpolate each session into a datetime series at the desired frequency."
786
+ # region ---- ---- ---- Trim the Sessions ---- ---- ----
787
+ # Compare 'start' to the session end times so that if 'start' is in the middle of a session
788
+ # that session remains in session_times. Vise-vera for End
789
+ if start is not None:
790
+ session_times = session_times[session_times.end > start]
791
+ if end is not None:
792
+ session_times = session_times[session_times.start < end]
793
+ if len(session_times) == 0:
794
+ return pd.Series([])
795
+
796
+ # Override the First Session's Start and Last Session's End times if needed
797
+ if start is not None and start > session_times.loc[session_times.index[0], "start"]:
798
+ # Align the start to a multiple of the timestep after the session's beginning.
799
+ # This is to make the returned DTIndex consistent across all start/end/period settings.
800
+ session_start = session_times.loc[session_times.index[0], "start"]
801
+ start_aligned = session_start + (
802
+ ceil((start - session_start) / timestep) * timestep
803
+ )
804
+ session_times.loc[session_times.index[0], "start"] = start_aligned
805
+ if end is not None and end < session_times.loc[session_times.index[-1], "end"]:
806
+ session_start = session_times.loc[session_times.index[0], "start"]
807
+ end_aligned = session_start + (
808
+ floor((end - session_start) / timestep) * timestep
809
+ )
810
+ session_times.loc[session_times.index[-1], "end"] = end_aligned
811
+
812
+ num_bars = _num_bars_ltf(session_times, timestep, closed)
813
+
814
+ if periods is not None:
815
+ sessions_to_keep = _course_trim_to_period_count(
816
+ num_bars, periods, end is not None
817
+ )
818
+ num_bars = num_bars[sessions_to_keep]
819
+ session_times = session_times[sessions_to_keep]
820
+
821
+ # endregion
822
+
823
+ starts = session_times.start.repeat(num_bars) # type: ignore
824
+
825
+ if closed == "right":
826
+ # Right side of addition is cumulative time since session start in multiples of timestep
827
+ time_series = starts + (starts.groupby(starts.index).cumcount() + 1) * timestep
828
+ else:
829
+ time_series = starts + (starts.groupby(starts.index).cumcount()) * timestep
830
+
831
+ if force_close is not None:
832
+ # Trim off all timestamps that stretched beyond their intended session
833
+ time_series = time_series[time_series.le(session_times.end.repeat(num_bars))]
834
+
835
+ if force_close:
836
+ time_series = pd.concat([time_series, session_times.end])
837
+
838
+ time_series = time_series.drop_duplicates().sort_values() # type: ignore
839
+
840
+ if periods is not None and len(time_series) > 0:
841
+ # Although likely redundant, Fine Trim to desired period count.
842
+ if end is not None:
843
+ s_len = len(time_series)
844
+ time_series = time_series[max(s_len - periods, 0) : s_len]
845
+ else:
846
+ time_series = time_series[0:periods]
847
+
848
+ return time_series
849
+
850
+
851
+ # endregion
852
+
853
+
854
+ PeriodCode = Literal["D", "W", "M", "Q", "Y"]
855
+ Day_Anchor = Literal["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
856
+ Month_Anchor = Literal[
857
+ "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
858
+ ]
859
+
860
+ # These needed because the pandas Period Object is stupid and not consistant w/ date_range.
861
+ # pd.date_range(s,e, freq = 'W-SUN') == [DatetimeIndex of all sundays] (as Expected)
862
+ # but, pd.Timestamp([A Sunday]).to_period('W-SUN').start_time == [The Monday Prior???]
863
+ days_rolled = list(Day_Anchor.__args__)
864
+ days_rolled.insert(0, days_rolled.pop())
865
+ weekly_roll_map = dict(zip(Day_Anchor.__args__, days_rolled))
866
+
867
+ months_rolled = list(Month_Anchor.__args__)
868
+ months_rolled.insert(0, months_rolled.pop())
869
+ yearly_roll_map = dict(zip(Month_Anchor.__args__, months_rolled))
870
+
871
+
872
+ def date_range_htf(
873
+ cal: "CustomBusinessDay",
874
+ frequency: Union[str, pd.Timedelta, int, float],
875
+ start: Union[str, pd.Timestamp, int, float, None] = None,
876
+ end: Union[str, pd.Timestamp, int, float, None] = None,
877
+ periods: Union[int, None] = None,
878
+ closed: Union[Literal["left", "right"], None] = "right",
879
+ *,
880
+ day_anchor: Day_Anchor = "SUN",
881
+ month_anchor: Month_Anchor = "JAN",
882
+ ) -> pd.DatetimeIndex:
883
+ """
884
+ Returns a Normalized DatetimeIndex from the start-date to End-Date for Time periods of 1D and Higher.
885
+
886
+ Unless using a custom calendar, it is advised to call the date_range_htf() method of the desired calendar.
887
+ This is because default_anchors may change, or a single calendar may not be sufficient to model a market.
888
+
889
+ For example, NYSE has two calendars: The first covers pre-1952 where saturdays were trading days. The second
890
+ covers post-1952 where saturdays are closed.
891
+
892
+ PARAMETERS:
893
+
894
+ :param cal: CustomBuisnessDay Calendar associated with a MarketCalendar. This can be retieved by
895
+ calling the holidays() method of a MarketCalendar.
896
+
897
+ :param frequency: String, Int/float (POSIX seconds) or pd.Timedelta of the desired frequency.
898
+ :Must be Greater than '1D' and an integer multiple of the base frequency (D, W, M, Q, or Y)
899
+ :Important Note: Ints/Floats & Timedeltas are always considered as 'Open Business Days',
900
+ '2D' == Every Other Buisness Day, '3D' == Every 3rd B.Day, '7D' == Every 7th B.Day
901
+ :Higher periods (passed as strings) align to the beginning or end of the relevant period
902
+ :i.e. '1W' == First/[Last] Trading Day of each Week, '1Q' == First/[Last] Day of every Quarter
903
+
904
+ :param start: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
905
+ :The Time & Timezone information is ignored. Only the Normalized Day is considered.
906
+
907
+ :param end: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
908
+ :The Time & Timezone information is ignored. Only the Normalized Day is considered.
909
+
910
+ :param periods: Optional Integer number of periods to return. If a Period count, Start time,
911
+ and End time are given the period count is ignored.
912
+
913
+ :param closed: Literal['left', 'right']. Method used to close each range.
914
+ :Left: First open trading day of the Session is returned (e.g. First Open Day of The Month)
915
+ :right: Last open trading day of the Session is returned (e.g. Last Open Day of The Month)
916
+ :Note, This has no effect when the desired frequency is a number of days.
917
+
918
+ :param day_anchor: Day to Anchor the start of the Weekly timeframes to. Default 'SUN'.
919
+ : To get the First/Last Days of the trading Week then the Anchor needs to be on a day the relevant
920
+ market is closed.
921
+ : This can be set so that a specific day each week is returned.
922
+ : freq='1W' & day_anchor='WED' Will return Every 'WED' when the market is open, and nearest day
923
+ to the left or right (based on 'closed') when the market is closed.
924
+ Options: ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
925
+
926
+ :param month_anchor: Month to Anchor the start of the year to for Quarter and yearly timeframes.
927
+ : Default 'JAN' for Calendar Quarters/Years. Can be set to 'JUL' to return Fiscal Years
928
+ Options: ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
929
+ """
930
+
931
+ start, end, periods = _error_check_htf_range(start, end, periods)
932
+ mult, _period_code = _standardize_htf_freq(frequency)
933
+
934
+ if _period_code == "D":
935
+ if mult == 1:
936
+ # When desiring a frequency of '1D' default to pd.date_range. It will give the same
937
+ # answer but it is more performant than the method in _cal_day_range.
938
+ return pd.date_range(start, end, periods, freq=cal)
939
+ else:
940
+ return _cal_day_range(cal, start, end, periods, mult)
941
+
942
+ elif _period_code == "W":
943
+ freq = str(mult) + "W-" + day_anchor.upper()
944
+ grouping_period = "W-" + weekly_roll_map[day_anchor.upper()]
945
+
946
+ return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
947
+
948
+ elif _period_code == "M":
949
+ freq = str(mult) + "M" + ("S" if closed == "left" else "E")
950
+ return _cal_WMQY_range(cal, start, end, periods, freq, "M", closed)
951
+
952
+ else: # Yearly & Quarterly Period
953
+ freq = str(mult) + _period_code
954
+ freq += (
955
+ "S-" + month_anchor.upper()
956
+ if closed == "left" # *Insert Angry Tom Meme Here*
957
+ else "E-" + yearly_roll_map[month_anchor.upper()]
958
+ )
959
+ grouping_period = _period_code + "-" + yearly_roll_map[month_anchor.upper()]
960
+
961
+ return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
962
+
963
+
964
+ # region ---- ---- ---- Date Range HTF Subroutines ---- ---- ----
965
+
966
+
967
+ def _error_check_htf_range(
968
+ start, end, periods: Union[int, None]
969
+ ) -> Tuple[Union[pd.Timestamp, None], Union[pd.Timestamp, None], Union[int, None]]:
970
+ "Standardize and Error Check Start, End, and period params"
971
+ if periods is not None:
972
+ if not isinstance(periods, int):
973
+ raise ValueError(
974
+ f"Date_Range_HTF Must be either an int or None. Given {type(periods)}"
975
+ )
976
+ if periods < 0:
977
+ raise ValueError("Date_range_HTF Periods must be Positive.")
978
+
979
+ if isinstance(start, (int, float)):
980
+ start = int(start * 1_000_000_000)
981
+ if isinstance(end, (int, float)):
982
+ end = int(end * 1_000_000_000)
983
+
984
+ if start is not None:
985
+ start = pd.Timestamp(start).normalize().tz_localize(None)
986
+ if end is not None:
987
+ end = pd.Timestamp(end).normalize().tz_localize(None)
988
+
989
+ if all((start, end, periods)):
990
+ periods = None # Ignore Periods if passed too many params
991
+ if len([param for param in (start, end, periods) if param is not None]) < 2:
992
+ raise ValueError(
993
+ "Date_Range_HTF must be given two of the three following params: (start, end, periods)"
994
+ )
995
+
996
+ if start is not None and end is not None and end < start:
997
+ raise ValueError("Date_Range_HTF() Start-Date must be before the End-Date")
998
+
999
+ return start, end, periods
1000
+
1001
+
1002
+ def _standardize_htf_freq(
1003
+ frequency: Union[str, pd.Timedelta, int, float]
1004
+ ) -> Tuple[int, PeriodCode]:
1005
+ "Standardize the frequency multiplier and Code, throwing errors as needed."
1006
+ if isinstance(frequency, str):
1007
+ if len(frequency) == 0:
1008
+ raise ValueError("Date_Range_HTF Frequency is an empty string.")
1009
+ if len(frequency) == 1:
1010
+ frequency = "1" + frequency # Turn 'D' into '1D' for all period codes
1011
+ if frequency[-1].upper() in {"W", "M", "Q", "Y"}:
1012
+ try:
1013
+ if (mult := int(frequency[0:-1])) <= 0:
1014
+ raise ValueError()
1015
+ return mult, frequency[-1].upper() # type: ignore
1016
+ except ValueError as e:
1017
+ raise ValueError(
1018
+ "Date_Range_HTF() Week, Month, Quarter and Year frequency must "
1019
+ "have a positive integer multiplier"
1020
+ ) from e
1021
+
1022
+ # All remaining frequencies (int, float, strs, & Timedeltas) are parsed as business days.
1023
+ if isinstance(frequency, (int, float)): # Convert To Seconds
1024
+ frequency = int(frequency * 1_000_000_000)
1025
+
1026
+ frequency = pd.Timedelta(frequency)
1027
+ if frequency < pd.Timedelta("1D"):
1028
+ raise ValueError("Date_Range_HTF() Frequency must be '1D' or Higher.")
1029
+ if frequency % pd.Timedelta("1D") != pd.Timedelta(0):
1030
+ raise ValueError(
1031
+ "Date_Range_HTF() Week and Day frequency must be an integer multiple of Days"
1032
+ )
1033
+
1034
+ return frequency.days, "D"
1035
+
1036
+
1037
+ def _days_per_week(weekmask: Union[Iterable, str]) -> int:
1038
+ "Used to get a more accurate estimate of the number of days per week"
1039
+ # Return any 'Array Like' Representation
1040
+ if not isinstance(weekmask, str):
1041
+ return len([day for day in weekmask if bool(day)])
1042
+
1043
+ if len(weekmask) == 0:
1044
+ raise ValueError("Weekmask cannot be blank")
1045
+
1046
+ weekmask = weekmask.upper()
1047
+ day_abbrs = {day for day in weekly_roll_map.values() if day in weekmask}
1048
+ if len(day_abbrs) != 0:
1049
+ return len(day_abbrs)
1050
+
1051
+ # Weekmask Something like '0111110'
1052
+ return len([day for day in weekmask if bool(day)])
1053
+
1054
+
1055
+ def _cal_day_range(
1056
+ cb_day: "CustomBusinessDay", start, end, periods, mult
1057
+ ) -> pd.DatetimeIndex:
1058
+ """
1059
+ Returns a Normalized DateTimeIndex of Open Buisness Days.
1060
+ Exactly two of the (start, end, periods) arguments must be given.
1061
+
1062
+ ** Arguments should be Type/Error Checked before calling this function **
1063
+
1064
+ :param cb_day: CustomBusinessDay Object from the respective calendar
1065
+ :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1066
+ :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1067
+ :param periods: Optional Number of periods to return
1068
+ :param mult: Integer Multiple of buisness days between data-points.
1069
+ e.g: 1 == Every Business Day, 2 == Every Other B.Day, 3 == Every Third B.Day, etc.
1070
+ :returns: DateRangeIndex[datetime64[ns]]
1071
+ """
1072
+
1073
+ # Ensure Start and End are open Business days in the desired range
1074
+ if start is not None:
1075
+ start = cb_day.rollforward(start)
1076
+ if end is not None:
1077
+ end = cb_day.rollback(end)
1078
+
1079
+ # ---- Start-Date to End-Date ----
1080
+ if isinstance(start, pd.Timestamp) and isinstance(end, pd.Timestamp):
1081
+ num_days = (end - start) / mult
1082
+ # Get a better estimate of the number of open days since date_range calc is slow
1083
+ est_open_days = (
1084
+ (num_days // 7) * _days_per_week(cb_day.weekmask)
1085
+ ) + num_days % pd.Timedelta("1W")
1086
+
1087
+ # Should always produce a small overestimate since Holidays aren't accounted for.
1088
+ est_open_days = ceil(est_open_days / pd.Timedelta("1D"))
1089
+ _range = pd.RangeIndex(0, est_open_days * mult, mult)
1090
+
1091
+ dt_index = pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1092
+ return dt_index[dt_index <= end]
1093
+
1094
+ # ---- Periods from Start-Date ----
1095
+ elif isinstance(start, pd.Timestamp):
1096
+ _range = pd.RangeIndex(0, periods * mult, mult)
1097
+ return pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1098
+
1099
+ # ---- Periods from End-Date ----
1100
+ else:
1101
+ # Ensure the end-date is the first valid Trading Day <= given end-date
1102
+ end = cb_day.rollback(end)
1103
+ _range = pd.RangeIndex(0, -1 * periods * mult, -1 * mult)
1104
+
1105
+ return pd.DatetimeIndex(end + _range * cb_day, dtype="datetime64[ns]")[::-1]
1106
+
1107
+
1108
+ def _cal_WMQY_range(
1109
+ cb_day: "CustomBusinessDay",
1110
+ start: Union[pd.Timestamp, None],
1111
+ end: Union[pd.Timestamp, None],
1112
+ periods: Union[int, None],
1113
+ freq: str,
1114
+ grouping_period: str,
1115
+ closed: Union[Literal["left", "right"], None] = "right",
1116
+ ):
1117
+ """
1118
+ Return A DateRangeIndex of the Weekdays that mark either the start or end of each
1119
+ buisness week based on the 'closed' parameter.
1120
+
1121
+ ** Arguments should be Type/Error Checked before calling this function **
1122
+
1123
+ :param cb_day: CustomBusinessDay Object from the respective calendar
1124
+ :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1125
+ :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1126
+ :param periods: Optional Number of periods to return
1127
+ :param freq: Formatted frequency of '1W' and Higher with desired multiple, S/E Chars,
1128
+ and Anchoring code.
1129
+ :param grouping_period: Period_Code with anchor that matches the given period Code.
1130
+ i.e. 'W-[DAY]', 'M', 'Q-[MONTH]', 'Y-[MONTH]'
1131
+ :param closed: Union['left', Any].
1132
+ 'left': The normalized start-day of the relative period is returned
1133
+ Everything else: The normalized last-day of the relative period is returned
1134
+ :returns: DateRangeIndex[datetime64[ns]]
1135
+ """
1136
+
1137
+ # Need to Adjust the Start/End Dates given to pandas since Rolling forward or backward can shift
1138
+ # the calculated date range out of the desired [start, end] range adding or ignoring desired values.
1139
+
1140
+ # For Example, say we want NYSE-Month-Starts between [2020-01-02, 2020-02-02]. W/O Adjusting dates
1141
+ # we call pd.date_range('2020-01-02, '2020-02-02', 'MS') => ['2020-02-01'] Rolled to ['2020-02-03'].
1142
+ # '02-03' date is then trimmed off returning an empty Index. despite '2020-01-02' being a valid Month Start
1143
+ # By Adjusting the Dates we call pd.date_range('2020-01-01, '2020-02-02') => ['2020-01-01, '2020-02-01']
1144
+ # That's then Rolled into [2020-01-02, 2020-02-03] & Trimmed to [2020-01-02] as desired.
1145
+
1146
+ _dr_start, _dr_end = None, None
1147
+
1148
+ if closed == "left":
1149
+ roll_func = cb_day.rollforward
1150
+ if start is not None:
1151
+ normalized_start = start.to_period(grouping_period).start_time
1152
+ _dr_start = (
1153
+ normalized_start if start <= roll_func(normalized_start) else start
1154
+ )
1155
+
1156
+ if end is not None:
1157
+ if periods is not None:
1158
+ normalized_end = end.to_period(grouping_period).start_time
1159
+ _dr_end = (
1160
+ normalized_end - pd.Timedelta("1D") # Shift into preceding group
1161
+ if end < roll_func(normalized_end)
1162
+ else cb_day.rollback(end)
1163
+ )
1164
+ else:
1165
+ _dr_end = cb_day.rollback(end)
1166
+
1167
+ else:
1168
+ roll_func = cb_day.rollback
1169
+ if start is not None:
1170
+ if periods is not None:
1171
+ normalized_start = start.to_period(grouping_period).end_time.normalize()
1172
+ _dr_start = (
1173
+ normalized_start + pd.Timedelta("1D") # Shift into trailing group
1174
+ if start > roll_func(normalized_start)
1175
+ else cb_day.rollforward(start)
1176
+ )
1177
+ else:
1178
+ _dr_start = cb_day.rollforward(start)
1179
+
1180
+ if end is not None:
1181
+ normalized_end = end.to_period(grouping_period).end_time.normalize()
1182
+ _dr_end = normalized_end if end >= roll_func(normalized_end) else end
1183
+
1184
+ _range = (
1185
+ pd.date_range(_dr_start, _dr_end, periods, freq).to_series().apply(roll_func)
1186
+ )
1187
+
1188
+ # Ensure that Rolled Timestamps are in the desired range When given both Start and End
1189
+ if start is not None and end is not None:
1190
+ if len(_range) > 0 and _range.iloc[0] < start:
1191
+ # Trims off the first 'WMQY End' that might have been Rolled before start
1192
+ _range = _range[1:]
1193
+ if len(_range) > 0 and _range.iloc[-1] > end:
1194
+ # Trims off the last 'WMQY Start' the might have been Rolled after end
1195
+ _range = _range[0:-1]
1196
+
1197
+ return pd.DatetimeIndex(_range, dtype="datetime64[ns]")
1198
+
1199
+
1200
+ # endregion