pandas-market-calendars 5.0.0__py3-none-any.whl → 5.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. pandas_market_calendars/__init__.py +39 -39
  2. pandas_market_calendars/calendar_registry.py +57 -57
  3. pandas_market_calendars/calendar_utils.py +1151 -1147
  4. pandas_market_calendars/calendars/asx.py +77 -70
  5. pandas_market_calendars/calendars/bmf.py +226 -219
  6. pandas_market_calendars/calendars/bse.py +432 -425
  7. pandas_market_calendars/calendars/cboe.py +156 -149
  8. pandas_market_calendars/calendars/cme.py +412 -405
  9. pandas_market_calendars/calendars/cme_globex_agriculture.py +172 -172
  10. pandas_market_calendars/calendars/cme_globex_base.py +126 -119
  11. pandas_market_calendars/calendars/cme_globex_crypto.py +165 -158
  12. pandas_market_calendars/calendars/cme_globex_energy_and_metals.py +223 -216
  13. pandas_market_calendars/calendars/cme_globex_equities.py +130 -123
  14. pandas_market_calendars/calendars/cme_globex_fixed_income.py +136 -136
  15. pandas_market_calendars/calendars/cme_globex_fx.py +101 -101
  16. pandas_market_calendars/calendars/eurex.py +138 -131
  17. pandas_market_calendars/calendars/eurex_fixed_income.py +105 -98
  18. pandas_market_calendars/calendars/hkex.py +438 -431
  19. pandas_market_calendars/calendars/ice.py +88 -81
  20. pandas_market_calendars/calendars/iex.py +162 -155
  21. pandas_market_calendars/calendars/jpx.py +124 -117
  22. pandas_market_calendars/calendars/lse.py +125 -118
  23. pandas_market_calendars/calendars/mirror.py +144 -144
  24. pandas_market_calendars/calendars/nyse.py +1472 -1466
  25. pandas_market_calendars/calendars/ose.py +125 -118
  26. pandas_market_calendars/calendars/sifma.py +390 -356
  27. pandas_market_calendars/calendars/six.py +143 -136
  28. pandas_market_calendars/calendars/sse.py +322 -315
  29. pandas_market_calendars/calendars/tase.py +231 -224
  30. pandas_market_calendars/calendars/tsx.py +192 -185
  31. pandas_market_calendars/class_registry.py +115 -115
  32. pandas_market_calendars/holidays/cme.py +385 -385
  33. pandas_market_calendars/holidays/cme_globex.py +214 -214
  34. pandas_market_calendars/holidays/cn.py +1476 -1476
  35. pandas_market_calendars/holidays/jp.py +401 -401
  36. pandas_market_calendars/holidays/jpx_equinox.py +506 -506
  37. pandas_market_calendars/holidays/nyse.py +1536 -1536
  38. pandas_market_calendars/holidays/oz.py +63 -63
  39. pandas_market_calendars/holidays/sifma.py +350 -350
  40. pandas_market_calendars/holidays/us.py +376 -376
  41. pandas_market_calendars/market_calendar.py +1008 -1008
  42. {pandas_market_calendars-5.0.0.dist-info → pandas_market_calendars-5.1.1.dist-info}/METADATA +3 -1
  43. pandas_market_calendars-5.1.1.dist-info/RECORD +50 -0
  44. {pandas_market_calendars-5.0.0.dist-info → pandas_market_calendars-5.1.1.dist-info}/WHEEL +1 -1
  45. pandas_market_calendars-5.0.0.dist-info/RECORD +0 -50
  46. {pandas_market_calendars-5.0.0.dist-info → pandas_market_calendars-5.1.1.dist-info}/licenses/LICENSE +0 -0
  47. {pandas_market_calendars-5.0.0.dist-info → pandas_market_calendars-5.1.1.dist-info}/licenses/NOTICE +0 -0
  48. {pandas_market_calendars-5.0.0.dist-info → pandas_market_calendars-5.1.1.dist-info}/top_level.txt +0 -0
@@ -1,1147 +1,1151 @@
1
- """
2
- Utilities to use with market_calendars
3
- """
4
-
5
- import itertools
6
- from math import ceil, floor
7
- from typing import TYPE_CHECKING, Any, Dict, Iterable, Literal, Tuple, Union
8
- import warnings
9
-
10
- from re import finditer, split
11
- import numpy as np
12
- import pandas as pd
13
-
14
- if TYPE_CHECKING:
15
- from pandas.tseries.offsets import CustomBusinessDay
16
- from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
17
-
18
- DEFAULT_LABEL_MAP = {
19
- "pre": "pre",
20
- "rth_pre_break": "rth",
21
- "rth": "rth",
22
- "break": "break",
23
- "rth_post_break": "rth",
24
- "post": "post",
25
- "closed": "closed",
26
- }
27
-
28
-
29
- def mark_session(
30
- schedule: pd.DataFrame,
31
- timestamps: pd.DatetimeIndex,
32
- label_map: Dict[str, Any] = {},
33
- *,
34
- closed: Literal["left", "right"] = "right",
35
- ) -> pd.Series:
36
- """
37
- Return a Series that denotes the trading session of each timestamp in a DatetimeIndex.
38
- The returned Series's Index is the provided Datetime Index, the Series's values
39
- are the timestamps' corresponding session.
40
-
41
- PARAMETERS:
42
-
43
- :param schedule: The market schedule to check the timestamps against. This Schedule must include
44
- all of the trading days that are in the provided DatetimeIndex of timestamps.
45
- Note: The columns need to be sorted into ascending order, if not, then an error will be
46
- raised saying the bins must be in ascending order.
47
-
48
- :param timestamps: A DatetimeIndex of Timestamps to check. Must be sorted in ascending order.
49
-
50
- :param label_map: Optional mapping of Dict[str, Any] to change the values returned in the
51
- series. The keys of the given mapping should match the keys of the default dict, but the
52
- values can be anything. A subset of mappings may also be provided, e.g. {'closed':-1} will
53
- only change the label of the 'closed' session. All others will remain the default label.
54
-
55
- >>> Default Mapping == {
56
- "pre": "pre",
57
- "rth_pre_break": "rth", # When the Schedule has a break
58
- "rth": "rth", # When the Schedule doesn't have a break
59
- "break": "break", # When the Schedule has a break
60
- "rth_post_break": "rth", # When the Schedule has a break
61
- "post": "post",
62
- "closed": "closed",
63
- }
64
-
65
- :param closed: Which side of each interval should be closed (inclusive)
66
- left: == [start, end)
67
- right: == (start, end]
68
- """
69
- # ---- ---- ---- Determine which columns need to be dropped ---- ---- ----
70
- session_labels = ["closed"]
71
- columns = set(schedule.columns)
72
- needed_cols = set()
73
-
74
- def _extend_statement(session: str, parts: set):
75
- if parts.issubset(columns):
76
- needed_cols.update(parts)
77
- session_labels.append(session)
78
-
79
- _extend_statement("pre", {"pre", "market_open"})
80
- if {"break_start", "break_end"}.issubset(columns):
81
- _extend_statement("rth_pre_break", {"market_open", "break_start"})
82
- _extend_statement("break", {"break_start", "break_end"})
83
- _extend_statement("rth_post_break", {"break_end", "market_close"})
84
- else:
85
- _extend_statement("rth", {"market_open", "market_close"})
86
- _extend_statement("post", {"market_close", "post"})
87
-
88
- # ---- ---- ---- Error Check ---- ---- ----
89
- if len(extra_cols := columns - needed_cols) > 0:
90
- schedule = schedule.drop(columns=[*extra_cols])
91
- warnings.warn(
92
- f"Attempting to mark trading sessions and the schedule ({columns = }) contains the "
93
- f"extra columns: {extra_cols}. Returned sessions may not be labeled as desired."
94
- )
95
-
96
- start = timestamps[0]
97
- end = timestamps[-1]
98
- if start < schedule.iloc[0, 0]: # type: ignore
99
- raise ValueError(
100
- f"Insufficient Schedule. Needed Start-Time: {start.normalize().tz_localize(None)}. "
101
- f"Schedule starts at: {schedule.iloc[0, 0]}"
102
- )
103
- if end > schedule.iloc[-1, -1]: # type: ignore
104
- raise ValueError(
105
- f"Insufficient Schedule. Needed End-Time: {end.normalize().tz_localize(None)}. "
106
- f"Schedule ends at: {schedule.iloc[-1, -1]}"
107
- )
108
-
109
- # Trim the schedule to match the timeframe covered by the given timeseries
110
- schedule = schedule[
111
- (schedule.index >= start.normalize().tz_localize(None))
112
- & (schedule.index <= end.normalize().tz_localize(None))
113
- ]
114
-
115
- backfilled_map = DEFAULT_LABEL_MAP | label_map
116
- mapped_labels = [backfilled_map[label] for label in session_labels]
117
- labels = pd.Series([mapped_labels]).repeat(len(schedule)).explode()
118
- labels = pd.concat([labels, pd.Series([backfilled_map["closed"]])])
119
-
120
- # Append on additional Edge-Case Bins so result doesn't include NaNs
121
- bins = schedule.to_numpy().flatten()
122
- bins = np.insert(bins, 0, bins[0].normalize())
123
- bins = np.append(bins, bins[-1].normalize() + pd.Timedelta("1D"))
124
-
125
- bins, _ind, _counts = np.unique(bins, return_index=True, return_counts=True)
126
-
127
- if len(bins) - 1 != len(labels):
128
- # np.Unique Dropped some bins, need to drop the associated labels
129
- label_inds = (_ind + _counts - 1)[:-1]
130
- labels = labels.iloc[label_inds]
131
-
132
- return pd.Series(
133
- pd.cut(timestamps, bins, closed != "left", labels=labels, ordered=False), # type: ignore
134
- index=timestamps,
135
- )
136
-
137
-
138
- def merge_schedules(schedules, how="outer"):
139
- """
140
- Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
141
- of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
142
-
143
- CAVEATS:
144
- * This does not work for schedules with breaks, the break information will be lost.
145
- * Only "market_open" and "market_close" are considered, other market times are not yet supported.
146
-
147
- :param schedules: list of schedules
148
- :param how: outer or inner
149
- :return: schedule DataFrame
150
- """
151
- all_cols = [x.columns for x in schedules]
152
- all_cols = list(itertools.chain(*all_cols))
153
- if ("break_start" in all_cols) or ("break_end" in all_cols):
154
- warnings.warn("Merge schedules will drop the break_start and break_end from result.")
155
-
156
- result = schedules[0]
157
- for schedule in schedules[1:]:
158
- result = result.merge(schedule, how=how, right_index=True, left_index=True)
159
- if how == "outer":
160
- result["market_open"] = result.apply(lambda x: min(x.market_open_x, x.market_open_y), axis=1)
161
- result["market_close"] = result.apply(lambda x: max(x.market_close_x, x.market_close_y), axis=1)
162
- elif how == "inner":
163
- result["market_open"] = result.apply(lambda x: max(x.market_open_x, x.market_open_y), axis=1)
164
- result["market_close"] = result.apply(lambda x: min(x.market_close_x, x.market_close_y), axis=1)
165
- else:
166
- raise ValueError('how argument must be "inner" or "outer"')
167
- result = result[["market_open", "market_close"]]
168
- return result
169
-
170
-
171
- def is_single_observance(holiday: "Holiday"):
172
- "Returns the Date of the Holiday if it is only observed once, None otherwise."
173
- return holiday.start_date if holiday.start_date == holiday.end_date else None # type: ignore ??
174
-
175
-
176
- def all_single_observance_rules(calendar: "AbstractHolidayCalendar"):
177
- "Returns a list of timestamps if the Calendar's Rules are all single observance holidays, None Otherwise"
178
- observances = [is_single_observance(rule) for rule in calendar.rules]
179
- return observances if all(observances) else None
180
-
181
-
182
- def convert_freq(index, frequency):
183
- """
184
- Converts a DateTimeIndex to a new lower frequency
185
-
186
- :param index: DateTimeIndex
187
- :param frequency: frequency string
188
- :return: DateTimeIndex
189
- """
190
- return pd.DataFrame(index=index).asfreq(frequency).index
191
-
192
-
193
- SESSIONS = Literal[
194
- "pre",
195
- "post",
196
- "RTH",
197
- "pre_break",
198
- "post_break",
199
- "ETH",
200
- "break",
201
- "closed",
202
- "closed_masked",
203
- ]
204
- MKT_TIMES = Literal["pre", "post", "market_open", "market_close", "break_start", "break_end"]
205
-
206
-
207
- # region ---- ---- ---- Date Range Warning Types ---- ---- ----
208
- class DateRangeWarning(UserWarning):
209
- "Super Class to all Date_range Warning Types"
210
-
211
-
212
- class OverlappingSessionWarning(DateRangeWarning):
213
- """
214
- Warning thrown when date_range is called with a timedelta that is larger than the
215
- gap between two sessions leading to them overlapping.
216
- This is only an issue when closed='right'/'both'/None and force_close=None
217
-
218
- For Example, the following raises a warning because the 10:00 Timestamp that is from the 'pre'
219
- session comes after the start of the 9:30 'RTH' session, but belongs to the 'pre' session
220
- >>> date_range(NYSE, '2h', 'right', None, {'pre', 'RTH'}, merge_adjacent = False)
221
- >>> ['2020-01-02 06:00:00', '2020-01-02 08:00:00',
222
- '2020-01-02 10:00:00', '2020-01-02 11:30:00',
223
- '2020-01-02 13:30:00', '2020-01-02 15:30:00',
224
- '2020-01-02 17:30:00'],
225
- This is particularly convoluted when close='both'/None
226
- >>> date_range(NYSE, '2h', 'both', None, {'pre', 'RTH'}, merge_adjacent = False)
227
- >>> ['2020-01-02 04:00:00' (pre), '2020-01-02 06:00:00' (pre),
228
- '2020-01-02 08:00:00' (pre), '2020-01-02 09:30:00' (rth),
229
- '2020-01-02 10:00:00' (pre), '2020-01-02 11:30:00' (rth),
230
- '2020-01-02 13:30:00' (rth), '2020-01-02 15:30:00' (rth),
231
- '2020-01-02 17:30:00' (rth)],
232
- """
233
-
234
-
235
- class DisappearingSessionWarning(DateRangeWarning):
236
- """
237
- Warning thrown when date_range is called with a timedelta that is larger than an entire session
238
- resulting in the session disappearing from the DatetimeIndex.
239
-
240
- Only an issue when closed='right' and force_close = False
241
- """
242
-
243
-
244
- class MissingSessionWarning(DateRangeWarning):
245
- """
246
- Warning thrown when a date_range() call is made with a requested session,
247
- but lacks the necessary columns. When this warning is ignored the returned
248
- datetimeindex will simply lack the relevant sessions
249
-
250
- e.g. 'pre' Session requested and schedule lacks 'pre' and/or 'market_open' column
251
- """
252
-
253
-
254
- class InsufficientScheduleWarning(DateRangeWarning):
255
- """
256
- Warning thrown when a date_range() call is made with a requested number of periods,
257
- or start-date / end-date that exceed what was provided in the given schedule.
258
-
259
- If a Schedule has an insufficient start and end date then this warning is thrown twice.
260
-
261
- If this warning is thrown when date_range is called with a number of desired periods, then
262
- the desired start/end date is an approximate value. This 'approximation' is biased to
263
- overestimate the needed start/end time by about 1 week. This is done to limit the edge
264
- cases where this warning could get thrown multiple times in a row.
265
- """
266
-
267
-
268
- def filter_date_range_warnings(
269
- action: Literal["error", "ignore", "always", "default", "once"],
270
- source: Union[Iterable[type[DateRangeWarning]], type[DateRangeWarning]] = DateRangeWarning,
271
- ):
272
- """
273
- Adjust the behavior of the date_range() warnings to the desired action.
274
-
275
- :param action: - The desired change to the warning behavior
276
- 'error': Escalate Warnings into Errors
277
- 'ignore': Silence Warning Messages
278
- 'once': Only display a message of the given category once
279
- 'default': Reset the behavior of the given warning category
280
- 'always': Always show the Warning of a given category
281
-
282
- :param source: - The Category/Categories to apply the action to. Can be a single Warning or a list of warnings
283
- default: DateRangeWarning (All Warnings)
284
- Warning Types: MissingSessionWarning, OverlappingSessionWarning,
285
- DisappearingSessionWarning, InsufficientScheduleWarning
286
- """
287
- if not isinstance(source, Iterable):
288
- warnings.filterwarnings(action, category=source)
289
- return
290
-
291
- for src in source:
292
- warnings.filterwarnings(action, category=src)
293
-
294
-
295
- def parse_missing_session_warning(
296
- err: MissingSessionWarning,
297
- ) -> Tuple[set[SESSIONS], set[MKT_TIMES]]:
298
- """
299
- Parses a Missing Session Warning's Error Message.
300
- :returns Tuple[set[str], set[str]]:
301
- Set #1: The Missing Sessions
302
- Set #2: The Missing Schedule Columns
303
- """
304
- splits = split(r"[{|}]", err.args[0].replace("'", ""))
305
- return (set(splits[1].split(", ")), set(splits[3].split(", "))) # type: ignore
306
-
307
-
308
- def parse_insufficient_schedule_warning(
309
- err: InsufficientScheduleWarning,
310
- ) -> Tuple[bool, pd.Timestamp, pd.Timestamp]:
311
- """
312
- Parses the information from an Insufficient Schedule Warning.
313
- :returns Tuple[bool, pd.Timestamp, pd.Timestamp]:
314
- bool: True == Range is missing from the start, False == Range missing from the end
315
- Timestamp 1: Start of missing range
316
- Timestamp 2: End of the missing range.
317
- Note: The Timestamps are always ordered (t1 <= t2) and do not overlap with the original schedule.
318
- If a supplemental schedule is generated it can be concatenated on without any overlapping indices.
319
- data
320
- """
321
- matcher = finditer(r"\d{4}-\d{2}-\d{2}", err.args[0])
322
- b = "Start-Time" in err.args[0]
323
- t1 = pd.Timestamp(next(matcher).group())
324
- t2 = pd.Timestamp(next(matcher).group())
325
-
326
- if b:
327
- t2 -= pd.Timedelta("1D")
328
- else:
329
- t2 += pd.Timedelta("1D")
330
-
331
- return (b, t1, t2) if t1 <= t2 else (b, t2, t1)
332
-
333
-
334
- # endregion
335
-
336
-
337
- def date_range(
338
- schedule: pd.DataFrame,
339
- frequency: Union[str, pd.Timedelta, int, float],
340
- closed: Union[Literal["left", "right", "both"], None] = "right",
341
- force_close: Union[bool, None] = True,
342
- session: Union[SESSIONS, Iterable[SESSIONS]] = {"RTH"},
343
- merge_adjacent: bool = True,
344
- start: Union[str, pd.Timestamp, int, float, None] = None,
345
- end: Union[str, pd.Timestamp, int, float, None] = None,
346
- periods: Union[int, None] = None,
347
- ) -> pd.DatetimeIndex:
348
- """
349
- Interpolates a Market's Schedule at the desired frequency and returns the result as a DatetimeIndex.
350
- This function is only valid for periods less than 1 Day, for longer periods use date_range_htf().
351
-
352
- Note: The slowest part of this function is by far generating the necessary schedule (which in
353
- turn is limited by pandas' date_range() function). If speed is a concern, store and update the
354
- schedule as needed instead of generating it every time.
355
-
356
- WARNINGS SYSTEM:
357
- *There are multiple edge-case warnings that are thrown by this function. See the Docstrings
358
- of each warning for more info. (DateRangeWarning, InsufficientScheduleWarning,
359
- MissingSessionWarning, OverlappingSessionWarning, DisappearingSessionWarning)
360
-
361
- *The thrown warnings can be ignored or escalated into catchable errors by using the
362
- filter_date_range_warnings() function.
363
-
364
- parse_missing_session_warning() & parse_insufficient_schedule_warning() exist to easily
365
- process the warnings those warnings if they are escalated into errors.
366
-
367
- PARAMETERS:
368
-
369
- :param schedule: Schedule of a calendar which includes all the columns necessary
370
- for the desired sessions.
371
-
372
- :param frequency: String, Int/float (seconds) or pd.Timedelta that represents the desired
373
- interval of the date_range. Intervals larger than 1D are not supported.
374
-
375
- :param closed: the way the intervals are labeled
376
- 'right': use the end of the interval
377
- 'left': use the start of the interval
378
- None / 'both': use the end of the interval but include the start of the first interval
379
-
380
- :param force_close: How the last value of a trading session is handled
381
- True: guarantee that the close of the trading session is the last value
382
- False: guarantee that there is no value greater than the close of the trading session
383
- None: leave the last value as it is calculated based on the closed parameter
384
-
385
- :param session: A str representing a single session or an Iterable of the following Sessions.
386
- RTH: The Default Option. This is [Market_open, Market_close], if the schedule includes a
387
- break then the break is excluded from the returned datetime index.
388
- ETH: [pre, market_open] & [market_close, post]
389
- pre: [pre, market_open]
390
- post: [market_close, post]
391
- break: [break_start, break_end]
392
- pre_break: [market_open, break_start]
393
- post_break: [break_end, market_close]
394
- closed: [market_close, market_open (of the next day)] If ETH market times are given then
395
- this will be [post, pre (of the next day)] instead. The last session will end at
396
- Midnight of the timezone the schedule is given in.
397
- closed_masked: Same as closed, but Weekends & Holidays are ignored. Instead, the Datetime
398
- index stops at Midnight on the trading day before the break and resumes at midnight
399
- prior to the next trading day. **Note: This is Midnight of the Timezone the schedule is
400
- given in, not Midnight of the exchange's tz since the exchange's tz is not known.
401
-
402
- :param merge_adjacent: Bool representing if adjacent sessions should be merged into a single session.
403
- For Example, NYSE w/ session={'RTH', 'ETH'}, frequency=2h, closed=left, force_close=False
404
- merge_adjacent == True => [pre, post]
405
- >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
406
- '2020-01-02 08:00:00', '2020-01-02 10:00:00',
407
- '2020-01-02 12:00:00', '2020-01-02 14:00:00',
408
- '2020-01-02 16:00:00', '2020-01-02 18:00:00']
409
- merge_adjacent == False => [pre, market_open] & [market_open, market_close] & [market_close, post]
410
- >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
411
- '2020-01-02 08:00:00', '2020-01-02 09:30:00',
412
- '2020-01-02 11:30:00', '2020-01-02 13:30:00',
413
- '2020-01-02 15:30:00', '2020-01-02 16:00:00',
414
- '2020-01-02 18:00:00']
415
- merge_adjacent=False re-aligns the timestamps to the session, but this results in
416
- the difference between timestamps not always equaling the desired frequency.
417
-
418
- :param start: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired start time.
419
- :If left as None then the start-time of the the Schedule is used.
420
- :If no TZ info is given it will be interpreted in the same timezone as the first column
421
- of the schedule
422
- :Start can be a Day and Time, but the returned index will still be aligned to the underlying
423
- schedule. e.g. Session = [9:30am, 12pm], frequency=7min, start=9:45am. Underlying session
424
- = [9:30, 9:37, 9:44, 9:51, ...] => returned DatetimeIndex = [9:51, ...]
425
-
426
- :param end: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired end time.
427
- :If left as None then the end-time of the the Schedule is used.
428
- :If no TZ info is given it will be interpreted in the same timezone as the first column
429
- **Note: The time given is an absolute value. i.e. end="2020-01-01" == "2020-01-01 00:00"
430
- returning times prior to Midnight of "2019-12-31", not to the EOD of "2020-01-01"
431
-
432
- :param periods: Optional Integer number of periods to return. If a Period count, Start time,
433
- and End time are given the period count is ignored.
434
- None: Period count is ignored. Returned index is all periods in [Start, End]
435
- Int: # of periods to return. By default, this is the first N periods following the start.
436
- If an end time is given then this is the N periods prior to the End Time (inclusive).
437
- CAVEAT: When Force_close == False & closed == 'right'/'both' the number of periods returned
438
- may be less than the parameter given.
439
-
440
- :return: pd.DatetimeIndex of datetime64[ns, TZ-Aware]
441
- """
442
- # ---- ---- Error Check Inputs ---- ----
443
- if closed not in ("left", "right", "both", None):
444
- raise ValueError("closed must be 'left', 'right', 'both' or None.")
445
- if force_close not in (True, False, None):
446
- raise ValueError("force_close must be True, False or None.")
447
- if merge_adjacent not in (True, False):
448
- raise ValueError("merge_adjacent must be True or False")
449
-
450
- # ---- ---- Standardize Frequency Param ---- ----
451
- if isinstance(frequency, (int, float)):
452
- frequency = int(frequency * 1_000_000_000)
453
- try:
454
- frequency = pd.Timedelta(frequency)
455
- except ValueError as e:
456
- raise ValueError(f"Market Calendar Date_range Timeframe Error: {e}") from e
457
- if frequency <= pd.Timedelta("0s"):
458
- raise ValueError("Market Calendar Date_Range Frequency must be Positive.")
459
- if frequency > pd.Timedelta("1D"):
460
- raise ValueError("Market Calendar Date_Range Frequency Cannot Be longer than '1D'.")
461
-
462
- session_list, mask = _make_session_list(set(schedule.columns), session, merge_adjacent)
463
- if len(session_list) == 0:
464
- return pd.DatetimeIndex([], dtype="datetime64[ns, UTC]")
465
-
466
- session_times = _reconfigure_schedule(schedule, session_list, mask)
467
- # Trim off all 0 length sessions
468
- session_times = session_times[session_times.start.ne(session_times.end)]
469
- _error_check_sessions(session_times, frequency, closed, force_close)
470
-
471
- tz = schedule[session_list[0][0]].dt.tz # copy tz info from schedule
472
- dtype = schedule[session_list[0][0]].dtype # copy dtype info from schedule
473
- start, end, periods = _standardize_times(schedule, start, end, periods, tz)
474
-
475
- time_series = _calc_time_series(session_times, frequency, closed, force_close, start, end, periods)
476
- time_series.name = None
477
-
478
- return pd.DatetimeIndex(time_series, tz=tz, dtype=dtype)
479
-
480
-
481
- # region ------------------ Date Range LTF Subroutines ------------------
482
-
483
-
484
- def _make_session_list(columns: set, sessions: Union[str, Iterable], merge_adjacent: bool) -> Tuple[list, bool]:
485
- "Create a list of (Session Start, Session End) Tuples"
486
- session_times = []
487
- missing_cols = set()
488
- missing_sess = set()
489
- sessions = {sessions} if isinstance(sessions, str) else set(sessions)
490
-
491
- if len(extras := sessions.difference(set(SESSIONS.__args__))) > 0: # type: ignore
492
- raise ValueError(f"Unknown Date_Range Market Session: {extras}")
493
-
494
- if "ETH" in sessions: # Standardize ETH to 'pre' and 'post'
495
- sessions = sessions - {"ETH"} | {"pre", "post"}
496
- if "closed_masked" in sessions: # closed_masked == 'closed' for this step
497
- sessions |= {"closed"}
498
- if "pre" in columns: # Add wrap-around sessions
499
- columns |= {"pre_wrap"}
500
- if "market_open" in columns:
501
- columns |= {"market_open_wrap"}
502
-
503
- def _extend_statement(session, parts):
504
- if session not in sessions:
505
- return
506
- if columns.issuperset(parts):
507
- session_times.extend(parts)
508
- else:
509
- missing_sess.update({session})
510
- missing_cols.update(set(parts) - columns)
511
-
512
- # Append session_start, session_end for each desired session *in session order*
513
- _extend_statement("pre", ("pre", "market_open"))
514
- if {"break_start", "break_end"}.issubset(columns):
515
- # If the schedule has breaks then sub-divide RTH into pre & post break sessions
516
- if "RTH" in sessions:
517
- sessions = sessions - {"RTH"} | {"pre_break", "post_break"}
518
- _extend_statement("pre_break", ("market_open", "break_start"))
519
- _extend_statement("break", ("break_start", "break_end"))
520
- _extend_statement("post_break", ("break_end", "market_close"))
521
- else:
522
- _extend_statement("RTH", ("market_open", "market_close"))
523
- _extend_statement("post", ("market_close", "post"))
524
-
525
- # Closed can mean [close, open], [close, pre], [pre, post], or [post, open] Adjust accordingly
526
- s_start = "post" if "post" in columns else "market_close"
527
- s_end = "pre_wrap" if "pre" in columns else "market_open_wrap"
528
- _extend_statement("closed", (s_start, s_end))
529
-
530
- if len(missing_sess) > 0:
531
- warnings.warn(
532
- f"Requested Sessions: {missing_sess}, but schedule is missing columns: {missing_cols}."
533
- "\nResulting DatetimeIndex will lack those sessions. ",
534
- category=MissingSessionWarning,
535
- )
536
-
537
- if merge_adjacent:
538
- drop_set = set()
539
- for i in range(1, len(session_times) - 1, 2):
540
- if session_times[i] == session_times[i + 1]:
541
- drop_set |= {session_times[i]}
542
-
543
- # Guaranteed to drop in pairs => no check needed before zipping
544
- session_times = [t for t in session_times if t not in drop_set]
545
-
546
- # Zip the flat list into a list of pairs
547
- session_pairs = list(zip(*(iter(session_times),) * 2))
548
-
549
- return session_pairs, "closed_masked" in sessions
550
-
551
-
552
- def _standardize_times(schedule, start, end, periods, tz) -> Tuple[pd.Timestamp, pd.Timestamp, Union[int, None]]:
553
- "Standardize start and end into a timestamp of the relevant timezone"
554
- if all((start, end, periods)):
555
- periods = None # Ignore Periods if all 3 params are given.
556
-
557
- if start is not None:
558
- if isinstance(start, (int, float)):
559
- start *= 1_000_000_000
560
- try:
561
- start = pd.Timestamp(start)
562
- if start.tz is None:
563
- start = start.tz_localize(tz)
564
- except ValueError as e:
565
- raise ValueError(f"Invalid Time ({start = }) given to date_range()") from e
566
-
567
- if start < schedule.index[0].tz_localize(tz):
568
- warnings.warn(
569
- f"Insufficient Schedule. Requested Start-Time: {start.normalize().tz_localize(None)}. "
570
- f"Schedule starts at: {schedule.index[0].normalize().tz_localize(None)}",
571
- category=InsufficientScheduleWarning,
572
- )
573
-
574
- if end is not None:
575
- if isinstance(end, (int, float)):
576
- end *= 1_000_000_000
577
- try:
578
- end = pd.Timestamp(end)
579
- if end.tz is None and tz is not None:
580
- end = end.tz_localize(tz)
581
- except ValueError as e:
582
- raise ValueError(f"Invalid Time ({end = }) given to date_range()") from e
583
-
584
- if end > schedule.index[-1].tz_localize(tz) + pd.Timedelta("1D"):
585
- # Checking against the day and not the specific session since so requesting a time
586
- # after the last session's close but before the next day doesn't throw a warning.
587
- requested_end = end.normalize().tz_localize(None) - pd.Timedelta("1D")
588
- warnings.warn(
589
- f"Insufficient Schedule. Requested End-Time: {requested_end}. "
590
- f"Schedule ends at: {schedule.index[-1].normalize().tz_localize(None)}",
591
- category=InsufficientScheduleWarning,
592
- )
593
-
594
- if start is not None and end is not None and start > end:
595
- raise ValueError(
596
- "Date_range() given a start-date that occurs after the given end-date. " f"{start = }, {end = }"
597
- )
598
-
599
- return start, end, periods
600
-
601
-
602
- def _reconfigure_schedule(schedule, session_list, mask_close) -> pd.DataFrame:
603
- "Reconfigure a schedule into a sorted dataframe of [start, end] times for each session"
604
-
605
- sessions = []
606
-
607
- for start, end in session_list:
608
- if not end.endswith("_wrap"):
609
- # Simple Session where 'start' occurs before 'end'
610
- sessions.append(
611
- schedule[[start, end]].rename(columns={start: "start", end: "end"}).set_index("start", drop=False)
612
- )
613
- continue
614
-
615
- # 'closed' Session that wraps around midnight. Shift the 'end' col by 1 Day
616
- end = end.rstrip("_wrap")
617
- tmp = pd.DataFrame(
618
- {
619
- "start": schedule[start],
620
- "end": schedule[end].shift(-1),
621
- }
622
- ).set_index("start", drop=False)
623
-
624
- # Shift(-1) leaves last index of 'end' as 'NaT'
625
- # Set the [-1, 'end' ('end' === 1)] cell to Midnight of the 'start' time of that row.
626
- tmp.iloc[-1, 1] = tmp.iloc[-1, 0].normalize() + pd.Timedelta("1D") # type: ignore
627
-
628
- if mask_close:
629
- # Do some additional work to split 'closed' sessions that span weekends/holidays
630
- sessions_to_split = tmp["end"] - tmp["start"] > pd.Timedelta("1D")
631
-
632
- split_strt = tmp[sessions_to_split]["start"]
633
- split_end = tmp[sessions_to_split]["end"]
634
-
635
- sessions.append(
636
- pd.DataFrame( # From start of the long close to Midnight
637
- {
638
- "start": split_strt,
639
- "end": split_strt.dt.normalize() + pd.Timedelta("1D"),
640
- }
641
- ).set_index("start", drop=False)
642
- )
643
- sessions.append(
644
- pd.DataFrame( # From Midnight to the end of the long close
645
- {
646
- "start": split_end.dt.normalize(),
647
- "end": split_end,
648
- }
649
- ).set_index("start", drop=False)
650
- )
651
-
652
- # leave tmp as all the sessions that were not split
653
- tmp = tmp[~sessions_to_split]
654
-
655
- sessions.append(tmp)
656
-
657
- return pd.concat(sessions).sort_index()
658
-
659
-
660
- def _error_check_sessions(session_times, timestep, closed, force_close):
661
- if session_times.start.gt(session_times.end).any():
662
- raise ValueError(
663
- "Desired Sessions from the Schedule contain rows where session start < session end, "
664
- "please correct the schedule"
665
- )
666
-
667
- # Disappearing Session
668
- if force_close is False and closed == "right":
669
- # only check if needed
670
- if (session_times.end - session_times.start).lt(timestep).any():
671
- warnings.warn(
672
- "An interval of the chosen frequency is larger than some of the trading sessions, "
673
- "while closed='right' and force_close=False. This will make those trading sessions "
674
- "disappear. Use a higher frequency or change the values of closed/force_close, to "
675
- "keep this from happening.",
676
- category=DisappearingSessionWarning,
677
- )
678
-
679
- # Overlapping Session
680
- if force_close is None and closed != "left":
681
- num_bars = _num_bars_ltf(session_times, timestep, closed)
682
- end_times = session_times.start + num_bars * timestep
683
-
684
- if end_times.gt(session_times.start.shift(-1)).any():
685
- warnings.warn(
686
- "The desired frequency results in date_range() generating overlapping sessions. "
687
- "This can happen when the timestep is larger than a session, or when "
688
- "merge_session = False and a session is not evenly divisible by the timestep. "
689
- "The overlapping timestep can be deleted with force_close = True or False",
690
- category=OverlappingSessionWarning,
691
- )
692
-
693
-
694
- def _num_bars_ltf(session_times, timestep, closed) -> pd.Series:
695
- "Calculate the number of timestamps needed for each trading session."
696
- if closed in ("both", None):
697
- return np.ceil((session_times.end - session_times.start) / timestep) + 1
698
- else:
699
- return np.ceil((session_times.end - session_times.start) / timestep)
700
-
701
-
702
- def _course_trim_to_period_count(num_bars, periods, reverse) -> pd.Series:
703
- """
704
- Course Trim the Session times to the desired period count.
705
- Large enough of a sub-routine to merit its own function call.
706
- """
707
- if reverse:
708
- # If end-date is given calculate sum in reverse order
709
- num_bars = num_bars[::-1]
710
-
711
- _sum = num_bars.cumsum()
712
-
713
- if _sum.iloc[-1] < periods:
714
- # Insufficient Number of Periods. Try to estimate an ending time from the data given.
715
- # delta = (end_date - start_date) / (cumulative # of periods) * (periods still needed) * fudge factor
716
- delta = abs(
717
- # (end_date - start_date) / (cumulative # of periods)
718
- ((_sum.index[-1] - _sum.index[0]) / _sum.iloc[-1])
719
- * (periods - _sum.iloc[-1]) # (periods still needed)
720
- * 1.05 # (Fudge Factor for weekends/holidays)
721
- )
722
- # delta = math.ceil(delta) + '1W'
723
- delta = (delta // pd.Timedelta("1D") + 8) * pd.Timedelta("1D")
724
- # The 1.05 Factor handles when the schedule is short by a few months, the + '1W' handles
725
- # when the schedule is short by only a few periods. While 1 Week is absolute overkill,
726
- # generating the extra few days is very little extra cost compared to throwing this error
727
- # a second or even third time.
728
-
729
- if reverse:
730
- approx_start = _sum.index[-1] - delta
731
- warnings.warn(
732
- f"Insufficient Schedule. Requested Approx Start-Time: {approx_start}. "
733
- f"Schedule starts at: {_sum.index[-1].normalize().tz_localize(None)}",
734
- category=InsufficientScheduleWarning,
735
- )
736
- else:
737
- approx_end = _sum.index[-1] + delta
738
- warnings.warn(
739
- f"Insufficient Schedule. Requested Approx End-Time: {approx_end}. "
740
- f"Schedule ends at: {_sum.index[-1].normalize().tz_localize(None)}",
741
- category=InsufficientScheduleWarning,
742
- )
743
-
744
- sessions_to_keep = _sum < periods
745
- # Shifting Ensures the number of needed periods are generated, but no more.
746
- sessions_to_keep = sessions_to_keep.shift(1, fill_value=True)
747
-
748
- if reverse:
749
- # If end-date is given calculate un-reverse the order of the series
750
- sessions_to_keep = sessions_to_keep[::-1]
751
-
752
- return sessions_to_keep
753
-
754
-
755
- def _calc_time_series(session_times, timestep, closed, force_close, start, end, periods) -> pd.Series:
756
- "Interpolate each session into a datetime series at the desired frequency."
757
- # region ---- ---- ---- Trim the Sessions ---- ---- ----
758
- # Compare 'start' to the session end times so that if 'start' is in the middle of a session
759
- # that session remains in session_times. Vise-vera for End
760
- if start is not None:
761
- session_times = session_times[session_times.end > start]
762
- if end is not None:
763
- session_times = session_times[session_times.start < end]
764
- if len(session_times) == 0:
765
- return pd.Series([])
766
-
767
- # Override the First Session's Start and Last Session's End times if needed
768
- if start is not None and start > session_times.loc[session_times.index[0], "start"]:
769
- # Align the start to a multiple of the timestep after the session's beginning.
770
- # This is to make the returned DTIndex consistent across all start/end/period settings.
771
- session_start = session_times.loc[session_times.index[0], "start"]
772
- start_aligned = session_start + (ceil((start - session_start) / timestep) * timestep)
773
- session_times.loc[session_times.index[0], "start"] = start_aligned
774
- if end is not None and end < session_times.loc[session_times.index[-1], "end"]:
775
- session_start = session_times.loc[session_times.index[0], "start"]
776
- end_aligned = session_start + (floor((end - session_start) / timestep) * timestep)
777
- session_times.loc[session_times.index[-1], "end"] = end_aligned
778
-
779
- num_bars = _num_bars_ltf(session_times, timestep, closed)
780
-
781
- if periods is not None:
782
- sessions_to_keep = _course_trim_to_period_count(num_bars, periods, end is not None)
783
- num_bars = num_bars[sessions_to_keep]
784
- session_times = session_times[sessions_to_keep]
785
-
786
- # endregion
787
-
788
- starts = session_times.start.repeat(num_bars) # type: ignore
789
-
790
- if closed == "right":
791
- # Right side of addition is cumulative time since session start in multiples of timestep
792
- time_series = starts + (starts.groupby(starts.index).cumcount() + 1) * timestep
793
- else:
794
- time_series = starts + (starts.groupby(starts.index).cumcount()) * timestep
795
-
796
- if force_close is not None:
797
- # Trim off all timestamps that stretched beyond their intended session
798
- time_series = time_series[time_series.le(session_times.end.repeat(num_bars))]
799
-
800
- if force_close:
801
- time_series = pd.concat([time_series, session_times.end])
802
-
803
- time_series = time_series.drop_duplicates().sort_values() # type: ignore
804
-
805
- if periods is not None and len(time_series) > 0:
806
- # Although likely redundant, Fine Trim to desired period count.
807
- if end is not None:
808
- s_len = len(time_series)
809
- time_series = time_series[max(s_len - periods, 0) : s_len]
810
- else:
811
- time_series = time_series[0:periods]
812
-
813
- return time_series
814
-
815
-
816
- # endregion
817
-
818
-
819
- PeriodCode = Literal["D", "W", "M", "Q", "Y"]
820
- Day_Anchor = Literal["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
821
- Month_Anchor = Literal["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
822
-
823
- # These needed because the pandas Period Object is stupid and not consistant w/ date_range.
824
- # pd.date_range(s,e, freq = 'W-SUN') == [DatetimeIndex of all sundays] (as Expected)
825
- # but, pd.Timestamp([A Sunday]).to_period('W-SUN').start_time == [The Monday Prior???]
826
- days_rolled = list(Day_Anchor.__args__)
827
- days_rolled.insert(0, days_rolled.pop())
828
- weekly_roll_map = dict(zip(Day_Anchor.__args__, days_rolled))
829
-
830
- months_rolled = list(Month_Anchor.__args__)
831
- months_rolled.insert(0, months_rolled.pop())
832
- yearly_roll_map = dict(zip(Month_Anchor.__args__, months_rolled))
833
-
834
-
835
- def date_range_htf(
836
- cal: "CustomBusinessDay",
837
- frequency: Union[str, pd.Timedelta, int, float],
838
- start: Union[str, pd.Timestamp, int, float, None] = None,
839
- end: Union[str, pd.Timestamp, int, float, None] = None,
840
- periods: Union[int, None] = None,
841
- closed: Union[Literal["left", "right"], None] = "right",
842
- *,
843
- day_anchor: Day_Anchor = "SUN",
844
- month_anchor: Month_Anchor = "JAN",
845
- ) -> pd.DatetimeIndex:
846
- """
847
- Returns a Normalized DatetimeIndex from the start-date to End-Date for Time periods of 1D and Higher.
848
-
849
- Unless using a custom calendar, it is advised to call the date_range_htf() method of the desired calendar.
850
- This is because default_anchors may change, or a single calendar may not be sufficient to model a market.
851
-
852
- For example, NYSE has two calendars: The first covers pre-1952 where saturdays were trading days. The second
853
- covers post-1952 where saturdays are closed.
854
-
855
- PARAMETERS:
856
-
857
- :param cal: CustomBuisnessDay Calendar associated with a MarketCalendar. This can be retieved by
858
- calling the holidays() method of a MarketCalendar.
859
-
860
- :param frequency: String, Int/float (POSIX seconds) or pd.Timedelta of the desired frequency.
861
- :Must be Greater than '1D' and an integer multiple of the base frequency (D, W, M, Q, or Y)
862
- :Important Note: Ints/Floats & Timedeltas are always considered as 'Open Business Days',
863
- '2D' == Every Other Buisness Day, '3D' == Every 3rd B.Day, '7D' == Every 7th B.Day
864
- :Higher periods (passed as strings) align to the beginning or end of the relevant period
865
- :i.e. '1W' == First/[Last] Trading Day of each Week, '1Q' == First/[Last] Day of every Quarter
866
-
867
- :param start: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
868
- :The Time & Timezone information is ignored. Only the Normalized Day is considered.
869
-
870
- :param end: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
871
- :The Time & Timezone information is ignored. Only the Normalized Day is considered.
872
-
873
- :param periods: Optional Integer number of periods to return. If a Period count, Start time,
874
- and End time are given the period count is ignored.
875
-
876
- :param closed: Literal['left', 'right']. Method used to close each range.
877
- :Left: First open trading day of the Session is returned (e.g. First Open Day of The Month)
878
- :right: Last open trading day of the Session is returned (e.g. Last Open Day of The Month)
879
- :Note, This has no effect when the desired frequency is a number of days.
880
-
881
- :param day_anchor: Day to Anchor the start of the Weekly timeframes to. Default 'SUN'.
882
- : To get the First/Last Days of the trading Week then the Anchor needs to be on a day the relevant
883
- market is closed.
884
- : This can be set so that a specific day each week is returned.
885
- : freq='1W' & day_anchor='WED' Will return Every 'WED' when the market is open, and nearest day
886
- to the left or right (based on 'closed') when the market is closed.
887
- Options: ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
888
-
889
- :param month_anchor: Month to Anchor the start of the year to for Quarter and yearly timeframes.
890
- : Default 'JAN' for Calendar Quarters/Years. Can be set to 'JUL' to return Fiscal Years
891
- Options: ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
892
- """
893
-
894
- start, end, periods = _error_check_htf_range(start, end, periods)
895
- mult, _period_code = _standardize_htf_freq(frequency)
896
-
897
- if _period_code == "D":
898
- if mult == 1:
899
- # When desiring a frequency of '1D' default to pd.date_range. It will give the same
900
- # answer but it is more performant than the method in _cal_day_range.
901
- return pd.date_range(start, end, periods, freq=cal)
902
- else:
903
- return _cal_day_range(cal, start, end, periods, mult)
904
-
905
- elif _period_code == "W":
906
- freq = str(mult) + "W-" + day_anchor.upper()
907
- grouping_period = "W-" + weekly_roll_map[day_anchor.upper()]
908
-
909
- return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
910
-
911
- elif _period_code == "M":
912
- freq = str(mult) + "M" + ("S" if closed == "left" else "E")
913
- return _cal_WMQY_range(cal, start, end, periods, freq, "M", closed)
914
-
915
- else: # Yearly & Quarterly Period
916
- freq = str(mult) + _period_code
917
- freq += (
918
- "S-" + month_anchor.upper()
919
- if closed == "left" # *Insert Angry Tom Meme Here*
920
- else "E-" + yearly_roll_map[month_anchor.upper()]
921
- )
922
- grouping_period = _period_code + "-" + yearly_roll_map[month_anchor.upper()]
923
-
924
- return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
925
-
926
-
927
- # region ---- ---- ---- Date Range HTF Subroutines ---- ---- ----
928
-
929
-
930
- def _error_check_htf_range(
931
- start, end, periods: Union[int, None]
932
- ) -> Tuple[Union[pd.Timestamp, None], Union[pd.Timestamp, None], Union[int, None]]:
933
- "Standardize and Error Check Start, End, and period params"
934
- if periods is not None:
935
- if not isinstance(periods, int):
936
- raise ValueError(f"Date_Range_HTF Must be either an int or None. Given {type(periods)}")
937
- if periods < 0:
938
- raise ValueError("Date_range_HTF Periods must be Positive.")
939
-
940
- if isinstance(start, (int, float)):
941
- start = int(start * 1_000_000_000)
942
- if isinstance(end, (int, float)):
943
- end = int(end * 1_000_000_000)
944
-
945
- if start is not None:
946
- start = pd.Timestamp(start).normalize().tz_localize(None)
947
- if end is not None:
948
- end = pd.Timestamp(end).normalize().tz_localize(None)
949
-
950
- if all((start, end, periods)):
951
- periods = None # Ignore Periods if passed too many params
952
- if len([param for param in (start, end, periods) if param is not None]) < 2:
953
- raise ValueError("Date_Range_HTF must be given two of the three following params: (start, end, periods)")
954
-
955
- if start is not None and end is not None and end < start:
956
- raise ValueError("Date_Range_HTF() Start-Date must be before the End-Date")
957
-
958
- return start, end, periods
959
-
960
-
961
- def _standardize_htf_freq(frequency: Union[str, pd.Timedelta, int, float]) -> Tuple[int, PeriodCode]:
962
- "Standardize the frequency multiplier and Code, throwing errors as needed."
963
- if isinstance(frequency, str):
964
- if len(frequency) == 0:
965
- raise ValueError("Date_Range_HTF Frequency is an empty string.")
966
- if len(frequency) == 1:
967
- frequency = "1" + frequency # Turn 'D' into '1D' for all period codes
968
- if frequency[-1].upper() in {"W", "M", "Q", "Y"}:
969
- try:
970
- if (mult := int(frequency[0:-1])) <= 0:
971
- raise ValueError()
972
- return mult, frequency[-1].upper() # type: ignore
973
- except ValueError as e:
974
- raise ValueError(
975
- "Date_Range_HTF() Week, Month, Quarter and Year frequency must "
976
- "have a positive integer multiplier"
977
- ) from e
978
-
979
- # All remaining frequencies (int, float, strs, & Timedeltas) are parsed as business days.
980
- if isinstance(frequency, (int, float)): # Convert To Seconds
981
- frequency = int(frequency * 1_000_000_000)
982
-
983
- frequency = pd.Timedelta(frequency)
984
- if frequency < pd.Timedelta("1D"):
985
- raise ValueError("Date_Range_HTF() Frequency must be '1D' or Higher.")
986
- if frequency % pd.Timedelta("1D") != pd.Timedelta(0):
987
- raise ValueError("Date_Range_HTF() Week and Day frequency must be an integer multiple of Days")
988
-
989
- return frequency.days, "D"
990
-
991
-
992
- def _days_per_week(weekmask: Union[Iterable, str]) -> int:
993
- "Used to get a more accurate estimate of the number of days per week"
994
- # Return any 'Array Like' Representation
995
- if not isinstance(weekmask, str):
996
- return len([day for day in weekmask if bool(day)])
997
-
998
- if len(weekmask) == 0:
999
- raise ValueError("Weekmask cannot be blank")
1000
-
1001
- weekmask = weekmask.upper()
1002
- day_abbrs = {day for day in weekly_roll_map.values() if day in weekmask}
1003
- if len(day_abbrs) != 0:
1004
- return len(day_abbrs)
1005
-
1006
- # Weekmask Something like '0111110'
1007
- return len([day for day in weekmask if bool(day)])
1008
-
1009
-
1010
- def _cal_day_range(cb_day: "CustomBusinessDay", start, end, periods, mult) -> pd.DatetimeIndex:
1011
- """
1012
- Returns a Normalized DateTimeIndex of Open Buisness Days.
1013
- Exactly two of the (start, end, periods) arguments must be given.
1014
-
1015
- ** Arguments should be Type/Error Checked before calling this function **
1016
-
1017
- :param cb_day: CustomBusinessDay Object from the respective calendar
1018
- :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1019
- :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1020
- :param periods: Optional Number of periods to return
1021
- :param mult: Integer Multiple of buisness days between data-points.
1022
- e.g: 1 == Every Business Day, 2 == Every Other B.Day, 3 == Every Third B.Day, etc.
1023
- :returns: DateRangeIndex[datetime64[ns]]
1024
- """
1025
-
1026
- # Ensure Start and End are open Business days in the desired range
1027
- if start is not None:
1028
- start = cb_day.rollforward(start)
1029
- if end is not None:
1030
- end = cb_day.rollback(end)
1031
-
1032
- # ---- Start-Date to End-Date ----
1033
- if isinstance(start, pd.Timestamp) and isinstance(end, pd.Timestamp):
1034
- num_days = (end - start) / mult
1035
- # Get a better estimate of the number of open days since date_range calc is slow
1036
- est_open_days = ((num_days // 7) * _days_per_week(cb_day.weekmask)) + num_days % pd.Timedelta("1W")
1037
-
1038
- # Should always produce a small overestimate since Holidays aren't accounted for.
1039
- est_open_days = ceil(est_open_days / pd.Timedelta("1D"))
1040
- _range = pd.RangeIndex(0, est_open_days * mult, mult)
1041
-
1042
- dt_index = pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1043
- return dt_index[dt_index <= end]
1044
-
1045
- # ---- Periods from Start-Date ----
1046
- elif isinstance(start, pd.Timestamp):
1047
- _range = pd.RangeIndex(0, periods * mult, mult)
1048
- return pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1049
-
1050
- # ---- Periods from End-Date ----
1051
- else:
1052
- # Ensure the end-date is the first valid Trading Day <= given end-date
1053
- end = cb_day.rollback(end)
1054
- _range = pd.RangeIndex(0, -1 * periods * mult, -1 * mult)
1055
-
1056
- return pd.DatetimeIndex(end + _range * cb_day, dtype="datetime64[ns]")[::-1]
1057
-
1058
-
1059
- def _cal_WMQY_range(
1060
- cb_day: "CustomBusinessDay",
1061
- start: Union[pd.Timestamp, None],
1062
- end: Union[pd.Timestamp, None],
1063
- periods: Union[int, None],
1064
- freq: str,
1065
- grouping_period: str,
1066
- closed: Union[Literal["left", "right"], None] = "right",
1067
- ):
1068
- """
1069
- Return A DateRangeIndex of the Weekdays that mark either the start or end of each
1070
- buisness week based on the 'closed' parameter.
1071
-
1072
- ** Arguments should be Type/Error Checked before calling this function **
1073
-
1074
- :param cb_day: CustomBusinessDay Object from the respective calendar
1075
- :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1076
- :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1077
- :param periods: Optional Number of periods to return
1078
- :param freq: Formatted frequency of '1W' and Higher with desired multiple, S/E Chars,
1079
- and Anchoring code.
1080
- :param grouping_period: Period_Code with anchor that matches the given period Code.
1081
- i.e. 'W-[DAY]', 'M', 'Q-[MONTH]', 'Y-[MONTH]'
1082
- :param closed: Union['left', Any].
1083
- 'left': The normalized start-day of the relative period is returned
1084
- Everything else: The normalized last-day of the relative period is returned
1085
- :returns: DateRangeIndex[datetime64[ns]]
1086
- """
1087
-
1088
- # Need to Adjust the Start/End Dates given to pandas since Rolling forward or backward can shift
1089
- # the calculated date range out of the desired [start, end] range adding or ignoring desired values.
1090
-
1091
- # For Example, say we want NYSE-Month-Starts between [2020-01-02, 2020-02-02]. W/O Adjusting dates
1092
- # we call pd.date_range('2020-01-02, '2020-02-02', 'MS') => ['2020-02-01'] Rolled to ['2020-02-03'].
1093
- # '02-03' date is then trimmed off returning an empty Index. despite '2020-01-02' being a valid Month Start
1094
- # By Adjusting the Dates we call pd.date_range('2020-01-01, '2020-02-02') => ['2020-01-01, '2020-02-01']
1095
- # That's then Rolled into [2020-01-02, 2020-02-03] & Trimmed to [2020-01-02] as desired.
1096
-
1097
- _dr_start, _dr_end = None, None
1098
-
1099
- if closed == "left":
1100
- roll_func = cb_day.rollforward
1101
- if start is not None:
1102
- normalized_start = start.to_period(grouping_period).start_time
1103
- _dr_start = normalized_start if start <= roll_func(normalized_start) else start
1104
-
1105
- if end is not None:
1106
- if periods is not None:
1107
- normalized_end = end.to_period(grouping_period).start_time
1108
- _dr_end = (
1109
- normalized_end - pd.Timedelta("1D") # Shift into preceding group
1110
- if end < roll_func(normalized_end)
1111
- else cb_day.rollback(end)
1112
- )
1113
- else:
1114
- _dr_end = cb_day.rollback(end)
1115
-
1116
- else:
1117
- roll_func = cb_day.rollback
1118
- if start is not None:
1119
- if periods is not None:
1120
- normalized_start = start.to_period(grouping_period).end_time.normalize()
1121
- _dr_start = (
1122
- normalized_start + pd.Timedelta("1D") # Shift into trailing group
1123
- if start > roll_func(normalized_start)
1124
- else cb_day.rollforward(start)
1125
- )
1126
- else:
1127
- _dr_start = cb_day.rollforward(start)
1128
-
1129
- if end is not None:
1130
- normalized_end = end.to_period(grouping_period).end_time.normalize()
1131
- _dr_end = normalized_end if end >= roll_func(normalized_end) else end
1132
-
1133
- _range = pd.date_range(_dr_start, _dr_end, periods, freq).to_series().apply(roll_func)
1134
-
1135
- # Ensure that Rolled Timestamps are in the desired range When given both Start and End
1136
- if start is not None and end is not None:
1137
- if len(_range) > 0 and _range.iloc[0] < start:
1138
- # Trims off the first 'WMQY End' that might have been Rolled before start
1139
- _range = _range[1:]
1140
- if len(_range) > 0 and _range.iloc[-1] > end:
1141
- # Trims off the last 'WMQY Start' the might have been Rolled after end
1142
- _range = _range[0:-1]
1143
-
1144
- return pd.DatetimeIndex(_range, dtype="datetime64[ns]")
1145
-
1146
-
1147
- # endregion
1
+ """
2
+ Utilities to use with market_calendars
3
+ """
4
+
5
+ import itertools
6
+ from math import ceil, floor
7
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, Literal, Tuple, Union, Type, Set
8
+ import warnings
9
+
10
+ from re import finditer, split
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ if TYPE_CHECKING:
15
+ from pandas.tseries.offsets import CustomBusinessDay
16
+ from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
17
+
18
+ DEFAULT_LABEL_MAP = {
19
+ "pre": "pre",
20
+ "rth_pre_break": "rth",
21
+ "rth": "rth",
22
+ "break": "break",
23
+ "rth_post_break": "rth",
24
+ "post": "post",
25
+ "closed": "closed",
26
+ }
27
+
28
+
29
+ def mark_session(
30
+ schedule: pd.DataFrame,
31
+ timestamps: pd.DatetimeIndex,
32
+ label_map: Dict[str, Any] = {},
33
+ *,
34
+ closed: Literal["left", "right"] = "right",
35
+ ) -> pd.Series:
36
+ """
37
+ Return a Series that denotes the trading session of each timestamp in a DatetimeIndex.
38
+ The returned Series's Index is the provided Datetime Index, the Series's values
39
+ are the timestamps' corresponding session.
40
+
41
+ PARAMETERS:
42
+
43
+ :param schedule: The market schedule to check the timestamps against. This Schedule must include
44
+ all of the trading days that are in the provided DatetimeIndex of timestamps.
45
+ Note: The columns need to be sorted into ascending order, if not, then an error will be
46
+ raised saying the bins must be in ascending order.
47
+
48
+ :param timestamps: A DatetimeIndex of Timestamps to check. Must be sorted in ascending order.
49
+
50
+ :param label_map: Optional mapping of Dict[str, Any] to change the values returned in the
51
+ series. The keys of the given mapping should match the keys of the default dict, but the
52
+ values can be anything. A subset of mappings may also be provided, e.g. {'closed':-1} will
53
+ only change the label of the 'closed' session. All others will remain the default label.
54
+
55
+ >>> Default Mapping == {
56
+ "pre": "pre",
57
+ "rth_pre_break": "rth", # When the Schedule has a break
58
+ "rth": "rth", # When the Schedule doesn't have a break
59
+ "break": "break", # When the Schedule has a break
60
+ "rth_post_break": "rth", # When the Schedule has a break
61
+ "post": "post",
62
+ "closed": "closed",
63
+ }
64
+
65
+ :param closed: Which side of each interval should be closed (inclusive)
66
+ left: == [start, end)
67
+ right: == (start, end]
68
+ """
69
+ # ---- ---- ---- Determine which columns need to be dropped ---- ---- ----
70
+ session_labels = ["closed"]
71
+ columns = set(schedule.columns)
72
+ needed_cols = set()
73
+
74
+ def _extend_statement(session: str, parts: set):
75
+ if parts.issubset(columns):
76
+ needed_cols.update(parts)
77
+ session_labels.append(session)
78
+
79
+ _extend_statement("pre", {"pre", "market_open"})
80
+ if {"break_start", "break_end"}.issubset(columns):
81
+ _extend_statement("rth_pre_break", {"market_open", "break_start"})
82
+ _extend_statement("break", {"break_start", "break_end"})
83
+ _extend_statement("rth_post_break", {"break_end", "market_close"})
84
+ else:
85
+ _extend_statement("rth", {"market_open", "market_close"})
86
+ _extend_statement("post", {"market_close", "post"})
87
+
88
+ # ---- ---- ---- Error Check ---- ---- ----
89
+ if len(extra_cols := columns - needed_cols) > 0:
90
+ schedule = schedule.drop(columns=[*extra_cols])
91
+ warnings.warn(
92
+ f"Attempting to mark trading sessions and the schedule ({columns = }) contains the "
93
+ f"extra columns: {extra_cols}. Returned sessions may not be labeled as desired."
94
+ )
95
+
96
+ start = timestamps[0]
97
+ end = timestamps[-1]
98
+ if start < schedule.iloc[0, 0]: # type: ignore
99
+ raise ValueError(
100
+ f"Insufficient Schedule. Needed Start-Time: {start.normalize().tz_localize(None)}. "
101
+ f"Schedule starts at: {schedule.iloc[0, 0]}"
102
+ )
103
+ if end > schedule.iloc[-1, -1]: # type: ignore
104
+ raise ValueError(
105
+ f"Insufficient Schedule. Needed End-Time: {end.normalize().tz_localize(None)}. "
106
+ f"Schedule ends at: {schedule.iloc[-1, -1]}"
107
+ )
108
+
109
+ lte_end = schedule.index <= end.normalize().tz_localize(None)
110
+ gte_start = schedule.index >= start.normalize().tz_localize(None)
111
+
112
+ # Shift both by 1 to keep an extra row on either end if available. Needed in some edge cases.
113
+ gte_start = np.append(gte_start, True)[1:] # Shifts gte_start by one to the left.
114
+ lte_end = np.insert(lte_end, 0, True)[:-1] # Shifts lte_end by one to the right.
115
+
116
+ # Trim the schedule to match the timeframe covered by the given timeseries
117
+ schedule = schedule[gte_start & lte_end]
118
+
119
+ backfilled_map = DEFAULT_LABEL_MAP | label_map
120
+ mapped_labels = [backfilled_map[label] for label in session_labels]
121
+ labels = pd.Series([mapped_labels]).repeat(len(schedule)).explode()
122
+ labels = pd.concat([labels, pd.Series([backfilled_map["closed"]])])
123
+
124
+ # Append on additional Edge-Case Bins so result doesn't include NaNs
125
+ bins = schedule.to_numpy().flatten()
126
+ bins = np.insert(bins, 0, bins[0].normalize())
127
+ bins = np.append(bins, bins[-1].normalize() + pd.Timedelta("1D"))
128
+
129
+ bins, _ind, _counts = np.unique(bins, return_index=True, return_counts=True)
130
+
131
+ if len(bins) - 1 != len(labels):
132
+ # np.Unique Dropped some bins, need to drop the associated labels
133
+ label_inds = (_ind + _counts - 1)[:-1]
134
+ labels = labels.iloc[label_inds]
135
+
136
+ return pd.Series(
137
+ pd.cut(timestamps, bins, closed != "left", labels=labels, ordered=False), # type: ignore
138
+ index=timestamps,
139
+ )
140
+
141
+
142
+ def merge_schedules(schedules, how="outer"):
143
+ """
144
+ Given a list of schedules will return a merged schedule. The merge method (how) will either return the superset
145
+ of any datetime when any schedule is open (outer) or only the datetime where all markets are open (inner)
146
+
147
+ CAVEATS:
148
+ * This does not work for schedules with breaks, the break information will be lost.
149
+ * Only "market_open" and "market_close" are considered, other market times are not yet supported.
150
+
151
+ :param schedules: list of schedules
152
+ :param how: outer or inner
153
+ :return: schedule DataFrame
154
+ """
155
+ all_cols = [x.columns for x in schedules]
156
+ all_cols = list(itertools.chain(*all_cols))
157
+ if ("break_start" in all_cols) or ("break_end" in all_cols):
158
+ warnings.warn("Merge schedules will drop the break_start and break_end from result.")
159
+
160
+ result = schedules[0]
161
+ for schedule in schedules[1:]:
162
+ result = result.merge(schedule, how=how, right_index=True, left_index=True)
163
+ if how == "outer":
164
+ result["market_open"] = result.apply(lambda x: min(x.market_open_x, x.market_open_y), axis=1)
165
+ result["market_close"] = result.apply(lambda x: max(x.market_close_x, x.market_close_y), axis=1)
166
+ elif how == "inner":
167
+ result["market_open"] = result.apply(lambda x: max(x.market_open_x, x.market_open_y), axis=1)
168
+ result["market_close"] = result.apply(lambda x: min(x.market_close_x, x.market_close_y), axis=1)
169
+ else:
170
+ raise ValueError('how argument must be "inner" or "outer"')
171
+ result = result[["market_open", "market_close"]]
172
+ return result
173
+
174
+
175
+ def is_single_observance(holiday: "Holiday"):
176
+ "Returns the Date of the Holiday if it is only observed once, None otherwise."
177
+ return holiday.start_date if holiday.start_date == holiday.end_date else None # type: ignore ??
178
+
179
+
180
+ def all_single_observance_rules(calendar: "AbstractHolidayCalendar"):
181
+ "Returns a list of timestamps if the Calendar's Rules are all single observance holidays, None Otherwise"
182
+ observances = [is_single_observance(rule) for rule in calendar.rules]
183
+ return observances if all(observances) else None
184
+
185
+
186
+ def convert_freq(index, frequency):
187
+ """
188
+ Converts a DateTimeIndex to a new lower frequency
189
+
190
+ :param index: DateTimeIndex
191
+ :param frequency: frequency string
192
+ :return: DateTimeIndex
193
+ """
194
+ return pd.DataFrame(index=index).asfreq(frequency).index
195
+
196
+
197
+ SESSIONS = Literal[
198
+ "pre",
199
+ "post",
200
+ "RTH",
201
+ "pre_break",
202
+ "post_break",
203
+ "ETH",
204
+ "break",
205
+ "closed",
206
+ "closed_masked",
207
+ ]
208
+ MKT_TIMES = Literal["pre", "post", "market_open", "market_close", "break_start", "break_end"]
209
+
210
+
211
+ # region ---- ---- ---- Date Range Warning Types ---- ---- ----
212
+ class DateRangeWarning(UserWarning):
213
+ "Super Class to all Date_range Warning Types"
214
+
215
+
216
+ class OverlappingSessionWarning(DateRangeWarning):
217
+ """
218
+ Warning thrown when date_range is called with a timedelta that is larger than the
219
+ gap between two sessions leading to them overlapping.
220
+ This is only an issue when closed='right'/'both'/None and force_close=None
221
+
222
+ For Example, the following raises a warning because the 10:00 Timestamp that is from the 'pre'
223
+ session comes after the start of the 9:30 'RTH' session, but belongs to the 'pre' session
224
+ >>> date_range(NYSE, '2h', 'right', None, {'pre', 'RTH'}, merge_adjacent = False)
225
+ >>> ['2020-01-02 06:00:00', '2020-01-02 08:00:00',
226
+ '2020-01-02 10:00:00', '2020-01-02 11:30:00',
227
+ '2020-01-02 13:30:00', '2020-01-02 15:30:00',
228
+ '2020-01-02 17:30:00'],
229
+ This is particularly convoluted when close='both'/None
230
+ >>> date_range(NYSE, '2h', 'both', None, {'pre', 'RTH'}, merge_adjacent = False)
231
+ >>> ['2020-01-02 04:00:00' (pre), '2020-01-02 06:00:00' (pre),
232
+ '2020-01-02 08:00:00' (pre), '2020-01-02 09:30:00' (rth),
233
+ '2020-01-02 10:00:00' (pre), '2020-01-02 11:30:00' (rth),
234
+ '2020-01-02 13:30:00' (rth), '2020-01-02 15:30:00' (rth),
235
+ '2020-01-02 17:30:00' (rth)],
236
+ """
237
+
238
+
239
+ class DisappearingSessionWarning(DateRangeWarning):
240
+ """
241
+ Warning thrown when date_range is called with a timedelta that is larger than an entire session
242
+ resulting in the session disappearing from the DatetimeIndex.
243
+
244
+ Only an issue when closed='right' and force_close = False
245
+ """
246
+
247
+
248
+ class MissingSessionWarning(DateRangeWarning):
249
+ """
250
+ Warning thrown when a date_range() call is made with a requested session,
251
+ but lacks the necessary columns. When this warning is ignored the returned
252
+ datetimeindex will simply lack the relevant sessions
253
+
254
+ e.g. 'pre' Session requested and schedule lacks 'pre' and/or 'market_open' column
255
+ """
256
+
257
+
258
+ class InsufficientScheduleWarning(DateRangeWarning):
259
+ """
260
+ Warning thrown when a date_range() call is made with a requested number of periods,
261
+ or start-date / end-date that exceed what was provided in the given schedule.
262
+
263
+ If a Schedule has an insufficient start and end date then this warning is thrown twice.
264
+
265
+ If this warning is thrown when date_range is called with a number of desired periods, then
266
+ the desired start/end date is an approximate value. This 'approximation' is biased to
267
+ overestimate the needed start/end time by about 1 week. This is done to limit the edge
268
+ cases where this warning could get thrown multiple times in a row.
269
+ """
270
+
271
+
272
+ def filter_date_range_warnings(
273
+ action: Literal["error", "ignore", "always", "default", "once"],
274
+ source: Union[Iterable[Type[DateRangeWarning]], Type[DateRangeWarning]] = DateRangeWarning,
275
+ ):
276
+ """
277
+ Adjust the behavior of the date_range() warnings to the desired action.
278
+
279
+ :param action: - The desired change to the warning behavior
280
+ 'error': Escalate Warnings into Errors
281
+ 'ignore': Silence Warning Messages
282
+ 'once': Only display a message of the given category once
283
+ 'default': Reset the behavior of the given warning category
284
+ 'always': Always show the Warning of a given category
285
+
286
+ :param source: - The Category/Categories to apply the action to. Can be a single Warning or a list of warnings
287
+ default: DateRangeWarning (All Warnings)
288
+ Warning Types: MissingSessionWarning, OverlappingSessionWarning,
289
+ DisappearingSessionWarning, InsufficientScheduleWarning
290
+ """
291
+ if not isinstance(source, Iterable):
292
+ warnings.filterwarnings(action, category=source)
293
+ return
294
+
295
+ for src in source:
296
+ warnings.filterwarnings(action, category=src)
297
+
298
+
299
+ def parse_missing_session_warning(
300
+ err: MissingSessionWarning,
301
+ ) -> Tuple[Set[SESSIONS], Set[MKT_TIMES]]:
302
+ """
303
+ Parses a Missing Session Warning's Error Message.
304
+ :returns Tuple[set[str], set[str]]:
305
+ Set #1: The Missing Sessions
306
+ Set #2: The Missing Schedule Columns
307
+ """
308
+ splits = split(r"[{|}]", err.args[0].replace("'", ""))
309
+ return (set(splits[1].split(", ")), set(splits[3].split(", "))) # type: ignore
310
+
311
+
312
+ def parse_insufficient_schedule_warning(
313
+ err: InsufficientScheduleWarning,
314
+ ) -> Tuple[bool, pd.Timestamp, pd.Timestamp]:
315
+ """
316
+ Parses the information from an Insufficient Schedule Warning.
317
+ :returns Tuple[bool, pd.Timestamp, pd.Timestamp]:
318
+ bool: True == Range is missing from the start, False == Range missing from the end
319
+ Timestamp 1: Start of missing range
320
+ Timestamp 2: End of the missing range.
321
+ Note: The Timestamps are always ordered (t1 <= t2) and do not overlap with the original schedule.
322
+ If a supplemental schedule is generated it can be concatenated on without any overlapping indices.
323
+ data
324
+ """
325
+ matcher = finditer(r"\d{4}-\d{2}-\d{2}", err.args[0])
326
+ b = "Start-Time" in err.args[0]
327
+ t1 = pd.Timestamp(next(matcher).group())
328
+ t2 = pd.Timestamp(next(matcher).group())
329
+
330
+ if b:
331
+ t2 -= pd.Timedelta("1D")
332
+ else:
333
+ t2 += pd.Timedelta("1D")
334
+
335
+ return (b, t1, t2) if t1 <= t2 else (b, t2, t1)
336
+
337
+
338
+ # endregion
339
+
340
+
341
+ def date_range(
342
+ schedule: pd.DataFrame,
343
+ frequency: Union[str, pd.Timedelta, int, float],
344
+ closed: Union[Literal["left", "right", "both"], None] = "right",
345
+ force_close: Union[bool, None] = True,
346
+ session: Union[SESSIONS, Iterable[SESSIONS]] = {"RTH"},
347
+ merge_adjacent: bool = True,
348
+ start: Union[str, pd.Timestamp, int, float, None] = None,
349
+ end: Union[str, pd.Timestamp, int, float, None] = None,
350
+ periods: Union[int, None] = None,
351
+ ) -> pd.DatetimeIndex:
352
+ """
353
+ Interpolates a Market's Schedule at the desired frequency and returns the result as a DatetimeIndex.
354
+ This function is only valid for periods less than 1 Day, for longer periods use date_range_htf().
355
+
356
+ Note: The slowest part of this function is by far generating the necessary schedule (which in
357
+ turn is limited by pandas' date_range() function). If speed is a concern, store and update the
358
+ schedule as needed instead of generating it every time.
359
+
360
+ WARNINGS SYSTEM:
361
+ *There are multiple edge-case warnings that are thrown by this function. See the Docstrings
362
+ of each warning for more info. (DateRangeWarning, InsufficientScheduleWarning,
363
+ MissingSessionWarning, OverlappingSessionWarning, DisappearingSessionWarning)
364
+
365
+ *The thrown warnings can be ignored or escalated into catchable errors by using the
366
+ filter_date_range_warnings() function.
367
+
368
+ parse_missing_session_warning() & parse_insufficient_schedule_warning() exist to easily
369
+ process the warnings those warnings if they are escalated into errors.
370
+
371
+ PARAMETERS:
372
+
373
+ :param schedule: Schedule of a calendar which includes all the columns necessary
374
+ for the desired sessions.
375
+
376
+ :param frequency: String, Int/float (seconds) or pd.Timedelta that represents the desired
377
+ interval of the date_range. Intervals larger than 1D are not supported.
378
+
379
+ :param closed: the way the intervals are labeled
380
+ 'right': use the end of the interval
381
+ 'left': use the start of the interval
382
+ None / 'both': use the end of the interval but include the start of the first interval
383
+
384
+ :param force_close: How the last value of a trading session is handled
385
+ True: guarantee that the close of the trading session is the last value
386
+ False: guarantee that there is no value greater than the close of the trading session
387
+ None: leave the last value as it is calculated based on the closed parameter
388
+
389
+ :param session: A str representing a single session or an Iterable of the following Sessions.
390
+ RTH: The Default Option. This is [Market_open, Market_close], if the schedule includes a
391
+ break then the break is excluded from the returned datetime index.
392
+ ETH: [pre, market_open] & [market_close, post]
393
+ pre: [pre, market_open]
394
+ post: [market_close, post]
395
+ break: [break_start, break_end]
396
+ pre_break: [market_open, break_start]
397
+ post_break: [break_end, market_close]
398
+ closed: [market_close, market_open (of the next day)] If ETH market times are given then
399
+ this will be [post, pre (of the next day)] instead. The last session will end at
400
+ Midnight of the timezone the schedule is given in.
401
+ closed_masked: Same as closed, but Weekends & Holidays are ignored. Instead, the Datetime
402
+ index stops at Midnight on the trading day before the break and resumes at midnight
403
+ prior to the next trading day. **Note: This is Midnight of the Timezone the schedule is
404
+ given in, not Midnight of the exchange's tz since the exchange's tz is not known.
405
+
406
+ :param merge_adjacent: Bool representing if adjacent sessions should be merged into a single session.
407
+ For Example, NYSE w/ session={'RTH', 'ETH'}, frequency=2h, closed=left, force_close=False
408
+ merge_adjacent == True => [pre, post]
409
+ >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
410
+ '2020-01-02 08:00:00', '2020-01-02 10:00:00',
411
+ '2020-01-02 12:00:00', '2020-01-02 14:00:00',
412
+ '2020-01-02 16:00:00', '2020-01-02 18:00:00']
413
+ merge_adjacent == False => [pre, market_open] & [market_open, market_close] & [market_close, post]
414
+ >>> ['2020-01-02 04:00:00', '2020-01-02 06:00:00',
415
+ '2020-01-02 08:00:00', '2020-01-02 09:30:00',
416
+ '2020-01-02 11:30:00', '2020-01-02 13:30:00',
417
+ '2020-01-02 15:30:00', '2020-01-02 16:00:00',
418
+ '2020-01-02 18:00:00']
419
+ merge_adjacent=False re-aligns the timestamps to the session, but this results in
420
+ the difference between timestamps not always equaling the desired frequency.
421
+
422
+ :param start: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired start time.
423
+ :If left as None then the start-time of the the Schedule is used.
424
+ :If no TZ info is given it will be interpreted in the same timezone as the first column
425
+ of the schedule
426
+ :Start can be a Day and Time, but the returned index will still be aligned to the underlying
427
+ schedule. e.g. Session = [9:30am, 12pm], frequency=7min, start=9:45am. Underlying session
428
+ = [9:30, 9:37, 9:44, 9:51, ...] => returned DatetimeIndex = [9:51, ...]
429
+
430
+ :param end: Optional [String, Int/float (POSIX seconds) or pd.Timestamp] of the desired end time.
431
+ :If left as None then the end-time of the the Schedule is used.
432
+ :If no TZ info is given it will be interpreted in the same timezone as the first column
433
+ **Note: The time given is an absolute value. i.e. end="2020-01-01" == "2020-01-01 00:00"
434
+ returning times prior to Midnight of "2019-12-31", not to the EOD of "2020-01-01"
435
+
436
+ :param periods: Optional Integer number of periods to return. If a Period count, Start time,
437
+ and End time are given the period count is ignored.
438
+ None: Period count is ignored. Returned index is all periods in [Start, End]
439
+ Int: # of periods to return. By default, this is the first N periods following the start.
440
+ If an end time is given then this is the N periods prior to the End Time (inclusive).
441
+ CAVEAT: When Force_close == False & closed == 'right'/'both' the number of periods returned
442
+ may be less than the parameter given.
443
+
444
+ :return: pd.DatetimeIndex of datetime64[ns, TZ-Aware]
445
+ """
446
+ # ---- ---- Error Check Inputs ---- ----
447
+ if closed not in ("left", "right", "both", None):
448
+ raise ValueError("closed must be 'left', 'right', 'both' or None.")
449
+ if force_close not in (True, False, None):
450
+ raise ValueError("force_close must be True, False or None.")
451
+ if merge_adjacent not in (True, False):
452
+ raise ValueError("merge_adjacent must be True or False")
453
+
454
+ # ---- ---- Standardize Frequency Param ---- ----
455
+ if isinstance(frequency, (int, float)):
456
+ frequency = int(frequency * 1_000_000_000)
457
+ try:
458
+ frequency = pd.Timedelta(frequency)
459
+ except ValueError as e:
460
+ raise ValueError(f"Market Calendar Date_range Timeframe Error: {e}") from e
461
+ if frequency <= pd.Timedelta("0s"):
462
+ raise ValueError("Market Calendar Date_Range Frequency must be Positive.")
463
+ if frequency > pd.Timedelta("1D"):
464
+ raise ValueError("Market Calendar Date_Range Frequency Cannot Be longer than '1D'.")
465
+
466
+ session_list, mask = _make_session_list(set(schedule.columns), session, merge_adjacent)
467
+ if len(session_list) == 0:
468
+ return pd.DatetimeIndex([], dtype="datetime64[ns, UTC]")
469
+
470
+ session_times = _reconfigure_schedule(schedule, session_list, mask)
471
+ # Trim off all 0 length sessions
472
+ session_times = session_times[session_times.start.ne(session_times.end)]
473
+ _error_check_sessions(session_times, frequency, closed, force_close)
474
+
475
+ tz = schedule[session_list[0][0]].dt.tz # copy tz info from schedule
476
+ dtype = schedule[session_list[0][0]].dtype # copy dtype info from schedule
477
+ start, end, periods = _standardize_times(schedule, start, end, periods, tz)
478
+
479
+ time_series = _calc_time_series(session_times, frequency, closed, force_close, start, end, periods)
480
+ time_series.name = None
481
+
482
+ return pd.DatetimeIndex(time_series, tz=tz, dtype=dtype)
483
+
484
+
485
+ # region ------------------ Date Range LTF Subroutines ------------------
486
+
487
+
488
+ def _make_session_list(columns: set, sessions: Union[str, Iterable], merge_adjacent: bool) -> Tuple[list, bool]:
489
+ "Create a list of (Session Start, Session End) Tuples"
490
+ session_times = []
491
+ missing_cols = set()
492
+ missing_sess = set()
493
+ sessions = {sessions} if isinstance(sessions, str) else set(sessions)
494
+
495
+ if len(extras := sessions.difference(set(SESSIONS.__args__))) > 0: # type: ignore
496
+ raise ValueError(f"Unknown Date_Range Market Session: {extras}")
497
+
498
+ if "ETH" in sessions: # Standardize ETH to 'pre' and 'post'
499
+ sessions = sessions - {"ETH"} | {"pre", "post"}
500
+ if "closed_masked" in sessions: # closed_masked == 'closed' for this step
501
+ sessions |= {"closed"}
502
+ if "pre" in columns: # Add wrap-around sessions
503
+ columns |= {"pre_wrap"}
504
+ if "market_open" in columns:
505
+ columns |= {"market_open_wrap"}
506
+
507
+ def _extend_statement(session, parts):
508
+ if session not in sessions:
509
+ return
510
+ if columns.issuperset(parts):
511
+ session_times.extend(parts)
512
+ else:
513
+ missing_sess.update({session})
514
+ missing_cols.update(set(parts) - columns)
515
+
516
+ # Append session_start, session_end for each desired session *in session order*
517
+ _extend_statement("pre", ("pre", "market_open"))
518
+ if {"break_start", "break_end"}.issubset(columns):
519
+ # If the schedule has breaks then sub-divide RTH into pre & post break sessions
520
+ if "RTH" in sessions:
521
+ sessions = sessions - {"RTH"} | {"pre_break", "post_break"}
522
+ _extend_statement("pre_break", ("market_open", "break_start"))
523
+ _extend_statement("break", ("break_start", "break_end"))
524
+ _extend_statement("post_break", ("break_end", "market_close"))
525
+ else:
526
+ _extend_statement("RTH", ("market_open", "market_close"))
527
+ _extend_statement("post", ("market_close", "post"))
528
+
529
+ # Closed can mean [close, open], [close, pre], [pre, post], or [post, open] Adjust accordingly
530
+ s_start = "post" if "post" in columns else "market_close"
531
+ s_end = "pre_wrap" if "pre" in columns else "market_open_wrap"
532
+ _extend_statement("closed", (s_start, s_end))
533
+
534
+ if len(missing_sess) > 0:
535
+ warnings.warn(
536
+ f"Requested Sessions: {missing_sess}, but schedule is missing columns: {missing_cols}."
537
+ "\nResulting DatetimeIndex will lack those sessions. ",
538
+ category=MissingSessionWarning,
539
+ )
540
+
541
+ if merge_adjacent:
542
+ drop_set = set()
543
+ for i in range(1, len(session_times) - 1, 2):
544
+ if session_times[i] == session_times[i + 1]:
545
+ drop_set |= {session_times[i]}
546
+
547
+ # Guaranteed to drop in pairs => no check needed before zipping
548
+ session_times = [t for t in session_times if t not in drop_set]
549
+
550
+ # Zip the flat list into a list of pairs
551
+ session_pairs = list(zip(*(iter(session_times),) * 2))
552
+
553
+ return session_pairs, "closed_masked" in sessions
554
+
555
+
556
+ def _standardize_times(schedule, start, end, periods, tz) -> Tuple[pd.Timestamp, pd.Timestamp, Union[int, None]]:
557
+ "Standardize start and end into a timestamp of the relevant timezone"
558
+ if all((start, end, periods)):
559
+ periods = None # Ignore Periods if all 3 params are given.
560
+
561
+ if start is not None:
562
+ if isinstance(start, (int, float)):
563
+ start *= 1_000_000_000
564
+ try:
565
+ start = pd.Timestamp(start)
566
+ if start.tz is None:
567
+ start = start.tz_localize(tz)
568
+ except ValueError as e:
569
+ raise ValueError(f"Invalid Time ({start = }) given to date_range()") from e
570
+
571
+ if start < schedule.index[0].tz_localize(tz):
572
+ warnings.warn(
573
+ f"Insufficient Schedule. Requested Start-Time: {start.normalize().tz_localize(None)}. "
574
+ f"Schedule starts at: {schedule.index[0].normalize().tz_localize(None)}",
575
+ category=InsufficientScheduleWarning,
576
+ )
577
+
578
+ if end is not None:
579
+ if isinstance(end, (int, float)):
580
+ end *= 1_000_000_000
581
+ try:
582
+ end = pd.Timestamp(end)
583
+ if end.tz is None and tz is not None:
584
+ end = end.tz_localize(tz)
585
+ except ValueError as e:
586
+ raise ValueError(f"Invalid Time ({end = }) given to date_range()") from e
587
+
588
+ if end > schedule.index[-1].tz_localize(tz) + pd.Timedelta("1D"):
589
+ # Checking against the day and not the specific session since so requesting a time
590
+ # after the last session's close but before the next day doesn't throw a warning.
591
+ requested_end = end.normalize().tz_localize(None) - pd.Timedelta("1D")
592
+ warnings.warn(
593
+ f"Insufficient Schedule. Requested End-Time: {requested_end}. "
594
+ f"Schedule ends at: {schedule.index[-1].normalize().tz_localize(None)}",
595
+ category=InsufficientScheduleWarning,
596
+ )
597
+
598
+ if start is not None and end is not None and start > end:
599
+ raise ValueError(
600
+ "Date_range() given a start-date that occurs after the given end-date. " f"{start = }, {end = }"
601
+ )
602
+
603
+ return start, end, periods
604
+
605
+
606
+ def _reconfigure_schedule(schedule, session_list, mask_close) -> pd.DataFrame:
607
+ "Reconfigure a schedule into a sorted dataframe of [start, end] times for each session"
608
+
609
+ sessions = []
610
+
611
+ for start, end in session_list:
612
+ if not end.endswith("_wrap"):
613
+ # Simple Session where 'start' occurs before 'end'
614
+ sessions.append(
615
+ schedule[[start, end]].rename(columns={start: "start", end: "end"}).set_index("start", drop=False)
616
+ )
617
+ continue
618
+
619
+ # 'closed' Session that wraps around midnight. Shift the 'end' col by 1 Day
620
+ end = end.rstrip("_wrap")
621
+ tmp = pd.DataFrame(
622
+ {
623
+ "start": schedule[start],
624
+ "end": schedule[end].shift(-1),
625
+ }
626
+ ).set_index("start", drop=False)
627
+
628
+ # Shift(-1) leaves last index of 'end' as 'NaT'
629
+ # Set the [-1, 'end' ('end' === 1)] cell to Midnight of the 'start' time of that row.
630
+ tmp.iloc[-1, 1] = tmp.iloc[-1, 0].normalize() + pd.Timedelta("1D") # type: ignore
631
+
632
+ if mask_close:
633
+ # Do some additional work to split 'closed' sessions that span weekends/holidays
634
+ sessions_to_split = tmp["end"] - tmp["start"] > pd.Timedelta("1D")
635
+
636
+ split_strt = tmp[sessions_to_split]["start"]
637
+ split_end = tmp[sessions_to_split]["end"]
638
+
639
+ sessions.append(
640
+ pd.DataFrame( # From start of the long close to Midnight
641
+ {
642
+ "start": split_strt,
643
+ "end": split_strt.dt.normalize() + pd.Timedelta("1D"),
644
+ }
645
+ ).set_index("start", drop=False)
646
+ )
647
+ sessions.append(
648
+ pd.DataFrame( # From Midnight to the end of the long close
649
+ {
650
+ "start": split_end.dt.normalize(),
651
+ "end": split_end,
652
+ }
653
+ ).set_index("start", drop=False)
654
+ )
655
+
656
+ # leave tmp as all the sessions that were not split
657
+ tmp = tmp[~sessions_to_split]
658
+
659
+ sessions.append(tmp)
660
+
661
+ return pd.concat(sessions).sort_index()
662
+
663
+
664
+ def _error_check_sessions(session_times, timestep, closed, force_close):
665
+ if session_times.start.gt(session_times.end).any():
666
+ raise ValueError(
667
+ "Desired Sessions from the Schedule contain rows where session start < session end, "
668
+ "please correct the schedule"
669
+ )
670
+
671
+ # Disappearing Session
672
+ if force_close is False and closed == "right":
673
+ # only check if needed
674
+ if (session_times.end - session_times.start).lt(timestep).any():
675
+ warnings.warn(
676
+ "An interval of the chosen frequency is larger than some of the trading sessions, "
677
+ "while closed='right' and force_close=False. This will make those trading sessions "
678
+ "disappear. Use a higher frequency or change the values of closed/force_close, to "
679
+ "keep this from happening.",
680
+ category=DisappearingSessionWarning,
681
+ )
682
+
683
+ # Overlapping Session
684
+ if force_close is None and closed != "left":
685
+ num_bars = _num_bars_ltf(session_times, timestep, closed)
686
+ end_times = session_times.start + num_bars * timestep
687
+
688
+ if end_times.gt(session_times.start.shift(-1)).any():
689
+ warnings.warn(
690
+ "The desired frequency results in date_range() generating overlapping sessions. "
691
+ "This can happen when the timestep is larger than a session, or when "
692
+ "merge_session = False and a session is not evenly divisible by the timestep. "
693
+ "The overlapping timestep can be deleted with force_close = True or False",
694
+ category=OverlappingSessionWarning,
695
+ )
696
+
697
+
698
+ def _num_bars_ltf(session_times, timestep, closed) -> pd.Series:
699
+ "Calculate the number of timestamps needed for each trading session."
700
+ if closed in ("both", None):
701
+ return np.ceil((session_times.end - session_times.start) / timestep) + 1
702
+ else:
703
+ return np.ceil((session_times.end - session_times.start) / timestep)
704
+
705
+
706
+ def _course_trim_to_period_count(num_bars, periods, reverse) -> pd.Series:
707
+ """
708
+ Course Trim the Session times to the desired period count.
709
+ Large enough of a sub-routine to merit its own function call.
710
+ """
711
+ if reverse:
712
+ # If end-date is given calculate sum in reverse order
713
+ num_bars = num_bars[::-1]
714
+
715
+ _sum = num_bars.cumsum()
716
+
717
+ if _sum.iloc[-1] < periods:
718
+ # Insufficient Number of Periods. Try to estimate an ending time from the data given.
719
+ # delta = (end_date - start_date) / (cumulative # of periods) * (periods still needed) * fudge factor
720
+ delta = abs(
721
+ # (end_date - start_date) / (cumulative # of periods)
722
+ ((_sum.index[-1] - _sum.index[0]) / _sum.iloc[-1])
723
+ * (periods - _sum.iloc[-1]) # (periods still needed)
724
+ * 1.05 # (Fudge Factor for weekends/holidays)
725
+ )
726
+ # delta = math.ceil(delta) + '1W'
727
+ delta = (delta // pd.Timedelta("1D") + 8) * pd.Timedelta("1D")
728
+ # The 1.05 Factor handles when the schedule is short by a few months, the + '1W' handles
729
+ # when the schedule is short by only a few periods. While 1 Week is absolute overkill,
730
+ # generating the extra few days is very little extra cost compared to throwing this error
731
+ # a second or even third time.
732
+
733
+ if reverse:
734
+ approx_start = _sum.index[-1] - delta
735
+ warnings.warn(
736
+ f"Insufficient Schedule. Requested Approx Start-Time: {approx_start}. "
737
+ f"Schedule starts at: {_sum.index[-1].normalize().tz_localize(None)}",
738
+ category=InsufficientScheduleWarning,
739
+ )
740
+ else:
741
+ approx_end = _sum.index[-1] + delta
742
+ warnings.warn(
743
+ f"Insufficient Schedule. Requested Approx End-Time: {approx_end}. "
744
+ f"Schedule ends at: {_sum.index[-1].normalize().tz_localize(None)}",
745
+ category=InsufficientScheduleWarning,
746
+ )
747
+
748
+ sessions_to_keep = _sum < periods
749
+ # Shifting Ensures the number of needed periods are generated, but no more.
750
+ sessions_to_keep = sessions_to_keep.shift(1, fill_value=True)
751
+
752
+ if reverse:
753
+ # If end-date is given calculate un-reverse the order of the series
754
+ sessions_to_keep = sessions_to_keep[::-1]
755
+
756
+ return sessions_to_keep
757
+
758
+
759
+ def _calc_time_series(session_times, timestep, closed, force_close, start, end, periods) -> pd.Series:
760
+ "Interpolate each session into a datetime series at the desired frequency."
761
+ # region ---- ---- ---- Trim the Sessions ---- ---- ----
762
+ # Compare 'start' to the session end times so that if 'start' is in the middle of a session
763
+ # that session remains in session_times. Vise-vera for End
764
+ if start is not None:
765
+ session_times = session_times[session_times.end > start]
766
+ if end is not None:
767
+ session_times = session_times[session_times.start < end]
768
+ if len(session_times) == 0:
769
+ return pd.Series([])
770
+
771
+ # Override the First Session's Start and Last Session's End times if needed
772
+ if start is not None and start > session_times.loc[session_times.index[0], "start"]:
773
+ # Align the start to a multiple of the timestep after the session's beginning.
774
+ # This is to make the returned DTIndex consistent across all start/end/period settings.
775
+ session_start = session_times.loc[session_times.index[0], "start"]
776
+ start_aligned = session_start + (ceil((start - session_start) / timestep) * timestep)
777
+ session_times.loc[session_times.index[0], "start"] = start_aligned
778
+ if end is not None and end < session_times.loc[session_times.index[-1], "end"]:
779
+ session_start = session_times.loc[session_times.index[0], "start"]
780
+ end_aligned = session_start + (floor((end - session_start) / timestep) * timestep)
781
+ session_times.loc[session_times.index[-1], "end"] = end_aligned
782
+
783
+ num_bars = _num_bars_ltf(session_times, timestep, closed)
784
+
785
+ if periods is not None:
786
+ sessions_to_keep = _course_trim_to_period_count(num_bars, periods, end is not None)
787
+ num_bars = num_bars[sessions_to_keep]
788
+ session_times = session_times[sessions_to_keep]
789
+
790
+ # endregion
791
+
792
+ starts = session_times.start.repeat(num_bars) # type: ignore
793
+
794
+ if closed == "right":
795
+ # Right side of addition is cumulative time since session start in multiples of timestep
796
+ time_series = starts + (starts.groupby(starts.index).cumcount() + 1) * timestep
797
+ else:
798
+ time_series = starts + (starts.groupby(starts.index).cumcount()) * timestep
799
+
800
+ if force_close is not None:
801
+ # Trim off all timestamps that stretched beyond their intended session
802
+ time_series = time_series[time_series.le(session_times.end.repeat(num_bars))]
803
+
804
+ if force_close:
805
+ time_series = pd.concat([time_series, session_times.end])
806
+
807
+ time_series = time_series.drop_duplicates().sort_values() # type: ignore
808
+
809
+ if periods is not None and len(time_series) > 0:
810
+ # Although likely redundant, Fine Trim to desired period count.
811
+ if end is not None:
812
+ s_len = len(time_series)
813
+ time_series = time_series[max(s_len - periods, 0) : s_len]
814
+ else:
815
+ time_series = time_series[0:periods]
816
+
817
+ return time_series
818
+
819
+
820
+ # endregion
821
+
822
+
823
+ PeriodCode = Literal["D", "W", "M", "Q", "Y"]
824
+ Day_Anchor = Literal["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
825
+ Month_Anchor = Literal["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
826
+
827
+ # These needed because the pandas Period Object is stupid and not consistant w/ date_range.
828
+ # pd.date_range(s,e, freq = 'W-SUN') == [DatetimeIndex of all sundays] (as Expected)
829
+ # but, pd.Timestamp([A Sunday]).to_period('W-SUN').start_time == [The Monday Prior???]
830
+ days_rolled = list(Day_Anchor.__args__)
831
+ days_rolled.insert(0, days_rolled.pop())
832
+ weekly_roll_map = dict(zip(Day_Anchor.__args__, days_rolled))
833
+
834
+ months_rolled = list(Month_Anchor.__args__)
835
+ months_rolled.insert(0, months_rolled.pop())
836
+ yearly_roll_map = dict(zip(Month_Anchor.__args__, months_rolled))
837
+
838
+
839
+ def date_range_htf(
840
+ cal: "CustomBusinessDay",
841
+ frequency: Union[str, pd.Timedelta, int, float],
842
+ start: Union[str, pd.Timestamp, int, float, None] = None,
843
+ end: Union[str, pd.Timestamp, int, float, None] = None,
844
+ periods: Union[int, None] = None,
845
+ closed: Union[Literal["left", "right"], None] = "right",
846
+ *,
847
+ day_anchor: Day_Anchor = "SUN",
848
+ month_anchor: Month_Anchor = "JAN",
849
+ ) -> pd.DatetimeIndex:
850
+ """
851
+ Returns a Normalized DatetimeIndex from the start-date to End-Date for Time periods of 1D and Higher.
852
+
853
+ Unless using a custom calendar, it is advised to call the date_range_htf() method of the desired calendar.
854
+ This is because default_anchors may change, or a single calendar may not be sufficient to model a market.
855
+
856
+ For example, NYSE has two calendars: The first covers pre-1952 where saturdays were trading days. The second
857
+ covers post-1952 where saturdays are closed.
858
+
859
+ PARAMETERS:
860
+
861
+ :param cal: CustomBuisnessDay Calendar associated with a MarketCalendar. This can be retieved by
862
+ calling the holidays() method of a MarketCalendar.
863
+
864
+ :param frequency: String, Int/float (POSIX seconds) or pd.Timedelta of the desired frequency.
865
+ :Must be Greater than '1D' and an integer multiple of the base frequency (D, W, M, Q, or Y)
866
+ :Important Note: Ints/Floats & Timedeltas are always considered as 'Open Business Days',
867
+ '2D' == Every Other Buisness Day, '3D' == Every 3rd B.Day, '7D' == Every 7th B.Day
868
+ :Higher periods (passed as strings) align to the beginning or end of the relevant period
869
+ :i.e. '1W' == First/[Last] Trading Day of each Week, '1Q' == First/[Last] Day of every Quarter
870
+
871
+ :param start: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
872
+ :The Time & Timezone information is ignored. Only the Normalized Day is considered.
873
+
874
+ :param end: String, Int/float (POSIX seconds) or pd.Timestamp of the desired start time.
875
+ :The Time & Timezone information is ignored. Only the Normalized Day is considered.
876
+
877
+ :param periods: Optional Integer number of periods to return. If a Period count, Start time,
878
+ and End time are given the period count is ignored.
879
+
880
+ :param closed: Literal['left', 'right']. Method used to close each range.
881
+ :Left: First open trading day of the Session is returned (e.g. First Open Day of The Month)
882
+ :right: Last open trading day of the Session is returned (e.g. Last Open Day of The Month)
883
+ :Note, This has no effect when the desired frequency is a number of days.
884
+
885
+ :param day_anchor: Day to Anchor the start of the Weekly timeframes to. Default 'SUN'.
886
+ : To get the First/Last Days of the trading Week then the Anchor needs to be on a day the relevant
887
+ market is closed.
888
+ : This can be set so that a specific day each week is returned.
889
+ : freq='1W' & day_anchor='WED' Will return Every 'WED' when the market is open, and nearest day
890
+ to the left or right (based on 'closed') when the market is closed.
891
+ Options: ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]
892
+
893
+ :param month_anchor: Month to Anchor the start of the year to for Quarter and yearly timeframes.
894
+ : Default 'JAN' for Calendar Quarters/Years. Can be set to 'JUL' to return Fiscal Years
895
+ Options: ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
896
+ """
897
+
898
+ start, end, periods = _error_check_htf_range(start, end, periods)
899
+ mult, _period_code = _standardize_htf_freq(frequency)
900
+
901
+ if _period_code == "D":
902
+ if mult == 1:
903
+ # When desiring a frequency of '1D' default to pd.date_range. It will give the same
904
+ # answer but it is more performant than the method in _cal_day_range.
905
+ return pd.date_range(start, end, periods, freq=cal)
906
+ else:
907
+ return _cal_day_range(cal, start, end, periods, mult)
908
+
909
+ elif _period_code == "W":
910
+ freq = str(mult) + "W-" + day_anchor.upper()
911
+ grouping_period = "W-" + weekly_roll_map[day_anchor.upper()]
912
+
913
+ return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
914
+
915
+ elif _period_code == "M":
916
+ freq = str(mult) + "M" + ("S" if closed == "left" else "E")
917
+ return _cal_WMQY_range(cal, start, end, periods, freq, "M", closed)
918
+
919
+ else: # Yearly & Quarterly Period
920
+ freq = str(mult) + _period_code
921
+ freq += (
922
+ "S-" + month_anchor.upper()
923
+ if closed == "left" # *Insert Angry Tom Meme Here*
924
+ else "E-" + yearly_roll_map[month_anchor.upper()]
925
+ )
926
+ grouping_period = _period_code + "-" + yearly_roll_map[month_anchor.upper()]
927
+
928
+ return _cal_WMQY_range(cal, start, end, periods, freq, grouping_period, closed)
929
+
930
+
931
+ # region ---- ---- ---- Date Range HTF Subroutines ---- ---- ----
932
+
933
+
934
+ def _error_check_htf_range(
935
+ start, end, periods: Union[int, None]
936
+ ) -> Tuple[Union[pd.Timestamp, None], Union[pd.Timestamp, None], Union[int, None]]:
937
+ "Standardize and Error Check Start, End, and period params"
938
+ if periods is not None:
939
+ if not isinstance(periods, int):
940
+ raise ValueError(f"Date_Range_HTF Must be either an int or None. Given {type(periods)}")
941
+ if periods < 0:
942
+ raise ValueError("Date_range_HTF Periods must be Positive.")
943
+
944
+ if isinstance(start, (int, float)):
945
+ start = int(start * 1_000_000_000)
946
+ if isinstance(end, (int, float)):
947
+ end = int(end * 1_000_000_000)
948
+
949
+ if start is not None:
950
+ start = pd.Timestamp(start).normalize().tz_localize(None)
951
+ if end is not None:
952
+ end = pd.Timestamp(end).normalize().tz_localize(None)
953
+
954
+ if all((start, end, periods)):
955
+ periods = None # Ignore Periods if passed too many params
956
+ if len([param for param in (start, end, periods) if param is not None]) < 2:
957
+ raise ValueError("Date_Range_HTF must be given two of the three following params: (start, end, periods)")
958
+
959
+ if start is not None and end is not None and end < start:
960
+ raise ValueError("Date_Range_HTF() Start-Date must be before the End-Date")
961
+
962
+ return start, end, periods
963
+
964
+
965
+ def _standardize_htf_freq(frequency: Union[str, pd.Timedelta, int, float]) -> Tuple[int, PeriodCode]:
966
+ "Standardize the frequency multiplier and Code, throwing errors as needed."
967
+ if isinstance(frequency, str):
968
+ if len(frequency) == 0:
969
+ raise ValueError("Date_Range_HTF Frequency is an empty string.")
970
+ if len(frequency) == 1:
971
+ frequency = "1" + frequency # Turn 'D' into '1D' for all period codes
972
+ if frequency[-1].upper() in {"W", "M", "Q", "Y"}:
973
+ try:
974
+ if (mult := int(frequency[0:-1])) <= 0:
975
+ raise ValueError()
976
+ return mult, frequency[-1].upper() # type: ignore
977
+ except ValueError as e:
978
+ raise ValueError(
979
+ "Date_Range_HTF() Week, Month, Quarter and Year frequency must "
980
+ "have a positive integer multiplier"
981
+ ) from e
982
+
983
+ # All remaining frequencies (int, float, strs, & Timedeltas) are parsed as business days.
984
+ if isinstance(frequency, (int, float)): # Convert To Seconds
985
+ frequency = int(frequency * 1_000_000_000)
986
+
987
+ frequency = pd.Timedelta(frequency)
988
+ if frequency < pd.Timedelta("1D"):
989
+ raise ValueError("Date_Range_HTF() Frequency must be '1D' or Higher.")
990
+ if frequency % pd.Timedelta("1D") != pd.Timedelta(0):
991
+ raise ValueError("Date_Range_HTF() Week and Day frequency must be an integer multiple of Days")
992
+
993
+ return frequency.days, "D"
994
+
995
+
996
+ def _days_per_week(weekmask: Union[Iterable, str]) -> int:
997
+ "Used to get a more accurate estimate of the number of days per week"
998
+ # Return any 'Array Like' Representation
999
+ if not isinstance(weekmask, str):
1000
+ return len([day for day in weekmask if bool(day)])
1001
+
1002
+ if len(weekmask) == 0:
1003
+ raise ValueError("Weekmask cannot be blank")
1004
+
1005
+ weekmask = weekmask.upper()
1006
+ day_abbrs = {day for day in weekly_roll_map.values() if day in weekmask}
1007
+ if len(day_abbrs) != 0:
1008
+ return len(day_abbrs)
1009
+
1010
+ # Weekmask Something like '0111110'
1011
+ return len([day for day in weekmask if bool(day)])
1012
+
1013
+
1014
+ def _cal_day_range(cb_day: "CustomBusinessDay", start, end, periods, mult) -> pd.DatetimeIndex:
1015
+ """
1016
+ Returns a Normalized DateTimeIndex of Open Buisness Days.
1017
+ Exactly two of the (start, end, periods) arguments must be given.
1018
+
1019
+ ** Arguments should be Type/Error Checked before calling this function **
1020
+
1021
+ :param cb_day: CustomBusinessDay Object from the respective calendar
1022
+ :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1023
+ :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1024
+ :param periods: Optional Number of periods to return
1025
+ :param mult: Integer Multiple of buisness days between data-points.
1026
+ e.g: 1 == Every Business Day, 2 == Every Other B.Day, 3 == Every Third B.Day, etc.
1027
+ :returns: DateRangeIndex[datetime64[ns]]
1028
+ """
1029
+
1030
+ # Ensure Start and End are open Business days in the desired range
1031
+ if start is not None:
1032
+ start = cb_day.rollforward(start)
1033
+ if end is not None:
1034
+ end = cb_day.rollback(end)
1035
+
1036
+ # ---- Start-Date to End-Date ----
1037
+ if isinstance(start, pd.Timestamp) and isinstance(end, pd.Timestamp):
1038
+ num_days = (end - start) / mult
1039
+ # Get a better estimate of the number of open days since date_range calc is slow
1040
+ est_open_days = ((num_days // 7) * _days_per_week(cb_day.weekmask)) + num_days % pd.Timedelta("1W")
1041
+
1042
+ # Should always produce a small overestimate since Holidays aren't accounted for.
1043
+ est_open_days = ceil(est_open_days / pd.Timedelta("1D"))
1044
+ _range = pd.RangeIndex(0, est_open_days * mult, mult)
1045
+
1046
+ dt_index = pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1047
+ return dt_index[dt_index <= end]
1048
+
1049
+ # ---- Periods from Start-Date ----
1050
+ elif isinstance(start, pd.Timestamp):
1051
+ _range = pd.RangeIndex(0, periods * mult, mult)
1052
+ return pd.DatetimeIndex(start + _range * cb_day, dtype="datetime64[ns]")
1053
+
1054
+ # ---- Periods from End-Date ----
1055
+ else:
1056
+ # Ensure the end-date is the first valid Trading Day <= given end-date
1057
+ end = cb_day.rollback(end)
1058
+ _range = pd.RangeIndex(0, -1 * periods * mult, -1 * mult)
1059
+
1060
+ return pd.DatetimeIndex(end + _range * cb_day, dtype="datetime64[ns]")[::-1]
1061
+
1062
+
1063
+ def _cal_WMQY_range(
1064
+ cb_day: "CustomBusinessDay",
1065
+ start: Union[pd.Timestamp, None],
1066
+ end: Union[pd.Timestamp, None],
1067
+ periods: Union[int, None],
1068
+ freq: str,
1069
+ grouping_period: str,
1070
+ closed: Union[Literal["left", "right"], None] = "right",
1071
+ ):
1072
+ """
1073
+ Return A DateRangeIndex of the Weekdays that mark either the start or end of each
1074
+ buisness week based on the 'closed' parameter.
1075
+
1076
+ ** Arguments should be Type/Error Checked before calling this function **
1077
+
1078
+ :param cb_day: CustomBusinessDay Object from the respective calendar
1079
+ :param start: Optional Start-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1080
+ :param end: Optional End-Date. Must be a Normalized, TZ-Naive pd.Timestamp
1081
+ :param periods: Optional Number of periods to return
1082
+ :param freq: Formatted frequency of '1W' and Higher with desired multiple, S/E Chars,
1083
+ and Anchoring code.
1084
+ :param grouping_period: Period_Code with anchor that matches the given period Code.
1085
+ i.e. 'W-[DAY]', 'M', 'Q-[MONTH]', 'Y-[MONTH]'
1086
+ :param closed: Union['left', Any].
1087
+ 'left': The normalized start-day of the relative period is returned
1088
+ Everything else: The normalized last-day of the relative period is returned
1089
+ :returns: DateRangeIndex[datetime64[ns]]
1090
+ """
1091
+
1092
+ # Need to Adjust the Start/End Dates given to pandas since Rolling forward or backward can shift
1093
+ # the calculated date range out of the desired [start, end] range adding or ignoring desired values.
1094
+
1095
+ # For Example, say we want NYSE-Month-Starts between [2020-01-02, 2020-02-02]. W/O Adjusting dates
1096
+ # we call pd.date_range('2020-01-02, '2020-02-02', 'MS') => ['2020-02-01'] Rolled to ['2020-02-03'].
1097
+ # '02-03' date is then trimmed off returning an empty Index. despite '2020-01-02' being a valid Month Start
1098
+ # By Adjusting the Dates we call pd.date_range('2020-01-01, '2020-02-02') => ['2020-01-01, '2020-02-01']
1099
+ # That's then Rolled into [2020-01-02, 2020-02-03] & Trimmed to [2020-01-02] as desired.
1100
+
1101
+ _dr_start, _dr_end = None, None
1102
+
1103
+ if closed == "left":
1104
+ roll_func = cb_day.rollforward
1105
+ if start is not None:
1106
+ normalized_start = start.to_period(grouping_period).start_time
1107
+ _dr_start = normalized_start if start <= roll_func(normalized_start) else start
1108
+
1109
+ if end is not None:
1110
+ if periods is not None:
1111
+ normalized_end = end.to_period(grouping_period).start_time
1112
+ _dr_end = (
1113
+ normalized_end - pd.Timedelta("1D") # Shift into preceding group
1114
+ if end < roll_func(normalized_end)
1115
+ else cb_day.rollback(end)
1116
+ )
1117
+ else:
1118
+ _dr_end = cb_day.rollback(end)
1119
+
1120
+ else:
1121
+ roll_func = cb_day.rollback
1122
+ if start is not None:
1123
+ if periods is not None:
1124
+ normalized_start = start.to_period(grouping_period).end_time.normalize()
1125
+ _dr_start = (
1126
+ normalized_start + pd.Timedelta("1D") # Shift into trailing group
1127
+ if start > roll_func(normalized_start)
1128
+ else cb_day.rollforward(start)
1129
+ )
1130
+ else:
1131
+ _dr_start = cb_day.rollforward(start)
1132
+
1133
+ if end is not None:
1134
+ normalized_end = end.to_period(grouping_period).end_time.normalize()
1135
+ _dr_end = normalized_end if end >= roll_func(normalized_end) else end
1136
+
1137
+ _range = pd.date_range(_dr_start, _dr_end, periods, freq).to_series().apply(roll_func)
1138
+
1139
+ # Ensure that Rolled Timestamps are in the desired range When given both Start and End
1140
+ if start is not None and end is not None:
1141
+ if len(_range) > 0 and _range.iloc[0] < start:
1142
+ # Trims off the first 'WMQY End' that might have been Rolled before start
1143
+ _range = _range[1:]
1144
+ if len(_range) > 0 and _range.iloc[-1] > end:
1145
+ # Trims off the last 'WMQY Start' the might have been Rolled after end
1146
+ _range = _range[0:-1]
1147
+
1148
+ return pd.DatetimeIndex(_range, dtype="datetime64[ns]")
1149
+
1150
+
1151
+ # endregion