forex_data_aggregator 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,993 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Apr 25 18:07:21 2022
4
+
5
+ @author: fiora
6
+ """
7
+
8
+ from loguru import logger
9
+
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Union
12
+ from datetime import datetime
13
+ from shutil import rmtree
14
+ from io import StringIO
15
+
16
+ from attrs import (
17
+ define,
18
+ field,
19
+ validate,
20
+ validators
21
+ )
22
+
23
+ # PANDAS
24
+ from pandas import (
25
+ DataFrame as pandas_dataframe,
26
+ read_csv as pandas_read_csv
27
+ )
28
+
29
+ # PYARROW
30
+ from pyarrow import (
31
+ Table as pyarrow_Table,
32
+ table as pyarrow_table,
33
+ compute as pc
34
+ )
35
+
36
+ # POLARS
37
+ from polars import (
38
+ col,
39
+ from_epoch,
40
+ from_dict as polars_fromdict,
41
+ DataFrame as polars_dataframe,
42
+ LazyFrame as polars_lazyframe
43
+ )
44
+
45
+ from numpy import array
46
+
47
+ # python base
48
+ from dotty_dict import dotty
49
+ from dotty_dict import Dotty
50
+
51
+ # external
52
+
53
+ # polygon-io source
54
+ import polygon
55
+ from polygon import (
56
+ RESTClient as polygonio_client,
57
+ BadResponse
58
+ )
59
+
60
+ # alpha-vantage source
61
+ from alpha_vantage.foreignexchange import ForeignExchange as av_forex_client
62
+
63
+ from .common import (
64
+ YEARS,
65
+ MONTHS,
66
+ DATE_FORMAT_SQL,
67
+ DATE_FORMAT_HISTDATA_CSV,
68
+ HISTDATA_URL_TICKDATA_TEMPLATE,
69
+ HISTDATA_BASE_DOWNLOAD_METHOD,
70
+ HISTDATA_BASE_DOWNLOAD_URL,
71
+ DEFAULT_PATHS,
72
+ DATA_TYPE,
73
+ BASE_DATA_COLUMN_NAME,
74
+ DATA_FILE_COLUMN_INDEX,
75
+ SUPPORTED_DATA_FILES,
76
+ SUPPORTED_DATA_ENGINES,
77
+ ASSET_TYPE,
78
+ TEMP_FOLDER,
79
+ TEMP_CSV_FILE,
80
+ DTYPE_DICT,
81
+ PYARROW_DTYPE_DICT,
82
+ POLARS_DTYPE_DICT,
83
+ DATA_COLUMN_NAMES,
84
+ FILENAME_TEMPLATE,
85
+ DATA_KEY,
86
+ TICK_TIMEFRAME,
87
+ FILENAME_STR,
88
+ REALTIME_DATA_PROVIDER,
89
+ ALPHA_VANTAGE_API_KEY,
90
+ CANONICAL_INDEX,
91
+ DATE_NO_HOUR_FORMAT,
92
+ POLYGON_IO_API_KEY,
93
+ validator_file_path,
94
+ validator_dir_path,
95
+ get_attrs_names,
96
+ check_time_offset_str,
97
+ check_timeframe_str,
98
+ any_date_to_datetime64,
99
+ empty_dataframe,
100
+ is_empty_dataframe,
101
+ shape_dataframe,
102
+ get_dataframe_column,
103
+ get_dataframe_row,
104
+ get_dataframe_element,
105
+ get_dotty_leafs,
106
+ astype,
107
+ read_csv,
108
+ polars_datetime,
109
+ sort_dataframe,
110
+ concat_data,
111
+ list_remove_duplicates,
112
+ get_dotty_key_field,
113
+ reframe_data,
114
+ write_csv,
115
+ write_parquet,
116
+ read_parquet,
117
+ to_pandas_dataframe,
118
+ get_pair_symbols,
119
+ to_source_symbol,
120
+ get_date_interval,
121
+ polygon_agg_to_dict,
122
+ validator_list_timeframe,
123
+ AV_LIST_URL,
124
+ PAIR_ALPHAVANTAGE_FORMAT,
125
+ PAIR_POLYGON_FORMAT
126
+ )
127
+ from ..config import (
128
+ read_config_file,
129
+ read_config_string,
130
+ read_config_folder
131
+ )
132
+
133
+ # constants
134
+ READ_RETRY_COUNT = 2
135
+ READ_PAUSE = 1
136
+ READ_CHUNKSIZE = 96
137
+
138
+ MINIMAL_RECENT_TIME_WINDOW_DAYS = 3
139
+
140
+
141
+ __all__ = ['RealtimeManager']
142
+
143
+ # Realtime data manager
144
+ # source data providers to test APIs: polygon-IO, alpha-vantage
145
+
146
+
147
+ @define(kw_only=True, slots=True)
148
+ class RealtimeManager:
149
+
150
+ # interface parameters
151
+ config: str = field(default='',
152
+ validator=validators.instance_of(str))
153
+ providers_key: dict = field(default=dict(),
154
+ validator=validators.instance_of(dict))
155
+ data_type: str = field(default='parquet',
156
+ validator=validators.in_(SUPPORTED_DATA_FILES))
157
+ engine: str = field(default='polars_lazy',
158
+ validator=validators.in_(SUPPORTED_DATA_ENGINES))
159
+
160
+ # internal parameters
161
+ _db_dict = field(factory=dotty,
162
+ validator=validators.instance_of(Dotty))
163
+ _dataframe_type = field(default=pandas_dataframe)
164
+ _data_path = field(default=Path(DEFAULT_PATHS.BASE_PATH),
165
+ validator=validator_dir_path(create_if_missing=True))
166
+ _realtimedata_path = field(
167
+ default=Path(DEFAULT_PATHS.BASE_PATH) / DEFAULT_PATHS.REALTIME_DATA_FOLDER,
168
+ validator=validator_dir_path(create_if_missing=True)
169
+ )
170
+ _temporary_data_path = field(
171
+ default=(Path(DEFAULT_PATHS.BASE_PATH) /
172
+ DEFAULT_PATHS.REALTIME_DATA_FOLDER /
173
+ TEMP_FOLDER),
174
+ validator=validator_dir_path(create_if_missing=True))
175
+
176
+ # if a valid config file or string
177
+ # is passed
178
+ # arguments contained are assigned here
179
+ # if instantiation passed values are present
180
+ # they will override the related argument
181
+ # value in the next initialization step
182
+
183
+ # if neither by instantation or config file
184
+ # an argument value is set, the argument
185
+ # will be set by asociated defined default
186
+ # or factory
187
+
188
+ def __init__(self, **kwargs: Any) -> None:
189
+
190
+ _class_attributes_name = get_attrs_names(self, **kwargs)
191
+ _not_assigned_attrs_index_mask = [True] * len(_class_attributes_name)
192
+
193
+ if 'config' in kwargs.keys():
194
+
195
+ if kwargs['config']:
196
+
197
+ config_path = Path(kwargs['config'])
198
+
199
+ if (
200
+ config_path.exists() and
201
+ config_path.is_dir()
202
+ ):
203
+
204
+ config_filepath = read_config_folder(
205
+ config_path, file_pattern='data_config.yaml')
206
+
207
+ else:
208
+
209
+ config_filepath = Path()
210
+
211
+ config_args = {}
212
+ if config_filepath.exists() \
213
+ and \
214
+ config_filepath.is_file() \
215
+ and \
216
+ config_filepath.suffix == '.yaml':
217
+
218
+ # read parameters from config file
219
+ # and force keys to lower case
220
+ config_args = {key.lower(): val for key, val in
221
+ read_config_file(str(config_filepath)).items()}
222
+
223
+ elif isinstance(kwargs['config'], str):
224
+
225
+ # read parameters from config file
226
+ # and force keys to lower case
227
+ config_args = {key.lower(): val for key, val in
228
+ read_config_string(kwargs['config']).items()}
229
+
230
+ else:
231
+
232
+ logger.critical('invalid config type '
233
+ f'{kwargs["config"]}: '
234
+ 'required str or Path, got '
235
+ f'{type(kwargs["config"])}')
236
+ raise TypeError
237
+
238
+ # check consistency of config_args
239
+ if (
240
+ not isinstance(config_args, dict) or
241
+ not bool(config_args)
242
+ ):
243
+
244
+ logger.critical(f'config {kwargs["config"]} '
245
+ 'has no valid yaml formatted data')
246
+ raise TypeError
247
+
248
+ # set args from config file
249
+ attrs_keys_configfile = \
250
+ set(_class_attributes_name).intersection(config_args.keys())
251
+
252
+ for attr_key in attrs_keys_configfile:
253
+
254
+ self.__setattr__(attr_key,
255
+ config_args[attr_key])
256
+
257
+ _not_assigned_attrs_index_mask[
258
+ _class_attributes_name.index(attr_key)
259
+ ] = False
260
+
261
+ # set args from instantiation
262
+ # override if attr already has a value from config
263
+ attrs_keys_input = \
264
+ set(_class_attributes_name).intersection(kwargs.keys())
265
+
266
+ for attr_key in attrs_keys_input:
267
+
268
+ self.__setattr__(attr_key,
269
+ kwargs[attr_key])
270
+
271
+ _not_assigned_attrs_index_mask[
272
+ _class_attributes_name.index(attr_key)
273
+ ] = False
274
+
275
+ # attrs not present in config file or instance inputs
276
+ # --> self.attr leads to KeyError
277
+ # are manually assigned to default value derived
278
+ # from __attrs_attrs__
279
+
280
+ for attr_key in array(_class_attributes_name)[
281
+ _not_assigned_attrs_index_mask
282
+ ]:
283
+
284
+ try:
285
+
286
+ attr = [attr
287
+ for attr in self.__attrs_attrs__
288
+ if attr.name == attr_key][0]
289
+
290
+ except KeyError:
291
+
292
+ logger.error('KeyError: initializing object has no '
293
+ f'attribute {attr.name}')
294
+ raise
295
+
296
+ except IndexError:
297
+
298
+ logger.error('IndexError: initializing object has no '
299
+ f'attribute {attr.name}')
300
+ raise
301
+
302
+ else:
303
+
304
+ # assign default value
305
+ # try default and factory sabsequently
306
+ # if neither are present
307
+ # assign None
308
+ if hasattr(attr, 'default'):
309
+
310
+ if hasattr(attr.default, 'factory'):
311
+
312
+ self.__setattr__(attr.name,
313
+ attr.default.factory())
314
+
315
+ else:
316
+
317
+ self.__setattr__(attr.name,
318
+ attr.default)
319
+
320
+ else:
321
+
322
+ self.__setattr__(attr.name,
323
+ None)
324
+
325
+ else:
326
+
327
+ # no config file is defined
328
+ # call generated init
329
+ self.__attrs_init__(**kwargs) # type: ignore[attr-defined]
330
+
331
+ validate(self)
332
+
333
+ self.__attrs_post_init__()
334
+
335
+ def __attrs_post_init__(self) -> None:
336
+
337
+ # set up log sink for historical manager
338
+ logger.add(self._data_path / 'log' / 'forexrtdata.log',
339
+ level="TRACE",
340
+ rotation="5 MB",
341
+ filter=lambda record: ('rtmanager' == record['extra'].get('target') and
342
+ bool(record["extra"].get('target'))))
343
+ # checks on data folder path
344
+ if (
345
+ not self._realtimedata_path.is_dir() or
346
+ not self._realtimedata_path.exists()
347
+ ):
348
+
349
+ self._realtimedata_path.mkdir(parents=True,
350
+ exist_ok=True)
351
+
352
+ if self.engine == 'pandas':
353
+
354
+ self._dataframe_type = pandas_dataframe
355
+
356
+ elif self.engine == 'pyarrow':
357
+
358
+ self._dataframe_type = pyarrow_table
359
+
360
+ elif self.engine == 'polars':
361
+
362
+ self._dataframe_type = polars_dataframe
363
+
364
+ elif self.engine == 'polars_lazy':
365
+
366
+ self._dataframe_type = polars_lazyframe
367
+
368
+ self._temporary_data_path = self._realtimedata_path \
369
+ / TEMP_FOLDER
370
+
371
+ self._clear_temporary_data_folder()
372
+
373
+ def _clear_temporary_data_folder(self) -> None:
374
+
375
+ # delete temporary data path
376
+ if (
377
+ self._temporary_data_path.exists() and
378
+ self._temporary_data_path.is_dir()
379
+ ):
380
+
381
+ try:
382
+
383
+ rmtree(str(self._temporary_data_path))
384
+
385
+ except Exception as e:
386
+
387
+ logger.warning('Deleting temporary data folder '
388
+ f'{str(self._temporary_data_path)} not successfull: {e}')
389
+
390
+ def _getClient(self, provider: str) -> Any:
391
+
392
+ if provider == REALTIME_DATA_PROVIDER.ALPHA_VANTAGE:
393
+
394
+ return av_forex_client(key=self.providers_key[ALPHA_VANTAGE_API_KEY])
395
+
396
+ elif provider == REALTIME_DATA_PROVIDER.POLYGON_IO:
397
+
398
+ return polygonio_client(api_key=self.providers_key[POLYGON_IO_API_KEY])
399
+
400
+ def tickers_list(self,
401
+ data_source,
402
+ asset_class: Optional[str] = None) -> List[str]:
403
+
404
+ # return list of symbols for tickers actively treated by data providers
405
+
406
+ tickers_list = list()
407
+
408
+ if data_source == REALTIME_DATA_PROVIDER.ALPHA_VANTAGE:
409
+
410
+ # compose URL for tickers listing request
411
+ # decode content
412
+ with self._session as s: # type: ignore[attr-defined]
413
+ listing_downloaded = s.get(AV_LIST_URL.format(
414
+ api_key=self._av_api_key)) # type: ignore[attr-defined]
415
+ decoded_content = listing_downloaded.content.decode('utf-8')
416
+ tickers_df = pandas_read_csv(
417
+ StringIO(decoded_content), sep=',', header=0)
418
+
419
+ if asset_class:
420
+
421
+ if asset_class == ASSET_TYPE.FOREX:
422
+
423
+ logger.error('alpha vantage listing not including forex tickers')
424
+ raise ValueError
425
+
426
+ elif asset_class == ASSET_TYPE.ETF:
427
+
428
+ assetType_req_index = tickers_df[:, 'assetType'] == 'ETF'
429
+
430
+ elif asset_class == ASSET_TYPE.STOCK:
431
+
432
+ assetType_req_index = tickers_df[:, 'assetType'] == 'Stock'
433
+
434
+ tickers_list = tickers_df.loc[assetType_req_index, 'symbol'].to_list()
435
+
436
+ else:
437
+
438
+ tickers_list = tickers_df.loc[:, 'symbol'].to_list()
439
+
440
+ elif data_source == REALTIME_DATA_PROVIDER.POLYGON_IO:
441
+
442
+ if asset_class:
443
+
444
+ if asset_class == ASSET_TYPE.FOREX:
445
+
446
+ poly_asset_class = 'fx'
447
+
448
+ else:
449
+
450
+ poly_asset_class = None
451
+
452
+ # call function for forex asset_class
453
+ listing_downloaded = self._poly_reader.get_exchanges( # type: ignore[attr-defined]
454
+ asset_class=poly_asset_class)
455
+
456
+ tickers_list = [item.acronym for item in listing_downloaded]
457
+
458
+ return tickers_list
459
+
460
+ def get_realtime_quote(self, ticker: str) -> Any:
461
+
462
+ with self._getClient(REALTIME_DATA_PROVIDER.POLYGON_IO) as client:
463
+
464
+ to_symbol, from_symbol = get_pair_symbols(ticker.upper())
465
+
466
+ poly_resp = client.get_last_forex_quote(from_symbol,
467
+ to_symbol)
468
+
469
+ return poly_resp
470
+
471
+ def get_daily_close(self,
472
+ ticker,
473
+ last_close=False,
474
+ recent_days_window=None,
475
+ day_start=None,
476
+ day_end=None,
477
+ ) -> Any:
478
+ """
479
+ Retrieve daily OHLC data for the specified ticker.
480
+
481
+ Fetches daily forex data from Alpha Vantage API. Supports three modes of operation:
482
+ last close only, recent N days window, or specific date range.
483
+
484
+ Args:
485
+ ticker (str): Currency pair symbol (e.g., 'EURUSD', 'GBPUSD', 'USDJPY').
486
+ Case-insensitive.
487
+ last_close (bool, optional): If True, returns only the most recent daily close.
488
+ Default is False.
489
+ recent_days_window (int, optional): Number of recent days to retrieve.
490
+ Mutually exclusive with day_start/day_end. Default is None.
491
+ day_start (str, optional): Start date for data retrieval in 'YYYY-MM-DD' format.
492
+ Used with day_end to specify exact date range. Default is None.
493
+ day_end (str, optional): End date for data retrieval in 'YYYY-MM-DD' format.
494
+ Used with day_start to specify exact date range. Default is None.
495
+
496
+ Returns:
497
+ polars.DataFrame | polars.LazyFrame: DataFrame containing daily OHLC data with columns:
498
+
499
+ - timestamp: datetime column with daily timestamps
500
+ - open: Opening price (float32)
501
+ - high: Highest price (float32)
502
+ - low: Lowest price (float32)
503
+ - close: Closing price (float32)
504
+
505
+ Returns empty DataFrame if API call fails.
506
+
507
+ Raises:
508
+ AssertionError: If recent_days_window is not an integer when provided
509
+ BadResponse: If Alpha Vantage API request fails (handled internally)
510
+
511
+ Example::
512
+
513
+ # Get last close only
514
+ manager = RealtimeManager(config='data_config.yaml')
515
+ latest = manager.get_daily_close(ticker='EURUSD', last_close=True)
516
+
517
+ # Get last 10 days
518
+ recent = manager.get_daily_close(ticker='EURUSD', recent_days_window=10)
519
+
520
+ # Get specific date range
521
+ range_data = manager.get_daily_close(
522
+ ticker='EURUSD',
523
+ day_start='2024-01-01',
524
+ day_end='2024-01-31'
525
+ )
526
+
527
+ Note:
528
+ - Requires valid Alpha Vantage API key in configuration
529
+ - Free tier has 25 requests per day limit
530
+ - outputsize='compact' returns ~100 most recent data points
531
+ - outputsize='full' can return several years of data
532
+ - Use last_close=True for minimal data transfer
533
+
534
+ """
535
+
536
+ to_symbol, from_symbol = get_pair_symbols(ticker.upper())
537
+
538
+ try:
539
+
540
+ client = self._getClient(REALTIME_DATA_PROVIDER.ALPHA_VANTAGE)
541
+
542
+ if last_close:
543
+
544
+ res = client.get_currency_exchange_daily(
545
+ to_symbol,
546
+ from_symbol,
547
+ outputsize='compact'
548
+ )
549
+
550
+ # parse response and return
551
+ return self._parse_data_daily_alphavantage(
552
+ res,
553
+ last_close=True
554
+ )
555
+
556
+ else:
557
+
558
+ if not day_start or not day_end:
559
+ assert isinstance(recent_days_window, int), \
560
+ 'recent_days_window must be integer'
561
+
562
+ # careful that option "outputsize='full'" does not have constant day start
563
+ # so it is not possible to guarantee a consistent meeting of the
564
+ # function input 'day_start' and 'recent_days_window' when
565
+ # they imply a large interval outside of the
566
+ # "outputsize='full'" option
567
+ res = client.get_currency_exchange_daily(
568
+ from_symbol,
569
+ to_symbol,
570
+ outputsize='full'
571
+ )
572
+
573
+ # parse response and return
574
+ return self._parse_data_daily_alphavantage(
575
+ res,
576
+ last_close=False,
577
+ recent_days_window=recent_days_window,
578
+ day_start=day_start,
579
+ day_end=day_end)
580
+
581
+ except BadResponse as e:
582
+
583
+ logger.warning(e)
584
+ return self._dataframe_type([])
585
+
586
+ except Exception as e:
587
+
588
+ logger.warning(f'Raised Exception: {e}')
589
+ return self._dataframe_type([])
590
+
591
+ def _parse_aggs_data(self, data_provider: str, **kwargs: Any) -> Any:
592
+
593
+ if data_provider == REALTIME_DATA_PROVIDER.ALPHA_VANTAGE:
594
+
595
+ return self._parse_data_daily_alphavantage(**kwargs)
596
+
597
+ elif data_provider == REALTIME_DATA_PROVIDER.POLYGON_IO:
598
+
599
+ return self._parse_data_aggs_polygonio(**kwargs)
600
+
601
+ else:
602
+
603
+ logger.error(f'data provider {data_provider} is invalid '
604
+ '- supported providers: {REALTIME_DATA_PROVIDER_LIST}')
605
+
606
+ return self._dataframe_type()
607
+
608
+ def _parse_data_daily_alphavantage(
609
+ self,
610
+ daily_data,
611
+ last_close=False,
612
+ recent_days_window=None,
613
+ day_start=None,
614
+ day_end=None
615
+ ) -> Any:
616
+
617
+ if not last_close:
618
+
619
+ if isinstance(recent_days_window, int):
620
+ # set window as DateOffset str with num and days
621
+ days_window = '{days_num}d'.format(days_num=recent_days_window)
622
+
623
+ day_start, day_end = get_date_interval(interval_end_mode='now',
624
+ interval_timespan=days_window,
625
+ normalize=True,
626
+ bdays=True)
627
+
628
+ else:
629
+
630
+ day_start = any_date_to_datetime64(day_start)
631
+ day_end = any_date_to_datetime64(day_end)
632
+
633
+ # parse alpha vantage response from daily api request
634
+ resp_data_dict = daily_data[CANONICAL_INDEX.AV_DF_DATA_INDEX]
635
+
636
+ # raw response data to dictionary
637
+ timestamp = list(resp_data_dict.keys())
638
+ data_values = resp_data_dict.values()
639
+ open_data = [item['1. open'] for item in data_values]
640
+ high_data = [item['2. high'] for item in data_values]
641
+ low_data = [item['3. low'] for item in data_values]
642
+ close_data = [item['4. close'] for item in data_values]
643
+
644
+ if self.engine == 'pandas':
645
+
646
+ df = pandas_dataframe(
647
+ {
648
+ BASE_DATA_COLUMN_NAME.TIMESTAMP: timestamp,
649
+ BASE_DATA_COLUMN_NAME.OPEN: open_data,
650
+ BASE_DATA_COLUMN_NAME.HIGH: high_data,
651
+ BASE_DATA_COLUMN_NAME.LOW: low_data,
652
+ BASE_DATA_COLUMN_NAME.CLOSE: close_data
653
+ }
654
+ )
655
+
656
+ # final cast to standard dtypes
657
+ df = astype(df, DTYPE_DICT.TIME_TF_DTYPE)
658
+
659
+ # sort by timestamp
660
+ df = sort_dataframe(df, BASE_DATA_COLUMN_NAME.TIMESTAMP)
661
+
662
+ # timestamp as column to include it in return data
663
+ df.reset_index(inplace=True)
664
+
665
+ if last_close:
666
+
667
+ # get most recent line --> lowest num index
668
+ df = get_dataframe_row(df, shape_dataframe(df)[0] - 1)
669
+
670
+ else:
671
+
672
+ # return data based on filter output
673
+ df = df[
674
+ (df[BASE_DATA_COLUMN_NAME.TIMESTAMP] >= day_start) &
675
+ (df[BASE_DATA_COLUMN_NAME.TIMESTAMP] <= day_end)
676
+ ]
677
+
678
+ elif self.engine == 'pyarrow':
679
+
680
+ df = pyarrow_table(
681
+ {
682
+ BASE_DATA_COLUMN_NAME.TIMESTAMP: timestamp,
683
+ BASE_DATA_COLUMN_NAME.OPEN: open_data,
684
+ BASE_DATA_COLUMN_NAME.HIGH: high_data,
685
+ BASE_DATA_COLUMN_NAME.LOW: low_data,
686
+ BASE_DATA_COLUMN_NAME.CLOSE: close_data
687
+ }
688
+ )
689
+
690
+ # final cast to standard dtypes
691
+ df = astype(df, PYARROW_DTYPE_DICT.TIME_TF_DTYPE)
692
+
693
+ # sort by timestamp
694
+ df = sort_dataframe(df, BASE_DATA_COLUMN_NAME.TIMESTAMP)
695
+
696
+ if last_close:
697
+
698
+ df = get_dataframe_row(df, shape_dataframe(df)[0] - 1)
699
+
700
+ else:
701
+
702
+ mask = pc.and_(
703
+ pc.greater(df[BASE_DATA_COLUMN_NAME.TIMESTAMP],
704
+ day_start),
705
+ pc.less(df[BASE_DATA_COLUMN_NAME.TIMESTAMP],
706
+ day_end)
707
+ )
708
+
709
+ data_df = pyarrow_Table.from_arrays(df.filter(mask).columns,
710
+ schema=df.schema)
711
+
712
+ elif self.engine == 'polars':
713
+
714
+ df = polars_fromdict(
715
+ {
716
+ BASE_DATA_COLUMN_NAME.TIMESTAMP: timestamp,
717
+ BASE_DATA_COLUMN_NAME.OPEN: open_data,
718
+ BASE_DATA_COLUMN_NAME.HIGH: high_data,
719
+ BASE_DATA_COLUMN_NAME.LOW: low_data,
720
+ BASE_DATA_COLUMN_NAME.CLOSE: close_data
721
+ }
722
+ )
723
+
724
+ # convert timestamp column to datetime data type
725
+ df = \
726
+ df.with_columns(
727
+ col(BASE_DATA_COLUMN_NAME.TIMESTAMP).str.strptime(
728
+ polars_datetime('ms'),
729
+ format=DATE_NO_HOUR_FORMAT
730
+ )
731
+ )
732
+
733
+ # final cast to standard dtypes
734
+ df = astype(df, POLARS_DTYPE_DICT.TIME_TF_DTYPE)
735
+
736
+ # sort by timestamp
737
+ df = sort_dataframe(df, BASE_DATA_COLUMN_NAME.TIMESTAMP)
738
+
739
+ if last_close:
740
+
741
+ df = get_dataframe_row(df, shape_dataframe(df)[0] - 1)
742
+
743
+ else:
744
+
745
+ # filter on date
746
+ df = \
747
+ (
748
+ df
749
+ .filter(
750
+ col(BASE_DATA_COLUMN_NAME.TIMESTAMP).is_between(day_start,
751
+ day_end
752
+ )
753
+ ).clone()
754
+ )
755
+
756
+ elif self.engine == 'polars_lazy':
757
+
758
+ df = polars_lazyframe(
759
+ {
760
+ BASE_DATA_COLUMN_NAME.TIMESTAMP: timestamp,
761
+ BASE_DATA_COLUMN_NAME.OPEN: open_data,
762
+ BASE_DATA_COLUMN_NAME.HIGH: high_data,
763
+ BASE_DATA_COLUMN_NAME.LOW: low_data,
764
+ BASE_DATA_COLUMN_NAME.CLOSE: close_data
765
+ }
766
+ )
767
+
768
+ # convert timestamp column to datetime data type
769
+ df = \
770
+ df.with_columns(
771
+ col(BASE_DATA_COLUMN_NAME.TIMESTAMP).str.strptime(
772
+ polars_datetime('ms'),
773
+ format=DATE_NO_HOUR_FORMAT
774
+ )
775
+ )
776
+
777
+ # final cast to standard dtypes
778
+ df = astype(df, POLARS_DTYPE_DICT.TIME_TF_DTYPE)
779
+
780
+ # sort by timestamp
781
+ df = sort_dataframe(df, BASE_DATA_COLUMN_NAME.TIMESTAMP)
782
+
783
+ if last_close:
784
+
785
+ df = get_dataframe_row(df, shape_dataframe(df)[0] - 1)
786
+
787
+ else:
788
+
789
+ # filter on date
790
+ df = \
791
+ (
792
+ df
793
+ .filter(
794
+ col(BASE_DATA_COLUMN_NAME.TIMESTAMP).is_between(day_start,
795
+ day_end
796
+ )
797
+ ).clone()
798
+ )
799
+
800
+ # sort by timestamp
801
+ return df
802
+
803
+ def _parse_data_aggs_polygonio(
804
+ self,
805
+ data=None,
806
+ engine='polars'
807
+ ) -> Union[polars_lazyframe, polars_dataframe]:
808
+
809
+ if engine == 'pandas':
810
+
811
+ # parse data and format data as common defined
812
+ df = pandas_dataframe(data)
813
+
814
+ # keep base data columns
815
+ extra_columns = list(set(df.columns).difference(DATA_COLUMN_NAMES.TF_DATA))
816
+ df.drop(extra_columns, axis=1, inplace=True)
817
+
818
+ df.index = any_date_to_datetime64(
819
+ df[BASE_DATA_COLUMN_NAME.TIMESTAMP],
820
+ unit='ms'
821
+ )
822
+
823
+ # convert to conventional dtype
824
+ df = astype(df, DTYPE_DICT.TIME_TF_DTYPE)
825
+
826
+ elif engine == 'pyarrow':
827
+
828
+ # TODO: convert Agg items into dicts
829
+ # call Table.from_pylist and set also
830
+ # schema appropriate
831
+
832
+ data_dict_list = [polygon_agg_to_dict(agg)
833
+ for agg in data]
834
+
835
+ df = pyarrow_Table.from_pylist(data_dict_list)
836
+
837
+ extra_columns = list(
838
+ set(df.column_names).difference(DATA_COLUMN_NAMES.TF_DATA))
839
+
840
+ df = df.drop_columns(extra_columns)
841
+
842
+ # convert to conventional dtype
843
+ df = astype(df, PYARROW_DTYPE_DICT.TIME_TF_DTYPE)
844
+
845
+ elif engine == 'polars':
846
+
847
+ df = polars_dataframe(data)
848
+
849
+ # sort by timestamp
850
+ df = sort_dataframe(df, BASE_DATA_COLUMN_NAME.TIMESTAMP)
851
+
852
+ extra_columns = list(set(df.columns).difference(DATA_COLUMN_NAMES.TF_DATA))
853
+
854
+ df = df.drop(extra_columns)
855
+
856
+ # convert timestamp column to datetime data type
857
+ df = df.with_columns(
858
+ from_epoch(BASE_DATA_COLUMN_NAME.TIMESTAMP,
859
+ time_unit='ms').alias(BASE_DATA_COLUMN_NAME.TIMESTAMP)
860
+ )
861
+
862
+ elif engine == 'polars_lazy':
863
+
864
+ if data:
865
+
866
+ df = polars_lazyframe(data)
867
+
868
+ extra_columns = list(set(
869
+ df.collect_schema().names()).difference(
870
+ DATA_COLUMN_NAMES.TF_DATA
871
+ )
872
+ )
873
+
874
+ df = df.drop(extra_columns)
875
+
876
+ # convert timestamp column to datetime data type
877
+ df = df.with_columns(
878
+ from_epoch(BASE_DATA_COLUMN_NAME.TIMESTAMP,
879
+ time_unit='ms').alias(BASE_DATA_COLUMN_NAME.TIMESTAMP)
880
+ )
881
+
882
+ # convert to conventional dtype
883
+ df = astype(df, POLARS_DTYPE_DICT.TIME_TF_DTYPE)
884
+
885
+ else:
886
+
887
+ df = empty_dataframe('polars_lazy')
888
+
889
+ # sort by timestamp
890
+ return sort_dataframe(df, BASE_DATA_COLUMN_NAME.TIMESTAMP)
891
+
892
+ def get_data(self,
893
+ ticker,
894
+ start=None,
895
+ end=None,
896
+ timeframe=None,
897
+ ) -> Union[polars_lazyframe, polars_dataframe]:
898
+ """
899
+ Retrieve real-time OHLC data for the specified ticker and timeframe.
900
+
901
+ Fetches intraday forex data from Polygon.io API for the specified date range
902
+ and timeframe. Data is automatically reframed to the requested timeframe.
903
+
904
+ Args:
905
+ ticker (str): Currency pair symbol (e.g., 'EURUSD', 'GBPUSD', 'USDJPY').
906
+ Case-insensitive.
907
+ start (str | datetime, optional): Start date for data retrieval. Accepts:
908
+ - ISO format: 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'
909
+ - datetime object
910
+ Default is None.
911
+ end (str | datetime, optional): End date for data retrieval. Same format
912
+ as start. Must be after start date. Default is None.
913
+ timeframe (str, optional): Target timeframe for aggregation. If specified,
914
+ minute data will be reframed to this timeframe (e.g., '5m', '1h', '1D').
915
+ Default is None (returns minute data).
916
+
917
+ Returns:
918
+ polars.DataFrame | polars.LazyFrame: DataFrame containing OHLC data with columns:
919
+
920
+ - timestamp: datetime column with candle timestamps
921
+ - open: Opening price (float32)
922
+ - high: Highest price (float32)
923
+ - low: Lowest price (float32)
924
+ - close: Closing price (float32)
925
+
926
+ Returns empty DataFrame if API call fails.
927
+
928
+ Raises:
929
+ BadResponse: If Polygon.io API request fails (handled internally, returns empty DataFrame)
930
+
931
+ Example::
932
+
933
+ # Get hourly data for 5 days
934
+ manager = RealtimeManager(config='data_config.yaml')
935
+ data = manager.get_data(
936
+ ticker='EURUSD',
937
+ start='2024-01-10',
938
+ end='2024-01-15',
939
+ timeframe='1h'
940
+ )
941
+ print(f"Retrieved {len(data)} hourly candles")
942
+ # Output: Retrieved 120 hourly candles
943
+
944
+ Note:
945
+ - Requires valid Polygon.io API key in configuration
946
+ - Free tier has rate limits and historical data restrictions
947
+ - Data is fetched at 1-minute resolution and aggregated to requested timeframe
948
+ - Failed requests return an empty DataFrame with a warning logged
949
+
950
+ """
951
+
952
+ start = any_date_to_datetime64(start)
953
+ end = any_date_to_datetime64(end)
954
+
955
+ # forward request only to polygon-io
956
+ # set ticker in polygon format
957
+
958
+ ticker_polygonio = to_source_symbol(
959
+ ticker.upper(),
960
+ REALTIME_DATA_PROVIDER.POLYGON_IO
961
+ )
962
+
963
+ try:
964
+
965
+ client = self._getClient(REALTIME_DATA_PROVIDER.POLYGON_IO)
966
+
967
+ poly_aggs = []
968
+
969
+ # TODO: set up try-except with BadResponse to manage provider
970
+ # subcription limitation
971
+
972
+ # using Polygon-io client
973
+ for a in client.list_aggs(ticker=ticker_polygonio,
974
+ multiplier=1,
975
+ timespan='minute',
976
+ from_=start,
977
+ to=end,
978
+ adjusted=True,
979
+ sort='asc'):
980
+
981
+ poly_aggs.append(a)
982
+
983
+ except BadResponse as e:
984
+
985
+ # to log
986
+ logger.warning(e)
987
+ return self._dataframe_type([])
988
+
989
+ data_df = self._parse_aggs_data(REALTIME_DATA_PROVIDER.POLYGON_IO,
990
+ data=poly_aggs,
991
+ engine=self.engine)
992
+
993
+ return reframe_data(data_df, timeframe)