onetick-py 1.177.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +262 -0
  4. locator_parser/common.py +368 -0
  5. locator_parser/io.py +43 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +279 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +141 -0
  14. onetick/py/__init__.py +293 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +648 -0
  19. onetick/py/aggregations/_docs.py +948 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +501 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +374 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +276 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +798 -0
  33. onetick/py/configuration.py +771 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2312 -0
  45. onetick/py/core/_internal/_state_vars.py +93 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +809 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +272 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1002 -0
  58. onetick/py/core/_source/source_methods/joins.py +1413 -0
  59. onetick/py/core/_source/source_methods/merges.py +605 -0
  60. onetick/py/core/_source/source_methods/misc.py +1455 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +986 -0
  68. onetick/py/core/_source/symbol.py +205 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +216 -0
  75. onetick/py/core/column_operations/_methods/methods.py +292 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +160 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +28 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
  83. onetick/py/core/column_operations/base.py +1121 -0
  84. onetick/py/core/cut_builder.py +150 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +245 -0
  87. onetick/py/core/lambda_object.py +441 -0
  88. onetick/py/core/multi_output_source.py +232 -0
  89. onetick/py/core/per_tick_script.py +2256 -0
  90. onetick/py/core/query_inspector.py +464 -0
  91. onetick/py/core/source.py +1744 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1128 -0
  94. onetick/py/db/db.py +1327 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2398 -0
  100. onetick/py/license.py +190 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +935 -0
  103. onetick/py/misc.py +470 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +216 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +916 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1347 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +128 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1045 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +271 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +374 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +251 -0
  132. onetick/py/types.py +2131 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +498 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1374 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +120 -0
  146. onetick/py/utils/tz.py +84 -0
  147. onetick_py-1.177.0.dist-info/METADATA +137 -0
  148. onetick_py-1.177.0.dist-info/RECORD +152 -0
  149. onetick_py-1.177.0.dist-info/WHEEL +5 -0
  150. onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.177.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1128 @@
1
+ import itertools
2
+ import warnings
3
+ from typing import Union, Iterable, Tuple, Optional, Any, Literal
4
+ from datetime import date as dt_date, datetime, timedelta
5
+
6
+ import pandas as pd
7
+ from dateutil.tz import gettz
8
+
9
+ import onetick.py as otp
10
+ from onetick.py import configuration, utils
11
+ from onetick.py import types as ott
12
+ from onetick.py.compatibility import is_native_plus_zstd_supported, is_show_db_list_show_description_supported
13
+ from onetick.py.core import db_constants
14
+ from onetick.py.otq import otq
15
+
16
+
17
+ def _datetime2date(dt: Union[dt_date, datetime]) -> dt_date:
18
+ """ Convert datetime and date explicitly into the datetime.date """
19
+ return dt_date(dt.year, dt.month, dt.day)
20
+
21
+
22
+ class DB:
23
+
24
+ """
25
+ An object of available databases that the :py:func:`otp.databases() <onetick.py.databases>` function returns.
26
+ It helps to make initial analysis on the database level: available tick types,
27
+ dates with data, symbols, tick schema, etc.
28
+ """
29
+
30
+ def __init__(self, name, description='', context=utils.default):
31
+ self.name = name
32
+ self.description = description
33
+ if context is utils.default or context is None:
34
+ self.context = otp.config.context
35
+ else:
36
+ self.context = context
37
+ self._locator_date_ranges = None
38
+
39
+ def __eq__(self, obj):
40
+ return str(self) == str(obj)
41
+
42
+ def __lt__(self, obj):
43
+ return str(self) < str(obj)
44
+
45
+ def __str__(self):
46
+ return self.name
47
+
48
+ def access_info(self, deep_scan=False, username=None) -> Union[pd.DataFrame, dict]:
49
+ """
50
+ Get access info for this database and ``username``.
51
+
52
+ All dates are returned in GMT timezone.
53
+
54
+ Parameters
55
+ ----------
56
+ deep_scan:
57
+ If False (default) then the access fields are returned from the configuration of the database
58
+ (basically the same fields as specified in the locator) and the dictionary is returned.
59
+ If True then access fields are returned for each available remote host and time interval
60
+ and the :pandas:`pandas.DataFrame` object is returned.
61
+ username:
62
+ Can be used to specify the user for which the query will be executed.
63
+ By default the query is executed for the current user.
64
+
65
+ See also
66
+ --------
67
+ **ACCESS_INFO** OneTick event processor
68
+
69
+ Examples
70
+ --------
71
+
72
+ By default access fields from the basic configuration of the database are returned:
73
+
74
+ >>> some_db = otp.databases()['SOME_DB']
75
+ >>> some_db.access_info() # doctest: +SKIP
76
+ {'DB_NAME': 'SOME_DB',
77
+ 'READ_ACCESS': 1,
78
+ 'WRITE_ACCESS': 1,
79
+ 'MIN_AGE_SET': 0,
80
+ 'MIN_AGE_MSEC': 0,
81
+ 'MAX_AGE_SET': 0,
82
+ 'MAX_AGE_MSEC': 0,
83
+ 'MIN_START_DATE_SET': 0,
84
+ 'MIN_START_DATE_MSEC': Timestamp('1970-01-01 00:00:00'),
85
+ 'MAX_END_DATE_SET': 0,
86
+ 'MAX_END_DATE_MSEC': Timestamp('1970-01-01 00:00:00'),
87
+ 'MIN_AGE_DB_DAYS': 0,
88
+ 'MIN_AGE_DB_DAYS_SET': 0,
89
+ 'MAX_AGE_DB_DAYS': 0,
90
+ 'MAX_AGE_DB_DAYS_SET': 0,
91
+ 'CEP_ACCESS': 1,
92
+ 'DESTROY_ACCESS': 0,
93
+ 'MEMDB_ACCESS': 1}
94
+
95
+ Set parameter ``deep_scan`` to True to return access fields from each available host and time interval:
96
+
97
+ >>> some_db.access_info(deep_scan=True) # doctest: +SKIP
98
+ DB_NAME READ_ACCESS WRITE_ACCESS MIN_AGE_SET MIN_AGE_MSEC MAX_AGE_SET MAX_AGE_MSEC\
99
+ MIN_START_DATE_SET MIN_START_DATE_MSEC MAX_END_DATE_SET MAX_END_DATE_MSEC MIN_AGE_DB_DAYS\
100
+ MIN_AGE_DB_DAYS_SET MAX_AGE_DB_DAYS MAX_AGE_DB_DAYS_SET CEP_ACCESS DESTROY_ACCESS MEMDB_ACCESS\
101
+ SERVER_ADDRESS INTERVAL_START INTERVAL_END
102
+ 0 SOME_DB 1 1 0 0 0 0\
103
+ 0 1970-01-01 0 1970-01-01 0\
104
+ 0 0 0 1 0 1\
105
+ ... 2002-12-30 2100-01-01
106
+ """
107
+ # get parent name for derived databases, only parent databases will be listed by AccessInfo
108
+ name, _, _ = self.name.partition('//')
109
+ node = (
110
+ otq.AccessInfo(info_type='DATABASES', show_for_all_users=False, deep_scan=deep_scan)
111
+ >> otq.WhereClause(where=f'DB_NAME = "{name}"')
112
+ )
113
+ graph = otq.GraphQuery(node)
114
+ df = otp.run(graph,
115
+ symbols='LOCAL::',
116
+ # start and end times don't matter
117
+ start=db_constants.DEFAULT_START_DATE,
118
+ end=db_constants.DEFAULT_END_DATE,
119
+ # and timezone is GMT, because timestamp parameters in ACL are in GMT
120
+ timezone='GMT',
121
+ username=username,
122
+ context=self.context)
123
+ if not df.empty:
124
+ df = df.drop(columns='Time')
125
+ if deep_scan:
126
+ return df
127
+ return dict(df.iloc[0] if not df.empty else {})
128
+
129
+ def show_config(self, config_type: Literal['locator_entry', 'db_time_intervals'] = 'locator_entry') -> dict:
130
+ """
131
+ Shows the specified configuration for a database.
132
+
133
+ Parameters
134
+ ----------
135
+ config_type: str
136
+ If **'locator_entry'** is specified, a string representing db's locator entry along with VDB_FLAG
137
+ (this flag equals 1 when the database is virtual and 0 otherwise) will be returned.
138
+
139
+ If **'db_time_intervals'** is specified,
140
+ then time intervals configured in the locator file will be propagated
141
+ including additional information, such as
142
+ LOCATION, ARCHIVE_DURATION, DAY_BOUNDARY_TZ, DAY_BOUNDARY_OFFSET, ALTERNATIVE_LOCATIONS, etc.
143
+
144
+ See also
145
+ --------
146
+ **DB/SHOW_CONFIG** OneTick event processor
147
+
148
+ Examples
149
+ --------
150
+ .. testcode::
151
+ :skipif: not is_native_plus_zstd_supported()
152
+
153
+ some_db = otp.databases()['SOME_DB']
154
+ print(some_db.show_config()['LOCATOR_STRING'])
155
+
156
+ .. testoutput::
157
+ :options: +ELLIPSIS
158
+
159
+ <DB ARCHIVE_COMPRESSION_TYPE="NATIVE_PLUS_ZSTD" ID="SOME_DB" SYMBOLOGY="BZX" TICK_TIMESTAMP_TYPE="NANOS" >
160
+ <LOCATIONS >
161
+ <LOCATION ACCESS_METHOD="file" DAY_BOUNDARY_TZ="EST5EDT"
162
+ END_TIME="21000101000000" LOCATION="..." START_TIME="20021230000000" />
163
+ </LOCATIONS>
164
+ <RAW_DATA />
165
+ </DB>
166
+
167
+ >>> some_db = otp.databases()['SOME_DB']
168
+ >>> some_db.show_config(config_type='db_time_intervals') # doctest: +ELLIPSIS
169
+ {'START_DATE': 1041206400000, 'END_DATE': 4102444800000,
170
+ 'GROWABLE_ARCHIVE_FLAG': 0, 'ARCHIVE_DURATION': 0,
171
+ 'LOCATION': '...', 'DAY_BOUNDARY_TZ': 'EST5EDT', 'DAY_BOUNDARY_OFFSET': 0, 'ALTERNATIVE_LOCATIONS': ''}
172
+ """
173
+ node = otq.DbShowConfig(db_name=self.name, config_type=config_type.upper())
174
+ graph = otq.GraphQuery(node)
175
+ df = otp.run(graph,
176
+ symbols='LOCAL::',
177
+ # start and end times don't matter
178
+ start=db_constants.DEFAULT_START_DATE,
179
+ end=db_constants.DEFAULT_END_DATE,
180
+ # and timezone is GMT, because timestamp parameters in ACL are in GMT
181
+ timezone='GMT',
182
+ context=self.context)
183
+ if df.empty:
184
+ raise ValueError(f"Can't get config for database '{self.name}'")
185
+ df = df.drop(columns='Time')
186
+ return dict(df.iloc[0])
187
+
188
+ @property
189
+ def min_acl_start_date(self) -> Optional[dt_date]:
190
+ """
191
+ Minimum start date set in ACL for current user.
192
+ Returns None if not set.
193
+ """
194
+ access_info = self.access_info()
195
+ if not access_info:
196
+ return None
197
+ if access_info['MIN_START_DATE_SET'] == 0:
198
+ return None
199
+ return _datetime2date(access_info['MIN_START_DATE_MSEC'])
200
+
201
+ @property
202
+ def max_acl_end_date(self) -> Optional[dt_date]:
203
+ """
204
+ Maximum end date set in ACL for current user.
205
+ Returns None if not set.
206
+ """
207
+ access_info = self.access_info()
208
+ if not access_info:
209
+ return None
210
+ if access_info['MAX_END_DATE_SET'] == 0:
211
+ return None
212
+ return _datetime2date(access_info['MAX_END_DATE_MSEC'])
213
+
214
+ def _fit_time_interval_in_acl(self, start, end, timezone='GMT') -> Tuple[datetime, datetime]:
215
+ """
216
+ Returns the part of time interval between ``start`` and ``end`` that fits ACL start/end time rules.
217
+ ``start`` and ``end`` objects are considered to be timezone-naive and will be localized in ``timezone``.
218
+
219
+ If it's not possible to find such interval, raises ValueError.
220
+ """
221
+ # convert to GMT, because ACL timestamps are in GMT
222
+ start = otp.dt(utils.convert_timezone(start, timezone, 'GMT'))
223
+ end = otp.dt(utils.convert_timezone(end, timezone, 'GMT'))
224
+
225
+ if self.min_acl_start_date is not None:
226
+ if end < otp.dt(self.min_acl_start_date):
227
+ # fully not intersecting intervals
228
+ raise ValueError(f'Date {start.date()} {timezone} violates ACL rules for the database {self.name}:'
229
+ f' minimum start time is {otp.dt(self.min_acl_start_date)} GMT.')
230
+ # partly intersecting intervals, choose the part not violating ACL
231
+ start = max(start, otp.dt(self.min_acl_start_date))
232
+
233
+ if self.max_acl_end_date is not None:
234
+ if start >= otp.dt(self.max_acl_end_date):
235
+ # fully not intersecting intervals
236
+ raise ValueError(f'Date {start.date()} {timezone} violates ACL rules for the database {self.name}:'
237
+ f' maximum (exclusive) end time is {otp.dt(self.max_acl_end_date)} GMT.')
238
+ # partly intersecting intervals, choose the part not violating ACL
239
+ end = min(end, otp.dt(self.max_acl_end_date))
240
+
241
+ # convert back to timezone
242
+ start = utils.convert_timezone(start, 'GMT', timezone)
243
+ end = utils.convert_timezone(end, 'GMT', timezone)
244
+ return start, end
245
+
246
+ def _fit_date_in_acl(self, date, timezone='GMT') -> Tuple[datetime, datetime]:
247
+ """
248
+ Returns the part of ``date`` time interval that fits ACL start/end time rules.
249
+ ``date`` object is considered to be timezone-naive and will be localized in ``timezone``.
250
+
251
+ If it's not possible to find such interval, raises ValueError.
252
+ """
253
+ date = _datetime2date(date)
254
+ start = otp.dt(date)
255
+ end = start + otp.Day(1)
256
+ return self._fit_time_interval_in_acl(start, end, timezone)
257
+
258
+ def _set_intervals(self):
259
+ """
260
+ Finds all date ranges from locators.
261
+ These intervals are required to find all possible dates with data.
262
+ It is only possible by querying the DB_SHOW_LOADED_TIME_RANGE
263
+ against the largest possible query date range.
264
+ """
265
+
266
+ if self._locator_date_ranges is None:
267
+ graph = otq.GraphQuery(otq.DbShowConfiguredTimeRanges(db_name=self.name).tick_type("ANY")
268
+ >> otq.Table(fields='long START_DATE, long END_DATE'))
269
+
270
+ result = otp.run(graph,
271
+ symbols=f'{self.name}::',
272
+ # start and end times don't matter for this query, use some constants
273
+ start=db_constants.DEFAULT_START_DATE,
274
+ end=db_constants.DEFAULT_END_DATE,
275
+ # GMT, because start/end timestamp in locator are in GMT
276
+ timezone='GMT',
277
+ context=self.context)
278
+
279
+ date_ranges = []
280
+
281
+ tz_gmt = gettz('GMT')
282
+ for inx in range(len(result)):
283
+ start_date = result['START_DATE'][inx]
284
+ # On Windows datetime.fromtimestamp throws an OSError for negative values
285
+ start_date = max(start_date, 0)
286
+ start = datetime.fromtimestamp(start_date / 1000, tz=tz_gmt)
287
+ start = start.replace(tzinfo=None)
288
+ try:
289
+ end = datetime.fromtimestamp(result['END_DATE'][inx] / 1000, tz=tz_gmt)
290
+ except (ValueError, OSError):
291
+ # this may happen if value exceeds 9999-12-31 23:59:59.999999
292
+ end = datetime.max
293
+ end = end.replace(tzinfo=None)
294
+
295
+ date_ranges.append((start, end))
296
+
297
+ # merge ranges if necessary to reduce number of queries
298
+ # for `dates` property then
299
+ self._locator_date_ranges = []
300
+ start, end = None, None
301
+
302
+ for t_start, t_end in date_ranges:
303
+ if start is None:
304
+ start = t_start
305
+ if end is None:
306
+ end = t_end
307
+ else:
308
+ if t_start == end:
309
+ end = t_end
310
+ else:
311
+ self._locator_date_ranges.append((start, end))
312
+ start, end = t_start, t_end
313
+
314
+ if start and end:
315
+ self._locator_date_ranges.append((start, end))
316
+
317
+ def _show_loaded_time_ranges(self, start, end, only_last=False, prefer_speed_over_accuracy=False):
318
+ kwargs = {}
319
+ if prefer_speed_over_accuracy:
320
+ kwargs['prefer_speed_over_accuracy'] = prefer_speed_over_accuracy
321
+
322
+ eps = otq.DbShowLoadedTimeRanges(use_cache=True, **kwargs).tick_type('ANY')
323
+ eps = eps >> otq.WhereClause(where='NUM_LOADED_PARTITIONS > 0')
324
+ if only_last:
325
+ eps = eps >> otq.LastTick()
326
+
327
+ graph = otq.GraphQuery(eps)
328
+ result = otp.run(graph,
329
+ symbols=f'{self.name}::',
330
+ start=start,
331
+ end=end,
332
+ # GMT works properly for locators with gap
333
+ timezone='GMT',
334
+ context=self.context)
335
+
336
+ dates = []
337
+ # every record contains consequent intervals of data on disk
338
+ for inx in range(len(result)):
339
+ start = datetime.strptime(str(result['START_DATE'][inx]), '%Y%m%d')
340
+ end = datetime.strptime(str(result['END_DATE'][inx]), '%Y%m%d')
341
+ if only_last:
342
+ return [_datetime2date(end)]
343
+ while start <= end:
344
+ dates.append(_datetime2date(start))
345
+ start += timedelta(days=1)
346
+
347
+ return dates
348
+
349
+ def __split_loaded_time_ranges(self, locator_start, locator_end, only_last):
350
+ # locator date range can be very big, so splitting it in smaller parts
351
+ # (because _show_loaded_time_ranges() can be very slow for big time ranges)
352
+ # it is especially useful when we only need the last date
353
+ dates = []
354
+ start = end = locator_end
355
+ delta = 1 if only_last else 365
356
+ while locator_start < start:
357
+ start = end - timedelta(days=delta)
358
+ start = max(locator_start, start)
359
+ loaded_dates = self._show_loaded_time_ranges(start, end, only_last=only_last)
360
+ if only_last and loaded_dates:
361
+ return [loaded_dates[-1]]
362
+ dates = loaded_dates + dates
363
+ end = start
364
+ # if we are not getting data, then increasing time range to find it faster
365
+ if not loaded_dates:
366
+ delta *= 2
367
+ return dates
368
+
369
+ def __get_dates(self, only_last=False, respect_acl=False, check_index_file=utils.adaptive):
370
+ """ Returns list of dates in GMT timezone with data """
371
+ self._set_intervals()
372
+
373
+ dates = []
374
+ today = dt_date.today()
375
+ today = datetime(today.year, today.month, today.day)
376
+ # searching in reversed order in case we need only_last date
377
+ for locator_start, locator_end in reversed(self._locator_date_ranges):
378
+ # future is not loaded yet
379
+ if locator_start > today:
380
+ continue
381
+ locator_end = min(locator_end, today)
382
+
383
+ if respect_acl:
384
+ try:
385
+ locator_start, locator_end = self._fit_time_interval_in_acl(locator_start, locator_end)
386
+ except ValueError:
387
+ # fully not intersecting intervals, trying next locator date range
388
+ continue
389
+
390
+ if check_index_file is utils.adaptive or check_index_file is None:
391
+ prefer_speed_over_accuracy = True
392
+ else:
393
+ prefer_speed_over_accuracy = not check_index_file
394
+ try:
395
+ loaded_dates = self._show_loaded_time_ranges(locator_start, locator_end,
396
+ only_last=only_last,
397
+ prefer_speed_over_accuracy=prefer_speed_over_accuracy)
398
+ except Exception as e:
399
+ # parameter prefer_speed_over_accuracy is not supported on all OneTick versions and servers
400
+ if check_index_file is not utils.adaptive:
401
+ raise ValueError(
402
+ "Parameter 'check_index_file' is not supported by the API or OneTick server"
403
+ ) from e
404
+ # in this case we fall back to splitting the locator range into smaller parts to increase speed
405
+ loaded_dates = self.__split_loaded_time_ranges(locator_start, locator_end, only_last)
406
+
407
+ if only_last and loaded_dates:
408
+ return loaded_dates[-1]
409
+ dates = loaded_dates + dates
410
+
411
+ if only_last and len(dates) == 0:
412
+ return None # no data on disk
413
+
414
+ return dates
415
+
416
+ def dates(self, respect_acl=False, check_index_file=utils.adaptive):
417
+ """
418
+ Returns list of dates in GMT timezone for which data is available.
419
+
420
+ Parameters
421
+ ----------
422
+ respect_acl: bool
423
+ If True then only the dates that current user has access to will be returned
424
+ check_index_file: bool
425
+ If True, then file *index* will be searched for to determine if a database is loaded for a date.
426
+ This check may be expensive, in terms of time it takes,
427
+ when the file resides on NFS or on object storage, such as S3.
428
+ If this parameter is set to False, then only the database directory for a date will be searched.
429
+ This will increase performance, but may also return the days that are configured
430
+ but where there is actually no data.
431
+ By default this option is set to False if it is supported by API and the server,
432
+ otherwise it is set to True.
433
+
434
+ Returns
435
+ -------
436
+ ``datetime.date`` or ``None``
437
+ Returns ``None`` when there is no data in the database
438
+
439
+ Examples
440
+ --------
441
+ >>> some_db = otp.databases()['SOME_DB']
442
+ >>> some_db.dates()
443
+ [datetime.date(2003, 12, 1)]
444
+ """
445
+ return self.__get_dates(respect_acl=respect_acl, check_index_file=check_index_file)
446
+
447
+ def last_not_empty_date(self, last_date, days_back, timezone=None, tick_type=None):
448
+ """
449
+ Find first day that has data
450
+ starting from ``last_date`` and going ``days_back`` number of days back.
451
+ """
452
+ min_locator_date = self.min_locator_date()
453
+ for i in range(days_back + 1):
454
+ date = _datetime2date(last_date - timedelta(days=i))
455
+ if date < min_locator_date:
456
+ break
457
+ try:
458
+ tick_types = self.tick_types(date, timezone=timezone)
459
+ except ValueError:
460
+ # acl date violation
461
+ break
462
+ if tick_type is None and tick_types:
463
+ return date
464
+ if tick_type is not None and tick_type in tick_types:
465
+ return date
466
+ return None
467
+
468
+ @property
469
+ def last_date(self):
470
+ """
471
+ The latest date on which db has data and the current user has access to.
472
+
473
+ Returns
474
+ -------
475
+ ``datetime.date`` or ``None``
476
+ Returns ``None`` when there is no data in the database
477
+
478
+ Examples
479
+ --------
480
+ >>> some_db = otp.databases()['SOME_DB']
481
+ >>> some_db.last_date
482
+ datetime.date(2003, 12, 1)
483
+ """
484
+ return self.get_last_date()
485
+
486
+ def get_last_date(self, tick_type=None, timezone=None, show_warnings=True, check_index_file=utils.adaptive):
487
+ last_date = self.__get_dates(only_last=True, respect_acl=True, check_index_file=check_index_file)
488
+ if last_date is None:
489
+ return None
490
+ # It might happen that database loading processes is configured
491
+ # to work over weekends and holidays and therefore
492
+ # there are days that are configured but have no data, tick types and schema.
493
+ # We want to find the closest not empty day because
494
+ # we want to expose the most actual schema to end user.
495
+ # For example, this is a case of OneTick Cloud US_COMP database.
496
+ # We only scan 5 previous days to cover weekends + possible conjuncted holidays.
497
+ # According to the official NYSE calendar there are no more than 5 closed days.
498
+ date = self.last_not_empty_date(last_date, days_back=5, tick_type=tick_type, timezone=timezone)
499
+ if date is None:
500
+ if show_warnings:
501
+ warnings.warn(
502
+ "Can't find not empty day for the last 5 days, using last configured day. "
503
+ "Try to use .last_not_empty_date() function to find older not empty days."
504
+ )
505
+ return last_date
506
+ return date
507
+
508
+ def tick_types(self, date=None, timezone=None) -> list[str]:
509
+ """
510
+ Returns list of tick types for the ``date``.
511
+
512
+ Parameters
513
+ ----------
514
+ date: :class:`otp.dt <onetick.py.datetime>`, :py:class:`datetime.datetime`, optional
515
+ Date for the tick types look up. ``None`` means the :attr:`last_date`
516
+ timezone: str, optional
517
+ Timezone for the look up. ``None`` means the default timezone.
518
+
519
+ Returns
520
+ -------
521
+ list
522
+ List with string values of available tick types.
523
+
524
+ Examples
525
+ --------
526
+ >>> us_comp_db = otp.databases()['US_COMP']
527
+ >>> us_comp_db.tick_types(date=otp.dt(2022, 3, 1))
528
+ ['QTE', 'TRD']
529
+ """
530
+ date = self.last_date if date is None else date
531
+ if timezone is None:
532
+ timezone = configuration.config.tz
533
+ time_params: dict[str, Any] = {}
534
+
535
+ if date is not None:
536
+ time_params['start'], time_params['end'] = self._fit_date_in_acl(date, timezone=timezone)
537
+
538
+ # PY-458: don't use cache, it can return different result in some cases
539
+ result = otp.run(otq.DbShowTickTypes(use_cache=False,
540
+ show_schema=False,
541
+ include_memdb=True),
542
+ symbols=f'{self.name}::',
543
+ **time_params,
544
+ timezone=timezone,
545
+ context=self.context)
546
+
547
+ if len(result) == 0:
548
+ return []
549
+
550
+ return result['TICK_TYPE_NAME'].tolist()
551
+
552
+ def min_locator_date(self):
553
+ self._set_intervals()
554
+ min_date = min(obj[0] for obj in self._locator_date_ranges)
555
+ return _datetime2date(min_date)
556
+
557
+ def schema(self, date=None, tick_type=None, timezone=None, check_index_file=utils.adaptive) -> dict[str, type]:
558
+ """
559
+ Gets the schema of the database.
560
+
561
+ Parameters
562
+ ----------
563
+ date: :class:`otp.dt <onetick.py.datetime>`, :py:class:`datetime.datetime`, optional
564
+ Date for the schema. ``None`` means the :attr:`last_date`
565
+ tick_type: str, optional
566
+ Specifies a tick type for schema. ``None`` means use the one available
567
+ tick type, if there are multiple tick types then it raises the ``Exception``.
568
+ It uses the :meth:`tick_types` method.
569
+ timezone: str, optional
570
+ Allows to specify a timezone for searching tick types.
571
+ check_index_file: bool
572
+ If True, then file *index* will be searched for to determine if a database is loaded for a date.
573
+ This check may be expensive, in terms of time it takes,
574
+ when the file resides on NFS or on object storage, such as S3.
575
+ If this parameter is set to False, then only the database directory for a date will be searched.
576
+ This will increase performance, but may also return the days that are configured
577
+ but where there is actually no data.
578
+ By default this option is set to False if it is supported by API and the server,
579
+ otherwise it is set to True.
580
+
581
+ Returns
582
+ -------
583
+ dict
584
+ Dict where keys are field names and values are ``onetick.py`` :ref:`types <schema concept>`.
585
+ It's compatible with the :attr:`onetick.py.Source.schema` methods.
586
+
587
+ Examples
588
+ --------
589
+ >>> us_comp_db = otp.databases()['US_COMP']
590
+ >>> us_comp_db.schema(tick_type='TRD', date=otp.dt(2022, 3, 1))
591
+ {'PRICE': <class 'float'>, 'SIZE': <class 'int'>}
592
+ """
593
+ orig_date = date
594
+
595
+ if date is None:
596
+ date = self.get_last_date(tick_type=tick_type, timezone=timezone, check_index_file=check_index_file)
597
+ if timezone is None:
598
+ timezone = configuration.config.tz
599
+ if tick_type is None:
600
+ tick_types = self.tick_types(date=date, timezone=timezone)
601
+ if len(tick_types) == 0:
602
+ raise ValueError("No tick types has found and specified")
603
+ if len(tick_types) > 1:
604
+ raise ValueError("Database has multiple tick types, please specify using the `tick_type` parameter")
605
+
606
+ tick_type = tick_types[0]
607
+
608
+ if date is None:
609
+ # it might happen when a database has no data on disks
610
+ return {}
611
+
612
+ # Convert explicitly into the datetime.date, because min_date and date
613
+ # could be date or datetime types, and datetime is not comparable with datetime.date
614
+ date = _datetime2date(date)
615
+
616
+ start, end = self._fit_date_in_acl(date, timezone=timezone)
617
+
618
+ # TODO: refactor into global method, use in tick_types()
619
+ def get_schema(use_cache: bool = True):
620
+ return otp.run(otq.DbShowTickTypes(use_cache=use_cache,
621
+ show_schema=True,
622
+ include_memdb=True)
623
+ >> otq.WhereClause(where=f'TICK_TYPE_NAME="{tick_type}"'),
624
+ symbols=f'{self.name}::',
625
+ start=start,
626
+ end=end,
627
+ timezone=timezone,
628
+ context=self.context)
629
+
630
+ result = get_schema(use_cache=True)
631
+ if result.empty:
632
+ # in case cache settings in database are bad (e.g. BEXRTS-1220)
633
+ result = get_schema(use_cache=False)
634
+
635
+ fields: Iterable
636
+ if len(result):
637
+ # filter schema by date
638
+ date_to_filter = None
639
+ if orig_date:
640
+ # if date is passed as a parameter -- then use it
641
+ date_to_filter = date
642
+ else:
643
+ # otherwise use the closest date
644
+ date_to_filter = result['Time'].max()
645
+
646
+ result = result[(result['Time'] >= pd.Timestamp(date_to_filter))]
647
+
648
+ fields = zip(result['FIELD_NAME'].tolist(),
649
+ result['FIELD_TYPE_NAME'].tolist(),
650
+ result['FIELD_SIZE'].tolist())
651
+ else:
652
+ fields = []
653
+
654
+ schema = {}
655
+
656
+ for fname, ftype, fsize in fields:
657
+ dtype: type
658
+
659
+ if 'UINT32' in ftype:
660
+ dtype = otp.uint
661
+ elif 'UINT64' in ftype:
662
+ dtype = otp.ulong
663
+ elif 'INT32' in ftype:
664
+ dtype = otp.int
665
+ elif 'INT64' in ftype:
666
+ # otp.long can be used too, but we use int for backward compatibility
667
+ dtype = int
668
+ elif 'INT8' in ftype:
669
+ dtype = otp.byte
670
+ elif 'INT16' in ftype:
671
+ dtype = otp.short
672
+ elif 'INT' in ftype:
673
+ dtype = int
674
+ elif 'MSEC' in ftype:
675
+ dtype = otp.msectime
676
+ elif 'NSEC' in ftype:
677
+ dtype = otp.nsectime
678
+ elif 'DOUBLE' in ftype or 'FLOAT' in ftype:
679
+ dtype = float
680
+ elif 'DECIMAL' in ftype:
681
+ dtype = otp.decimal
682
+ elif 'VARSTRING' in ftype:
683
+ dtype = otp.varstring
684
+ elif 'STRING' in ftype:
685
+ if fsize == 64:
686
+ dtype = str
687
+ else:
688
+ dtype = otp.string[fsize]
689
+ else:
690
+ warnings.warn(
691
+ f"Unsupported field type '{ftype}' for field '{fname}'. "
692
+ "Note that this field will be ignored "
693
+ "and will not be added to the python schema, "
694
+ "but will still remain in the OneTick schema."
695
+ )
696
+ continue
697
+
698
+ schema[fname] = dtype
699
+
700
+ return schema
701
+
702
+ def symbols(self, date=None, timezone=None, tick_type=None, pattern='.*') -> list[str]:
703
+ """
704
+ Finds a list of available symbols in the database
705
+
706
+ Parameters
707
+ ----------
708
+ date: :class:`otp.dt <onetick.py.datetime>`, :py:class:`datetime.datetime`, optional
709
+ Date for the symbols look up. ``None`` means the :attr:`last_date`
710
+ tick_type: str, optional
711
+ Tick type for symbols. ``None`` means union across all tick types.
712
+ timezone: str, optional
713
+ Timezone for the lookup. ``None`` means the default timezone.
714
+ pattern: str
715
+ Regular expression to select symbols.
716
+
717
+ Examples
718
+ --------
719
+ >>> us_comp_db = otp.databases()['US_COMP']
720
+ >>> us_comp_db.symbols(date=otp.dt(2022, 3, 1), tick_type='TRD', pattern='^AAP.*')
721
+ ['AAP', 'AAPL']
722
+ """
723
+ if date is None:
724
+ date = self.last_date
725
+ if timezone is None:
726
+ timezone = configuration.config.tz
727
+ if tick_type is None:
728
+ tick_type = ''
729
+
730
+ eps = otq.FindDbSymbols(pattern='%', tick_type_field=tick_type) \
731
+ >> otq.AddField(field='varstring SYMBOL', value='regex_replace(SYMBOL_NAME, ".*::", "")') \
732
+ >> otq.WhereClause(where=f'regex_match(SYMBOL, "{pattern}")') \
733
+ >> otq.Table(fields='SYMBOL')
734
+
735
+ result = otp.run(eps,
736
+ symbols=f'{self.name}::',
737
+ start=date,
738
+ end=date + timedelta(days=1),
739
+ timezone=timezone,
740
+ context=self.context)
741
+
742
+ if len(result) == 0:
743
+ return []
744
+
745
+ return result['SYMBOL'].tolist()
746
+
747
+ def show_archive_stats(
748
+ self,
749
+ start=utils.adaptive,
750
+ end=utils.adaptive,
751
+ date=None,
752
+ timezone='GMT',
753
+ ) -> pd.DataFrame:
754
+ """
755
+ This method shows various stats about the queried symbol,
756
+ as well as an archive as a whole for each day within the queried interval.
757
+
758
+ Accelerator databases are not supported.
759
+ Memory databases will be ignored even within their life hours.
760
+
761
+ Archive stats returned:
762
+
763
+ * COMPRESSION_TYPE - archive compression type.
764
+ In older archives native compression flag is not stored,
765
+ so for example for gzip compression this field may say "GZIP or NATIVE_PLUS_GZIP".
766
+ The meta_data_upgrader.exe tool can be used to determine and inject that information in such cases
767
+ in order to get a more precise result in this field.
768
+ * TIME_RANGE_VALIDITY - whether lowest and highest loaded timestamps (see below) are known.
769
+ Like native compression flag, this information is missing in older archives
770
+ and can be added using meta_data_upgrader.exe tool.
771
+ * LOWEST_LOADED_DATETIME - the lowest loaded timestamp for the queried interval (across all symbols)
772
+ * HIGHEST_LOADED_DATETIME - the highest loaded timestamp for the queried interval (across all symbols)
773
+ * TOTAL_TICKS - the number of ticks for the queried interval (across all symbols).
774
+ Also missing in older archives and can be added using meta_data_upgrader.exe.
775
+ If not available, -1 will be returned.
776
+ * SYMBOL_DATA_SIZE - the size of the symbol in archive in bytes.
777
+ This information is also missing in older archives, however the other options, it cannot later be added.
778
+ In such cases -1 will be returned.
779
+ * TOTAL_SYMBOLS - the number of symbols for the queried interval
780
+ * TOTAL_SIZE - archive size in bytes for the queried interval
781
+ (including the garbage potentially accumulated during appends).
782
+
783
+ Note
784
+ ----
785
+ Fields **LOWEST_LOADED_DATETIME** and **HIGHEST_LOADED_DATETIME** are returned in GMT timezone,
786
+ so the default value of parameter ``timezone`` is GMT too.
787
+
788
+ See also
789
+ --------
790
+ **SHOW_ARCHIVE_STATS** OneTick event processor
791
+
792
+ Examples
793
+ --------
794
+
795
+ Show stats for a particular date for a database SOME_DB:
796
+
797
+ .. testcode::
798
+ :skipif: not is_native_plus_zstd_supported()
799
+
800
+ db = otp.databases()['SOME_DB']
801
+ stats = db.show_archive_stats(date=otp.dt(2003, 12, 1))
802
+ print(stats)
803
+
804
+ .. testoutput::
805
+ :options: +ELLIPSIS
806
+
807
+ Time COMPRESSION_TYPE TIME_RANGE_VALIDITY LOWEST_LOADED_DATETIME HIGHEST_LOADED_DATETIME...
808
+ 0 2003-12-01 05:00:00 NATIVE_PLUS_ZSTD VALID 2003-12-01 05:00:00 2003-12-01 05:00:00.002...
809
+ """
810
+ node = otq.ShowArchiveStats()
811
+ graph = otq.GraphQuery(node)
812
+ df = otp.run(graph,
813
+ symbols=f'{self.name}::',
814
+ start=start,
815
+ end=end,
816
+ date=date,
817
+ timezone=timezone,
818
+ context=self.context)
819
+ return df
820
+
821
+ def ref_data(
822
+ self,
823
+ ref_data_type: str,
824
+ symbol_date=None,
825
+ start=utils.adaptive,
826
+ end=utils.adaptive,
827
+ date=None,
828
+ timezone='GMT',
829
+ symbol: str = '',
830
+ ) -> pd.DataFrame:
831
+ """
832
+ Shows reference data for the specified security and reference data type.
833
+
834
+ It can be used to view corporation actions,
835
+ symbol name changes,
836
+ primary exchange info and symbology mapping for a securities,
837
+ as well as the list of symbologies,
838
+ names of custom adjustment types for corporate actions present in a reference database
839
+ as well as names of continuous contracts in database symbology.
840
+
841
+ Parameters
842
+ ----------
843
+ ref_data_type: str
844
+ Type of reference data to be queried. Possible values are:
845
+
846
+ * corp_actions
847
+ * symbol_name_history
848
+ * primary_exchange
849
+ * symbol_calendar
850
+ * symbol_currency
851
+ * symbology_mapping
852
+ * symbology_list
853
+ * custom_adjustment_type_list
854
+ * all_calendars
855
+ * all_continuous_contract_names
856
+ symbol_date:
857
+ This parameter must be specified for some reference data types to be queried.
858
+ symbol:
859
+ Symbol name for the query (may be useful for some ``ref_data_type``).
860
+
861
+ See also
862
+ --------
863
+ **REF_DATA** OneTick event processor
864
+
865
+ Examples
866
+ --------
867
+
868
+ Show calendars for a database TRAIN_A_PRL_TRD in the given range:
869
+
870
+ >>> db = otp.databases()['TRAIN_A_PRL_TRD'] # doctest: +SKIP
871
+ >>> db.ref_data('all_calendars', # doctest: +SKIP
872
+ ... start=otp.dt(2018, 2, 1),
873
+ ... end=otp.dt(2018, 2, 9),
874
+ ... symbol_date=otp.dt(2018, 2, 1))
875
+ Time END_DATETIME CALENDAR_NAME SESSION_NAME SESSION_FLAGS DAY_PATTERN START_HHMMSS\
876
+ END_HHMMSS TIMEZONE PRIORITY DESCRIPTION
877
+ 0 2018-02-01 00:00:00 2018-02-06 23:59:59 FRED Regular R 0.0.12345 93000\
878
+ 160000 EST5EDT 0
879
+ 1 2018-02-06 23:59:59 2018-02-07 23:59:59 FRED Holiday H 0.0.12345 93000\
880
+ 160000 EST5EDT 1
881
+ 2 2018-02-07 23:59:59 2050-12-31 23:59:59 FRED Regular F 0.0.12345 93000\
882
+ 160000 EST5EDT 0
883
+
884
+ Set symbol name with ``symbol`` parameter:
885
+
886
+ >>> db = otp.databases()['US_COMP_SAMPLE'] # doctest: +SKIP
887
+ >>> db.ref_data(ref_data_type='corp_actions', # doctest: +SKIP
888
+ ... start=otp.dt(2025, 1, 2),
889
+ ... end=otp.dt(2025, 7, 2),
890
+ ... symbol_date=otp.dt(2025, 7, 1),
891
+ ... symbol='WMT',
892
+ ... timezone='America/New_York')
893
+ Time MULTIPLICATIVE_ADJUSTMENT ADDITIVE_ADJUSTMENT ADJUSTMENT_TYPE
894
+ 0 2025-03-21 1.000000 0.235 CASH_DIVIDEND
895
+ 1 2025-03-21 0.997261 0.000 MULTI_ADJ_CASH
896
+ 2 2025-05-09 1.000000 0.235 CASH_DIVIDEND
897
+ 3 2025-05-09 0.997588 0.000 MULTI_ADJ_CASH
898
+ """
899
+ ref_data_type = ref_data_type.upper()
900
+ node = otq.RefData(ref_data_type=ref_data_type)
901
+ graph = otq.GraphQuery(node)
902
+ df = otp.run(graph,
903
+ symbols=f'{self.name}::{symbol}',
904
+ symbol_date=symbol_date,
905
+ start=start,
906
+ end=end,
907
+ date=date,
908
+ timezone=timezone,
909
+ context=self.context)
910
+ return df
911
+
912
+
913
+ def databases(
914
+ context=utils.default, derived: bool = False, readable_only: bool = True,
915
+ fetch_description: Optional[bool] = None,
916
+ as_table: bool = False,
917
+ ) -> Union[dict[str, DB], pd.DataFrame]:
918
+ """
919
+ Gets all available databases in the ``context``.
920
+
921
+ Parameters
922
+ ----------
923
+ context: str, optional
924
+ Context to run the query.
925
+ If not set then default :py:attr:`context<onetick.py.configuration.Config.context>` is used.
926
+ See :ref:`guide about switching contexts <switching contexts>` for examples.
927
+ derived: bool, dict
928
+ If False (default) then derived databases are not returned.
929
+ Otherwise derived databases names are added to the result after the non-derived databases.
930
+ If set to dict then its items used as parameters to :py:func:`~onetick.py.derived_databases`.
931
+ If set to True then default parameters for :py:func:`~onetick.py.derived_databases` are used.
932
+ readable_only: bool
933
+ If set to True (default), then return only the databases with read-access for the current user.
934
+ Otherwise return all databases visible from the current process.
935
+ fetch_description: bool
936
+ If set to True, retrieves descriptions for databases and puts them into ``description`` property of
937
+ :py:class:`~onetick.py.DB` objects in a returned dict.
938
+ as_table: bool
939
+ If False (default), this function returns a dictionary of database names and database objects.
940
+ If True, returns a :pandas:`pandas.DataFrame` table where each row contains the info for each database.
941
+
942
+ See also
943
+ --------
944
+ | **SHOW_DB_LIST** OneTick event processor
945
+ | **ACCESS_INFO** OneTick event processor
946
+ | :py:func:`derived_databases`
947
+
948
+ Returns
949
+ -------
950
+ Dict where keys are database names and values are :class:`DB <onetick.py.db._inspection.DB>` objects
951
+ or :pandas:`pandas.DataFrame` object depending on ``as_table`` parameter.
952
+
953
+ Examples
954
+ --------
955
+
956
+ Get the dictionary of database names and objects:
957
+
958
+ >>> otp.databases() # doctest: +SKIP
959
+ {'ABU_DHABI': <onetick.py.db._inspection.DB at 0x7f9413a5e8e0>,
960
+ 'ABU_DHABI_BARS': <onetick.py.db._inspection.DB at 0x7f9413a5ef40>,
961
+ 'ABU_DHABI_DAILY': <onetick.py.db._inspection.DB at 0x7f9413a5eac0>,
962
+ 'ALPHA': <onetick.py.db._inspection.DB at 0x7f9413a5e940>,
963
+ 'ALPHA_X': <onetick.py.db._inspection.DB at 0x7f9413a5e490>,
964
+ ...
965
+ }
966
+
967
+ Get a table with database info:
968
+
969
+ >>> otp.databases(as_table=True) # doctest: +SKIP
970
+ Time DB_NAME READ_ACCESS WRITE_ACCESS ...
971
+ 0 2003-01-01 ABU_DHABI 1 0 ...
972
+ 1 2003-01-01 ABU_DHABI_BARS 1 1 ...
973
+ 2 2003-01-01 ABU_DHABI_DAILY 1 1 ...
974
+ 3 2003-01-01 ALPHA 1 1 ...
975
+ 4 2003-01-01 ALPHA_X 1 1 ...
976
+ ... ... ... ... ... ...
977
+ """
978
+ show_db_list_kwargs = {}
979
+ if fetch_description is not None and is_show_db_list_show_description_supported() and (
980
+ 'show_description' in otq.ShowDbList.Parameters.list_parameters()
981
+ ):
982
+ show_db_list_kwargs['show_description'] = fetch_description
983
+
984
+ node = otq.AccessInfo(info_type='DATABASES', show_for_all_users=False, deep_scan=True).tick_type('ANY')
985
+ # for some reason ACCESS_INFO sometimes return several ticks
986
+ # for the same database with different SERVER_ADDRESS values
987
+ # so we get only the first tick
988
+ node = (
989
+ node >> otq.NumTicks(is_running_aggr=True, group_by='DB_NAME',
990
+ all_fields_for_sliding=False, output_field_name='NUM_TICKS')
991
+ >> otq.WhereClause(where='NUM_TICKS = 1')
992
+ >> otq.Passthrough('NUM_TICKS', drop_fields=True)
993
+ )
994
+ if readable_only:
995
+ node = node >> otq.WhereClause(where='READ_ACCESS = 1')
996
+
997
+ left = node.set_node_name('LEFT')
998
+ right = otq.ShowDbList(**show_db_list_kwargs).tick_type('ANY').set_node_name('RIGHT')
999
+ join = otq.Join(
1000
+ left_source='LEFT', join_type='INNER', join_criteria='LEFT.DB_NAME = RIGHT.DATABASE_NAME',
1001
+ add_source_prefix=False,
1002
+ )
1003
+ left >> join << right # pylint: disable=pointless-statement
1004
+ node = join >> otq.Passthrough('LEFT.TIMESTAMP,RIGHT.TIMESTAMP,DATABASE_NAME', drop_fields=True)
1005
+
1006
+ # times bigger than datetime.max are not representable in python
1007
+ max_dt = ott.value2str(datetime.max)
1008
+ node = node >> otq.UpdateFields(set=f'INTERVAL_START={max_dt}', where=f'INTERVAL_START > {max_dt}')
1009
+ node = node >> otq.UpdateFields(set=f'INTERVAL_END={max_dt}', where=f'INTERVAL_END > {max_dt}')
1010
+
1011
+ dbs = otp.run(node,
1012
+ symbols='LOCAL::',
1013
+ # start and end times don't matter for this query, use some constants
1014
+ start=db_constants.DEFAULT_START_DATE,
1015
+ end=db_constants.DEFAULT_END_DATE,
1016
+ context=context)
1017
+
1018
+ if as_table:
1019
+ return dbs
1020
+
1021
+ # WebAPI returns empty DataFrame (no columns) if there are no databases
1022
+ if len(dbs) == 0:
1023
+ return {}
1024
+
1025
+ db_list = list(dbs['DB_NAME'])
1026
+ db_description_list = dbs['DESCRIPTION'] if 'DESCRIPTION' in dbs else itertools.repeat('')
1027
+ merged_db_list = list(zip(db_list, db_description_list))
1028
+
1029
+ db_dict = {
1030
+ db_name: DB(db_name, description=db_description, context=context)
1031
+ for db_name, db_description in merged_db_list
1032
+ }
1033
+
1034
+ if derived:
1035
+ kwargs: dict = derived if isinstance(derived, dict) else {}
1036
+ kwargs.setdefault('context', context)
1037
+ db_dict.update(
1038
+ derived_databases(**kwargs)
1039
+ )
1040
+ return db_dict
1041
+
1042
+
1043
+ def derived_databases(
1044
+ context=utils.default,
1045
+ start=None, end=None,
1046
+ selection_criteria='all',
1047
+ db=None,
1048
+ db_discovery_scope='query_host_only',
1049
+ as_table: bool = False,
1050
+ ) -> dict[str, DB]:
1051
+ """
1052
+ Gets available derived databases.
1053
+
1054
+ Parameters
1055
+ ----------
1056
+ context: str, optional
1057
+ Context to run the query.
1058
+ If not set then default :py:attr:`context<onetick.py.configuration.Config.context>` is used.
1059
+ See :ref:`guide about switching contexts <switching contexts>` for examples.
1060
+ start: :py:class:`otp.datetime <onetick.py.datetime>`, optional
1061
+ If both ``start`` and ``end`` are set, then listing databases in this range only.
1062
+ Otherwise list databases from all configured time ranges for databases.
1063
+
1064
+ If ``db`` is set, then
1065
+ :py:attr:`otp.config.default_start_time <onetick.py.configuration.Config.default_start_time>`
1066
+ is used by default.
1067
+ end: :py:class:`otp.datetime <onetick.py.datetime>`, optional
1068
+ If both ``start`` and ``end`` are set, then listing databases in this range only.
1069
+ Otherwise list databases from all configured time ranges for databases.
1070
+
1071
+ If ``db`` is set, then
1072
+ :py:attr:`otp.config.default_end_time <onetick.py.configuration.Config.default_end_time>` is used by default.
1073
+ selection_criteria: str
1074
+ Possible values: *all*, *derived_from_current_db*, *direct_children_of_current_db*.
1075
+ db: str, optional
1076
+ Specifies database name if ``selection_criteria`` is set to
1077
+ *derived_from_current_db* or *direct_children_of_current_db*.
1078
+ Must be set in this case, otherwise does nothing.
1079
+ db_discovery_scope: str
1080
+ When *query_host_and_all_reachable_hosts* is specified,
1081
+ an attempt will be performed to get derived databases from all reachable hosts.
1082
+ When *query_host_only* is specified,
1083
+ only derived databases from the host on which the query is performed will be returned.
1084
+ as_table: bool
1085
+ If False (default), this function returns a dictionary of database names and database objects.
1086
+ If True, returns a :pandas:`pandas.DataFrame` table where each row contains the info for each database.
1087
+
1088
+ See also
1089
+ --------
1090
+ **SHOW_DERIVED_DB_LIST** OneTick event processor
1091
+
1092
+ Returns
1093
+ -------
1094
+ Dict where keys are database names and values are :class:`DB <onetick.py.db._inspection.DB>` objects
1095
+ or :pandas:`pandas.DataFrame` object depending on ``as_table`` parameter.
1096
+ """
1097
+ if start and end:
1098
+ time_range = otq.ShowDerivedDbList.TimeRange.QUERY_TIME_INTERVAL
1099
+ else:
1100
+ if db is None:
1101
+ # start and end times don't matter in this case, use some constants
1102
+ start = db_constants.DEFAULT_START_DATE
1103
+ end = db_constants.DEFAULT_END_DATE
1104
+ else:
1105
+ start = otp.config.default_start_time
1106
+ end = otp.config.default_end_time
1107
+ time_range = otq.ShowDerivedDbList.TimeRange.CONFIGURED_TIME_INTERVAL
1108
+
1109
+ selection_criteria = getattr(otq.ShowDerivedDbList.SelectionCriteria, selection_criteria.upper())
1110
+ db_discovery_scope = getattr(otq.ShowDerivedDbList.DbDiscoveryScope, db_discovery_scope.upper())
1111
+
1112
+ if selection_criteria != otq.ShowDerivedDbList.SelectionCriteria.ALL and not db:
1113
+ raise ValueError(f"Parameter 'db' must be set when parameter 'selection_criteria' is {selection_criteria}")
1114
+
1115
+ ep = otq.ShowDerivedDbList(
1116
+ time_range=time_range,
1117
+ selection_criteria=selection_criteria,
1118
+ db_discovery_scope=db_discovery_scope,
1119
+ )
1120
+ ep = ep.tick_type('ANY')
1121
+ db = db or 'LOCAL'
1122
+ dbs = otp.run(ep, symbols=f'{db}::', start=start, end=end, context=context)
1123
+ if as_table:
1124
+ return dbs
1125
+ if len(dbs) == 0:
1126
+ return {}
1127
+ db_list = list(dbs['DERIVED_DB_NAME'])
1128
+ return {db_name: DB(db_name, context=context) for db_name in db_list}