onetick-py 1.162.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +266 -0
  4. locator_parser/common.py +365 -0
  5. locator_parser/io.py +41 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +280 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +138 -0
  14. onetick/py/__init__.py +290 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +645 -0
  19. onetick/py/aggregations/_docs.py +912 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +427 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +373 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +275 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +752 -0
  33. onetick/py/configuration.py +736 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2307 -0
  45. onetick/py/core/_internal/_state_vars.py +87 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +810 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +270 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1001 -0
  58. onetick/py/core/_source/source_methods/joins.py +1393 -0
  59. onetick/py/core/_source/source_methods/merges.py +566 -0
  60. onetick/py/core/_source/source_methods/misc.py +1325 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +702 -0
  68. onetick/py/core/_source/symbol.py +202 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +215 -0
  75. onetick/py/core/column_operations/_methods/methods.py +294 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +150 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +30 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +92 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +464 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +160 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1374 -0
  83. onetick/py/core/column_operations/base.py +1061 -0
  84. onetick/py/core/cut_builder.py +149 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +244 -0
  87. onetick/py/core/lambda_object.py +442 -0
  88. onetick/py/core/multi_output_source.py +193 -0
  89. onetick/py/core/per_tick_script.py +2253 -0
  90. onetick/py/core/query_inspector.py +465 -0
  91. onetick/py/core/source.py +1663 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1042 -0
  94. onetick/py/db/db.py +1423 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2354 -0
  100. onetick/py/license.py +188 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +947 -0
  103. onetick/py/misc.py +437 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +211 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +841 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1342 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +126 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1049 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +238 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +357 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +256 -0
  132. onetick/py/types.py +2056 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +499 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1139 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +118 -0
  146. onetick/py/utils/tz.py +82 -0
  147. onetick_py-1.162.2.dist-info/METADATA +148 -0
  148. onetick_py-1.162.2.dist-info/RECORD +152 -0
  149. onetick_py-1.162.2.dist-info/WHEEL +5 -0
  150. onetick_py-1.162.2.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.162.2.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.162.2.dist-info/top_level.txt +2 -0
onetick/py/run.py ADDED
@@ -0,0 +1,841 @@
1
+
2
+ import inspect
3
+ import datetime
4
+ import warnings
5
+ from typing import Union, List, Optional, Dict, Any, Callable, Type
6
+ from collections import defaultdict
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from onetick.py.otq import otq, pyomd, otli
11
+
12
+ from onetick import py as otp
13
+ from onetick.py import utils, configuration
14
+ from onetick.py.core.column_operations.base import _Operation
15
+ from onetick.py.types import datetime2timeval, datetime2expr
16
+ from onetick.py.core.source import _is_dict_required
17
+ from onetick.py.compatibility import (
18
+ has_max_expected_ticks_per_symbol,
19
+ has_password_param,
20
+ has_query_encoding_parameter,
21
+ _add_version_info_to_exception,
22
+ )
23
+ from onetick.py._stack_info import _add_stack_info_to_exception
24
+ from onetick.py.callback import LogCallback, ManualDataframeCallback
25
+
26
+
27
+ def run(query: Union[Callable, Dict, otp.Source, otp.MultiOutputSource, # NOSONAR
28
+ otp.query, str, otq.EpBase, otq.GraphQuery,
29
+ otq.ChainQuery, otq.Chainlet, otq.SqlQuery, otp.SqlQuery],
30
+ *,
31
+ symbols: Union[List[Union[str, otq.Symbol]], otp.Source, str, None] = None,
32
+ start: Union[datetime.datetime, otp.datetime, pyomd.timeval_t, None] = utils.adaptive, # type: ignore
33
+ end: Union[datetime.datetime, otp.datetime, pyomd.timeval_t, None] = utils.adaptive, # type: ignore
34
+ date: Union[datetime.date, otp.date, None] = None,
35
+ start_time_expression: Optional[str] = None,
36
+ end_time_expression: Optional[str] = None,
37
+ timezone=utils.default, # type: ignore
38
+ context=utils.default, # type: ignore
39
+ username: Optional[str] = None,
40
+ alternative_username: Optional[str] = None,
41
+ password: Optional[str] = None,
42
+ batch_size: Union[int, Type[utils.default], None] = utils.default,
43
+ running: Optional[bool] = False,
44
+ query_properties: Optional[pyomd.QueryProperties] = None, # type: ignore
45
+ concurrency: Union[int, Type[utils.default], None] = utils.default,
46
+ apply_times_daily: Optional[int] = None,
47
+ symbol_date: Union[datetime.datetime, int, str, None] = None,
48
+ query_params: Optional[Dict[str, Any]] = None,
49
+ time_as_nsec: bool = True,
50
+ treat_byte_arrays_as_strings: bool = True,
51
+ output_matrix_per_field: bool = False,
52
+ output_structure: Optional[str] = None,
53
+ return_utc_times: Optional[bool] = None,
54
+ connection=None,
55
+ callback=None,
56
+ svg_path=None,
57
+ use_connection_pool: bool = False,
58
+ node_name: Union[str, List[str], None] = None,
59
+ require_dict: bool = False,
60
+ max_expected_ticks_per_symbol: Optional[int] = None,
61
+ log_symbol: Union[bool, Type[utils.default]] = utils.default,
62
+ encoding: Optional[str] = None,
63
+ manual_dataframe_callback: bool = False):
64
+ """
65
+ Executes a query and returns its result.
66
+
67
+ Parameters
68
+ ----------
69
+ query: :py:class:`onetick.py.Source`, otq.Ep, otq.GraphQuery, otq.ChainQuery, str, otq.Chainlet,\
70
+ Callable, otq.SqlQuery, :py:class:`onetick.py.SqlQuery`
71
+ Query to execute can be source, path of the query on a disk or onetick.query graph or event processor.
72
+ For running OTQ files, it represents the path (including filename) to the OTQ file to run a single query within
73
+ the file. If more than one query is present, then the query to be run must be specified
74
+ (that is, ``'path_to_file/otq_file.otq::query_to_run'``).
75
+
76
+ ``query`` can also be a function that has a symbol object as the first parameter.
77
+ This object can be used to get symbol name and symbol parameters.
78
+ Function must return a :py:class:`Source <onetick.py.Source>`.
79
+ symbols: str, list of str, list of otq.Symbol, :py:class:`onetick.py.Source`, pd.DataFrame, optional
80
+ Symbol(s) to run the query for passed as a string, a list of strings, a pd.DataFrame with the ``SYMBOL_NAME``
81
+ column, or as a "symbols" query which results include the ``SYMBOL_NAME`` column. The start/end times for the
82
+ symbols query will taken from the params below.
83
+ See :ref:`symbols <static/concepts/symbols:Symbols: bound and unbound>` for more details.
84
+ start: :py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,\
85
+ :py:class:`pyomd.timeval_t`, optional
86
+ The start time of the query. Can be timezone-naive or timezone-aware. See also ``timezone`` argument.
87
+ onetick.py uses :py:attr:`default_start_time<onetick.py.configuration.Config.default_start_time>`
88
+ as default value, if you don't want to specify start time, e.g. to use saved time of the query,
89
+ then you should specify None value.
90
+ end: :py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,\
91
+ :py:class:`pyomd.timeval_t`, optional
92
+ The end time of the query (note that it's non-inclusive).
93
+ Can be timezone-naive or timezone-aware. See also ``timezone`` argument.
94
+ onetick.py uses :py:attr:`default_end_time<onetick.py.configuration.Config.default_end_time>`
95
+ as default value, if you don't want to specify end time, e.g. to use saved time of the query,
96
+ then you should specify None value.
97
+ date: :py:class:`datetime.date`, :py:class:`otp.date <onetick.py.date>`, optional
98
+ The date to run the query for. Can be set instead of ``start`` and ``end`` parameters.
99
+ If set then the interval to run the query will be from 0:00 to 24:00 of the specified date.
100
+ start_time_expression: str, :py:class:`~onetick.py.Operation`, optional
101
+ Start time onetick expression of the query. If specified, it will take precedence over ``start``.
102
+ Supported only if query is Source, Graph or Event Processor.
103
+ Not supported for WebAPI mode.
104
+ end_time_expression: str, :py:class:`~onetick.py.Operation`, optional
105
+ End time onetick expression of the query. If specified, it will take precedence over ``end``.
106
+ Supported only if query is Source, Graph or Event Processor.
107
+ Not supported for WebAPI mode.
108
+ timezone: str, optional
109
+ The timezone of output timestamps.
110
+ Also, when start and/or end arguments are timezone-naive, it will define their timezone.
111
+ If parameter is omitted timestamps of ticks will be formatted
112
+ with the default :py:attr:`tz<onetick.py.configuration.Config.tz>`.
113
+ context: str, optional
114
+ Allows specification of different contexts from OneTick configuration to connect to.
115
+ If not set then default :py:attr:`context<onetick.py.configuration.Config.context>` is used.
116
+ See :ref:`guide about switching contexts <switching contexts>` for examples.
117
+ username
118
+ The username to make the connection.
119
+ By default the user which executed the process is used.
120
+ alternative_username: str
121
+ The username used for authentication.
122
+ Needs to be set only when the tick server is configured to use password-based authentication.
123
+ By default, :py:attr:`default_auth_username<onetick.py.configuration.Config.default_auth_username>` is used.
124
+ Not supported for WebAPI mode.
125
+ password: str, optional
126
+ The password used for authentication.
127
+ Needs to be set only when the tick server is configured to use password-based authentication.
128
+ Note: not supported and ignored on older OneTick versions.
129
+ By default, :py:attr:`default_password<onetick.py.configuration.Config.default_password>` is used.
130
+ batch_size: int
131
+ number of symbols to run in one batch.
132
+ By default, the value from
133
+ :py:attr:`default_batch_size<onetick.py.configuration.Config.default_batch_size>` is used.
134
+ Not supported for WebAPI mode.
135
+ running: bool, optional
136
+ Indicates whether a query is CEP or not. Default is `False`.
137
+ query_properties: :py:class:`pyomd.QueryProperties` or dict, optional
138
+ Query properties, such as ONE_TO_MANY_POLICY, ALLOW_GRAPH_REUSE, etc
139
+ concurrency: int, optional
140
+ The maximum number of CPU cores to use to process the query.
141
+ By default, the value from
142
+ :py:attr:`default_concurrency<onetick.py.configuration.Config.default_concurrency>` is used.
143
+ apply_times_daily: bool
144
+ Runs the query for every day in the ``start``-``end`` time range,
145
+ using the time components of ``start`` and ``end`` datetimes.
146
+
147
+ Note that those daily intervals are executed separately, so you don't have access
148
+ to the data from previous or next days (see example in the next section).
149
+ symbol_date:
150
+ The symbol date used to look up symbology mapping information in the reference database,
151
+ expressed as datetime object or integer of YYYYMMDD format
152
+ query_params: dict
153
+ Parameters of the query.
154
+ time_as_nsec: bool
155
+ Outputs timestamps up to nanoseconds granularity
156
+ (defaults to False: by default we output timestamps in microseconds granularity)
157
+ treat_byte_arrays_as_strings: bool
158
+ Outputs byte arrays as strings (defaults to True)
159
+ Not supported for WebAPI mode.
160
+ output_matrix_per_field: bool
161
+ Changes output format to list of matrices per field.
162
+ Not supported for WebAPI mode.
163
+ output_structure: otp.Source.OutputStructure, optional
164
+
165
+ Structure (type) of the result. Supported values are:
166
+ - `df` (default) - the result is returned as :pandas:`pandas.DataFrame` object
167
+ or dictionary of symbol names and :pandas:`pandas.DataFrame` objects
168
+ in case of using multiple symbols or first stage query.
169
+ - `map` - the result is returned as SymbolNumpyResultMap.
170
+ - `list` - the result is returned as list.
171
+ - `polars` - the result is returned as
172
+ `polars.DataFrame <https://docs.pola.rs/api/python/stable/reference/dataframe/index.html>`_ object
173
+ or dictionary of symbol names and dataframe objects
174
+ (**Only supported in WebAPI mode**).
175
+ return_utc_times: bool
176
+ If True Return times in UTC timezone and in local timezone otherwise
177
+ Not supported for WebAPI mode.
178
+ connection: :py:class:`pyomd.Connection`
179
+ The connection to be used for discovering nested .otq files
180
+ Not supported for WebAPI mode.
181
+ callback: :py:class:`onetick.py.CallbackBase`
182
+ Class with callback methods.
183
+ If set, the output of the query should be controlled with callbacks
184
+ and this function returns nothing.
185
+ svg_path: str, optional
186
+ Not supported for WebAPI mode.
187
+ use_connection_pool: bool
188
+ Default is False. If set to True, the connection pool is used.
189
+ Not supported for WebAPI mode.
190
+ node_name: str, List[str], optional
191
+ Name of the output node to select result from. If query graph has several output nodes, you can specify the name
192
+ of the node to choose result from. If node_name was specified, query should be presented by path on the disk
193
+ and output_structure should be `df`
194
+ require_dict: bool
195
+ If set to True, result will be forced to be a dictionary even if it's returned for a single symbol
196
+ max_expected_ticks_per_symbol: int
197
+ Expected maximum number of ticks per symbol (used for performance optimizations).
198
+ By default,
199
+ :py:attr:`max_expected_ticks_per_symbol<onetick.py.configuration.Config.max_expected_ticks_per_symbol>`
200
+ is used.
201
+ Not supported for WebAPI mode.
202
+ log_symbol: bool
203
+ Log currently executed symbol.
204
+ Note that this only works with unbound symbols.
205
+ Also in this case :py:func:`otp.run<onetick.py.run>` is executed in ``callback`` mode
206
+ and no value is returned from the function, so it should be used only for debugging purposes.
207
+ This logging will not work if some other value specified in parameter ``callback``.
208
+ By default, :py:attr:`otp.config.log_symbol<onetick.py.configuration.Config.log_symbol>` is used.
209
+ encoding: str, optional
210
+ The encoding of string fields.
211
+ manual_dataframe_callback: bool
212
+ Create dataframe manually with ``callback`` mode.
213
+ Only works if ``output_structure='df'`` is specified and parameter ``callback`` is not.
214
+ May improve performance in some cases.
215
+
216
+ Returns
217
+ -------
218
+ result, list, dict, :pandas:`pandas.DataFrame`, None
219
+ result of the query
220
+
221
+ Examples
222
+ --------
223
+
224
+ Running :py:class:`onetick.py.Source` and setting start and end times:
225
+
226
+ >>> data = otp.Tick(A=1)
227
+ >>> otp.run(data, start=otp.dt(2003, 12, 2), end=otp.dt(2003, 12, 4))
228
+ Time A
229
+ 0 2003-12-02 1
230
+
231
+ Setting query interval with ``date`` parameter:
232
+
233
+ >>> data = otp.Tick(A=1)
234
+ >>> data['START'] = data['_START_TIME']
235
+ >>> data['END'] = data['_END_TIME']
236
+ >>> otp.run(data, date=otp.dt(2003, 12, 1))
237
+ Time A START END
238
+ 0 2003-12-01 1 2003-12-01 2003-12-02
239
+
240
+ Running otq.Ep and passing query parameters:
241
+
242
+ >>> ep = otq.TickGenerator(bucket_interval=0, fields='long A = $X').tick_type('TT')
243
+ >>> otp.run(ep, symbols='LOCAL::', query_params={'X': 1})
244
+ Time A
245
+ 0 2003-12-04 1
246
+
247
+ Running in callback mode:
248
+
249
+ >>> class Callback(otp.CallbackBase):
250
+ ... def __init__(self):
251
+ ... self.result = None
252
+ ... def process_tick(self, tick, time):
253
+ ... self.result = tick
254
+ >>> data = otp.Tick(A=1)
255
+ >>> callback = Callback()
256
+ >>> otp.run(data, callback=callback)
257
+ >>> callback.result
258
+ {'A': 1}
259
+
260
+ Running with ``apply_times_daily``.
261
+ Note that daily intervals are processed separately so, for example,
262
+ we can't access column **COUNT** from previous day.
263
+
264
+ >>> trd = otp.DataSource('US_COMP', symbols='AAPL', tick_type='TRD') # doctest: +SKIP
265
+ >>> trd = trd.agg({'COUNT': otp.agg.count()},
266
+ ... bucket_interval=12 * 3600, bucket_time='start') # doctest: +SKIP
267
+ >>> trd['PREV_COUNT'] = trd['COUNT'][-1] # doctest: +SKIP
268
+ >>> otp.run(trd, apply_times_daily=True,
269
+ ... start=otp.dt(2023, 4, 3), end=otp.dt(2023, 4, 5), timezone='EST5EDT') # doctest: +SKIP
270
+ Time COUNT PREV_COUNT
271
+ 0 2023-04-03 00:00:00 328447 0
272
+ 1 2023-04-03 12:00:00 240244 328447
273
+ 2 2023-04-04 00:00:00 263293 0
274
+ 3 2023-04-04 12:00:00 193018 263293
275
+
276
+ Using a function as a ``query``, accessing symbol name and parameters:
277
+
278
+ >>> def query(symbol):
279
+ ... t = otp.Tick(X='x')
280
+ ... t['SYMBOL_NAME'] = symbol.name
281
+ ... t['SYMBOL_PARAM'] = symbol.PARAM
282
+ ... return t
283
+ >>> symbols = otp.Ticks({'SYMBOL_NAME': ['A', 'B'], 'PARAM': [1, 2]})
284
+ >>> result = otp.run(query, symbols=symbols)
285
+ >>> result['A']
286
+ Time X SYMBOL_NAME SYMBOL_PARAM
287
+ 0 2003-12-01 x A 1
288
+ >>> result['B']
289
+ Time X SYMBOL_NAME SYMBOL_PARAM
290
+ 0 2003-12-01 x B 2
291
+
292
+ Debugging unbound symbols with ``log_symbol`` parameter:
293
+
294
+ >>> data = otp.Tick(X=1)
295
+ >>> symbols = otp.Ticks({'SYMBOL_NAME': ['A', 'B'], 'PARAM': [1, 2]})
296
+ >>> otp.run(query, symbols=symbols, log_symbol=True) # doctest: +ELLIPSIS
297
+ Running query <onetick.py.sources.ticks.Tick object at ...>
298
+ Processing symbol A
299
+ Processing symbol B
300
+
301
+ By default, some non-standard characters in data strings could be processed incorrectly:
302
+
303
+ >>> data = ['AA測試AA']
304
+ >>> source = otp.Ticks({'A': data})
305
+ >>> otp.run(source)
306
+ Time A
307
+ 0 2003-12-01 AA測試AA
308
+
309
+ To fix this you can pass `encoding` parameter to `otp.run`:
310
+
311
+ .. testcode::
312
+ :skipif: not has_query_encoding_parameter()
313
+
314
+ data = ['AA測試AA']
315
+ source = otp.Ticks({'A': data})
316
+ df = otp.run(source, encoding="utf-8")
317
+ print(df)
318
+
319
+ .. testoutput::
320
+
321
+ Time A
322
+ 0 2003-12-01 AA測試AA
323
+
324
+ Note that query ``start`` time is inclusive, but query ``end`` time is not,
325
+ meaning that ticks with timestamps equal to the query end time will not be included:
326
+
327
+ >>> data = otp.Tick(A=1, bucket_interval=24*60*60)
328
+ >>> data['A'] = data['TIMESTAMP'].dt.day_of_month()
329
+ >>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
330
+ Time A
331
+ 0 2003-12-01 1
332
+ 1 2003-12-02 2
333
+ 2 2003-12-03 3
334
+ >>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 2))
335
+ Time A
336
+ 0 2003-12-01 1
337
+
338
+ If you want to include such ticks, you can add one nanosecond to the query end time:
339
+
340
+ >>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 2) + otp.Nano(1))
341
+ Time A
342
+ 0 2003-12-01 1
343
+ 1 2003-12-02 2
344
+ """
345
+ _ = otli.OneTickLib()
346
+
347
+ query_schema = None
348
+ if isinstance(query, otp.Source):
349
+ query_schema = query.schema
350
+
351
+ if timezone is utils.default:
352
+ timezone = configuration.config.tz
353
+ if context is utils.default or context is None:
354
+ context = configuration.config.context
355
+ if concurrency is utils.default:
356
+ concurrency = configuration.default_query_concurrency()
357
+
358
+ if batch_size is utils.default:
359
+ batch_size = configuration.config.default_batch_size
360
+ if query_properties is None:
361
+ query_properties = pyomd.QueryProperties()
362
+
363
+ if isinstance(query_properties, dict):
364
+ qp_dict = query_properties
365
+ query_properties = utils.query_properties_from_dict(qp_dict)
366
+ else:
367
+ qp_dict = utils.query_properties_to_dict(query_properties)
368
+
369
+ if 'USE_FT' not in qp_dict:
370
+ query_properties.set_property_value('USE_FT', otp.config.default_fault_tolerance) # type: ignore[union-attr]
371
+
372
+ if 'IGNORE_TICKS_IN_UNENTITLED_TIME_RANGE' not in qp_dict:
373
+ query_properties.set_property_value('IGNORE_TICKS_IN_UNENTITLED_TIME_RANGE', # type: ignore[union-attr]
374
+ str(otp.config.ignore_ticks_in_unentitled_time_range).upper())
375
+
376
+ if date is not None:
377
+ for v in (start, end, start_time_expression, end_time_expression):
378
+ if v is not None and v is not utils.adaptive:
379
+ raise ValueError("Can't use 'date' parameter when other time interval parameters are specified")
380
+ start = otp.date(date)
381
+ end = start + otp.Day(1)
382
+
383
+ has_source_start, has_source_end = False, False
384
+ if isinstance(query, otp.Source):
385
+ has_source_start, has_source_end = query.has_start_end_time()
386
+
387
+ if (start is None or start is utils.adaptive) and otp.config.get('default_start_time') is None and \
388
+ not has_source_start:
389
+ warnings.warn('Start time is None and default start time is not set, '
390
+ 'onetick.query will use 19700101 as start time, '
391
+ 'which can cause unexpected results. '
392
+ 'Please set start time explicitly.')
393
+ if (end is None or end is utils.adaptive) and otp.config.get('default_end_time') is None and \
394
+ not has_source_end:
395
+ warnings.warn('End time is None and default end time is not set, '
396
+ 'onetick.query will use 19700101 as end time, '
397
+ 'which can cause unexpected results. '
398
+ 'Please set end time explicitly.')
399
+
400
+ if isinstance(start, _Operation) and start_time_expression is None:
401
+ start_time_expression = str(start)
402
+ if isinstance(end, _Operation) and end_time_expression is None:
403
+ end_time_expression = str(end)
404
+
405
+ if isinstance(start_time_expression, _Operation):
406
+ start_time_expression = str(start_time_expression)
407
+ if isinstance(end_time_expression, _Operation):
408
+ end_time_expression = str(end_time_expression)
409
+
410
+ # PY-1321: CEP-query seems to be using start and end values for some reason, so setting them to None
411
+ if start_time_expression is not None:
412
+ start = None
413
+ if end_time_expression is not None:
414
+ end = None
415
+
416
+ if inspect.ismethod(query) or inspect.isfunction(query):
417
+ t_s = None
418
+ if isinstance(symbols, otp.Source):
419
+ t_s = symbols
420
+ if isinstance(symbols, otp.query):
421
+ t_s = otp.Query(symbols)
422
+ if isinstance(symbols, str):
423
+ t_s = otp.Tick(SYMBOL_NAME=symbols)
424
+ if isinstance(symbols, list):
425
+ t_s = otp.Ticks(SYMBOL_NAME=symbols)
426
+
427
+ if isinstance(t_s, otp.Source):
428
+ query = query(t_s.to_symbol_param()) # type: ignore
429
+
430
+ query, query_params = _preprocess_otp_query(query, query_params)
431
+ # If query is an otp.Source object, then it can deal with otp.datetime and pd.Timestamp types
432
+
433
+ if log_symbol is utils.default:
434
+ log_symbol = otp.config.log_symbol
435
+ if callback is None and log_symbol:
436
+ callback = LogCallback(query)
437
+
438
+ if manual_dataframe_callback:
439
+ if output_structure and output_structure != 'df':
440
+ raise ValueError("Parameter 'output_structure' must be set to 'df'"
441
+ " if parameter 'manual_dataframe_callback' is set")
442
+ if log_symbol:
443
+ raise ValueError("Parameters 'manual_dataframe_callback' and 'log_symbol' can't be set together")
444
+ if callback is not None:
445
+ raise ValueError("Parameters 'manual_dataframe_callback' and 'callback' can't be set together")
446
+ callback = ManualDataframeCallback(timezone)
447
+
448
+ output_mode = otq.QueryOutputMode.numpy
449
+ if callback is not None:
450
+ output_mode = otq.QueryOutputMode.callback
451
+ if output_structure == 'polars':
452
+ if not otq.webapi:
453
+ raise ValueError("Parameter output_structure='polars' is only supported in WebAPI mode.")
454
+ try:
455
+ import polars as _ # type: ignore
456
+ except ImportError:
457
+ raise ValueError("Parameter output_structure='polars' is specified, but module polars can't be imported. "
458
+ "Use 'pip install onetick-py[polars]' command to install onetick-py with polars support.")
459
+ try:
460
+ output_mode = otq.QueryOutputMode.polars
461
+ except AttributeError:
462
+ raise ValueError("Parameter output_structure='polars' is specified, but it's not supported "
463
+ "by installed onetick.query_webapi library.")
464
+
465
+ output_structure, output_structure_for_otq = _process_output_structure(output_structure)
466
+ if symbol_date:
467
+ # otq.run supports only strings and datetime.date
468
+ symbol_date = utils.symbol_date_to_str(symbol_date)
469
+
470
+ require_dict = require_dict or _is_dict_required(symbols)
471
+
472
+ # converting symbols properly
473
+ if isinstance(symbols, otp.Source):
474
+ # check if SYMBOL_NAME is in schema, or if schema contains only one field
475
+ if ('SYMBOL_NAME' not in symbols.columns(skip_meta_fields=True).keys()) and \
476
+ len(symbols.columns(skip_meta_fields=True)) != 1:
477
+ warnings.warn('Using as a symbol list a source without "SYMBOL_NAME" field '
478
+ 'and with more than one field! This won\'t work unless the schema is incomplete')
479
+
480
+ symbols = otp.Source._convert_symbol_to_string(
481
+ symbol=symbols,
482
+ tmp_otq=query._tmp_otq if isinstance(query, otp.Source) else None,
483
+ start=start,
484
+ end=end,
485
+ timezone=timezone
486
+ )
487
+ if isinstance(symbols, str):
488
+ symbols = [symbols]
489
+ if isinstance(symbols, pd.DataFrame):
490
+ symbols = utils.get_symbol_list_from_df(symbols)
491
+
492
+ if isinstance(query, dict):
493
+ # we assume it's a dictionary of sources for the MultiOutputSource object
494
+ query = otp.MultiOutputSource(query)
495
+
496
+ params_saved_to_otq = {}
497
+ if isinstance(query, otp.Source) or isinstance(query, otp.MultiOutputSource):
498
+ start = None if start is utils.adaptive else start
499
+ end = None if end is utils.adaptive else end
500
+ params_saved_to_otq = dict(
501
+ symbols=symbols,
502
+ start=start,
503
+ end=end,
504
+ start_time_expression=start_time_expression,
505
+ end_time_expression=end_time_expression,
506
+ )
507
+ param_upd = query._prepare_for_execution(symbols=symbols, start=start, end=end,
508
+ timezone=timezone,
509
+ start_time_expression=start_time_expression,
510
+ end_time_expression=end_time_expression,
511
+ require_dict=require_dict,
512
+ running_query_flag=running,
513
+ node_name=node_name, has_output=None)
514
+ query, require_dict, node_name = param_upd
515
+ # symbols and start/end times should be already stored in the query and should not be passed again
516
+ symbols = None
517
+ start = None
518
+ end = None
519
+ start_time_expression = None
520
+ end_time_expression = None
521
+ time_as_nsec = True
522
+
523
+ elif isinstance(query, (otq.graph_components.EpBase, otq.Chainlet)):
524
+ query = otq.GraphQuery(query)
525
+
526
+ if isinstance(query, otq.SqlQuery):
527
+ # This has no impact on query result, just placeholder values
528
+ start = end = None
529
+
530
+ if start is utils.adaptive:
531
+ start = configuration.config.default_start_time
532
+
533
+ if end is utils.adaptive:
534
+ end = configuration.config.default_end_time
535
+
536
+ if not otq.webapi:
537
+ # converting to expressions, because in datetime objects nanoseconds are not supported on some OneTick versions
538
+ if start is not None and not start_time_expression:
539
+ start_time_expression = datetime2expr(start)
540
+ if end is not None and not end_time_expression:
541
+ end_time_expression = datetime2expr(end)
542
+
543
+ # start and end parameters could be set to None,
544
+ # because we use start and end time expressions,
545
+ # but because of the bug it sometimes doesn't work
546
+ # https://onemarketdata.atlassian.net/browse/BDS-454
547
+ start, end = _get_start_end(start, end, timezone)
548
+
549
+ # authentication
550
+ alternative_username = alternative_username or otp.config.default_auth_username
551
+ password = password or otp.config.default_password
552
+ kwargs = {}
553
+ if password is not None and has_password_param(throw_warning=True):
554
+ kwargs['password'] = password
555
+
556
+ max_expected_ticks_per_symbol = max_expected_ticks_per_symbol or otp.config.max_expected_ticks_per_symbol
557
+ if has_max_expected_ticks_per_symbol(throw_warning=True):
558
+ kwargs['max_expected_ticks_per_symbol'] = max_expected_ticks_per_symbol
559
+
560
+ if encoding is not None and has_query_encoding_parameter(throw_warning=True):
561
+ kwargs['encoding'] = encoding
562
+
563
+ run_params = dict(
564
+ query=query,
565
+ symbols=symbols, start=start, end=end, context=context, username=username,
566
+ timezone=timezone,
567
+ start_time_expression=start_time_expression,
568
+ end_time_expression=end_time_expression,
569
+ alternative_username=alternative_username, batch_size=batch_size,
570
+ running_query_flag=running, query_properties=query_properties,
571
+ max_concurrency=concurrency, apply_times_daily=apply_times_daily, symbol_date=symbol_date,
572
+ query_params=query_params, time_as_nsec=time_as_nsec,
573
+ treat_byte_arrays_as_strings=treat_byte_arrays_as_strings,
574
+ output_mode=output_mode,
575
+ output_matrix_per_field=output_matrix_per_field, output_structure=output_structure_for_otq,
576
+ return_utc_times=return_utc_times, connection=connection,
577
+ callback=callback, svg_path=svg_path, use_connection_pool=use_connection_pool, **kwargs
578
+ )
579
+
580
+ # some parameters were saved in .otq file, we need to debug them too
581
+ debug_params = dict(run_params, **params_saved_to_otq) if params_saved_to_otq else run_params
582
+ otp.get_logger(__name__).info(otp.utils.json_dumps(debug_params))
583
+
584
+ try:
585
+ result = otq.run(**run_params)
586
+ except Exception as e:
587
+ e = _add_stack_info_to_exception(e)
588
+ e = _add_version_info_to_exception(e)
589
+ raise e # noqa: W0707
590
+
591
+ if output_mode == otq.QueryOutputMode.callback:
592
+ if manual_dataframe_callback:
593
+ result = callback.result
594
+ return result
595
+
596
+ # node_names should be either a list of node names or None
597
+ node_names: Optional[List[str]]
598
+ if isinstance(node_name, str):
599
+ node_names = [node_name]
600
+ else:
601
+ node_names = node_name
602
+
603
+ if query_schema:
604
+ # check if we have empty result for any symbol to add schema to empty dataframes
605
+ _process_empty_results(result, query_schema, output_structure)
606
+
607
+ return _format_call_output(result, output_structure=output_structure,
608
+ require_dict=require_dict, node_names=node_names)
609
+
610
+
611
+ def _filter_returned_map_by_node(result, _node_names):
612
+ """
613
+ Here, result has the following format: {symbol: {node_name: data}}
614
+ We need to filter by correct node_name
615
+ """
616
+ # TODO: implement filtering by node_name in a way
617
+ # that no information from SymbolNumpyResultMap object is lost
618
+ return result
619
+
620
+
621
+ def _filter_returned_list_by_node(result, node_names):
622
+ """
623
+ Here, result has the following format: [(symbol, data_1, data_2, node_name)]
624
+ We need to filter by correct node_names
625
+ """
626
+ if not node_names:
627
+ return result
628
+
629
+ node_found = False
630
+
631
+ res = []
632
+ empty_result = True
633
+ for symbol, data_1, data_2, node, *_ in result:
634
+ if len(data_1) > 0:
635
+ empty_result = False
636
+ if node in node_names:
637
+ node_found = True
638
+ res.append((symbol, data_1, data_2, node))
639
+
640
+ if not empty_result and not node_found:
641
+ # TODO: Do we even want to raise it?
642
+ raise ValueError(f'No passed node name(s) were found in the results. Passed node names were: {node_names}')
643
+ return res
644
+
645
+
646
+ def _form_dict_from_list(data_list, output_structure):
647
+ """
648
+ Here, data_list has the following format: [(symbol, data_1, data_2, node_name), ...]
649
+ We need to create the following result:
650
+ either {symbol: DataFrame(data_1)} if there is only one result per symbol
651
+ or {symbol: [DataFrame(data_1)]} if there are multiple results for symbol for a single node_name
652
+ or {symbol: {node_name: DataFrame(data_1)}} if there are single results for multiple node names for a symbol
653
+ or {symbol: {node_name: [DataFrame(data_1)]}} if there are multiple results for multiple node names for a symbol
654
+ """
655
+
656
+ def form_node_name_dict(lst):
657
+ """
658
+ lst is a lit of (node, dataframe)
659
+ """
660
+ d = defaultdict(list)
661
+ for node, df in lst:
662
+ d[node].append(df)
663
+ for node in d.keys(): # noqa
664
+ if len(d[node]) == 1:
665
+ d[node] = d[node][0]
666
+ if len(d) == 1:
667
+ d = list(d.values())[0]
668
+ else: # converting defaultdict to regular dict
669
+ d = dict(d)
670
+ return d
671
+
672
+ def get_dataframe(data):
673
+ if output_structure == 'df':
674
+ return pd.DataFrame({col_name: col_value for col_name, col_value in data})
675
+ else:
676
+ import polars
677
+ if isinstance(data, polars.DataFrame):
678
+ # polars only works in webapi mode,
679
+ # and it's already returned as polars.DataFrame by onetick.query_webapi
680
+ return data
681
+ # but if there is no data, then we want to return empty polars.DataFrame
682
+ return polars.DataFrame()
683
+
684
+ symbols_dict = defaultdict(list)
685
+ for symbol, data, _, node, *_ in data_list:
686
+ df = get_dataframe(data)
687
+
688
+ list_item = (node, df)
689
+ symbols_dict[symbol].append(list_item)
690
+
691
+ for symbol, lst in symbols_dict.items():
692
+ symbols_dict[symbol] = form_node_name_dict(lst)
693
+
694
+ return dict(symbols_dict)
695
+
696
+
697
+ def _format_call_output(result, output_structure, node_names, require_dict):
698
+ """Formats output of otq.run() according to passed parameters.
699
+ See parameters' description for more information
700
+
701
+ Parameters
702
+ ----------
703
+ output_structure: ['df', 'list', 'map']
704
+ If 'df': forms pandas.DataFrame from the result.
705
+
706
+ Returns a dictionary with symbols as keys if there's more than one symbol
707
+ in returned data of if require_dict = True.
708
+
709
+ Values of the returned dictionary, or returned value itself if no dictionary is formed,
710
+ is either a list of tuples: (node_name, dataframe) if there's output for more than one node
711
+ or a dataframe
712
+
713
+ If 'list' or 'map': returns data as returned by otq.run(), possibly filtered by node_name (see below)
714
+ node_names: str, None
715
+ If not None, then selects only output returned by nodes in node_names list
716
+ for all output structures
717
+ require_dict: bool
718
+ If True, forces output for output_structure='df' to always be a dictionary, even if only one symbol is returned
719
+ Has no effect for other values of output_structure
720
+
721
+ Returns
722
+ ----------
723
+ Formatted output: pandas DataFrame, dictionary or list
724
+
725
+ """
726
+ if output_structure == 'list':
727
+ return _filter_returned_list_by_node(result, node_names)
728
+ elif output_structure == 'map':
729
+ return _filter_returned_map_by_node(result, node_names)
730
+
731
+ assert output_structure in ('df', 'polars'), (f'Output structure should be one of: "df", "map", "list", "polars" '
732
+ f'instead "{output_structure}" was passed')
733
+
734
+ # "df" output structure implies that raw results came as a list
735
+ result_list = _filter_returned_list_by_node(result, node_names)
736
+ result_dict = _form_dict_from_list(result_list, output_structure)
737
+
738
+ if len(result_dict) == 1 and not require_dict:
739
+ return list(result_dict.values())[0]
740
+ else:
741
+ return result_dict
742
+
743
+
744
+ def _process_empty_results(result, query_schema, output_structure):
745
+ """
746
+ Process query results and add columns to empty responses based on query schema.
747
+ """
748
+ schema = [
749
+ (field, np.array([], dtype=otp.types.type2np(dtype)))
750
+ for field, dtype in {**query_schema, 'Time': otp.nsectime}.items()
751
+ ]
752
+ if type(result) is otq.SymbolNumpyResultMap:
753
+ empty_data = dict(schema)
754
+ else:
755
+ empty_data = schema
756
+
757
+ if output_structure == 'polars':
758
+ import polars
759
+ empty_data = polars.DataFrame(dict(schema))
760
+
761
+ if type(result) is otq.SymbolNumpyResultMap:
762
+ for result_item in result.get_dict().values():
763
+ for node_name, symbol_result in result_item.items():
764
+ if len(symbol_result[0]) == 0:
765
+ result_item[node_name] = (empty_data, symbol_result[1])
766
+ else:
767
+ for idx, result_item in enumerate(result):
768
+ if len(result_item[1]) == 0:
769
+ result[idx] = (
770
+ result_item[0], empty_data, result_item[2], result_item[3], *result_item[4:]
771
+ )
772
+
773
+
774
+ def _preprocess_otp_query(query, query_params):
775
+
776
+ if isinstance(query, otp.query._outputs):
777
+ query = query['OUT']
778
+
779
+ if isinstance(query, otp.query):
780
+ if query.params:
781
+ if query_params:
782
+ raise ValueError("please specify parameters in query or in otp.run only")
783
+ query_params = query.params
784
+ query = query.path
785
+ return query, query_params
786
+
787
+
788
+ def _get_start_end(start, end, timezone):
789
+ """
790
+ Convert datetime objects supported by onetick-py
791
+ to datetime objects supported by onetick-query.
792
+ """
793
+ def support_nanoseconds(time):
794
+ if isinstance(time, (pd.Timestamp, otp.datetime)):
795
+ if otq.webapi:
796
+ # onetick-query_webapi supports pandas.Timestamp and strings in %Y%m%s%H%M%S.%J format
797
+ if isinstance(time, pd.Timestamp):
798
+ return time
799
+ elif isinstance(time, otp.datetime):
800
+ return time.ts
801
+ else:
802
+ if otp.compatibility.is_correct_timezone_used_in_otq_run():
803
+ time = datetime2timeval(time, timezone)
804
+ else:
805
+ # there is a bug in older onetick versions using wrong timezone
806
+ time = datetime2timeval(time, 'GMT')
807
+ return time
808
+
809
+ if start is utils.adaptive:
810
+ start = configuration.config.default_start_time
811
+
812
+ if end is utils.adaptive:
813
+ end = configuration.config.default_end_time
814
+
815
+ # `isinstance(obj, datetime.date)` is not correct because
816
+ # isinstance(<datetime.datetime object>, datetime.date) = True
817
+ if type(start) is datetime.date:
818
+ start = datetime.datetime(start.year, start.month, start.day)
819
+ if type(end) is datetime.date:
820
+ end = datetime.datetime(end.year, end.month, end.day)
821
+
822
+ start = support_nanoseconds(start)
823
+ end = support_nanoseconds(end)
824
+
825
+ return start, end
826
+
827
+
828
+ def _process_output_structure(output_structure):
829
+ if not output_structure or output_structure == "df": # otq doesn't support df
830
+ output_structure = "df"
831
+ output_structure_for_otq = "symbol_result_list"
832
+ elif output_structure == "list":
833
+ output_structure_for_otq = "symbol_result_list"
834
+ elif output_structure == "map":
835
+ output_structure_for_otq = "symbol_result_map"
836
+ elif output_structure == "polars":
837
+ output_structure = "polars"
838
+ output_structure_for_otq = "symbol_result_list"
839
+ else:
840
+ raise ValueError("output_structure support only the following values: df, list, map and polars")
841
+ return output_structure, output_structure_for_otq