onetick-py 1.162.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- locator_parser/__init__.py +0 -0
- locator_parser/acl.py +73 -0
- locator_parser/actions.py +266 -0
- locator_parser/common.py +365 -0
- locator_parser/io.py +41 -0
- locator_parser/locator.py +150 -0
- onetick/__init__.py +101 -0
- onetick/doc_utilities/__init__.py +3 -0
- onetick/doc_utilities/napoleon.py +40 -0
- onetick/doc_utilities/ot_doctest.py +140 -0
- onetick/doc_utilities/snippets.py +280 -0
- onetick/lib/__init__.py +4 -0
- onetick/lib/instance.py +138 -0
- onetick/py/__init__.py +290 -0
- onetick/py/_stack_info.py +89 -0
- onetick/py/_version.py +2 -0
- onetick/py/aggregations/__init__.py +11 -0
- onetick/py/aggregations/_base.py +645 -0
- onetick/py/aggregations/_docs.py +912 -0
- onetick/py/aggregations/compute.py +286 -0
- onetick/py/aggregations/functions.py +2216 -0
- onetick/py/aggregations/generic.py +104 -0
- onetick/py/aggregations/high_low.py +80 -0
- onetick/py/aggregations/num_distinct.py +83 -0
- onetick/py/aggregations/order_book.py +427 -0
- onetick/py/aggregations/other.py +1014 -0
- onetick/py/backports.py +26 -0
- onetick/py/cache.py +373 -0
- onetick/py/callback/__init__.py +5 -0
- onetick/py/callback/callback.py +275 -0
- onetick/py/callback/callbacks.py +131 -0
- onetick/py/compatibility.py +752 -0
- onetick/py/configuration.py +736 -0
- onetick/py/core/__init__.py +0 -0
- onetick/py/core/_csv_inspector.py +93 -0
- onetick/py/core/_internal/__init__.py +0 -0
- onetick/py/core/_internal/_manually_bound_value.py +6 -0
- onetick/py/core/_internal/_nodes_history.py +250 -0
- onetick/py/core/_internal/_op_utils/__init__.py +0 -0
- onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
- onetick/py/core/_internal/_op_utils/is_const.py +10 -0
- onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
- onetick/py/core/_internal/_proxy_node.py +140 -0
- onetick/py/core/_internal/_state_objects.py +2307 -0
- onetick/py/core/_internal/_state_vars.py +87 -0
- onetick/py/core/_source/__init__.py +0 -0
- onetick/py/core/_source/_symbol_param.py +95 -0
- onetick/py/core/_source/schema.py +97 -0
- onetick/py/core/_source/source_methods/__init__.py +0 -0
- onetick/py/core/_source/source_methods/aggregations.py +810 -0
- onetick/py/core/_source/source_methods/applyers.py +296 -0
- onetick/py/core/_source/source_methods/columns.py +141 -0
- onetick/py/core/_source/source_methods/data_quality.py +301 -0
- onetick/py/core/_source/source_methods/debugs.py +270 -0
- onetick/py/core/_source/source_methods/drops.py +120 -0
- onetick/py/core/_source/source_methods/fields.py +619 -0
- onetick/py/core/_source/source_methods/filters.py +1001 -0
- onetick/py/core/_source/source_methods/joins.py +1393 -0
- onetick/py/core/_source/source_methods/merges.py +566 -0
- onetick/py/core/_source/source_methods/misc.py +1325 -0
- onetick/py/core/_source/source_methods/pandases.py +155 -0
- onetick/py/core/_source/source_methods/renames.py +356 -0
- onetick/py/core/_source/source_methods/sorts.py +183 -0
- onetick/py/core/_source/source_methods/switches.py +142 -0
- onetick/py/core/_source/source_methods/symbols.py +117 -0
- onetick/py/core/_source/source_methods/times.py +627 -0
- onetick/py/core/_source/source_methods/writes.py +702 -0
- onetick/py/core/_source/symbol.py +202 -0
- onetick/py/core/_source/tmp_otq.py +222 -0
- onetick/py/core/column.py +209 -0
- onetick/py/core/column_operations/__init__.py +0 -0
- onetick/py/core/column_operations/_methods/__init__.py +4 -0
- onetick/py/core/column_operations/_methods/_internal.py +28 -0
- onetick/py/core/column_operations/_methods/conversions.py +215 -0
- onetick/py/core/column_operations/_methods/methods.py +294 -0
- onetick/py/core/column_operations/_methods/op_types.py +150 -0
- onetick/py/core/column_operations/accessors/__init__.py +0 -0
- onetick/py/core/column_operations/accessors/_accessor.py +30 -0
- onetick/py/core/column_operations/accessors/decimal_accessor.py +92 -0
- onetick/py/core/column_operations/accessors/dt_accessor.py +464 -0
- onetick/py/core/column_operations/accessors/float_accessor.py +160 -0
- onetick/py/core/column_operations/accessors/str_accessor.py +1374 -0
- onetick/py/core/column_operations/base.py +1061 -0
- onetick/py/core/cut_builder.py +149 -0
- onetick/py/core/db_constants.py +20 -0
- onetick/py/core/eval_query.py +244 -0
- onetick/py/core/lambda_object.py +442 -0
- onetick/py/core/multi_output_source.py +193 -0
- onetick/py/core/per_tick_script.py +2253 -0
- onetick/py/core/query_inspector.py +465 -0
- onetick/py/core/source.py +1663 -0
- onetick/py/db/__init__.py +2 -0
- onetick/py/db/_inspection.py +1042 -0
- onetick/py/db/db.py +1423 -0
- onetick/py/db/utils.py +64 -0
- onetick/py/docs/__init__.py +0 -0
- onetick/py/docs/docstring_parser.py +112 -0
- onetick/py/docs/utils.py +81 -0
- onetick/py/functions.py +2354 -0
- onetick/py/license.py +188 -0
- onetick/py/log.py +88 -0
- onetick/py/math.py +947 -0
- onetick/py/misc.py +437 -0
- onetick/py/oqd/__init__.py +22 -0
- onetick/py/oqd/eps.py +1195 -0
- onetick/py/oqd/sources.py +325 -0
- onetick/py/otq.py +211 -0
- onetick/py/pyomd_mock.py +47 -0
- onetick/py/run.py +841 -0
- onetick/py/servers.py +173 -0
- onetick/py/session.py +1342 -0
- onetick/py/sources/__init__.py +19 -0
- onetick/py/sources/cache.py +167 -0
- onetick/py/sources/common.py +126 -0
- onetick/py/sources/csv.py +642 -0
- onetick/py/sources/custom.py +85 -0
- onetick/py/sources/data_file.py +305 -0
- onetick/py/sources/data_source.py +1049 -0
- onetick/py/sources/empty.py +94 -0
- onetick/py/sources/odbc.py +337 -0
- onetick/py/sources/order_book.py +238 -0
- onetick/py/sources/parquet.py +168 -0
- onetick/py/sources/pit.py +191 -0
- onetick/py/sources/query.py +495 -0
- onetick/py/sources/snapshots.py +419 -0
- onetick/py/sources/split_query_output_by_symbol.py +198 -0
- onetick/py/sources/symbology_mapping.py +123 -0
- onetick/py/sources/symbols.py +357 -0
- onetick/py/sources/ticks.py +825 -0
- onetick/py/sql.py +70 -0
- onetick/py/state.py +256 -0
- onetick/py/types.py +2056 -0
- onetick/py/utils/__init__.py +70 -0
- onetick/py/utils/acl.py +93 -0
- onetick/py/utils/config.py +186 -0
- onetick/py/utils/default.py +49 -0
- onetick/py/utils/file.py +38 -0
- onetick/py/utils/helpers.py +76 -0
- onetick/py/utils/locator.py +94 -0
- onetick/py/utils/perf.py +499 -0
- onetick/py/utils/query.py +49 -0
- onetick/py/utils/render.py +1139 -0
- onetick/py/utils/script.py +244 -0
- onetick/py/utils/temp.py +471 -0
- onetick/py/utils/types.py +118 -0
- onetick/py/utils/tz.py +82 -0
- onetick_py-1.162.2.dist-info/METADATA +148 -0
- onetick_py-1.162.2.dist-info/RECORD +152 -0
- onetick_py-1.162.2.dist-info/WHEEL +5 -0
- onetick_py-1.162.2.dist-info/entry_points.txt +2 -0
- onetick_py-1.162.2.dist-info/licenses/LICENSE +21 -0
- onetick_py-1.162.2.dist-info/top_level.txt +2 -0
onetick/py/run.py
ADDED
|
@@ -0,0 +1,841 @@
|
|
|
1
|
+
|
|
2
|
+
import inspect
|
|
3
|
+
import datetime
|
|
4
|
+
import warnings
|
|
5
|
+
from typing import Union, List, Optional, Dict, Any, Callable, Type
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from onetick.py.otq import otq, pyomd, otli
|
|
11
|
+
|
|
12
|
+
from onetick import py as otp
|
|
13
|
+
from onetick.py import utils, configuration
|
|
14
|
+
from onetick.py.core.column_operations.base import _Operation
|
|
15
|
+
from onetick.py.types import datetime2timeval, datetime2expr
|
|
16
|
+
from onetick.py.core.source import _is_dict_required
|
|
17
|
+
from onetick.py.compatibility import (
|
|
18
|
+
has_max_expected_ticks_per_symbol,
|
|
19
|
+
has_password_param,
|
|
20
|
+
has_query_encoding_parameter,
|
|
21
|
+
_add_version_info_to_exception,
|
|
22
|
+
)
|
|
23
|
+
from onetick.py._stack_info import _add_stack_info_to_exception
|
|
24
|
+
from onetick.py.callback import LogCallback, ManualDataframeCallback
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def run(query: Union[Callable, Dict, otp.Source, otp.MultiOutputSource, # NOSONAR
|
|
28
|
+
otp.query, str, otq.EpBase, otq.GraphQuery,
|
|
29
|
+
otq.ChainQuery, otq.Chainlet, otq.SqlQuery, otp.SqlQuery],
|
|
30
|
+
*,
|
|
31
|
+
symbols: Union[List[Union[str, otq.Symbol]], otp.Source, str, None] = None,
|
|
32
|
+
start: Union[datetime.datetime, otp.datetime, pyomd.timeval_t, None] = utils.adaptive, # type: ignore
|
|
33
|
+
end: Union[datetime.datetime, otp.datetime, pyomd.timeval_t, None] = utils.adaptive, # type: ignore
|
|
34
|
+
date: Union[datetime.date, otp.date, None] = None,
|
|
35
|
+
start_time_expression: Optional[str] = None,
|
|
36
|
+
end_time_expression: Optional[str] = None,
|
|
37
|
+
timezone=utils.default, # type: ignore
|
|
38
|
+
context=utils.default, # type: ignore
|
|
39
|
+
username: Optional[str] = None,
|
|
40
|
+
alternative_username: Optional[str] = None,
|
|
41
|
+
password: Optional[str] = None,
|
|
42
|
+
batch_size: Union[int, Type[utils.default], None] = utils.default,
|
|
43
|
+
running: Optional[bool] = False,
|
|
44
|
+
query_properties: Optional[pyomd.QueryProperties] = None, # type: ignore
|
|
45
|
+
concurrency: Union[int, Type[utils.default], None] = utils.default,
|
|
46
|
+
apply_times_daily: Optional[int] = None,
|
|
47
|
+
symbol_date: Union[datetime.datetime, int, str, None] = None,
|
|
48
|
+
query_params: Optional[Dict[str, Any]] = None,
|
|
49
|
+
time_as_nsec: bool = True,
|
|
50
|
+
treat_byte_arrays_as_strings: bool = True,
|
|
51
|
+
output_matrix_per_field: bool = False,
|
|
52
|
+
output_structure: Optional[str] = None,
|
|
53
|
+
return_utc_times: Optional[bool] = None,
|
|
54
|
+
connection=None,
|
|
55
|
+
callback=None,
|
|
56
|
+
svg_path=None,
|
|
57
|
+
use_connection_pool: bool = False,
|
|
58
|
+
node_name: Union[str, List[str], None] = None,
|
|
59
|
+
require_dict: bool = False,
|
|
60
|
+
max_expected_ticks_per_symbol: Optional[int] = None,
|
|
61
|
+
log_symbol: Union[bool, Type[utils.default]] = utils.default,
|
|
62
|
+
encoding: Optional[str] = None,
|
|
63
|
+
manual_dataframe_callback: bool = False):
|
|
64
|
+
"""
|
|
65
|
+
Executes a query and returns its result.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
query: :py:class:`onetick.py.Source`, otq.Ep, otq.GraphQuery, otq.ChainQuery, str, otq.Chainlet,\
|
|
70
|
+
Callable, otq.SqlQuery, :py:class:`onetick.py.SqlQuery`
|
|
71
|
+
Query to execute can be source, path of the query on a disk or onetick.query graph or event processor.
|
|
72
|
+
For running OTQ files, it represents the path (including filename) to the OTQ file to run a single query within
|
|
73
|
+
the file. If more than one query is present, then the query to be run must be specified
|
|
74
|
+
(that is, ``'path_to_file/otq_file.otq::query_to_run'``).
|
|
75
|
+
|
|
76
|
+
``query`` can also be a function that has a symbol object as the first parameter.
|
|
77
|
+
This object can be used to get symbol name and symbol parameters.
|
|
78
|
+
Function must return a :py:class:`Source <onetick.py.Source>`.
|
|
79
|
+
symbols: str, list of str, list of otq.Symbol, :py:class:`onetick.py.Source`, pd.DataFrame, optional
|
|
80
|
+
Symbol(s) to run the query for passed as a string, a list of strings, a pd.DataFrame with the ``SYMBOL_NAME``
|
|
81
|
+
column, or as a "symbols" query which results include the ``SYMBOL_NAME`` column. The start/end times for the
|
|
82
|
+
symbols query will taken from the params below.
|
|
83
|
+
See :ref:`symbols <static/concepts/symbols:Symbols: bound and unbound>` for more details.
|
|
84
|
+
start: :py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,\
|
|
85
|
+
:py:class:`pyomd.timeval_t`, optional
|
|
86
|
+
The start time of the query. Can be timezone-naive or timezone-aware. See also ``timezone`` argument.
|
|
87
|
+
onetick.py uses :py:attr:`default_start_time<onetick.py.configuration.Config.default_start_time>`
|
|
88
|
+
as default value, if you don't want to specify start time, e.g. to use saved time of the query,
|
|
89
|
+
then you should specify None value.
|
|
90
|
+
end: :py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,\
|
|
91
|
+
:py:class:`pyomd.timeval_t`, optional
|
|
92
|
+
The end time of the query (note that it's non-inclusive).
|
|
93
|
+
Can be timezone-naive or timezone-aware. See also ``timezone`` argument.
|
|
94
|
+
onetick.py uses :py:attr:`default_end_time<onetick.py.configuration.Config.default_end_time>`
|
|
95
|
+
as default value, if you don't want to specify end time, e.g. to use saved time of the query,
|
|
96
|
+
then you should specify None value.
|
|
97
|
+
date: :py:class:`datetime.date`, :py:class:`otp.date <onetick.py.date>`, optional
|
|
98
|
+
The date to run the query for. Can be set instead of ``start`` and ``end`` parameters.
|
|
99
|
+
If set then the interval to run the query will be from 0:00 to 24:00 of the specified date.
|
|
100
|
+
start_time_expression: str, :py:class:`~onetick.py.Operation`, optional
|
|
101
|
+
Start time onetick expression of the query. If specified, it will take precedence over ``start``.
|
|
102
|
+
Supported only if query is Source, Graph or Event Processor.
|
|
103
|
+
Not supported for WebAPI mode.
|
|
104
|
+
end_time_expression: str, :py:class:`~onetick.py.Operation`, optional
|
|
105
|
+
End time onetick expression of the query. If specified, it will take precedence over ``end``.
|
|
106
|
+
Supported only if query is Source, Graph or Event Processor.
|
|
107
|
+
Not supported for WebAPI mode.
|
|
108
|
+
timezone: str, optional
|
|
109
|
+
The timezone of output timestamps.
|
|
110
|
+
Also, when start and/or end arguments are timezone-naive, it will define their timezone.
|
|
111
|
+
If parameter is omitted timestamps of ticks will be formatted
|
|
112
|
+
with the default :py:attr:`tz<onetick.py.configuration.Config.tz>`.
|
|
113
|
+
context: str, optional
|
|
114
|
+
Allows specification of different contexts from OneTick configuration to connect to.
|
|
115
|
+
If not set then default :py:attr:`context<onetick.py.configuration.Config.context>` is used.
|
|
116
|
+
See :ref:`guide about switching contexts <switching contexts>` for examples.
|
|
117
|
+
username
|
|
118
|
+
The username to make the connection.
|
|
119
|
+
By default the user which executed the process is used.
|
|
120
|
+
alternative_username: str
|
|
121
|
+
The username used for authentication.
|
|
122
|
+
Needs to be set only when the tick server is configured to use password-based authentication.
|
|
123
|
+
By default, :py:attr:`default_auth_username<onetick.py.configuration.Config.default_auth_username>` is used.
|
|
124
|
+
Not supported for WebAPI mode.
|
|
125
|
+
password: str, optional
|
|
126
|
+
The password used for authentication.
|
|
127
|
+
Needs to be set only when the tick server is configured to use password-based authentication.
|
|
128
|
+
Note: not supported and ignored on older OneTick versions.
|
|
129
|
+
By default, :py:attr:`default_password<onetick.py.configuration.Config.default_password>` is used.
|
|
130
|
+
batch_size: int
|
|
131
|
+
number of symbols to run in one batch.
|
|
132
|
+
By default, the value from
|
|
133
|
+
:py:attr:`default_batch_size<onetick.py.configuration.Config.default_batch_size>` is used.
|
|
134
|
+
Not supported for WebAPI mode.
|
|
135
|
+
running: bool, optional
|
|
136
|
+
Indicates whether a query is CEP or not. Default is `False`.
|
|
137
|
+
query_properties: :py:class:`pyomd.QueryProperties` or dict, optional
|
|
138
|
+
Query properties, such as ONE_TO_MANY_POLICY, ALLOW_GRAPH_REUSE, etc
|
|
139
|
+
concurrency: int, optional
|
|
140
|
+
The maximum number of CPU cores to use to process the query.
|
|
141
|
+
By default, the value from
|
|
142
|
+
:py:attr:`default_concurrency<onetick.py.configuration.Config.default_concurrency>` is used.
|
|
143
|
+
apply_times_daily: bool
|
|
144
|
+
Runs the query for every day in the ``start``-``end`` time range,
|
|
145
|
+
using the time components of ``start`` and ``end`` datetimes.
|
|
146
|
+
|
|
147
|
+
Note that those daily intervals are executed separately, so you don't have access
|
|
148
|
+
to the data from previous or next days (see example in the next section).
|
|
149
|
+
symbol_date:
|
|
150
|
+
The symbol date used to look up symbology mapping information in the reference database,
|
|
151
|
+
expressed as datetime object or integer of YYYYMMDD format
|
|
152
|
+
query_params: dict
|
|
153
|
+
Parameters of the query.
|
|
154
|
+
time_as_nsec: bool
|
|
155
|
+
Outputs timestamps up to nanoseconds granularity
|
|
156
|
+
(defaults to False: by default we output timestamps in microseconds granularity)
|
|
157
|
+
treat_byte_arrays_as_strings: bool
|
|
158
|
+
Outputs byte arrays as strings (defaults to True)
|
|
159
|
+
Not supported for WebAPI mode.
|
|
160
|
+
output_matrix_per_field: bool
|
|
161
|
+
Changes output format to list of matrices per field.
|
|
162
|
+
Not supported for WebAPI mode.
|
|
163
|
+
output_structure: otp.Source.OutputStructure, optional
|
|
164
|
+
|
|
165
|
+
Structure (type) of the result. Supported values are:
|
|
166
|
+
- `df` (default) - the result is returned as :pandas:`pandas.DataFrame` object
|
|
167
|
+
or dictionary of symbol names and :pandas:`pandas.DataFrame` objects
|
|
168
|
+
in case of using multiple symbols or first stage query.
|
|
169
|
+
- `map` - the result is returned as SymbolNumpyResultMap.
|
|
170
|
+
- `list` - the result is returned as list.
|
|
171
|
+
- `polars` - the result is returned as
|
|
172
|
+
`polars.DataFrame <https://docs.pola.rs/api/python/stable/reference/dataframe/index.html>`_ object
|
|
173
|
+
or dictionary of symbol names and dataframe objects
|
|
174
|
+
(**Only supported in WebAPI mode**).
|
|
175
|
+
return_utc_times: bool
|
|
176
|
+
If True Return times in UTC timezone and in local timezone otherwise
|
|
177
|
+
Not supported for WebAPI mode.
|
|
178
|
+
connection: :py:class:`pyomd.Connection`
|
|
179
|
+
The connection to be used for discovering nested .otq files
|
|
180
|
+
Not supported for WebAPI mode.
|
|
181
|
+
callback: :py:class:`onetick.py.CallbackBase`
|
|
182
|
+
Class with callback methods.
|
|
183
|
+
If set, the output of the query should be controlled with callbacks
|
|
184
|
+
and this function returns nothing.
|
|
185
|
+
svg_path: str, optional
|
|
186
|
+
Not supported for WebAPI mode.
|
|
187
|
+
use_connection_pool: bool
|
|
188
|
+
Default is False. If set to True, the connection pool is used.
|
|
189
|
+
Not supported for WebAPI mode.
|
|
190
|
+
node_name: str, List[str], optional
|
|
191
|
+
Name of the output node to select result from. If query graph has several output nodes, you can specify the name
|
|
192
|
+
of the node to choose result from. If node_name was specified, query should be presented by path on the disk
|
|
193
|
+
and output_structure should be `df`
|
|
194
|
+
require_dict: bool
|
|
195
|
+
If set to True, result will be forced to be a dictionary even if it's returned for a single symbol
|
|
196
|
+
max_expected_ticks_per_symbol: int
|
|
197
|
+
Expected maximum number of ticks per symbol (used for performance optimizations).
|
|
198
|
+
By default,
|
|
199
|
+
:py:attr:`max_expected_ticks_per_symbol<onetick.py.configuration.Config.max_expected_ticks_per_symbol>`
|
|
200
|
+
is used.
|
|
201
|
+
Not supported for WebAPI mode.
|
|
202
|
+
log_symbol: bool
|
|
203
|
+
Log currently executed symbol.
|
|
204
|
+
Note that this only works with unbound symbols.
|
|
205
|
+
Also in this case :py:func:`otp.run<onetick.py.run>` is executed in ``callback`` mode
|
|
206
|
+
and no value is returned from the function, so it should be used only for debugging purposes.
|
|
207
|
+
This logging will not work if some other value specified in parameter ``callback``.
|
|
208
|
+
By default, :py:attr:`otp.config.log_symbol<onetick.py.configuration.Config.log_symbol>` is used.
|
|
209
|
+
encoding: str, optional
|
|
210
|
+
The encoding of string fields.
|
|
211
|
+
manual_dataframe_callback: bool
|
|
212
|
+
Create dataframe manually with ``callback`` mode.
|
|
213
|
+
Only works if ``output_structure='df'`` is specified and parameter ``callback`` is not.
|
|
214
|
+
May improve performance in some cases.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
result, list, dict, :pandas:`pandas.DataFrame`, None
|
|
219
|
+
result of the query
|
|
220
|
+
|
|
221
|
+
Examples
|
|
222
|
+
--------
|
|
223
|
+
|
|
224
|
+
Running :py:class:`onetick.py.Source` and setting start and end times:
|
|
225
|
+
|
|
226
|
+
>>> data = otp.Tick(A=1)
|
|
227
|
+
>>> otp.run(data, start=otp.dt(2003, 12, 2), end=otp.dt(2003, 12, 4))
|
|
228
|
+
Time A
|
|
229
|
+
0 2003-12-02 1
|
|
230
|
+
|
|
231
|
+
Setting query interval with ``date`` parameter:
|
|
232
|
+
|
|
233
|
+
>>> data = otp.Tick(A=1)
|
|
234
|
+
>>> data['START'] = data['_START_TIME']
|
|
235
|
+
>>> data['END'] = data['_END_TIME']
|
|
236
|
+
>>> otp.run(data, date=otp.dt(2003, 12, 1))
|
|
237
|
+
Time A START END
|
|
238
|
+
0 2003-12-01 1 2003-12-01 2003-12-02
|
|
239
|
+
|
|
240
|
+
Running otq.Ep and passing query parameters:
|
|
241
|
+
|
|
242
|
+
>>> ep = otq.TickGenerator(bucket_interval=0, fields='long A = $X').tick_type('TT')
|
|
243
|
+
>>> otp.run(ep, symbols='LOCAL::', query_params={'X': 1})
|
|
244
|
+
Time A
|
|
245
|
+
0 2003-12-04 1
|
|
246
|
+
|
|
247
|
+
Running in callback mode:
|
|
248
|
+
|
|
249
|
+
>>> class Callback(otp.CallbackBase):
|
|
250
|
+
... def __init__(self):
|
|
251
|
+
... self.result = None
|
|
252
|
+
... def process_tick(self, tick, time):
|
|
253
|
+
... self.result = tick
|
|
254
|
+
>>> data = otp.Tick(A=1)
|
|
255
|
+
>>> callback = Callback()
|
|
256
|
+
>>> otp.run(data, callback=callback)
|
|
257
|
+
>>> callback.result
|
|
258
|
+
{'A': 1}
|
|
259
|
+
|
|
260
|
+
Running with ``apply_times_daily``.
|
|
261
|
+
Note that daily intervals are processed separately so, for example,
|
|
262
|
+
we can't access column **COUNT** from previous day.
|
|
263
|
+
|
|
264
|
+
>>> trd = otp.DataSource('US_COMP', symbols='AAPL', tick_type='TRD') # doctest: +SKIP
|
|
265
|
+
>>> trd = trd.agg({'COUNT': otp.agg.count()},
|
|
266
|
+
... bucket_interval=12 * 3600, bucket_time='start') # doctest: +SKIP
|
|
267
|
+
>>> trd['PREV_COUNT'] = trd['COUNT'][-1] # doctest: +SKIP
|
|
268
|
+
>>> otp.run(trd, apply_times_daily=True,
|
|
269
|
+
... start=otp.dt(2023, 4, 3), end=otp.dt(2023, 4, 5), timezone='EST5EDT') # doctest: +SKIP
|
|
270
|
+
Time COUNT PREV_COUNT
|
|
271
|
+
0 2023-04-03 00:00:00 328447 0
|
|
272
|
+
1 2023-04-03 12:00:00 240244 328447
|
|
273
|
+
2 2023-04-04 00:00:00 263293 0
|
|
274
|
+
3 2023-04-04 12:00:00 193018 263293
|
|
275
|
+
|
|
276
|
+
Using a function as a ``query``, accessing symbol name and parameters:
|
|
277
|
+
|
|
278
|
+
>>> def query(symbol):
|
|
279
|
+
... t = otp.Tick(X='x')
|
|
280
|
+
... t['SYMBOL_NAME'] = symbol.name
|
|
281
|
+
... t['SYMBOL_PARAM'] = symbol.PARAM
|
|
282
|
+
... return t
|
|
283
|
+
>>> symbols = otp.Ticks({'SYMBOL_NAME': ['A', 'B'], 'PARAM': [1, 2]})
|
|
284
|
+
>>> result = otp.run(query, symbols=symbols)
|
|
285
|
+
>>> result['A']
|
|
286
|
+
Time X SYMBOL_NAME SYMBOL_PARAM
|
|
287
|
+
0 2003-12-01 x A 1
|
|
288
|
+
>>> result['B']
|
|
289
|
+
Time X SYMBOL_NAME SYMBOL_PARAM
|
|
290
|
+
0 2003-12-01 x B 2
|
|
291
|
+
|
|
292
|
+
Debugging unbound symbols with ``log_symbol`` parameter:
|
|
293
|
+
|
|
294
|
+
>>> data = otp.Tick(X=1)
|
|
295
|
+
>>> symbols = otp.Ticks({'SYMBOL_NAME': ['A', 'B'], 'PARAM': [1, 2]})
|
|
296
|
+
>>> otp.run(query, symbols=symbols, log_symbol=True) # doctest: +ELLIPSIS
|
|
297
|
+
Running query <onetick.py.sources.ticks.Tick object at ...>
|
|
298
|
+
Processing symbol A
|
|
299
|
+
Processing symbol B
|
|
300
|
+
|
|
301
|
+
By default, some non-standard characters in data strings could be processed incorrectly:
|
|
302
|
+
|
|
303
|
+
>>> data = ['AA測試AA']
|
|
304
|
+
>>> source = otp.Ticks({'A': data})
|
|
305
|
+
>>> otp.run(source)
|
|
306
|
+
Time A
|
|
307
|
+
0 2003-12-01 AA測試AA
|
|
308
|
+
|
|
309
|
+
To fix this you can pass `encoding` parameter to `otp.run`:
|
|
310
|
+
|
|
311
|
+
.. testcode::
|
|
312
|
+
:skipif: not has_query_encoding_parameter()
|
|
313
|
+
|
|
314
|
+
data = ['AA測試AA']
|
|
315
|
+
source = otp.Ticks({'A': data})
|
|
316
|
+
df = otp.run(source, encoding="utf-8")
|
|
317
|
+
print(df)
|
|
318
|
+
|
|
319
|
+
.. testoutput::
|
|
320
|
+
|
|
321
|
+
Time A
|
|
322
|
+
0 2003-12-01 AA測試AA
|
|
323
|
+
|
|
324
|
+
Note that query ``start`` time is inclusive, but query ``end`` time is not,
|
|
325
|
+
meaning that ticks with timestamps equal to the query end time will not be included:
|
|
326
|
+
|
|
327
|
+
>>> data = otp.Tick(A=1, bucket_interval=24*60*60)
|
|
328
|
+
>>> data['A'] = data['TIMESTAMP'].dt.day_of_month()
|
|
329
|
+
>>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
|
|
330
|
+
Time A
|
|
331
|
+
0 2003-12-01 1
|
|
332
|
+
1 2003-12-02 2
|
|
333
|
+
2 2003-12-03 3
|
|
334
|
+
>>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 2))
|
|
335
|
+
Time A
|
|
336
|
+
0 2003-12-01 1
|
|
337
|
+
|
|
338
|
+
If you want to include such ticks, you can add one nanosecond to the query end time:
|
|
339
|
+
|
|
340
|
+
>>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 2) + otp.Nano(1))
|
|
341
|
+
Time A
|
|
342
|
+
0 2003-12-01 1
|
|
343
|
+
1 2003-12-02 2
|
|
344
|
+
"""
|
|
345
|
+
_ = otli.OneTickLib()
|
|
346
|
+
|
|
347
|
+
query_schema = None
|
|
348
|
+
if isinstance(query, otp.Source):
|
|
349
|
+
query_schema = query.schema
|
|
350
|
+
|
|
351
|
+
if timezone is utils.default:
|
|
352
|
+
timezone = configuration.config.tz
|
|
353
|
+
if context is utils.default or context is None:
|
|
354
|
+
context = configuration.config.context
|
|
355
|
+
if concurrency is utils.default:
|
|
356
|
+
concurrency = configuration.default_query_concurrency()
|
|
357
|
+
|
|
358
|
+
if batch_size is utils.default:
|
|
359
|
+
batch_size = configuration.config.default_batch_size
|
|
360
|
+
if query_properties is None:
|
|
361
|
+
query_properties = pyomd.QueryProperties()
|
|
362
|
+
|
|
363
|
+
if isinstance(query_properties, dict):
|
|
364
|
+
qp_dict = query_properties
|
|
365
|
+
query_properties = utils.query_properties_from_dict(qp_dict)
|
|
366
|
+
else:
|
|
367
|
+
qp_dict = utils.query_properties_to_dict(query_properties)
|
|
368
|
+
|
|
369
|
+
if 'USE_FT' not in qp_dict:
|
|
370
|
+
query_properties.set_property_value('USE_FT', otp.config.default_fault_tolerance) # type: ignore[union-attr]
|
|
371
|
+
|
|
372
|
+
if 'IGNORE_TICKS_IN_UNENTITLED_TIME_RANGE' not in qp_dict:
|
|
373
|
+
query_properties.set_property_value('IGNORE_TICKS_IN_UNENTITLED_TIME_RANGE', # type: ignore[union-attr]
|
|
374
|
+
str(otp.config.ignore_ticks_in_unentitled_time_range).upper())
|
|
375
|
+
|
|
376
|
+
if date is not None:
|
|
377
|
+
for v in (start, end, start_time_expression, end_time_expression):
|
|
378
|
+
if v is not None and v is not utils.adaptive:
|
|
379
|
+
raise ValueError("Can't use 'date' parameter when other time interval parameters are specified")
|
|
380
|
+
start = otp.date(date)
|
|
381
|
+
end = start + otp.Day(1)
|
|
382
|
+
|
|
383
|
+
has_source_start, has_source_end = False, False
|
|
384
|
+
if isinstance(query, otp.Source):
|
|
385
|
+
has_source_start, has_source_end = query.has_start_end_time()
|
|
386
|
+
|
|
387
|
+
if (start is None or start is utils.adaptive) and otp.config.get('default_start_time') is None and \
|
|
388
|
+
not has_source_start:
|
|
389
|
+
warnings.warn('Start time is None and default start time is not set, '
|
|
390
|
+
'onetick.query will use 19700101 as start time, '
|
|
391
|
+
'which can cause unexpected results. '
|
|
392
|
+
'Please set start time explicitly.')
|
|
393
|
+
if (end is None or end is utils.adaptive) and otp.config.get('default_end_time') is None and \
|
|
394
|
+
not has_source_end:
|
|
395
|
+
warnings.warn('End time is None and default end time is not set, '
|
|
396
|
+
'onetick.query will use 19700101 as end time, '
|
|
397
|
+
'which can cause unexpected results. '
|
|
398
|
+
'Please set end time explicitly.')
|
|
399
|
+
|
|
400
|
+
if isinstance(start, _Operation) and start_time_expression is None:
|
|
401
|
+
start_time_expression = str(start)
|
|
402
|
+
if isinstance(end, _Operation) and end_time_expression is None:
|
|
403
|
+
end_time_expression = str(end)
|
|
404
|
+
|
|
405
|
+
if isinstance(start_time_expression, _Operation):
|
|
406
|
+
start_time_expression = str(start_time_expression)
|
|
407
|
+
if isinstance(end_time_expression, _Operation):
|
|
408
|
+
end_time_expression = str(end_time_expression)
|
|
409
|
+
|
|
410
|
+
# PY-1321: CEP-query seems to be using start and end values for some reason, so setting them to None
|
|
411
|
+
if start_time_expression is not None:
|
|
412
|
+
start = None
|
|
413
|
+
if end_time_expression is not None:
|
|
414
|
+
end = None
|
|
415
|
+
|
|
416
|
+
if inspect.ismethod(query) or inspect.isfunction(query):
|
|
417
|
+
t_s = None
|
|
418
|
+
if isinstance(symbols, otp.Source):
|
|
419
|
+
t_s = symbols
|
|
420
|
+
if isinstance(symbols, otp.query):
|
|
421
|
+
t_s = otp.Query(symbols)
|
|
422
|
+
if isinstance(symbols, str):
|
|
423
|
+
t_s = otp.Tick(SYMBOL_NAME=symbols)
|
|
424
|
+
if isinstance(symbols, list):
|
|
425
|
+
t_s = otp.Ticks(SYMBOL_NAME=symbols)
|
|
426
|
+
|
|
427
|
+
if isinstance(t_s, otp.Source):
|
|
428
|
+
query = query(t_s.to_symbol_param()) # type: ignore
|
|
429
|
+
|
|
430
|
+
query, query_params = _preprocess_otp_query(query, query_params)
|
|
431
|
+
# If query is an otp.Source object, then it can deal with otp.datetime and pd.Timestamp types
|
|
432
|
+
|
|
433
|
+
if log_symbol is utils.default:
|
|
434
|
+
log_symbol = otp.config.log_symbol
|
|
435
|
+
if callback is None and log_symbol:
|
|
436
|
+
callback = LogCallback(query)
|
|
437
|
+
|
|
438
|
+
if manual_dataframe_callback:
|
|
439
|
+
if output_structure and output_structure != 'df':
|
|
440
|
+
raise ValueError("Parameter 'output_structure' must be set to 'df'"
|
|
441
|
+
" if parameter 'manual_dataframe_callback' is set")
|
|
442
|
+
if log_symbol:
|
|
443
|
+
raise ValueError("Parameters 'manual_dataframe_callback' and 'log_symbol' can't be set together")
|
|
444
|
+
if callback is not None:
|
|
445
|
+
raise ValueError("Parameters 'manual_dataframe_callback' and 'callback' can't be set together")
|
|
446
|
+
callback = ManualDataframeCallback(timezone)
|
|
447
|
+
|
|
448
|
+
output_mode = otq.QueryOutputMode.numpy
|
|
449
|
+
if callback is not None:
|
|
450
|
+
output_mode = otq.QueryOutputMode.callback
|
|
451
|
+
if output_structure == 'polars':
|
|
452
|
+
if not otq.webapi:
|
|
453
|
+
raise ValueError("Parameter output_structure='polars' is only supported in WebAPI mode.")
|
|
454
|
+
try:
|
|
455
|
+
import polars as _ # type: ignore
|
|
456
|
+
except ImportError:
|
|
457
|
+
raise ValueError("Parameter output_structure='polars' is specified, but module polars can't be imported. "
|
|
458
|
+
"Use 'pip install onetick-py[polars]' command to install onetick-py with polars support.")
|
|
459
|
+
try:
|
|
460
|
+
output_mode = otq.QueryOutputMode.polars
|
|
461
|
+
except AttributeError:
|
|
462
|
+
raise ValueError("Parameter output_structure='polars' is specified, but it's not supported "
|
|
463
|
+
"by installed onetick.query_webapi library.")
|
|
464
|
+
|
|
465
|
+
output_structure, output_structure_for_otq = _process_output_structure(output_structure)
|
|
466
|
+
if symbol_date:
|
|
467
|
+
# otq.run supports only strings and datetime.date
|
|
468
|
+
symbol_date = utils.symbol_date_to_str(symbol_date)
|
|
469
|
+
|
|
470
|
+
require_dict = require_dict or _is_dict_required(symbols)
|
|
471
|
+
|
|
472
|
+
# converting symbols properly
|
|
473
|
+
if isinstance(symbols, otp.Source):
|
|
474
|
+
# check if SYMBOL_NAME is in schema, or if schema contains only one field
|
|
475
|
+
if ('SYMBOL_NAME' not in symbols.columns(skip_meta_fields=True).keys()) and \
|
|
476
|
+
len(symbols.columns(skip_meta_fields=True)) != 1:
|
|
477
|
+
warnings.warn('Using as a symbol list a source without "SYMBOL_NAME" field '
|
|
478
|
+
'and with more than one field! This won\'t work unless the schema is incomplete')
|
|
479
|
+
|
|
480
|
+
symbols = otp.Source._convert_symbol_to_string(
|
|
481
|
+
symbol=symbols,
|
|
482
|
+
tmp_otq=query._tmp_otq if isinstance(query, otp.Source) else None,
|
|
483
|
+
start=start,
|
|
484
|
+
end=end,
|
|
485
|
+
timezone=timezone
|
|
486
|
+
)
|
|
487
|
+
if isinstance(symbols, str):
|
|
488
|
+
symbols = [symbols]
|
|
489
|
+
if isinstance(symbols, pd.DataFrame):
|
|
490
|
+
symbols = utils.get_symbol_list_from_df(symbols)
|
|
491
|
+
|
|
492
|
+
if isinstance(query, dict):
|
|
493
|
+
# we assume it's a dictionary of sources for the MultiOutputSource object
|
|
494
|
+
query = otp.MultiOutputSource(query)
|
|
495
|
+
|
|
496
|
+
params_saved_to_otq = {}
|
|
497
|
+
if isinstance(query, otp.Source) or isinstance(query, otp.MultiOutputSource):
|
|
498
|
+
start = None if start is utils.adaptive else start
|
|
499
|
+
end = None if end is utils.adaptive else end
|
|
500
|
+
params_saved_to_otq = dict(
|
|
501
|
+
symbols=symbols,
|
|
502
|
+
start=start,
|
|
503
|
+
end=end,
|
|
504
|
+
start_time_expression=start_time_expression,
|
|
505
|
+
end_time_expression=end_time_expression,
|
|
506
|
+
)
|
|
507
|
+
param_upd = query._prepare_for_execution(symbols=symbols, start=start, end=end,
|
|
508
|
+
timezone=timezone,
|
|
509
|
+
start_time_expression=start_time_expression,
|
|
510
|
+
end_time_expression=end_time_expression,
|
|
511
|
+
require_dict=require_dict,
|
|
512
|
+
running_query_flag=running,
|
|
513
|
+
node_name=node_name, has_output=None)
|
|
514
|
+
query, require_dict, node_name = param_upd
|
|
515
|
+
# symbols and start/end times should be already stored in the query and should not be passed again
|
|
516
|
+
symbols = None
|
|
517
|
+
start = None
|
|
518
|
+
end = None
|
|
519
|
+
start_time_expression = None
|
|
520
|
+
end_time_expression = None
|
|
521
|
+
time_as_nsec = True
|
|
522
|
+
|
|
523
|
+
elif isinstance(query, (otq.graph_components.EpBase, otq.Chainlet)):
|
|
524
|
+
query = otq.GraphQuery(query)
|
|
525
|
+
|
|
526
|
+
if isinstance(query, otq.SqlQuery):
|
|
527
|
+
# This has no impact on query result, just placeholder values
|
|
528
|
+
start = end = None
|
|
529
|
+
|
|
530
|
+
if start is utils.adaptive:
|
|
531
|
+
start = configuration.config.default_start_time
|
|
532
|
+
|
|
533
|
+
if end is utils.adaptive:
|
|
534
|
+
end = configuration.config.default_end_time
|
|
535
|
+
|
|
536
|
+
if not otq.webapi:
|
|
537
|
+
# converting to expressions, because in datetime objects nanoseconds are not supported on some OneTick versions
|
|
538
|
+
if start is not None and not start_time_expression:
|
|
539
|
+
start_time_expression = datetime2expr(start)
|
|
540
|
+
if end is not None and not end_time_expression:
|
|
541
|
+
end_time_expression = datetime2expr(end)
|
|
542
|
+
|
|
543
|
+
# start and end parameters could be set to None,
|
|
544
|
+
# because we use start and end time expressions,
|
|
545
|
+
# but because of the bug it sometimes doesn't work
|
|
546
|
+
# https://onemarketdata.atlassian.net/browse/BDS-454
|
|
547
|
+
start, end = _get_start_end(start, end, timezone)
|
|
548
|
+
|
|
549
|
+
# authentication
|
|
550
|
+
alternative_username = alternative_username or otp.config.default_auth_username
|
|
551
|
+
password = password or otp.config.default_password
|
|
552
|
+
kwargs = {}
|
|
553
|
+
if password is not None and has_password_param(throw_warning=True):
|
|
554
|
+
kwargs['password'] = password
|
|
555
|
+
|
|
556
|
+
max_expected_ticks_per_symbol = max_expected_ticks_per_symbol or otp.config.max_expected_ticks_per_symbol
|
|
557
|
+
if has_max_expected_ticks_per_symbol(throw_warning=True):
|
|
558
|
+
kwargs['max_expected_ticks_per_symbol'] = max_expected_ticks_per_symbol
|
|
559
|
+
|
|
560
|
+
if encoding is not None and has_query_encoding_parameter(throw_warning=True):
|
|
561
|
+
kwargs['encoding'] = encoding
|
|
562
|
+
|
|
563
|
+
run_params = dict(
|
|
564
|
+
query=query,
|
|
565
|
+
symbols=symbols, start=start, end=end, context=context, username=username,
|
|
566
|
+
timezone=timezone,
|
|
567
|
+
start_time_expression=start_time_expression,
|
|
568
|
+
end_time_expression=end_time_expression,
|
|
569
|
+
alternative_username=alternative_username, batch_size=batch_size,
|
|
570
|
+
running_query_flag=running, query_properties=query_properties,
|
|
571
|
+
max_concurrency=concurrency, apply_times_daily=apply_times_daily, symbol_date=symbol_date,
|
|
572
|
+
query_params=query_params, time_as_nsec=time_as_nsec,
|
|
573
|
+
treat_byte_arrays_as_strings=treat_byte_arrays_as_strings,
|
|
574
|
+
output_mode=output_mode,
|
|
575
|
+
output_matrix_per_field=output_matrix_per_field, output_structure=output_structure_for_otq,
|
|
576
|
+
return_utc_times=return_utc_times, connection=connection,
|
|
577
|
+
callback=callback, svg_path=svg_path, use_connection_pool=use_connection_pool, **kwargs
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
# some parameters were saved in .otq file, we need to debug them too
|
|
581
|
+
debug_params = dict(run_params, **params_saved_to_otq) if params_saved_to_otq else run_params
|
|
582
|
+
otp.get_logger(__name__).info(otp.utils.json_dumps(debug_params))
|
|
583
|
+
|
|
584
|
+
try:
|
|
585
|
+
result = otq.run(**run_params)
|
|
586
|
+
except Exception as e:
|
|
587
|
+
e = _add_stack_info_to_exception(e)
|
|
588
|
+
e = _add_version_info_to_exception(e)
|
|
589
|
+
raise e # noqa: W0707
|
|
590
|
+
|
|
591
|
+
if output_mode == otq.QueryOutputMode.callback:
|
|
592
|
+
if manual_dataframe_callback:
|
|
593
|
+
result = callback.result
|
|
594
|
+
return result
|
|
595
|
+
|
|
596
|
+
# node_names should be either a list of node names or None
|
|
597
|
+
node_names: Optional[List[str]]
|
|
598
|
+
if isinstance(node_name, str):
|
|
599
|
+
node_names = [node_name]
|
|
600
|
+
else:
|
|
601
|
+
node_names = node_name
|
|
602
|
+
|
|
603
|
+
if query_schema:
|
|
604
|
+
# check if we have empty result for any symbol to add schema to empty dataframes
|
|
605
|
+
_process_empty_results(result, query_schema, output_structure)
|
|
606
|
+
|
|
607
|
+
return _format_call_output(result, output_structure=output_structure,
|
|
608
|
+
require_dict=require_dict, node_names=node_names)
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
def _filter_returned_map_by_node(result, _node_names):
|
|
612
|
+
"""
|
|
613
|
+
Here, result has the following format: {symbol: {node_name: data}}
|
|
614
|
+
We need to filter by correct node_name
|
|
615
|
+
"""
|
|
616
|
+
# TODO: implement filtering by node_name in a way
|
|
617
|
+
# that no information from SymbolNumpyResultMap object is lost
|
|
618
|
+
return result
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def _filter_returned_list_by_node(result, node_names):
|
|
622
|
+
"""
|
|
623
|
+
Here, result has the following format: [(symbol, data_1, data_2, node_name)]
|
|
624
|
+
We need to filter by correct node_names
|
|
625
|
+
"""
|
|
626
|
+
if not node_names:
|
|
627
|
+
return result
|
|
628
|
+
|
|
629
|
+
node_found = False
|
|
630
|
+
|
|
631
|
+
res = []
|
|
632
|
+
empty_result = True
|
|
633
|
+
for symbol, data_1, data_2, node, *_ in result:
|
|
634
|
+
if len(data_1) > 0:
|
|
635
|
+
empty_result = False
|
|
636
|
+
if node in node_names:
|
|
637
|
+
node_found = True
|
|
638
|
+
res.append((symbol, data_1, data_2, node))
|
|
639
|
+
|
|
640
|
+
if not empty_result and not node_found:
|
|
641
|
+
# TODO: Do we even want to raise it?
|
|
642
|
+
raise ValueError(f'No passed node name(s) were found in the results. Passed node names were: {node_names}')
|
|
643
|
+
return res
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def _form_dict_from_list(data_list, output_structure):
|
|
647
|
+
"""
|
|
648
|
+
Here, data_list has the following format: [(symbol, data_1, data_2, node_name), ...]
|
|
649
|
+
We need to create the following result:
|
|
650
|
+
either {symbol: DataFrame(data_1)} if there is only one result per symbol
|
|
651
|
+
or {symbol: [DataFrame(data_1)]} if there are multiple results for symbol for a single node_name
|
|
652
|
+
or {symbol: {node_name: DataFrame(data_1)}} if there are single results for multiple node names for a symbol
|
|
653
|
+
or {symbol: {node_name: [DataFrame(data_1)]}} if there are multiple results for multiple node names for a symbol
|
|
654
|
+
"""
|
|
655
|
+
|
|
656
|
+
def form_node_name_dict(lst):
|
|
657
|
+
"""
|
|
658
|
+
lst is a lit of (node, dataframe)
|
|
659
|
+
"""
|
|
660
|
+
d = defaultdict(list)
|
|
661
|
+
for node, df in lst:
|
|
662
|
+
d[node].append(df)
|
|
663
|
+
for node in d.keys(): # noqa
|
|
664
|
+
if len(d[node]) == 1:
|
|
665
|
+
d[node] = d[node][0]
|
|
666
|
+
if len(d) == 1:
|
|
667
|
+
d = list(d.values())[0]
|
|
668
|
+
else: # converting defaultdict to regular dict
|
|
669
|
+
d = dict(d)
|
|
670
|
+
return d
|
|
671
|
+
|
|
672
|
+
def get_dataframe(data):
|
|
673
|
+
if output_structure == 'df':
|
|
674
|
+
return pd.DataFrame({col_name: col_value for col_name, col_value in data})
|
|
675
|
+
else:
|
|
676
|
+
import polars
|
|
677
|
+
if isinstance(data, polars.DataFrame):
|
|
678
|
+
# polars only works in webapi mode,
|
|
679
|
+
# and it's already returned as polars.DataFrame by onetick.query_webapi
|
|
680
|
+
return data
|
|
681
|
+
# but if there is no data, then we want to return empty polars.DataFrame
|
|
682
|
+
return polars.DataFrame()
|
|
683
|
+
|
|
684
|
+
symbols_dict = defaultdict(list)
|
|
685
|
+
for symbol, data, _, node, *_ in data_list:
|
|
686
|
+
df = get_dataframe(data)
|
|
687
|
+
|
|
688
|
+
list_item = (node, df)
|
|
689
|
+
symbols_dict[symbol].append(list_item)
|
|
690
|
+
|
|
691
|
+
for symbol, lst in symbols_dict.items():
|
|
692
|
+
symbols_dict[symbol] = form_node_name_dict(lst)
|
|
693
|
+
|
|
694
|
+
return dict(symbols_dict)
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _format_call_output(result, output_structure, node_names, require_dict):
|
|
698
|
+
"""Formats output of otq.run() according to passed parameters.
|
|
699
|
+
See parameters' description for more information
|
|
700
|
+
|
|
701
|
+
Parameters
|
|
702
|
+
----------
|
|
703
|
+
output_structure: ['df', 'list', 'map']
|
|
704
|
+
If 'df': forms pandas.DataFrame from the result.
|
|
705
|
+
|
|
706
|
+
Returns a dictionary with symbols as keys if there's more than one symbol
|
|
707
|
+
in returned data of if require_dict = True.
|
|
708
|
+
|
|
709
|
+
Values of the returned dictionary, or returned value itself if no dictionary is formed,
|
|
710
|
+
is either a list of tuples: (node_name, dataframe) if there's output for more than one node
|
|
711
|
+
or a dataframe
|
|
712
|
+
|
|
713
|
+
If 'list' or 'map': returns data as returned by otq.run(), possibly filtered by node_name (see below)
|
|
714
|
+
node_names: str, None
|
|
715
|
+
If not None, then selects only output returned by nodes in node_names list
|
|
716
|
+
for all output structures
|
|
717
|
+
require_dict: bool
|
|
718
|
+
If True, forces output for output_structure='df' to always be a dictionary, even if only one symbol is returned
|
|
719
|
+
Has no effect for other values of output_structure
|
|
720
|
+
|
|
721
|
+
Returns
|
|
722
|
+
----------
|
|
723
|
+
Formatted output: pandas DataFrame, dictionary or list
|
|
724
|
+
|
|
725
|
+
"""
|
|
726
|
+
if output_structure == 'list':
|
|
727
|
+
return _filter_returned_list_by_node(result, node_names)
|
|
728
|
+
elif output_structure == 'map':
|
|
729
|
+
return _filter_returned_map_by_node(result, node_names)
|
|
730
|
+
|
|
731
|
+
assert output_structure in ('df', 'polars'), (f'Output structure should be one of: "df", "map", "list", "polars" '
|
|
732
|
+
f'instead "{output_structure}" was passed')
|
|
733
|
+
|
|
734
|
+
# "df" output structure implies that raw results came as a list
|
|
735
|
+
result_list = _filter_returned_list_by_node(result, node_names)
|
|
736
|
+
result_dict = _form_dict_from_list(result_list, output_structure)
|
|
737
|
+
|
|
738
|
+
if len(result_dict) == 1 and not require_dict:
|
|
739
|
+
return list(result_dict.values())[0]
|
|
740
|
+
else:
|
|
741
|
+
return result_dict
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
def _process_empty_results(result, query_schema, output_structure):
|
|
745
|
+
"""
|
|
746
|
+
Process query results and add columns to empty responses based on query schema.
|
|
747
|
+
"""
|
|
748
|
+
schema = [
|
|
749
|
+
(field, np.array([], dtype=otp.types.type2np(dtype)))
|
|
750
|
+
for field, dtype in {**query_schema, 'Time': otp.nsectime}.items()
|
|
751
|
+
]
|
|
752
|
+
if type(result) is otq.SymbolNumpyResultMap:
|
|
753
|
+
empty_data = dict(schema)
|
|
754
|
+
else:
|
|
755
|
+
empty_data = schema
|
|
756
|
+
|
|
757
|
+
if output_structure == 'polars':
|
|
758
|
+
import polars
|
|
759
|
+
empty_data = polars.DataFrame(dict(schema))
|
|
760
|
+
|
|
761
|
+
if type(result) is otq.SymbolNumpyResultMap:
|
|
762
|
+
for result_item in result.get_dict().values():
|
|
763
|
+
for node_name, symbol_result in result_item.items():
|
|
764
|
+
if len(symbol_result[0]) == 0:
|
|
765
|
+
result_item[node_name] = (empty_data, symbol_result[1])
|
|
766
|
+
else:
|
|
767
|
+
for idx, result_item in enumerate(result):
|
|
768
|
+
if len(result_item[1]) == 0:
|
|
769
|
+
result[idx] = (
|
|
770
|
+
result_item[0], empty_data, result_item[2], result_item[3], *result_item[4:]
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def _preprocess_otp_query(query, query_params):
|
|
775
|
+
|
|
776
|
+
if isinstance(query, otp.query._outputs):
|
|
777
|
+
query = query['OUT']
|
|
778
|
+
|
|
779
|
+
if isinstance(query, otp.query):
|
|
780
|
+
if query.params:
|
|
781
|
+
if query_params:
|
|
782
|
+
raise ValueError("please specify parameters in query or in otp.run only")
|
|
783
|
+
query_params = query.params
|
|
784
|
+
query = query.path
|
|
785
|
+
return query, query_params
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def _get_start_end(start, end, timezone):
|
|
789
|
+
"""
|
|
790
|
+
Convert datetime objects supported by onetick-py
|
|
791
|
+
to datetime objects supported by onetick-query.
|
|
792
|
+
"""
|
|
793
|
+
def support_nanoseconds(time):
|
|
794
|
+
if isinstance(time, (pd.Timestamp, otp.datetime)):
|
|
795
|
+
if otq.webapi:
|
|
796
|
+
# onetick-query_webapi supports pandas.Timestamp and strings in %Y%m%s%H%M%S.%J format
|
|
797
|
+
if isinstance(time, pd.Timestamp):
|
|
798
|
+
return time
|
|
799
|
+
elif isinstance(time, otp.datetime):
|
|
800
|
+
return time.ts
|
|
801
|
+
else:
|
|
802
|
+
if otp.compatibility.is_correct_timezone_used_in_otq_run():
|
|
803
|
+
time = datetime2timeval(time, timezone)
|
|
804
|
+
else:
|
|
805
|
+
# there is a bug in older onetick versions using wrong timezone
|
|
806
|
+
time = datetime2timeval(time, 'GMT')
|
|
807
|
+
return time
|
|
808
|
+
|
|
809
|
+
if start is utils.adaptive:
|
|
810
|
+
start = configuration.config.default_start_time
|
|
811
|
+
|
|
812
|
+
if end is utils.adaptive:
|
|
813
|
+
end = configuration.config.default_end_time
|
|
814
|
+
|
|
815
|
+
# `isinstance(obj, datetime.date)` is not correct because
|
|
816
|
+
# isinstance(<datetime.datetime object>, datetime.date) = True
|
|
817
|
+
if type(start) is datetime.date:
|
|
818
|
+
start = datetime.datetime(start.year, start.month, start.day)
|
|
819
|
+
if type(end) is datetime.date:
|
|
820
|
+
end = datetime.datetime(end.year, end.month, end.day)
|
|
821
|
+
|
|
822
|
+
start = support_nanoseconds(start)
|
|
823
|
+
end = support_nanoseconds(end)
|
|
824
|
+
|
|
825
|
+
return start, end
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _process_output_structure(output_structure):
|
|
829
|
+
if not output_structure or output_structure == "df": # otq doesn't support df
|
|
830
|
+
output_structure = "df"
|
|
831
|
+
output_structure_for_otq = "symbol_result_list"
|
|
832
|
+
elif output_structure == "list":
|
|
833
|
+
output_structure_for_otq = "symbol_result_list"
|
|
834
|
+
elif output_structure == "map":
|
|
835
|
+
output_structure_for_otq = "symbol_result_map"
|
|
836
|
+
elif output_structure == "polars":
|
|
837
|
+
output_structure = "polars"
|
|
838
|
+
output_structure_for_otq = "symbol_result_list"
|
|
839
|
+
else:
|
|
840
|
+
raise ValueError("output_structure support only the following values: df, list, map and polars")
|
|
841
|
+
return output_structure, output_structure_for_otq
|