onetick-py 1.177.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- locator_parser/__init__.py +0 -0
- locator_parser/acl.py +73 -0
- locator_parser/actions.py +262 -0
- locator_parser/common.py +368 -0
- locator_parser/io.py +43 -0
- locator_parser/locator.py +150 -0
- onetick/__init__.py +101 -0
- onetick/doc_utilities/__init__.py +3 -0
- onetick/doc_utilities/napoleon.py +40 -0
- onetick/doc_utilities/ot_doctest.py +140 -0
- onetick/doc_utilities/snippets.py +279 -0
- onetick/lib/__init__.py +4 -0
- onetick/lib/instance.py +141 -0
- onetick/py/__init__.py +293 -0
- onetick/py/_stack_info.py +89 -0
- onetick/py/_version.py +2 -0
- onetick/py/aggregations/__init__.py +11 -0
- onetick/py/aggregations/_base.py +648 -0
- onetick/py/aggregations/_docs.py +948 -0
- onetick/py/aggregations/compute.py +286 -0
- onetick/py/aggregations/functions.py +2216 -0
- onetick/py/aggregations/generic.py +104 -0
- onetick/py/aggregations/high_low.py +80 -0
- onetick/py/aggregations/num_distinct.py +83 -0
- onetick/py/aggregations/order_book.py +501 -0
- onetick/py/aggregations/other.py +1014 -0
- onetick/py/backports.py +26 -0
- onetick/py/cache.py +374 -0
- onetick/py/callback/__init__.py +5 -0
- onetick/py/callback/callback.py +276 -0
- onetick/py/callback/callbacks.py +131 -0
- onetick/py/compatibility.py +798 -0
- onetick/py/configuration.py +771 -0
- onetick/py/core/__init__.py +0 -0
- onetick/py/core/_csv_inspector.py +93 -0
- onetick/py/core/_internal/__init__.py +0 -0
- onetick/py/core/_internal/_manually_bound_value.py +6 -0
- onetick/py/core/_internal/_nodes_history.py +250 -0
- onetick/py/core/_internal/_op_utils/__init__.py +0 -0
- onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
- onetick/py/core/_internal/_op_utils/is_const.py +10 -0
- onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
- onetick/py/core/_internal/_proxy_node.py +140 -0
- onetick/py/core/_internal/_state_objects.py +2312 -0
- onetick/py/core/_internal/_state_vars.py +93 -0
- onetick/py/core/_source/__init__.py +0 -0
- onetick/py/core/_source/_symbol_param.py +95 -0
- onetick/py/core/_source/schema.py +97 -0
- onetick/py/core/_source/source_methods/__init__.py +0 -0
- onetick/py/core/_source/source_methods/aggregations.py +809 -0
- onetick/py/core/_source/source_methods/applyers.py +296 -0
- onetick/py/core/_source/source_methods/columns.py +141 -0
- onetick/py/core/_source/source_methods/data_quality.py +301 -0
- onetick/py/core/_source/source_methods/debugs.py +272 -0
- onetick/py/core/_source/source_methods/drops.py +120 -0
- onetick/py/core/_source/source_methods/fields.py +619 -0
- onetick/py/core/_source/source_methods/filters.py +1002 -0
- onetick/py/core/_source/source_methods/joins.py +1413 -0
- onetick/py/core/_source/source_methods/merges.py +605 -0
- onetick/py/core/_source/source_methods/misc.py +1455 -0
- onetick/py/core/_source/source_methods/pandases.py +155 -0
- onetick/py/core/_source/source_methods/renames.py +356 -0
- onetick/py/core/_source/source_methods/sorts.py +183 -0
- onetick/py/core/_source/source_methods/switches.py +142 -0
- onetick/py/core/_source/source_methods/symbols.py +117 -0
- onetick/py/core/_source/source_methods/times.py +627 -0
- onetick/py/core/_source/source_methods/writes.py +986 -0
- onetick/py/core/_source/symbol.py +205 -0
- onetick/py/core/_source/tmp_otq.py +222 -0
- onetick/py/core/column.py +209 -0
- onetick/py/core/column_operations/__init__.py +0 -0
- onetick/py/core/column_operations/_methods/__init__.py +4 -0
- onetick/py/core/column_operations/_methods/_internal.py +28 -0
- onetick/py/core/column_operations/_methods/conversions.py +216 -0
- onetick/py/core/column_operations/_methods/methods.py +292 -0
- onetick/py/core/column_operations/_methods/op_types.py +160 -0
- onetick/py/core/column_operations/accessors/__init__.py +0 -0
- onetick/py/core/column_operations/accessors/_accessor.py +28 -0
- onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
- onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
- onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
- onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
- onetick/py/core/column_operations/base.py +1121 -0
- onetick/py/core/cut_builder.py +150 -0
- onetick/py/core/db_constants.py +20 -0
- onetick/py/core/eval_query.py +245 -0
- onetick/py/core/lambda_object.py +441 -0
- onetick/py/core/multi_output_source.py +232 -0
- onetick/py/core/per_tick_script.py +2256 -0
- onetick/py/core/query_inspector.py +464 -0
- onetick/py/core/source.py +1744 -0
- onetick/py/db/__init__.py +2 -0
- onetick/py/db/_inspection.py +1128 -0
- onetick/py/db/db.py +1327 -0
- onetick/py/db/utils.py +64 -0
- onetick/py/docs/__init__.py +0 -0
- onetick/py/docs/docstring_parser.py +112 -0
- onetick/py/docs/utils.py +81 -0
- onetick/py/functions.py +2398 -0
- onetick/py/license.py +190 -0
- onetick/py/log.py +88 -0
- onetick/py/math.py +935 -0
- onetick/py/misc.py +470 -0
- onetick/py/oqd/__init__.py +22 -0
- onetick/py/oqd/eps.py +1195 -0
- onetick/py/oqd/sources.py +325 -0
- onetick/py/otq.py +216 -0
- onetick/py/pyomd_mock.py +47 -0
- onetick/py/run.py +916 -0
- onetick/py/servers.py +173 -0
- onetick/py/session.py +1347 -0
- onetick/py/sources/__init__.py +19 -0
- onetick/py/sources/cache.py +167 -0
- onetick/py/sources/common.py +128 -0
- onetick/py/sources/csv.py +642 -0
- onetick/py/sources/custom.py +85 -0
- onetick/py/sources/data_file.py +305 -0
- onetick/py/sources/data_source.py +1045 -0
- onetick/py/sources/empty.py +94 -0
- onetick/py/sources/odbc.py +337 -0
- onetick/py/sources/order_book.py +271 -0
- onetick/py/sources/parquet.py +168 -0
- onetick/py/sources/pit.py +191 -0
- onetick/py/sources/query.py +495 -0
- onetick/py/sources/snapshots.py +419 -0
- onetick/py/sources/split_query_output_by_symbol.py +198 -0
- onetick/py/sources/symbology_mapping.py +123 -0
- onetick/py/sources/symbols.py +374 -0
- onetick/py/sources/ticks.py +825 -0
- onetick/py/sql.py +70 -0
- onetick/py/state.py +251 -0
- onetick/py/types.py +2131 -0
- onetick/py/utils/__init__.py +70 -0
- onetick/py/utils/acl.py +93 -0
- onetick/py/utils/config.py +186 -0
- onetick/py/utils/default.py +49 -0
- onetick/py/utils/file.py +38 -0
- onetick/py/utils/helpers.py +76 -0
- onetick/py/utils/locator.py +94 -0
- onetick/py/utils/perf.py +498 -0
- onetick/py/utils/query.py +49 -0
- onetick/py/utils/render.py +1374 -0
- onetick/py/utils/script.py +244 -0
- onetick/py/utils/temp.py +471 -0
- onetick/py/utils/types.py +120 -0
- onetick/py/utils/tz.py +84 -0
- onetick_py-1.177.0.dist-info/METADATA +137 -0
- onetick_py-1.177.0.dist-info/RECORD +152 -0
- onetick_py-1.177.0.dist-info/WHEEL +5 -0
- onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
- onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
- onetick_py-1.177.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1045 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import inspect
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
from typing import Dict, Iterable, Optional
|
|
6
|
+
|
|
7
|
+
import onetick.py as otp
|
|
8
|
+
from onetick.py.otq import otq
|
|
9
|
+
|
|
10
|
+
from onetick.py.db import _inspection
|
|
11
|
+
from onetick.py.core._source._symbol_param import _SymbolParamColumn
|
|
12
|
+
from onetick.py.core._source.tmp_otq import TmpOtq
|
|
13
|
+
from onetick.py.core.eval_query import _QueryEvalWrapper
|
|
14
|
+
from onetick.py.core.source import Source
|
|
15
|
+
from onetick.py.core.column_operations.base import Raw, OnetickParameter
|
|
16
|
+
|
|
17
|
+
from .. import types as ott
|
|
18
|
+
from .. import utils
|
|
19
|
+
from ..core.column_operations.base import _Operation
|
|
20
|
+
from ..db.db import DB
|
|
21
|
+
from ..compatibility import is_supported_where_clause_for_back_ticks
|
|
22
|
+
|
|
23
|
+
from onetick.py.docs.utils import docstring, param_doc
|
|
24
|
+
|
|
25
|
+
from .common import convert_tick_type_to_str, get_start_end_by_date
|
|
26
|
+
from .symbols import Symbols
|
|
27
|
+
from .ticks import Ticks
|
|
28
|
+
from .query import query
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_db_doc = param_doc(
|
|
32
|
+
name='db',
|
|
33
|
+
desc="""
|
|
34
|
+
Name(s) of the database or the database object(s).
|
|
35
|
+
""",
|
|
36
|
+
str_annotation='str, list of str, :class:`otp.DB <onetick.py.DB>`',
|
|
37
|
+
default=None,
|
|
38
|
+
str_default='None',
|
|
39
|
+
)
|
|
40
|
+
_symbol_doc = param_doc(
|
|
41
|
+
name='symbol',
|
|
42
|
+
desc="""
|
|
43
|
+
Symbol(s) from which data should be taken.
|
|
44
|
+
""",
|
|
45
|
+
str_annotation='str, list of str, :class:`Source`, :class:`query`, :py:func:`eval query <onetick.py.eval>`',
|
|
46
|
+
default=utils.adaptive,
|
|
47
|
+
str_default=' :py:class:`onetick.py.adaptive`',
|
|
48
|
+
)
|
|
49
|
+
_symbols_doc = param_doc(
|
|
50
|
+
name='symbols',
|
|
51
|
+
desc="""
|
|
52
|
+
Symbol(s) from which data should be taken.
|
|
53
|
+
Alias for ``symbol`` parameter. Will take precedence over it.
|
|
54
|
+
""",
|
|
55
|
+
str_annotation=('str, list of str, :class:`Source`, :class:`query`, :py:func:`eval query <onetick.py.eval>`, '
|
|
56
|
+
':py:class:`onetick.query.GraphQuery`.'),
|
|
57
|
+
default=None,
|
|
58
|
+
)
|
|
59
|
+
_tick_type_doc = param_doc(
|
|
60
|
+
name='tick_type',
|
|
61
|
+
desc="""
|
|
62
|
+
Tick type of the data.
|
|
63
|
+
If not specified, all ticks from `db` will be taken.
|
|
64
|
+
If ticks can't be found or there are many databases specified in `db` then default is "TRD".
|
|
65
|
+
""",
|
|
66
|
+
str_annotation='str, list of str',
|
|
67
|
+
default=utils.adaptive,
|
|
68
|
+
str_default=' :py:class:`onetick.py.adaptive`',
|
|
69
|
+
)
|
|
70
|
+
_start_doc = param_doc(
|
|
71
|
+
name='start',
|
|
72
|
+
desc="""
|
|
73
|
+
Start of the interval from which the data should be taken.
|
|
74
|
+
Default is :py:class:`onetick.py.adaptive`, making the final query deduce the time
|
|
75
|
+
limits from the rest of the graph.
|
|
76
|
+
""",
|
|
77
|
+
str_annotation=(
|
|
78
|
+
':py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,'
|
|
79
|
+
' :py:class:`onetick.py.adaptive`'
|
|
80
|
+
),
|
|
81
|
+
default=utils.adaptive,
|
|
82
|
+
str_default=' :py:class:`onetick.py.adaptive`',
|
|
83
|
+
)
|
|
84
|
+
_end_doc = param_doc(
|
|
85
|
+
name='end',
|
|
86
|
+
desc="""
|
|
87
|
+
End of the interval from which the data should be taken.
|
|
88
|
+
Default is :py:class:`onetick.py.adaptive`, making the final query deduce the time
|
|
89
|
+
limits from the rest of the graph.
|
|
90
|
+
""",
|
|
91
|
+
str_annotation=(
|
|
92
|
+
':py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,'
|
|
93
|
+
' :py:class:`onetick.py.adaptive`'
|
|
94
|
+
),
|
|
95
|
+
default=utils.adaptive,
|
|
96
|
+
str_default=' :py:class:`onetick.py.adaptive`',
|
|
97
|
+
)
|
|
98
|
+
_date_doc = param_doc(
|
|
99
|
+
name='date',
|
|
100
|
+
desc="""
|
|
101
|
+
Allows to specify a whole day instead of passing explicitly ``start`` and ``end`` parameters.
|
|
102
|
+
If it is set along with the ``start`` and ``end`` parameters then last two are ignored.
|
|
103
|
+
""",
|
|
104
|
+
str_annotation=":class:`datetime.datetime`, :class:`otp.datetime <onetick.py.datetime>`",
|
|
105
|
+
default=None,
|
|
106
|
+
)
|
|
107
|
+
_schema_policy_doc = param_doc(
|
|
108
|
+
name='schema_policy',
|
|
109
|
+
desc="""
|
|
110
|
+
Schema deduction policy:
|
|
111
|
+
|
|
112
|
+
- 'tolerant' (default)
|
|
113
|
+
The resulting schema is a combination of ``schema`` and database schema.
|
|
114
|
+
If the database schema can be deduced,
|
|
115
|
+
it's checked to be type-compatible with a ``schema``,
|
|
116
|
+
and ValueError is raised if checks are failed.
|
|
117
|
+
Also, with this policy database is scanned 5 days back to find the schema.
|
|
118
|
+
It is useful when database is misconfigured or in case of holidays.
|
|
119
|
+
|
|
120
|
+
- 'tolerant_strict'
|
|
121
|
+
The resulting schema will be ``schema`` if it's not empty.
|
|
122
|
+
Otherwise, database schema is used.
|
|
123
|
+
If the database schema can be deduced,
|
|
124
|
+
it's checked if it lacks fields from the ``schema``
|
|
125
|
+
and it's checked to be type-compatible with a ``schema``
|
|
126
|
+
and ValueError is raised if checks are failed.
|
|
127
|
+
Also, with this policy database is scanned 5 days back to find the schema.
|
|
128
|
+
It is useful when database is misconfigured or in case of holidays.
|
|
129
|
+
|
|
130
|
+
- 'fail'
|
|
131
|
+
The same as 'tolerant', but if the database schema can't be deduced, raises an Exception.
|
|
132
|
+
|
|
133
|
+
- 'fail_strict'
|
|
134
|
+
The same as 'tolerant_strict', but if the database schema can't be deduced, raises an Exception.
|
|
135
|
+
|
|
136
|
+
- 'manual'
|
|
137
|
+
The resulting schema is a combination of ``schema`` and database schema.
|
|
138
|
+
Compatibility with database schema will not be checked.
|
|
139
|
+
|
|
140
|
+
- 'manual_strict'
|
|
141
|
+
The resulting schema will be exactly ``schema``.
|
|
142
|
+
Compatibility with database schema will not be checked.
|
|
143
|
+
If some fields specified in ``schema`` do not exist in the database,
|
|
144
|
+
their values will be set to some default value for a type
|
|
145
|
+
(0 for integers, NaNs for floats, empty string for strings, epoch for datetimes).
|
|
146
|
+
|
|
147
|
+
Default value is :py:class:`onetick.py.adaptive` (if deprecated parameter ``guess_schema`` is not set).
|
|
148
|
+
If ``guess_schema`` is set to True then value is 'fail', if False then 'manual'.
|
|
149
|
+
If ``schema_policy`` is set to ``None`` then default value is 'tolerant'.
|
|
150
|
+
|
|
151
|
+
Default value can be changed with
|
|
152
|
+
:py:attr:`otp.config.default_schema_policy<onetick.py.configuration.Config.default_schema_policy>`
|
|
153
|
+
configuration parameter.
|
|
154
|
+
|
|
155
|
+
If you set schema manually, while creating DataSource instance, and don't set ``schema_policy``,
|
|
156
|
+
it will be automatically set to ``manual``.
|
|
157
|
+
""",
|
|
158
|
+
str_annotation="'tolerant', 'tolerant_strict', 'fail', 'fail_strict', 'manual', 'manual_strict'",
|
|
159
|
+
default=utils.adaptive,
|
|
160
|
+
str_default=' :py:class:`onetick.py.adaptive`',
|
|
161
|
+
)
|
|
162
|
+
_guess_schema_doc = param_doc(
|
|
163
|
+
name='guess_schema',
|
|
164
|
+
desc="""
|
|
165
|
+
.. deprecated:: 1.3.16
|
|
166
|
+
|
|
167
|
+
Use ``schema_policy`` parameter instead.
|
|
168
|
+
|
|
169
|
+
If ``guess_schema`` is set to True then ``schema_policy`` value is 'fail', if False then 'manual'.
|
|
170
|
+
""",
|
|
171
|
+
annotation=bool,
|
|
172
|
+
default=None,
|
|
173
|
+
)
|
|
174
|
+
_identify_input_ts_doc = param_doc(
|
|
175
|
+
name='identify_input_ts',
|
|
176
|
+
desc="""
|
|
177
|
+
If set to False, the fields SYMBOL_NAME and TICK_TYPE are not appended to the output ticks.
|
|
178
|
+
""",
|
|
179
|
+
annotation=bool,
|
|
180
|
+
default=False,
|
|
181
|
+
)
|
|
182
|
+
_back_to_first_tick_doc = param_doc(
|
|
183
|
+
name='back_to_first_tick',
|
|
184
|
+
desc="""
|
|
185
|
+
Determines how far back to go looking for the latest tick before ``start`` time.
|
|
186
|
+
If one is found, it is inserted into the output time series with the timestamp set to ``start`` time.
|
|
187
|
+
Note: it will be rounded to int, so otp.Millis(999) will be 0 seconds.
|
|
188
|
+
""",
|
|
189
|
+
str_annotation=('int, :ref:`offset <datetime_offsets>`, '
|
|
190
|
+
':class:`otp.expr <onetick.py.expr>`, '
|
|
191
|
+
':py:class:`~onetick.py.Operation`'),
|
|
192
|
+
default=0,
|
|
193
|
+
)
|
|
194
|
+
_keep_first_tick_timestamp_doc = param_doc(
|
|
195
|
+
name='keep_first_tick_timestamp',
|
|
196
|
+
desc="""
|
|
197
|
+
If set, new field with this name will be added to source.
|
|
198
|
+
This field contains original timestamp of the tick that was taken from before the start time of the query.
|
|
199
|
+
For all other ticks value in this field will be equal to the value of Time field.
|
|
200
|
+
This parameter is ignored if ``back_to_first_tick`` is not set.
|
|
201
|
+
""",
|
|
202
|
+
annotation=str,
|
|
203
|
+
default=None,
|
|
204
|
+
)
|
|
205
|
+
_presort_doc = param_doc(
|
|
206
|
+
name='presort',
|
|
207
|
+
desc="""
|
|
208
|
+
Add the presort EP in case of bound symbols.
|
|
209
|
+
Applicable only when ``symbols`` is not None.
|
|
210
|
+
By default, it is set to True if ``symbols`` are set
|
|
211
|
+
and to False otherwise.
|
|
212
|
+
""",
|
|
213
|
+
annotation=bool,
|
|
214
|
+
default=utils.adaptive,
|
|
215
|
+
str_default=' :py:class:`onetick.py.adaptive`',
|
|
216
|
+
)
|
|
217
|
+
_concurrency_doc = param_doc(
|
|
218
|
+
name='concurrency',
|
|
219
|
+
desc="""
|
|
220
|
+
Specifies the number of CPU cores to utilize for the ``presort``.
|
|
221
|
+
By default, the value is inherited from the value of the query where this PRESORT is used.
|
|
222
|
+
|
|
223
|
+
For the main query it may be specified in the ``concurrency`` parameter of :meth:`run` method
|
|
224
|
+
(which by default is set to
|
|
225
|
+
:py:attr:`otp.config.default_concurrency<onetick.py.configuration.Config.default_concurrency>`).
|
|
226
|
+
|
|
227
|
+
For the auxiliary queries (like first-stage queries) empty value means OneTick's default of 1.
|
|
228
|
+
If :py:attr:`otp.config.presort_force_default_concurrency<onetick.py.configuration.Config.presort_force_default_concurrency>`
|
|
229
|
+
is set then default concurrency value will be set in all PRESORT EPs in all queries.
|
|
230
|
+
""", # noqa: E501
|
|
231
|
+
annotation=int,
|
|
232
|
+
default=utils.default,
|
|
233
|
+
str_default=' :py:class:`onetick.py.utils.default`',
|
|
234
|
+
)
|
|
235
|
+
_batch_size_doc = param_doc(
|
|
236
|
+
name='batch_size',
|
|
237
|
+
desc="""
|
|
238
|
+
Specifies the query batch size for the ``presort``.
|
|
239
|
+
By default, the value from
|
|
240
|
+
:py:attr:`otp.config.default_batch_size<onetick.py.configuration.Config.default_batch_size>` is used.
|
|
241
|
+
""",
|
|
242
|
+
annotation=int,
|
|
243
|
+
default=None,
|
|
244
|
+
)
|
|
245
|
+
_schema_doc = param_doc(
|
|
246
|
+
name='schema',
|
|
247
|
+
desc="""
|
|
248
|
+
Dict of <column name> -> <column type> pairs that the source is expected to have.
|
|
249
|
+
If the type is irrelevant, provide None as the type in question.
|
|
250
|
+
""",
|
|
251
|
+
annotation=Optional[Dict[str, type]],
|
|
252
|
+
default=None,
|
|
253
|
+
)
|
|
254
|
+
_desired_schema_doc = param_doc(
|
|
255
|
+
name='kwargs',
|
|
256
|
+
desc="""
|
|
257
|
+
Deprecated. Use ``schema`` instead.
|
|
258
|
+
List of <column name> -> <column type> pairs that the source is expected to have.
|
|
259
|
+
If the type is irrelevant, provide None as the type in question.
|
|
260
|
+
""",
|
|
261
|
+
str_annotation='type[str]',
|
|
262
|
+
kind=inspect.Parameter.VAR_KEYWORD,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
_max_back_ticks_to_prepend_doc = param_doc(
|
|
266
|
+
name='max_back_ticks_to_prepend',
|
|
267
|
+
desc="""
|
|
268
|
+
When the ``back_to_first_tick`` interval is specified, this parameter determines the maximum number
|
|
269
|
+
of the most recent ticks before start_time that will be prepended to the output time series.
|
|
270
|
+
Their timestamp will be changed to start_time.
|
|
271
|
+
""",
|
|
272
|
+
annotation=int,
|
|
273
|
+
default=1,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
_where_clause_for_back_ticks_doc = param_doc(
|
|
277
|
+
name='where_clause_for_back_ticks',
|
|
278
|
+
desc="""
|
|
279
|
+
A logical expression that is computed only for the ticks encountered when a query goes back from the start time,
|
|
280
|
+
in search of the ticks to prepend. If it returns false, a tick is ignored.
|
|
281
|
+
""",
|
|
282
|
+
annotation=Raw,
|
|
283
|
+
default=None,
|
|
284
|
+
)
|
|
285
|
+
_symbol_date_doc = param_doc(
|
|
286
|
+
name='symbol_date',
|
|
287
|
+
desc="""
|
|
288
|
+
Symbol date or integer in the YYYYMMDD format.
|
|
289
|
+
Can only be specified if parameters ``symbols`` is set.
|
|
290
|
+
""",
|
|
291
|
+
str_annotation=':py:class:`otp.datetime <onetick.py.datetime>` or :py:class:`datetime.datetime` or int',
|
|
292
|
+
default=None,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
DATA_SOURCE_DOC_PARAMS = [
|
|
296
|
+
_db_doc, _symbol_doc, _tick_type_doc,
|
|
297
|
+
_start_doc, _end_doc, _date_doc,
|
|
298
|
+
_schema_policy_doc, _guess_schema_doc,
|
|
299
|
+
_identify_input_ts_doc,
|
|
300
|
+
_back_to_first_tick_doc, _keep_first_tick_timestamp_doc,
|
|
301
|
+
_max_back_ticks_to_prepend_doc,
|
|
302
|
+
_where_clause_for_back_ticks_doc,
|
|
303
|
+
_symbols_doc,
|
|
304
|
+
_presort_doc, _batch_size_doc, _concurrency_doc,
|
|
305
|
+
_schema_doc,
|
|
306
|
+
_symbol_date_doc,
|
|
307
|
+
_desired_schema_doc,
|
|
308
|
+
]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class DataSource(Source):
|
|
312
|
+
|
|
313
|
+
POLICY_MANUAL = "manual"
|
|
314
|
+
POLICY_MANUAL_STRICT = "manual_strict"
|
|
315
|
+
POLICY_TOLERANT = "tolerant"
|
|
316
|
+
POLICY_TOLERANT_STRICT = "tolerant_strict"
|
|
317
|
+
POLICY_FAIL = "fail"
|
|
318
|
+
POLICY_FAIL_STRICT = "fail_strict"
|
|
319
|
+
|
|
320
|
+
_VALID_POLICIES = frozenset([POLICY_MANUAL, POLICY_MANUAL_STRICT,
|
|
321
|
+
POLICY_TOLERANT, POLICY_TOLERANT_STRICT,
|
|
322
|
+
POLICY_FAIL, POLICY_FAIL_STRICT])
|
|
323
|
+
_PROPERTIES = Source._PROPERTIES + ["_p_db", "_p_strict", "_p_schema", "_schema", "logger"]
|
|
324
|
+
|
|
325
|
+
def __get_schema(self, db, start, schema_policy):
|
|
326
|
+
schema = {}
|
|
327
|
+
|
|
328
|
+
if start is utils.adaptive:
|
|
329
|
+
start = None # means that use the last date with data
|
|
330
|
+
|
|
331
|
+
if isinstance(db, list):
|
|
332
|
+
# This case of a merge, since we need to get combined schema across different tick types and dbs
|
|
333
|
+
for t_db in db:
|
|
334
|
+
if t_db.startswith('expr('):
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
_db = t_db.split(':')[0]
|
|
338
|
+
_tt = t_db.split(':')[-1]
|
|
339
|
+
|
|
340
|
+
# tick type as parameter
|
|
341
|
+
if _tt.startswith('$'):
|
|
342
|
+
_tt = None
|
|
343
|
+
|
|
344
|
+
db_obj = _inspection.DB(_db)
|
|
345
|
+
if schema_policy == self.POLICY_TOLERANT and start:
|
|
346
|
+
# repeating the same logic as in db_obj.last_date
|
|
347
|
+
start = db_obj.last_not_empty_date(start, days_back=5, tick_type=_tt)
|
|
348
|
+
|
|
349
|
+
db_schema = {}
|
|
350
|
+
try:
|
|
351
|
+
db_schema = db_obj.schema(date=start, tick_type=_tt)
|
|
352
|
+
except Exception as e:
|
|
353
|
+
if _tt is not None:
|
|
354
|
+
warnings.warn(f"Couldn't get schema from the database {db_obj}:\n{e}.\n\n"
|
|
355
|
+
"Set parameter schema_policy='manual' to set the schema manually.")
|
|
356
|
+
|
|
357
|
+
schema.update(db_schema)
|
|
358
|
+
|
|
359
|
+
if db is None or isinstance(db, _SymbolParamColumn):
|
|
360
|
+
# In this case we can't get schema, because db is calculated dynamically.
|
|
361
|
+
# Set to empty to indicate that in this case we expect the manually set schema.
|
|
362
|
+
schema = {}
|
|
363
|
+
return schema
|
|
364
|
+
|
|
365
|
+
def __prepare_schema(self, db, start, schema_policy, guess_schema, schema):
|
|
366
|
+
if guess_schema is not None:
|
|
367
|
+
warnings.warn(
|
|
368
|
+
"guess_schema flag is deprecated; use schema_policy argument instead",
|
|
369
|
+
FutureWarning,
|
|
370
|
+
)
|
|
371
|
+
if schema_policy is not None:
|
|
372
|
+
raise ValueError("guess_schema and schema_policy cannot be set at the same time")
|
|
373
|
+
if guess_schema:
|
|
374
|
+
schema_policy = self.POLICY_FAIL
|
|
375
|
+
else:
|
|
376
|
+
schema_policy = self.POLICY_MANUAL
|
|
377
|
+
|
|
378
|
+
if schema_policy is None:
|
|
379
|
+
schema_policy = self.POLICY_TOLERANT
|
|
380
|
+
if schema_policy not in self._VALID_POLICIES:
|
|
381
|
+
raise ValueError(f"Invalid schema_policy; allowed values are: {self._VALID_POLICIES}")
|
|
382
|
+
|
|
383
|
+
actual_schema = {}
|
|
384
|
+
if schema_policy not in (self.POLICY_MANUAL, self.POLICY_MANUAL_STRICT):
|
|
385
|
+
actual_schema = self.__get_schema(db, start, schema_policy)
|
|
386
|
+
dbs = ', '.join(db if isinstance(db, list) else [])
|
|
387
|
+
|
|
388
|
+
if len(actual_schema) == 0:
|
|
389
|
+
if schema_policy in (self.POLICY_FAIL, self.POLICY_FAIL_STRICT):
|
|
390
|
+
raise ValueError(f'No ticks found in database(-s) {dbs}')
|
|
391
|
+
# lets try to use at least something
|
|
392
|
+
return schema.copy()
|
|
393
|
+
|
|
394
|
+
for k, v in schema.items():
|
|
395
|
+
field_type = actual_schema.get(k, None)
|
|
396
|
+
incompatible_types = False
|
|
397
|
+
if field_type is None:
|
|
398
|
+
if self._p_strict or schema_policy in (self.POLICY_TOLERANT, self.POLICY_FAIL):
|
|
399
|
+
raise ValueError(f"Database(-s) {dbs} schema has no {k} field")
|
|
400
|
+
elif issubclass(field_type, str) and issubclass(v, str):
|
|
401
|
+
field_length = ott.string.DEFAULT_LENGTH
|
|
402
|
+
if issubclass(field_type, ott.string):
|
|
403
|
+
field_length = field_type.length
|
|
404
|
+
v_length = ott.string.DEFAULT_LENGTH
|
|
405
|
+
if issubclass(v, ott.string):
|
|
406
|
+
v_length = v.length
|
|
407
|
+
if issubclass(field_type, ott.varstring):
|
|
408
|
+
if not issubclass(v, ott.varstring):
|
|
409
|
+
incompatible_types = True
|
|
410
|
+
elif not issubclass(v, ott.varstring) and v_length < field_length:
|
|
411
|
+
incompatible_types = True
|
|
412
|
+
elif not issubclass(field_type, v):
|
|
413
|
+
incompatible_types = True
|
|
414
|
+
if incompatible_types:
|
|
415
|
+
error_message = f"Database(-s) {dbs} schema field {k} has type {field_type}, but {v} was requested"
|
|
416
|
+
if field_type in (str, ott.string) or v in (str, ott.string):
|
|
417
|
+
error_message = f"{error_message}. Notice, that `str` and `otp.string` lengths are 64"
|
|
418
|
+
raise ValueError(error_message)
|
|
419
|
+
if not self._p_strict:
|
|
420
|
+
schema.update(actual_schema)
|
|
421
|
+
|
|
422
|
+
table_schema = schema.copy()
|
|
423
|
+
if not self._p_strict:
|
|
424
|
+
# in this case we will table only fields specified by user
|
|
425
|
+
table_schema = {
|
|
426
|
+
k: v for k, v in table_schema.items() if k not in actual_schema
|
|
427
|
+
}
|
|
428
|
+
return table_schema
|
|
429
|
+
|
|
430
|
+
def __prepare_db_tick_type(self, db, tick_type, start, end):
|
|
431
|
+
if isinstance(db, list):
|
|
432
|
+
# If everything is correct then this branch should leave
|
|
433
|
+
# the `db` var as a list of databases with tick types and the `tick_type` var is None.
|
|
434
|
+
# Valid cases:
|
|
435
|
+
# - Fully defined case. The `db` parameter has a list of databases where
|
|
436
|
+
# every database has a tick type, when the `tick_type`
|
|
437
|
+
# parameter has default value or None (for backward compatibility)
|
|
438
|
+
# - Partially defined case. The `db` parameter has a list of databases but
|
|
439
|
+
# not every database has a tick type, and meantime the `tick_type`
|
|
440
|
+
# is passed to not None value. In that case databases without tick type
|
|
441
|
+
# are exetended with a tick type from the `tick_type` parameter
|
|
442
|
+
# - No defined case. The `db` parameter has a list of databases and
|
|
443
|
+
# every database there has no tick type, and the `tick_type` is
|
|
444
|
+
# set to not None value. In that case every database is extended with
|
|
445
|
+
# the tick type from the `tick_type`.
|
|
446
|
+
|
|
447
|
+
def db_converter(_db):
|
|
448
|
+
if isinstance(_db, DB):
|
|
449
|
+
return _db.name
|
|
450
|
+
else:
|
|
451
|
+
return _db
|
|
452
|
+
|
|
453
|
+
db = [db_converter(_db) for _db in db]
|
|
454
|
+
res = all(('::' in _db and _db[-1] != ':' for _db in db))
|
|
455
|
+
if res:
|
|
456
|
+
if tick_type is utils.adaptive or tick_type is None:
|
|
457
|
+
tick_type = None # tick types is specified for all databases
|
|
458
|
+
else:
|
|
459
|
+
raise ValueError('The `tick_type` is set as a parameter '
|
|
460
|
+
'and also as a part of the `db` parameter'
|
|
461
|
+
'for every database')
|
|
462
|
+
else:
|
|
463
|
+
dbs_without_tt = [_db.split(':')[0] for _db in db
|
|
464
|
+
if '::' not in _db or _db[-1] == ':']
|
|
465
|
+
|
|
466
|
+
if tick_type is utils.adaptive:
|
|
467
|
+
tick_type = 'TRD' # default one for backward compatibility and testing usecase
|
|
468
|
+
if tick_type is None:
|
|
469
|
+
raise ValueError('The tick type is not set for databases: ' +
|
|
470
|
+
', '.join(dbs_without_tt))
|
|
471
|
+
else:
|
|
472
|
+
# extend databases with missing tick types from the tick tick parameter
|
|
473
|
+
dbs_with_tt = [_db for _db in db
|
|
474
|
+
if '::' in _db and _db[-1] != ':']
|
|
475
|
+
|
|
476
|
+
db = dbs_with_tt + [_db + '::' + tick_type for _db in dbs_without_tt]
|
|
477
|
+
tick_type = None
|
|
478
|
+
|
|
479
|
+
if isinstance(db, (DB, _inspection.DB)):
|
|
480
|
+
db = db.name # ... and we go to the next branch
|
|
481
|
+
|
|
482
|
+
if isinstance(db, str):
|
|
483
|
+
# The resulting `db` var contains a list with string value, that has the `db`
|
|
484
|
+
# concatenated with the `tick_type`.
|
|
485
|
+
if '::' in db:
|
|
486
|
+
if tick_type is utils.adaptive or tick_type is None:
|
|
487
|
+
tick_type = db.split(':')[-1]
|
|
488
|
+
db = db.split('::')[0]
|
|
489
|
+
else:
|
|
490
|
+
raise ValueError('The `tick_type` is set as a parameter '
|
|
491
|
+
'and also as a part of the `db` parameter')
|
|
492
|
+
else:
|
|
493
|
+
if tick_type is utils.adaptive or tick_type is None:
|
|
494
|
+
db_obj = _inspection.DB(db)
|
|
495
|
+
|
|
496
|
+
# try to find at least one common tick type
|
|
497
|
+
# through all days
|
|
498
|
+
tick_types = None
|
|
499
|
+
|
|
500
|
+
if start is utils.adaptive:
|
|
501
|
+
start = end = db_obj.get_last_date(show_warnings=False)
|
|
502
|
+
|
|
503
|
+
if start and end: # could be None if there is no data
|
|
504
|
+
t_start = start
|
|
505
|
+
while t_start <= end:
|
|
506
|
+
t_tts = set(db_obj.tick_types(t_start))
|
|
507
|
+
|
|
508
|
+
t_start += dt.timedelta(days=1)
|
|
509
|
+
|
|
510
|
+
if len(t_tts) == 0:
|
|
511
|
+
continue
|
|
512
|
+
|
|
513
|
+
if tick_types is None:
|
|
514
|
+
tick_types = t_tts
|
|
515
|
+
else:
|
|
516
|
+
tick_types &= t_tts
|
|
517
|
+
|
|
518
|
+
if len(tick_types) == 0:
|
|
519
|
+
raise ValueError(f'It seems that there is no common '
|
|
520
|
+
f'tick types for dates from {start} '
|
|
521
|
+
f'to {end}. Please specify a tick '
|
|
522
|
+
'type')
|
|
523
|
+
|
|
524
|
+
if tick_types is None:
|
|
525
|
+
if tick_type is utils.adaptive:
|
|
526
|
+
tick_types = ['TRD'] # the default one
|
|
527
|
+
else:
|
|
528
|
+
raise ValueError(f'Could not find any data in from {start} '
|
|
529
|
+
f' to {end}. Could you check that tick type, '
|
|
530
|
+
' database and date range are correct.')
|
|
531
|
+
|
|
532
|
+
if len(tick_types) != 1:
|
|
533
|
+
raise ValueError('The tick type is not specified, found '
|
|
534
|
+
'multiple tick types in the database : ' +
|
|
535
|
+
', '.join(tick_types))
|
|
536
|
+
|
|
537
|
+
tick_type = tick_types.pop()
|
|
538
|
+
|
|
539
|
+
if not isinstance(tick_type, str) and isinstance(tick_type, Iterable):
|
|
540
|
+
if isinstance(tick_type, _SymbolParamColumn):
|
|
541
|
+
db = [f"expr('{db}::' + {str(tick_type)})"]
|
|
542
|
+
else:
|
|
543
|
+
db = [f'{db}::{tt}' for tt in tick_type]
|
|
544
|
+
else:
|
|
545
|
+
db = [db + '::' + tick_type]
|
|
546
|
+
tick_type = None
|
|
547
|
+
|
|
548
|
+
if isinstance(db, _SymbolParamColumn):
|
|
549
|
+
# Do nothing, because we don't know whether db will come with the tick type or not.
|
|
550
|
+
# The only one thing that definetely we know that tick_type can not be utils.adaptive
|
|
551
|
+
if tick_type is utils.adaptive:
|
|
552
|
+
# TODO: need to test this case
|
|
553
|
+
raise ValueError('The `db` is set to the symbol param, in that case '
|
|
554
|
+
'the `tick_type` should be set explicitly to some value '
|
|
555
|
+
'or to None')
|
|
556
|
+
|
|
557
|
+
if db is None:
|
|
558
|
+
# This case means that database comes with the symbol name, then tick type should be defined
|
|
559
|
+
if tick_type is utils.adaptive or tick_type is None:
|
|
560
|
+
raise ValueError('The `db` is not specified that means database is '
|
|
561
|
+
'expected to be defined with the symbol name. '
|
|
562
|
+
'In that case the `tick_type` should be defined.')
|
|
563
|
+
if not isinstance(tick_type, str) and isinstance(tick_type, Iterable):
|
|
564
|
+
tick_type = '+'.join(tick_type)
|
|
565
|
+
|
|
566
|
+
return db, tick_type
|
|
567
|
+
|
|
568
|
+
@docstring(parameters=DATA_SOURCE_DOC_PARAMS, add_self=True)
|
|
569
|
+
def __init__(
|
|
570
|
+
self,
|
|
571
|
+
db=None,
|
|
572
|
+
symbol=utils.adaptive,
|
|
573
|
+
tick_type=utils.adaptive,
|
|
574
|
+
start=utils.adaptive,
|
|
575
|
+
end=utils.adaptive,
|
|
576
|
+
date=None,
|
|
577
|
+
schema=None,
|
|
578
|
+
schema_policy=utils.adaptive,
|
|
579
|
+
guess_schema=None,
|
|
580
|
+
identify_input_ts=False,
|
|
581
|
+
back_to_first_tick=0,
|
|
582
|
+
keep_first_tick_timestamp=None,
|
|
583
|
+
max_back_ticks_to_prepend=1,
|
|
584
|
+
where_clause_for_back_ticks=None,
|
|
585
|
+
symbols=None,
|
|
586
|
+
presort=utils.adaptive,
|
|
587
|
+
batch_size=None,
|
|
588
|
+
concurrency=utils.default,
|
|
589
|
+
symbol_date=None,
|
|
590
|
+
**kwargs,
|
|
591
|
+
):
|
|
592
|
+
"""
|
|
593
|
+
Construct a source providing data from a given ``db``.
|
|
594
|
+
|
|
595
|
+
.. warning::
|
|
596
|
+
|
|
597
|
+
Default value of the parameter ``schema_policy`` enables automatic deduction
|
|
598
|
+
of the data schema, but it is highly not recommended for production code.
|
|
599
|
+
For details see :ref:`static/concepts/schema:Schema deduction mechanism`.
|
|
600
|
+
|
|
601
|
+
Note
|
|
602
|
+
----
|
|
603
|
+
If interval that was set for :py:class:`~onetick.py.DataSource` via ``start``/``end`` or ``date`` parameters
|
|
604
|
+
does not match intervals in other :py:class:`~onetick.py.Source` objects used in query,
|
|
605
|
+
or does not match the whole query interval, then :py:meth:`~otp.Source.modify_query_times` will be applied
|
|
606
|
+
to this ``DataSource`` with specified interval as start and end times parameters.
|
|
607
|
+
|
|
608
|
+
If ``symbols`` parameter is omitted, you need to specify unbound symbols for the query in ``symbols``
|
|
609
|
+
parameter of :py:func:`onetick.py.run` function.
|
|
610
|
+
|
|
611
|
+
If ``symbols`` parameter is set, :meth:`otp.merge <onetick.py.merge>` is used to merge all passed bound symbols.
|
|
612
|
+
In this case you don't need to specify unbound symbols in :py:func:`onetick.py.run` call.
|
|
613
|
+
|
|
614
|
+
It's not allowed to specify bound and unbound symbols at the same time.
|
|
615
|
+
|
|
616
|
+
See also
|
|
617
|
+
--------
|
|
618
|
+
:ref:`static/concepts/start_end:Query start / end flow`
|
|
619
|
+
:ref:`static/concepts/symbols:Symbols: bound and unbound`
|
|
620
|
+
|
|
621
|
+
Examples
|
|
622
|
+
--------
|
|
623
|
+
|
|
624
|
+
Query a single symbol from a database:
|
|
625
|
+
|
|
626
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols='S1')
|
|
627
|
+
>>> otp.run(data)
|
|
628
|
+
Time X
|
|
629
|
+
0 2003-12-01 00:00:00.000 1
|
|
630
|
+
1 2003-12-01 00:00:00.001 2
|
|
631
|
+
2 2003-12-01 00:00:00.002 3
|
|
632
|
+
|
|
633
|
+
Parameter ``symbols`` can be a list.
|
|
634
|
+
In this case specified symbols will be merged into a single data flow:
|
|
635
|
+
|
|
636
|
+
>>> # OTdirective: snippet-name:fetch data.simple;
|
|
637
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols=['S1', 'S2'])
|
|
638
|
+
>>> otp.run(data)
|
|
639
|
+
Time X
|
|
640
|
+
0 2003-12-01 00:00:00.000 1
|
|
641
|
+
1 2003-12-01 00:00:00.000 -3
|
|
642
|
+
2 2003-12-01 00:00:00.001 2
|
|
643
|
+
3 2003-12-01 00:00:00.001 -2
|
|
644
|
+
4 2003-12-01 00:00:00.002 3
|
|
645
|
+
5 2003-12-01 00:00:00.002 -1
|
|
646
|
+
|
|
647
|
+
Parameter ``identify_input_ts`` can be used to automatically add field with symbol name for each tick:
|
|
648
|
+
|
|
649
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols=['S1', 'S2'], identify_input_ts=True)
|
|
650
|
+
>>> otp.run(data)
|
|
651
|
+
Time SYMBOL_NAME TICK_TYPE X
|
|
652
|
+
0 2003-12-01 00:00:00.000 S1 TT 1
|
|
653
|
+
1 2003-12-01 00:00:00.000 S2 TT -3
|
|
654
|
+
2 2003-12-01 00:00:00.001 S1 TT 2
|
|
655
|
+
3 2003-12-01 00:00:00.001 S2 TT -2
|
|
656
|
+
4 2003-12-01 00:00:00.002 S1 TT 3
|
|
657
|
+
5 2003-12-01 00:00:00.002 S2 TT -1
|
|
658
|
+
|
|
659
|
+
Source also can be passed as symbols, in such case magic named column SYMBOL_NAME will be transform to symbol
|
|
660
|
+
and all other columns will be symbol parameters
|
|
661
|
+
|
|
662
|
+
>>> # OTdirective: snippet-name:fetch data.symbols as a source;
|
|
663
|
+
>>> symbols = otp.Ticks(SYMBOL_NAME=['S1', 'S2'])
|
|
664
|
+
>>> data = otp.DataSource(db='SOME_DB', symbols=symbols, tick_type='TT')
|
|
665
|
+
>>> otp.run(data)
|
|
666
|
+
Time X
|
|
667
|
+
0 2003-12-01 00:00:00.000 1
|
|
668
|
+
1 2003-12-01 00:00:00.000 -3
|
|
669
|
+
2 2003-12-01 00:00:00.001 2
|
|
670
|
+
3 2003-12-01 00:00:00.001 -2
|
|
671
|
+
4 2003-12-01 00:00:00.002 3
|
|
672
|
+
5 2003-12-01 00:00:00.002 -1
|
|
673
|
+
|
|
674
|
+
Default schema policy is **tolerant** (unless you specified ``schema`` parameter and
|
|
675
|
+
left ``schema_policy`` with default value, when it will be set to **manual**).
|
|
676
|
+
|
|
677
|
+
>>> data = otp.DataSource(
|
|
678
|
+
... db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 1),
|
|
679
|
+
... )
|
|
680
|
+
>>> data.schema
|
|
681
|
+
{'PRICE': <class 'float'>, 'SIZE': <class 'int'>}
|
|
682
|
+
|
|
683
|
+
>>> data = otp.DataSource(
|
|
684
|
+
... db='US_COMP', tick_type='TRD', symbols='AAPL', schema={'PRICE': int},
|
|
685
|
+
... schema_policy='tolerant', date=otp.dt(2022, 3, 1),
|
|
686
|
+
... )
|
|
687
|
+
Traceback (most recent call last):
|
|
688
|
+
...
|
|
689
|
+
ValueError: Database(-s) US_COMP::TRD schema field PRICE has type <class 'float'>,
|
|
690
|
+
but <class 'int'> was requested
|
|
691
|
+
|
|
692
|
+
Schema policy **manual** uses exactly ``schema``:
|
|
693
|
+
|
|
694
|
+
>>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', schema={'PRICE': float},
|
|
695
|
+
... date=otp.dt(2022, 3, 1), schema_policy='manual')
|
|
696
|
+
>>> data.schema
|
|
697
|
+
{'PRICE': <class 'float'>}
|
|
698
|
+
|
|
699
|
+
Schema policy **fail** raises an exception if the schema cannot be deduced:
|
|
700
|
+
|
|
701
|
+
>>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2021, 3, 1),
|
|
702
|
+
... schema_policy='fail')
|
|
703
|
+
Traceback (most recent call last):
|
|
704
|
+
...
|
|
705
|
+
ValueError: No ticks found in database(-s) US_COMP::TRD
|
|
706
|
+
|
|
707
|
+
``back_to_first_tick`` sets how far back to go looking for the latest tick before ``start`` time:
|
|
708
|
+
|
|
709
|
+
>>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
|
|
710
|
+
... back_to_first_tick=otp.Day(1))
|
|
711
|
+
>>> otp.run(data)
|
|
712
|
+
Time PRICE SIZE
|
|
713
|
+
0 2022-03-02 00:00:00.000 1.4 50
|
|
714
|
+
1 2022-03-02 00:00:00.000 1.0 100
|
|
715
|
+
2 2022-03-02 00:00:00.001 1.1 101
|
|
716
|
+
3 2022-03-02 00:00:00.002 1.2 102
|
|
717
|
+
|
|
718
|
+
``keep_first_tick_timestamp`` allows to show the original timestamp of the tick that was taken from before
|
|
719
|
+
the start time of the query:
|
|
720
|
+
|
|
721
|
+
>>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
|
|
722
|
+
... back_to_first_tick=otp.Day(1), keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
|
|
723
|
+
>>> otp.run(data)
|
|
724
|
+
Time ORIGIN_TIMESTAMP PRICE SIZE
|
|
725
|
+
0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
|
|
726
|
+
1 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
|
|
727
|
+
2 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
|
|
728
|
+
3 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
|
|
729
|
+
|
|
730
|
+
``max_back_ticks_to_prepend`` is used with ``back_to_first_tick``
|
|
731
|
+
if more than 1 ticks before start time should be retrieved:
|
|
732
|
+
|
|
733
|
+
>>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
|
|
734
|
+
... max_back_ticks_to_prepend=2, back_to_first_tick=otp.Day(1),
|
|
735
|
+
... keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
|
|
736
|
+
>>> otp.run(data)
|
|
737
|
+
Time ORIGIN_TIMESTAMP PRICE SIZE
|
|
738
|
+
0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.001 1.4 10
|
|
739
|
+
1 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
|
|
740
|
+
2 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
|
|
741
|
+
3 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
|
|
742
|
+
4 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
|
|
743
|
+
|
|
744
|
+
``where_clause_for_back_ticks`` is used to filter out ticks before the start time:
|
|
745
|
+
|
|
746
|
+
.. testcode::
|
|
747
|
+
:skipif: not is_supported_where_clause_for_back_ticks()
|
|
748
|
+
|
|
749
|
+
data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
|
|
750
|
+
where_clause_for_back_ticks=otp.raw('SIZE>=50', dtype=bool),
|
|
751
|
+
back_to_first_tick=otp.Day(1), max_back_ticks_to_prepend=2,
|
|
752
|
+
keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
|
|
753
|
+
df = otp.run(data)
|
|
754
|
+
print(df)
|
|
755
|
+
|
|
756
|
+
.. testoutput::
|
|
757
|
+
|
|
758
|
+
Time ORIGIN_TIMESTAMP PRICE SIZE
|
|
759
|
+
0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.000 1.3 100
|
|
760
|
+
1 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
|
|
761
|
+
2 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
|
|
762
|
+
3 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
|
|
763
|
+
4 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
|
|
764
|
+
"""
|
|
765
|
+
|
|
766
|
+
self.logger = otp.get_logger(__name__, self.__class__.__name__)
|
|
767
|
+
|
|
768
|
+
if self._try_default_constructor(schema=schema, **kwargs):
|
|
769
|
+
return
|
|
770
|
+
|
|
771
|
+
schema = self._select_schema(schema, kwargs)
|
|
772
|
+
|
|
773
|
+
if schema and (not schema_policy or schema_policy is utils.adaptive):
|
|
774
|
+
schema_policy = self.POLICY_MANUAL
|
|
775
|
+
|
|
776
|
+
if schema_policy is utils.adaptive:
|
|
777
|
+
schema_policy = otp.config.default_schema_policy
|
|
778
|
+
|
|
779
|
+
# for cases when we want to explicitly convert into string,
|
|
780
|
+
# it might be symbol param or join_with_query parameter
|
|
781
|
+
if isinstance(tick_type, OnetickParameter):
|
|
782
|
+
tick_type = tick_type.parameter_expression
|
|
783
|
+
|
|
784
|
+
if date:
|
|
785
|
+
# TODO: write a warning in that case
|
|
786
|
+
start, end = get_start_end_by_date(date)
|
|
787
|
+
|
|
788
|
+
db, tick_type = self.__prepare_db_tick_type(db,
|
|
789
|
+
tick_type,
|
|
790
|
+
start,
|
|
791
|
+
end)
|
|
792
|
+
|
|
793
|
+
self._p_db = db
|
|
794
|
+
|
|
795
|
+
if not schema and schema_policy == self.POLICY_MANUAL_STRICT:
|
|
796
|
+
raise ValueError(
|
|
797
|
+
f"'{self.POLICY_MANUAL_STRICT}' schema policy was specified, but no schema has been provided"
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
self._p_strict = schema_policy in (self.POLICY_FAIL_STRICT,
|
|
801
|
+
self.POLICY_TOLERANT_STRICT,
|
|
802
|
+
self.POLICY_MANUAL_STRICT)
|
|
803
|
+
|
|
804
|
+
# this is deprecated, but user may have set some complex types or values in schema,
|
|
805
|
+
# let's infer basic onetick-py types from them
|
|
806
|
+
for k, v in schema.items():
|
|
807
|
+
schema[k] = ott.get_source_base_type(v)
|
|
808
|
+
|
|
809
|
+
self._p_schema = self.__prepare_schema(db, # tick type is embedded into the db
|
|
810
|
+
start,
|
|
811
|
+
schema_policy,
|
|
812
|
+
guess_schema,
|
|
813
|
+
schema)
|
|
814
|
+
|
|
815
|
+
if symbols is not None:
|
|
816
|
+
if symbol is utils.adaptive or symbol is None:
|
|
817
|
+
symbol = symbols
|
|
818
|
+
else:
|
|
819
|
+
# TODO: test it
|
|
820
|
+
raise ValueError('You have set the `symbol` and `symbols` parameters'
|
|
821
|
+
'together, it is not allowed. Please, clarify parameters')
|
|
822
|
+
|
|
823
|
+
if symbol_date is not None:
|
|
824
|
+
if symbol is utils.adaptive or symbol is None:
|
|
825
|
+
raise ValueError("Parameter 'symbol_date' can only be specified together with parameter 'symbols'")
|
|
826
|
+
if isinstance(symbol, (str, list)):
|
|
827
|
+
# this is a hack
|
|
828
|
+
# onetick.query doesn't have an interface to set symbol_date for the EP node
|
|
829
|
+
# so instead of setting symbols for the EP node,
|
|
830
|
+
# we will turn symbol list into the first stage query, and symbol_date will be set for this query
|
|
831
|
+
if isinstance(symbol, str):
|
|
832
|
+
symbol = [symbol]
|
|
833
|
+
symbol = Ticks(SYMBOL_NAME=symbol)
|
|
834
|
+
|
|
835
|
+
if isinstance(symbol, Symbols) and symbol._p_db is None:
|
|
836
|
+
symbol = Symbols.duplicate(symbol, db=db)
|
|
837
|
+
|
|
838
|
+
if identify_input_ts:
|
|
839
|
+
if "SYMBOL_NAME" in schema:
|
|
840
|
+
# TODO: think about how user could workaround it
|
|
841
|
+
raise ValueError("Parameter 'identify_input_ts' is set,"
|
|
842
|
+
" but field 'SYMBOL_NAME' is already in the schema")
|
|
843
|
+
schema["SYMBOL_NAME"] = str
|
|
844
|
+
self._p_schema["SYMBOL_NAME"] = str
|
|
845
|
+
if "TICK_TYPE" in schema:
|
|
846
|
+
raise ValueError("Parameter 'identify_input_ts' is set,"
|
|
847
|
+
" but field 'TICK_TYPE' is already in the schema")
|
|
848
|
+
schema["TICK_TYPE"] = str
|
|
849
|
+
self._p_schema["TICK_TYPE"] = str
|
|
850
|
+
|
|
851
|
+
# unobvious way to convert otp.Minute/Hour/... to number of seconds
|
|
852
|
+
if type(back_to_first_tick).__name__ == '_DatePartCls':
|
|
853
|
+
back_to_first_tick = int((ott.dt(0) + back_to_first_tick).timestamp())
|
|
854
|
+
|
|
855
|
+
if isinstance(back_to_first_tick, _Operation):
|
|
856
|
+
back_to_first_tick = otp.expr(back_to_first_tick)
|
|
857
|
+
|
|
858
|
+
if back_to_first_tick != 0 and keep_first_tick_timestamp:
|
|
859
|
+
schema[keep_first_tick_timestamp] = ott.nsectime
|
|
860
|
+
self._p_schema[keep_first_tick_timestamp] = ott.nsectime
|
|
861
|
+
|
|
862
|
+
if max_back_ticks_to_prepend < 1:
|
|
863
|
+
raise ValueError(f'`max_back_ticks_to_prepend` must be at least 1 '
|
|
864
|
+
f'but {max_back_ticks_to_prepend} was passed')
|
|
865
|
+
|
|
866
|
+
if where_clause_for_back_ticks is not None:
|
|
867
|
+
# TODO: add otp.param here
|
|
868
|
+
if not isinstance(where_clause_for_back_ticks, Raw):
|
|
869
|
+
raise ValueError(f'Currently only otp.raw is supported for `where_clause_for_back_ticks` '
|
|
870
|
+
f'but {type(where_clause_for_back_ticks)} was passed')
|
|
871
|
+
if where_clause_for_back_ticks.dtype is not bool:
|
|
872
|
+
raise ValueError(f'Only bool dtype for otp.raw in `where_clause_for_back_ticks` is supported '
|
|
873
|
+
f'but {where_clause_for_back_ticks.dtype} was passed')
|
|
874
|
+
where_clause_for_back_ticks = str(where_clause_for_back_ticks)
|
|
875
|
+
|
|
876
|
+
self._schema = schema
|
|
877
|
+
|
|
878
|
+
if isinstance(symbol, _QueryEvalWrapper):
|
|
879
|
+
symbol_str = repr(symbol)
|
|
880
|
+
else:
|
|
881
|
+
symbol_str = symbol
|
|
882
|
+
self.logger.info(
|
|
883
|
+
otp.utils.json_dumps(dict(db=db, symbol=symbol_str, tick_type=tick_type, start=start, end=end))
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
if (
|
|
887
|
+
isinstance(symbol, (Source, query, _QueryEvalWrapper, otq.GraphQuery))
|
|
888
|
+
or hasattr(symbol, "__iter__")
|
|
889
|
+
and not isinstance(symbol, (dict, str, OnetickParameter, _SymbolParamColumn))
|
|
890
|
+
):
|
|
891
|
+
super().__init__(
|
|
892
|
+
_start=start,
|
|
893
|
+
_end=end,
|
|
894
|
+
_base_ep_func=lambda: self._base_ep_for_cross_symbol(
|
|
895
|
+
db, symbol, tick_type,
|
|
896
|
+
identify_input_ts=identify_input_ts,
|
|
897
|
+
back_to_first_tick=back_to_first_tick,
|
|
898
|
+
keep_first_tick_timestamp=keep_first_tick_timestamp,
|
|
899
|
+
presort=presort, batch_size=batch_size, concurrency=concurrency,
|
|
900
|
+
max_back_ticks_to_prepend=max_back_ticks_to_prepend,
|
|
901
|
+
where_clause_for_back_ticks=where_clause_for_back_ticks,
|
|
902
|
+
symbol_date=symbol_date,
|
|
903
|
+
),
|
|
904
|
+
schema=schema,
|
|
905
|
+
)
|
|
906
|
+
else:
|
|
907
|
+
super().__init__(
|
|
908
|
+
_symbols=symbol,
|
|
909
|
+
_start=start,
|
|
910
|
+
_end=end,
|
|
911
|
+
_base_ep_func=lambda: self.base_ep(
|
|
912
|
+
db,
|
|
913
|
+
tick_type,
|
|
914
|
+
identify_input_ts=identify_input_ts,
|
|
915
|
+
back_to_first_tick=back_to_first_tick,
|
|
916
|
+
keep_first_tick_timestamp=keep_first_tick_timestamp,
|
|
917
|
+
max_back_ticks_to_prepend=max_back_ticks_to_prepend,
|
|
918
|
+
where_clause_for_back_ticks=where_clause_for_back_ticks,
|
|
919
|
+
),
|
|
920
|
+
schema=schema,
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
@property
|
|
924
|
+
def db(self):
|
|
925
|
+
return self._p_db
|
|
926
|
+
|
|
927
|
+
def _create_source(self, passthrough_ep, back_to_first_tick=0, keep_first_tick_timestamp=None):
|
|
928
|
+
"""Create graph that save original timestamp of first tick if needed"""
|
|
929
|
+
if back_to_first_tick != 0 and keep_first_tick_timestamp:
|
|
930
|
+
src = Source(otq.Passthrough(), schema=self._schema)
|
|
931
|
+
src.sink(otq.AddField(field=keep_first_tick_timestamp, value='TIMESTAMP'))
|
|
932
|
+
src.sink(passthrough_ep)
|
|
933
|
+
return src
|
|
934
|
+
return Source(passthrough_ep, schema=self._schema)
|
|
935
|
+
|
|
936
|
+
def _table_schema(self, src):
|
|
937
|
+
return src.table(**self._p_schema, strict=self._p_strict)
|
|
938
|
+
|
|
939
|
+
def base_ep(
|
|
940
|
+
self,
|
|
941
|
+
db,
|
|
942
|
+
tick_type,
|
|
943
|
+
identify_input_ts,
|
|
944
|
+
back_to_first_tick=0,
|
|
945
|
+
keep_first_tick_timestamp=None,
|
|
946
|
+
max_back_ticks_to_prepend=1,
|
|
947
|
+
where_clause_for_back_ticks=None,
|
|
948
|
+
):
|
|
949
|
+
str_db = convert_tick_type_to_str(tick_type, db)
|
|
950
|
+
params = dict(
|
|
951
|
+
go_back_to_first_tick=back_to_first_tick,
|
|
952
|
+
max_back_ticks_to_prepend=max_back_ticks_to_prepend,
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
if where_clause_for_back_ticks is not None:
|
|
956
|
+
params['where_clause_for_back_ticks'] = where_clause_for_back_ticks
|
|
957
|
+
|
|
958
|
+
if isinstance(db, (list, _SymbolParamColumn)):
|
|
959
|
+
src = self._create_source(otq.Passthrough(**params),
|
|
960
|
+
back_to_first_tick=back_to_first_tick,
|
|
961
|
+
keep_first_tick_timestamp=keep_first_tick_timestamp)
|
|
962
|
+
|
|
963
|
+
if identify_input_ts or '+' in str_db or str_db.startswith('expr('):
|
|
964
|
+
# PY-941: use MERGE only if we need to identify input or there are many databases,
|
|
965
|
+
# otherwise use PASSTHROUGH, it seems to work faster in some cases
|
|
966
|
+
src.sink(otq.Merge(identify_input_ts=identify_input_ts))
|
|
967
|
+
else:
|
|
968
|
+
if identify_input_ts:
|
|
969
|
+
params["fields"] = "SYMBOL_NAME,TICK_TYPE"
|
|
970
|
+
params["drop_fields"] = True
|
|
971
|
+
|
|
972
|
+
src = self._create_source(otq.Passthrough(**params),
|
|
973
|
+
back_to_first_tick=back_to_first_tick,
|
|
974
|
+
keep_first_tick_timestamp=keep_first_tick_timestamp)
|
|
975
|
+
src.tick_type(str_db)
|
|
976
|
+
|
|
977
|
+
src = self._table_schema(src)
|
|
978
|
+
return src
|
|
979
|
+
|
|
980
|
+
def _cross_symbol_convert(self, symbol, symbol_date=None):
|
|
981
|
+
tmp_otq = TmpOtq()
|
|
982
|
+
|
|
983
|
+
if isinstance(symbol, _QueryEvalWrapper):
|
|
984
|
+
symbol = symbol.to_eval_string(tmp_otq=tmp_otq, symbol_date=symbol_date)
|
|
985
|
+
elif isinstance(symbol, query):
|
|
986
|
+
if symbol_date is not None:
|
|
987
|
+
raise ValueError("Parameter 'symbol_date' is not supported if symbols are set with otp.query object")
|
|
988
|
+
symbol = symbol.to_eval_string()
|
|
989
|
+
elif isinstance(symbol, (Source, otq.GraphQuery)):
|
|
990
|
+
symbol = Source._convert_symbol_to_string(symbol, tmp_otq, symbol_date=symbol_date)
|
|
991
|
+
|
|
992
|
+
return symbol, tmp_otq
|
|
993
|
+
|
|
994
|
+
def _base_ep_for_cross_symbol(
|
|
995
|
+
self, db, symbol, tick_type, identify_input_ts,
|
|
996
|
+
back_to_first_tick=0, keep_first_tick_timestamp=None,
|
|
997
|
+
presort=utils.adaptive, batch_size=None, concurrency=utils.default,
|
|
998
|
+
max_back_ticks_to_prepend=1,
|
|
999
|
+
where_clause_for_back_ticks=None,
|
|
1000
|
+
symbol_date=None,
|
|
1001
|
+
):
|
|
1002
|
+
symbol, tmp_otq = self._cross_symbol_convert(symbol, symbol_date)
|
|
1003
|
+
|
|
1004
|
+
self.logger.info(f'symbol={symbol}')
|
|
1005
|
+
|
|
1006
|
+
tick_type = convert_tick_type_to_str(tick_type, db)
|
|
1007
|
+
|
|
1008
|
+
kwargs = dict(
|
|
1009
|
+
go_back_to_first_tick=back_to_first_tick,
|
|
1010
|
+
max_back_ticks_to_prepend=max_back_ticks_to_prepend,
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
if where_clause_for_back_ticks is not None:
|
|
1014
|
+
kwargs['where_clause_for_back_ticks'] = where_clause_for_back_ticks
|
|
1015
|
+
|
|
1016
|
+
src = self._create_source(otq.Passthrough(**kwargs),
|
|
1017
|
+
back_to_first_tick=back_to_first_tick,
|
|
1018
|
+
keep_first_tick_timestamp=keep_first_tick_timestamp)
|
|
1019
|
+
if presort is utils.adaptive:
|
|
1020
|
+
presort = True
|
|
1021
|
+
if presort:
|
|
1022
|
+
if batch_size is None:
|
|
1023
|
+
batch_size = otp.config.default_batch_size
|
|
1024
|
+
if concurrency is utils.default:
|
|
1025
|
+
concurrency = otp.configuration.default_presort_concurrency()
|
|
1026
|
+
if concurrency is None:
|
|
1027
|
+
# None means inherit concurrency from the query where this EP is used
|
|
1028
|
+
# otq.Presort does not support None
|
|
1029
|
+
concurrency = ''
|
|
1030
|
+
src.sink(
|
|
1031
|
+
otq.Presort(batch_size=batch_size, max_concurrency=concurrency).symbols(symbol).tick_type(tick_type)
|
|
1032
|
+
)
|
|
1033
|
+
src.sink(otq.Merge(identify_input_ts=identify_input_ts))
|
|
1034
|
+
else:
|
|
1035
|
+
src.sink(
|
|
1036
|
+
otq.Merge(identify_input_ts=identify_input_ts).symbols(symbol).tick_type(tick_type)
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
src._tmp_otq.merge(tmp_otq)
|
|
1040
|
+
|
|
1041
|
+
src = self._table_schema(src)
|
|
1042
|
+
return src
|
|
1043
|
+
|
|
1044
|
+
|
|
1045
|
+
Custom = DataSource # for backward compatiblity, previously we had only Custom
|