onetick-py 1.177.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- locator_parser/__init__.py +0 -0
- locator_parser/acl.py +73 -0
- locator_parser/actions.py +262 -0
- locator_parser/common.py +368 -0
- locator_parser/io.py +43 -0
- locator_parser/locator.py +150 -0
- onetick/__init__.py +101 -0
- onetick/doc_utilities/__init__.py +3 -0
- onetick/doc_utilities/napoleon.py +40 -0
- onetick/doc_utilities/ot_doctest.py +140 -0
- onetick/doc_utilities/snippets.py +279 -0
- onetick/lib/__init__.py +4 -0
- onetick/lib/instance.py +141 -0
- onetick/py/__init__.py +293 -0
- onetick/py/_stack_info.py +89 -0
- onetick/py/_version.py +2 -0
- onetick/py/aggregations/__init__.py +11 -0
- onetick/py/aggregations/_base.py +648 -0
- onetick/py/aggregations/_docs.py +948 -0
- onetick/py/aggregations/compute.py +286 -0
- onetick/py/aggregations/functions.py +2216 -0
- onetick/py/aggregations/generic.py +104 -0
- onetick/py/aggregations/high_low.py +80 -0
- onetick/py/aggregations/num_distinct.py +83 -0
- onetick/py/aggregations/order_book.py +501 -0
- onetick/py/aggregations/other.py +1014 -0
- onetick/py/backports.py +26 -0
- onetick/py/cache.py +374 -0
- onetick/py/callback/__init__.py +5 -0
- onetick/py/callback/callback.py +276 -0
- onetick/py/callback/callbacks.py +131 -0
- onetick/py/compatibility.py +798 -0
- onetick/py/configuration.py +771 -0
- onetick/py/core/__init__.py +0 -0
- onetick/py/core/_csv_inspector.py +93 -0
- onetick/py/core/_internal/__init__.py +0 -0
- onetick/py/core/_internal/_manually_bound_value.py +6 -0
- onetick/py/core/_internal/_nodes_history.py +250 -0
- onetick/py/core/_internal/_op_utils/__init__.py +0 -0
- onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
- onetick/py/core/_internal/_op_utils/is_const.py +10 -0
- onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
- onetick/py/core/_internal/_proxy_node.py +140 -0
- onetick/py/core/_internal/_state_objects.py +2312 -0
- onetick/py/core/_internal/_state_vars.py +93 -0
- onetick/py/core/_source/__init__.py +0 -0
- onetick/py/core/_source/_symbol_param.py +95 -0
- onetick/py/core/_source/schema.py +97 -0
- onetick/py/core/_source/source_methods/__init__.py +0 -0
- onetick/py/core/_source/source_methods/aggregations.py +809 -0
- onetick/py/core/_source/source_methods/applyers.py +296 -0
- onetick/py/core/_source/source_methods/columns.py +141 -0
- onetick/py/core/_source/source_methods/data_quality.py +301 -0
- onetick/py/core/_source/source_methods/debugs.py +272 -0
- onetick/py/core/_source/source_methods/drops.py +120 -0
- onetick/py/core/_source/source_methods/fields.py +619 -0
- onetick/py/core/_source/source_methods/filters.py +1002 -0
- onetick/py/core/_source/source_methods/joins.py +1413 -0
- onetick/py/core/_source/source_methods/merges.py +605 -0
- onetick/py/core/_source/source_methods/misc.py +1455 -0
- onetick/py/core/_source/source_methods/pandases.py +155 -0
- onetick/py/core/_source/source_methods/renames.py +356 -0
- onetick/py/core/_source/source_methods/sorts.py +183 -0
- onetick/py/core/_source/source_methods/switches.py +142 -0
- onetick/py/core/_source/source_methods/symbols.py +117 -0
- onetick/py/core/_source/source_methods/times.py +627 -0
- onetick/py/core/_source/source_methods/writes.py +986 -0
- onetick/py/core/_source/symbol.py +205 -0
- onetick/py/core/_source/tmp_otq.py +222 -0
- onetick/py/core/column.py +209 -0
- onetick/py/core/column_operations/__init__.py +0 -0
- onetick/py/core/column_operations/_methods/__init__.py +4 -0
- onetick/py/core/column_operations/_methods/_internal.py +28 -0
- onetick/py/core/column_operations/_methods/conversions.py +216 -0
- onetick/py/core/column_operations/_methods/methods.py +292 -0
- onetick/py/core/column_operations/_methods/op_types.py +160 -0
- onetick/py/core/column_operations/accessors/__init__.py +0 -0
- onetick/py/core/column_operations/accessors/_accessor.py +28 -0
- onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
- onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
- onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
- onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
- onetick/py/core/column_operations/base.py +1121 -0
- onetick/py/core/cut_builder.py +150 -0
- onetick/py/core/db_constants.py +20 -0
- onetick/py/core/eval_query.py +245 -0
- onetick/py/core/lambda_object.py +441 -0
- onetick/py/core/multi_output_source.py +232 -0
- onetick/py/core/per_tick_script.py +2256 -0
- onetick/py/core/query_inspector.py +464 -0
- onetick/py/core/source.py +1744 -0
- onetick/py/db/__init__.py +2 -0
- onetick/py/db/_inspection.py +1128 -0
- onetick/py/db/db.py +1327 -0
- onetick/py/db/utils.py +64 -0
- onetick/py/docs/__init__.py +0 -0
- onetick/py/docs/docstring_parser.py +112 -0
- onetick/py/docs/utils.py +81 -0
- onetick/py/functions.py +2398 -0
- onetick/py/license.py +190 -0
- onetick/py/log.py +88 -0
- onetick/py/math.py +935 -0
- onetick/py/misc.py +470 -0
- onetick/py/oqd/__init__.py +22 -0
- onetick/py/oqd/eps.py +1195 -0
- onetick/py/oqd/sources.py +325 -0
- onetick/py/otq.py +216 -0
- onetick/py/pyomd_mock.py +47 -0
- onetick/py/run.py +916 -0
- onetick/py/servers.py +173 -0
- onetick/py/session.py +1347 -0
- onetick/py/sources/__init__.py +19 -0
- onetick/py/sources/cache.py +167 -0
- onetick/py/sources/common.py +128 -0
- onetick/py/sources/csv.py +642 -0
- onetick/py/sources/custom.py +85 -0
- onetick/py/sources/data_file.py +305 -0
- onetick/py/sources/data_source.py +1045 -0
- onetick/py/sources/empty.py +94 -0
- onetick/py/sources/odbc.py +337 -0
- onetick/py/sources/order_book.py +271 -0
- onetick/py/sources/parquet.py +168 -0
- onetick/py/sources/pit.py +191 -0
- onetick/py/sources/query.py +495 -0
- onetick/py/sources/snapshots.py +419 -0
- onetick/py/sources/split_query_output_by_symbol.py +198 -0
- onetick/py/sources/symbology_mapping.py +123 -0
- onetick/py/sources/symbols.py +374 -0
- onetick/py/sources/ticks.py +825 -0
- onetick/py/sql.py +70 -0
- onetick/py/state.py +251 -0
- onetick/py/types.py +2131 -0
- onetick/py/utils/__init__.py +70 -0
- onetick/py/utils/acl.py +93 -0
- onetick/py/utils/config.py +186 -0
- onetick/py/utils/default.py +49 -0
- onetick/py/utils/file.py +38 -0
- onetick/py/utils/helpers.py +76 -0
- onetick/py/utils/locator.py +94 -0
- onetick/py/utils/perf.py +498 -0
- onetick/py/utils/query.py +49 -0
- onetick/py/utils/render.py +1374 -0
- onetick/py/utils/script.py +244 -0
- onetick/py/utils/temp.py +471 -0
- onetick/py/utils/types.py +120 -0
- onetick/py/utils/tz.py +84 -0
- onetick_py-1.177.0.dist-info/METADATA +137 -0
- onetick_py-1.177.0.dist-info/RECORD +152 -0
- onetick_py-1.177.0.dist-info/WHEEL +5 -0
- onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
- onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
- onetick_py-1.177.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,605 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import TYPE_CHECKING, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
from onetick import py as otp
|
|
5
|
+
from onetick.py.otq import otq
|
|
6
|
+
from onetick.py.functions import __copy_sources_on_merge_or_join
|
|
7
|
+
from onetick.py.aggregations._docs import (
|
|
8
|
+
_boundary_tick_bucket_doc,
|
|
9
|
+
_bucket_end_condition_doc,
|
|
10
|
+
_bucket_interval_doc,
|
|
11
|
+
_bucket_time_doc,
|
|
12
|
+
_bucket_units_doc,
|
|
13
|
+
_end_condition_per_group_doc,
|
|
14
|
+
_group_by_doc,
|
|
15
|
+
_groups_to_display_doc,
|
|
16
|
+
)
|
|
17
|
+
from onetick.py.docs.utils import docstring, param_doc
|
|
18
|
+
from onetick.py.aggregations._base import _Aggregation
|
|
19
|
+
from onetick.py.compatibility import is_diff_show_all_ticks_supported
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from onetick.py.core.source import Source
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def __add__(self: 'Source', other: 'Source') -> 'Source':
|
|
27
|
+
return otp.merge([self, other])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def append(self: 'Source', other) -> 'Source':
|
|
31
|
+
"""
|
|
32
|
+
Merge data source with `other`
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
other: List, Source
|
|
37
|
+
data source to merge
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
Source
|
|
42
|
+
"""
|
|
43
|
+
if isinstance(other, list):
|
|
44
|
+
return otp.merge(other + [self])
|
|
45
|
+
else:
|
|
46
|
+
return otp.merge([self, other])
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def diff(self: 'Source', other: 'Source',
|
|
50
|
+
fields: Optional[Union[str, List[str]]] = None,
|
|
51
|
+
ignore: bool = False,
|
|
52
|
+
output_ignored_fields: Optional[bool] = None,
|
|
53
|
+
show_only_fields_that_differ: Optional[bool] = None,
|
|
54
|
+
show_matching_ticks: Optional[bool] = None,
|
|
55
|
+
show_all_ticks: bool = False,
|
|
56
|
+
non_decreasing_value_fields: Optional[Union[str, List[str]]] = None,
|
|
57
|
+
threshold: Optional[int] = None,
|
|
58
|
+
left_prefix: str = 'L',
|
|
59
|
+
right_prefix: str = 'R',
|
|
60
|
+
drop_index: bool = True) -> 'Source':
|
|
61
|
+
"""
|
|
62
|
+
Compare two time-series.
|
|
63
|
+
|
|
64
|
+
A tick from the first time series is considered to match a tick from the second time series
|
|
65
|
+
if both ticks have identical ``non_decreasing_value_fields`` values
|
|
66
|
+
and matching values of all ``fields`` that are present in both ticks and are not listed as the fields to be ignored.
|
|
67
|
+
|
|
68
|
+
A field is considered to match another field
|
|
69
|
+
if both fields have identical names, comparable types, and identical values.
|
|
70
|
+
|
|
71
|
+
Field names in an output tick are represented as <source_name>.<field_name>.
|
|
72
|
+
|
|
73
|
+
Note
|
|
74
|
+
----
|
|
75
|
+
The first source is considered to be the base for starting the matching process.
|
|
76
|
+
If a match is found, all ticks in the second source that occur before the first matched tick
|
|
77
|
+
are considered different from those in the first source.
|
|
78
|
+
The subsequent searches for the matches in the second source will begin after this latest matched tick.
|
|
79
|
+
The ticks from the second source that are between the found matches
|
|
80
|
+
are considered different from the ticks in the first source.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
fields:
|
|
85
|
+
List of fields to be used or ignored while comparing input time series.
|
|
86
|
+
By default, if this value is not set, all fields are used in comparison.
|
|
87
|
+
ignore:
|
|
88
|
+
If True, fields specified in the ``fields`` parameter are ignored while comparing the input time series.
|
|
89
|
+
Otherwise, by default, only the fields that are specified in the ``fields`` parameter are used in comparison.
|
|
90
|
+
If the ``fields`` parameter is empty, the value of this parameter is ignored.
|
|
91
|
+
output_ignored_fields:
|
|
92
|
+
If False, the fields which are not used in comparison are excluded from the output.
|
|
93
|
+
Otherwise, by default, all fields are propagated.
|
|
94
|
+
show_only_fields_that_differ:
|
|
95
|
+
If True (default), the method outputs only the tick fields the values of which were different.
|
|
96
|
+
But if ``output_ignored_fields`` is set to True, then ignored fields are still propagated.
|
|
97
|
+
show_matching_ticks:
|
|
98
|
+
If True, the output of this method consists of matched ticks from both input time series
|
|
99
|
+
instead of unmatched ticks.
|
|
100
|
+
The output tick timestamp is equal to the earliest timestamp of its corresponding input ticks.
|
|
101
|
+
Default value is False.
|
|
102
|
+
show_all_ticks: bool
|
|
103
|
+
If specified, the output of this EP consists of both matched and unmatched ticks from both input time series.
|
|
104
|
+
``MATCH_STATUS`` field will be added to the output tick with the possible values of:
|
|
105
|
+
|
|
106
|
+
* ``0`` - different ticks
|
|
107
|
+
* ``1`` - matching ticks
|
|
108
|
+
* ``2`` - tick from one source only
|
|
109
|
+
|
|
110
|
+
Default: ``False``
|
|
111
|
+
non_decreasing_value_fields:
|
|
112
|
+
List of *non-decreasing* value fields to be used for matching.
|
|
113
|
+
If value of this parameter is **TIMESTAMP** (default), it compares two time series based on tick timestamp.
|
|
114
|
+
If other field is specified, a field named <source_name>.<TIMESTAMP>
|
|
115
|
+
will be added to the tick whose value equals the tick's primary timestamp.
|
|
116
|
+
threshold:
|
|
117
|
+
Specifies the number of first diff ticks to propagate.
|
|
118
|
+
By default all such ticks are propagated.
|
|
119
|
+
left_prefix:
|
|
120
|
+
The prefix used in the output for fields from the left source.
|
|
121
|
+
right_prefix:
|
|
122
|
+
The prefix used in the output for fields from the right source.
|
|
123
|
+
drop_index: bool
|
|
124
|
+
If False, the output tick will also carry the position(s) of input tick(s)
|
|
125
|
+
in input time series in field <source_name>.INDEX for each source.
|
|
126
|
+
The lowest position number is 1.
|
|
127
|
+
If True then <source_name>.INDEX fields will not be included in the output.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
:class:`Source`
|
|
132
|
+
|
|
133
|
+
See Also
|
|
134
|
+
--------
|
|
135
|
+
**DIFF** OneTick event processor
|
|
136
|
+
|
|
137
|
+
Examples
|
|
138
|
+
--------
|
|
139
|
+
|
|
140
|
+
Print all ticks that have any unmatched fields:
|
|
141
|
+
|
|
142
|
+
>>> t = otp.Ticks(A=[1, 2], B=[0, 0])
|
|
143
|
+
>>> q = otp.Ticks(A=[1, 3], B=[0, 0])
|
|
144
|
+
>>> data = t.diff(q)
|
|
145
|
+
>>> otp.run(data)
|
|
146
|
+
Time L.A R.A
|
|
147
|
+
0 2003-12-01 00:00:00.001 2 3
|
|
148
|
+
|
|
149
|
+
Also show fields that were not different:
|
|
150
|
+
|
|
151
|
+
>>> t = otp.Ticks(A=[1, 2], B=[0, 0])
|
|
152
|
+
>>> q = otp.Ticks(A=[1, 3], B=[0, 0])
|
|
153
|
+
>>> data = t.diff(q, show_only_fields_that_differ=False)
|
|
154
|
+
>>> otp.run(data)
|
|
155
|
+
Time L.A L.B R.A R.B
|
|
156
|
+
0 2003-12-01 00:00:00.001 2 0 3 0
|
|
157
|
+
|
|
158
|
+
Change prefixes for output fields:
|
|
159
|
+
|
|
160
|
+
>>> t = otp.Ticks(A=[1, 2], B=[0, 0])
|
|
161
|
+
>>> q = otp.Ticks(A=[1, 3], B=[0, 0])
|
|
162
|
+
>>> data = t.diff(q, left_prefix='LEFT', right_prefix='RIGHT')
|
|
163
|
+
>>> otp.run(data)
|
|
164
|
+
Time LEFT.A RIGHT.A
|
|
165
|
+
0 2003-12-01 00:00:00.001 2 3
|
|
166
|
+
|
|
167
|
+
If there are several matching ticks then only the first will be matched:
|
|
168
|
+
|
|
169
|
+
.. testcode::
|
|
170
|
+
:skipif: not otp.compatibility.is_diff_show_matching_ticks_supported()
|
|
171
|
+
|
|
172
|
+
t = otp.Ticks(A=[1, 1, 1, 1, 1], B=[1, 2, 3, 4, 5], offset=[0, 0, 1000, 2000, 2000])
|
|
173
|
+
q = otp.Ticks(A=[1, 1, 1, 1, 1], B=[3, 4, 5, 6, 7], offset=[0, 1000, 1000, 2000, 2000])
|
|
174
|
+
data = t.diff(q, fields=['A'], show_matching_ticks=True, output_ignored_fields=True)
|
|
175
|
+
print(otp.run(data))
|
|
176
|
+
|
|
177
|
+
.. testoutput::
|
|
178
|
+
|
|
179
|
+
Time L.A L.B R.A R.B
|
|
180
|
+
0 2003-12-01 00:00:00 1 1 1 3
|
|
181
|
+
1 2003-12-01 00:00:01 1 3 1 4
|
|
182
|
+
2 2003-12-01 00:00:02 1 4 1 6
|
|
183
|
+
3 2003-12-01 00:00:02 1 5 1 7
|
|
184
|
+
|
|
185
|
+
Showing diff for every tick with ``show_all_ticks`` parameter:
|
|
186
|
+
|
|
187
|
+
.. testcode::
|
|
188
|
+
:skipif: not otp.compatibility.is_diff_show_all_ticks_supported()
|
|
189
|
+
|
|
190
|
+
t = otp.Ticks(A=[1, 2, 3], B=[0, 0, 1])
|
|
191
|
+
q = otp.Ticks(A=[1, 3], B=[0, 0])
|
|
192
|
+
data = t.diff(q, show_all_ticks=True)
|
|
193
|
+
print(otp.run(data))
|
|
194
|
+
|
|
195
|
+
.. testoutput::
|
|
196
|
+
|
|
197
|
+
Time MATCH_STATUS L.A R.A L.B
|
|
198
|
+
0 2003-12-01 00:00:00.000 1 0 0 0
|
|
199
|
+
1 2003-12-01 00:00:00.001 0 2 3 0
|
|
200
|
+
2 2003-12-01 00:00:00.002 2 3 0 1
|
|
201
|
+
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
if not fields:
|
|
205
|
+
fields = []
|
|
206
|
+
if isinstance(fields, str):
|
|
207
|
+
fields = [fields]
|
|
208
|
+
fields = list(map(str, fields))
|
|
209
|
+
for field in fields:
|
|
210
|
+
if field not in self.schema or field not in other.schema:
|
|
211
|
+
raise ValueError(f"Field {field} is not in schema")
|
|
212
|
+
|
|
213
|
+
if threshold is None:
|
|
214
|
+
threshold = '' # type: ignore
|
|
215
|
+
elif threshold < 0:
|
|
216
|
+
raise ValueError("Parameter 'threshold' must be non-negative")
|
|
217
|
+
|
|
218
|
+
ep_params = dict(
|
|
219
|
+
fields=','.join(map(str, fields)),
|
|
220
|
+
ignore=ignore,
|
|
221
|
+
threshold=threshold,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
if non_decreasing_value_fields is not None:
|
|
225
|
+
if not non_decreasing_value_fields:
|
|
226
|
+
raise ValueError("Parameter 'non_decreasing_value_fields' can't be empty")
|
|
227
|
+
if isinstance(non_decreasing_value_fields, str):
|
|
228
|
+
non_decreasing_value_fields = [non_decreasing_value_fields]
|
|
229
|
+
non_decreasing_value_fields = list(map(str, non_decreasing_value_fields))
|
|
230
|
+
for field in non_decreasing_value_fields:
|
|
231
|
+
if field not in self.schema or field not in other.schema:
|
|
232
|
+
raise ValueError(f"Field {field} is not in schema")
|
|
233
|
+
if otp.compatibility.is_diff_non_decreasing_value_fields_supported():
|
|
234
|
+
ep_params['non_decreasing_value_fields'] = ','.join(map(str, non_decreasing_value_fields))
|
|
235
|
+
else:
|
|
236
|
+
warnings.warn("Parameter 'non_decreasing_value_fields' is not supported on this version of OneTick")
|
|
237
|
+
|
|
238
|
+
if show_only_fields_that_differ and output_ignored_fields:
|
|
239
|
+
raise ValueError(
|
|
240
|
+
"Parameters 'output_ignored_fields' and 'show_only_fields_that_differ' can't be set at the same time"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if show_all_ticks:
|
|
244
|
+
if not is_diff_show_all_ticks_supported():
|
|
245
|
+
raise RuntimeError('`show_all_ticks` parameter not supported on current OneTick version')
|
|
246
|
+
|
|
247
|
+
ep_params['show_all_ticks'] = show_all_ticks
|
|
248
|
+
|
|
249
|
+
if show_only_fields_that_differ is None and output_ignored_fields is None:
|
|
250
|
+
if ignore:
|
|
251
|
+
ep_params['output_ignored_fields'] = True
|
|
252
|
+
else:
|
|
253
|
+
ep_params['output_ignored_fields'] = False
|
|
254
|
+
ep_params['show_only_fields_that_differ'] = not ep_params['output_ignored_fields']
|
|
255
|
+
elif show_only_fields_that_differ is None:
|
|
256
|
+
ep_params['show_only_fields_that_differ'] = not output_ignored_fields
|
|
257
|
+
elif output_ignored_fields is None:
|
|
258
|
+
ep_params['output_ignored_fields'] = False
|
|
259
|
+
ep_params['show_only_fields_that_differ'] = show_only_fields_that_differ
|
|
260
|
+
|
|
261
|
+
if show_matching_ticks is not None:
|
|
262
|
+
if otp.compatibility.is_diff_show_matching_ticks_supported():
|
|
263
|
+
ep_params['show_matching_ticks'] = show_matching_ticks
|
|
264
|
+
else:
|
|
265
|
+
warnings.warn("Parameter 'show_matching_ticks' is not supported on this version of OneTick")
|
|
266
|
+
|
|
267
|
+
if ep_params.get('show_matching_ticks') and show_only_fields_that_differ is None:
|
|
268
|
+
ep_params['show_only_fields_that_differ'] = False
|
|
269
|
+
|
|
270
|
+
schema = {
|
|
271
|
+
f'{left_prefix}.INDEX': int,
|
|
272
|
+
f'{right_prefix}.INDEX': int,
|
|
273
|
+
}
|
|
274
|
+
for src_prefix, src_schema in [(left_prefix, self.schema), (right_prefix, other.schema)]:
|
|
275
|
+
for field, dtype in src_schema.items():
|
|
276
|
+
if ignore and field in fields and not output_ignored_fields:
|
|
277
|
+
continue
|
|
278
|
+
schema[f'{src_prefix}.{field}'] = dtype
|
|
279
|
+
|
|
280
|
+
if show_all_ticks:
|
|
281
|
+
schema['MATCH_STATUS'] = int
|
|
282
|
+
|
|
283
|
+
result = otp.Source(
|
|
284
|
+
node=otq.Diff(**ep_params),
|
|
285
|
+
schema=schema,
|
|
286
|
+
)
|
|
287
|
+
__copy_sources_on_merge_or_join(result, (self, other),
|
|
288
|
+
names=(left_prefix, right_prefix))
|
|
289
|
+
if drop_index:
|
|
290
|
+
result = result.drop([f'{left_prefix}.INDEX', f'{right_prefix}.INDEX'])
|
|
291
|
+
|
|
292
|
+
return result
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def lee_and_ready(self: 'Source', qte: 'Source',
|
|
296
|
+
quote_delay: float = 0.0,
|
|
297
|
+
show_quote_fields: bool = False) -> 'Source':
|
|
298
|
+
"""
|
|
299
|
+
Adds a numeric attribute to each tick in the stream of trade ticks,
|
|
300
|
+
the value of which classifies the trade as a buy, a sell, or undefined.
|
|
301
|
+
|
|
302
|
+
This is an implementation of the Lee and Ready algorithm:
|
|
303
|
+
Match up a trade with the most recent good quote that is at least X seconds older than the trade —
|
|
304
|
+
|
|
305
|
+
* if the trade's price is closer to the ask price, label trade a buy (1);
|
|
306
|
+
* else, if it is closer to the bid price, label it a sell (-1);
|
|
307
|
+
* else, if trade's price is at the mid-quote, then if it is higher than the last trade's price,
|
|
308
|
+
classify it as a buy (1);
|
|
309
|
+
* else, if it is less, classify it as a sell (-1);
|
|
310
|
+
* else, if it is the same, classify it the same way as the previous trade was classified.
|
|
311
|
+
* If all of these fail, classify the trade as unknown (0).
|
|
312
|
+
|
|
313
|
+
This method expects two sources as its input: source of trades (``self``) and source of quotes (``qte``).
|
|
314
|
+
While ticks propagated by trades source should have the PRICE,SIZE fields,
|
|
315
|
+
ticks propagated by ``qte`` source should have the ASK_PRICE,ASK_SIZE,BID_PRICE,BID_SIZE fields.
|
|
316
|
+
|
|
317
|
+
Output of this method is a time series of trades ticks
|
|
318
|
+
with the Lee and Ready indicator field (**BuySellFlag**) added.
|
|
319
|
+
|
|
320
|
+
Parameters
|
|
321
|
+
----------
|
|
322
|
+
qte:
|
|
323
|
+
The source of quotes.
|
|
324
|
+
quote_delay:
|
|
325
|
+
The minimum number of seconds that needs to elapse between the trade and the quote
|
|
326
|
+
before the quote can be considered for a join with the trade.
|
|
327
|
+
|
|
328
|
+
The value is a float number.
|
|
329
|
+
Only the first three digits of the fraction are currently used,
|
|
330
|
+
thus the highest supported granularity of quote delay is milliseconds.
|
|
331
|
+
Sub-millisecond parts of the trade's and the quote's timestamps are ignored when computing delay between them.
|
|
332
|
+
show_quote_fields:
|
|
333
|
+
If set to True, the quote fields that classified trade will also be shown for each trade.
|
|
334
|
+
Note that if there were no quotes before trade, then quote fields will be set to 0.
|
|
335
|
+
|
|
336
|
+
Returns
|
|
337
|
+
-------
|
|
338
|
+
:class:`Source`
|
|
339
|
+
|
|
340
|
+
See Also
|
|
341
|
+
--------
|
|
342
|
+
**LEE_AND_READY** OneTick event processor
|
|
343
|
+
|
|
344
|
+
Examples
|
|
345
|
+
--------
|
|
346
|
+
|
|
347
|
+
Add field **BuySellFlag** to the ``trd`` source:
|
|
348
|
+
|
|
349
|
+
>>> import os
|
|
350
|
+
>>> trd = otp.CSV(os.path.join(csv_path, 'trd.csv'))
|
|
351
|
+
>>> qte = otp.CSV(os.path.join(csv_path, 'qte.csv'))
|
|
352
|
+
>>> data = trd.lee_and_ready(qte)
|
|
353
|
+
>>> otp.run(data).head(5)
|
|
354
|
+
Time PRICE SIZE BuySellFlag
|
|
355
|
+
0 2003-12-01 09:00:00.086545 178.26 246 -1.0
|
|
356
|
+
1 2003-12-01 09:00:00.245208 178.26 1 1.0
|
|
357
|
+
2 2003-12-01 09:00:00.245503 178.26 1 1.0
|
|
358
|
+
3 2003-12-01 09:00:00.387100 178.21 9 1.0
|
|
359
|
+
4 2003-12-01 09:00:00.387105 178.21 12 1.0
|
|
360
|
+
|
|
361
|
+
Fields from ``qte`` can be added with ``show_quote_fields`` parameter:
|
|
362
|
+
|
|
363
|
+
>>> data = trd.lee_and_ready(qte, show_quote_fields=True)
|
|
364
|
+
>>> data = data.drop(['ASK_SIZE', 'BID_SIZE'])
|
|
365
|
+
>>> otp.run(data).head(5)
|
|
366
|
+
Time PRICE SIZE BuySellFlag QTE_TIMESTAMP ASK_PRICE BID_PRICE
|
|
367
|
+
0 2003-12-01 09:00:00.086545 178.26 246 -1.0 2003-12-01 09:00:00.028307 178.80 177.92
|
|
368
|
+
1 2003-12-01 09:00:00.245208 178.26 1 1.0 2003-12-01 09:00:00.244626 178.57 177.75
|
|
369
|
+
2 2003-12-01 09:00:00.245503 178.26 1 1.0 2003-12-01 09:00:00.244626 178.57 177.75
|
|
370
|
+
3 2003-12-01 09:00:00.387100 178.21 9 1.0 2003-12-01 09:00:00.387096 178.57 177.75
|
|
371
|
+
4 2003-12-01 09:00:00.387105 178.21 12 1.0 2003-12-01 09:00:00.387096 178.57 177.75
|
|
372
|
+
|
|
373
|
+
Set ``quote_delay`` parameter to 300 milliseconds:
|
|
374
|
+
|
|
375
|
+
>>> data = trd.lee_and_ready(qte, show_quote_fields=True, quote_delay=0.3)
|
|
376
|
+
>>> data = data.drop(['ASK_SIZE', 'BID_SIZE'])
|
|
377
|
+
>>> otp.run(data).head(5)
|
|
378
|
+
Time PRICE SIZE BuySellFlag QTE_TIMESTAMP ASK_PRICE BID_PRICE
|
|
379
|
+
0 2003-12-01 09:00:00.086545 178.26 246 0.0 1969-12-31 19:00:00.000000 0.0 0.00
|
|
380
|
+
1 2003-12-01 09:00:00.245208 178.26 1 0.0 1969-12-31 19:00:00.000000 0.0 0.00
|
|
381
|
+
2 2003-12-01 09:00:00.245503 178.26 1 0.0 1969-12-31 19:00:00.000000 0.0 0.00
|
|
382
|
+
3 2003-12-01 09:00:00.387100 178.21 9 -1.0 2003-12-01 09:00:00.087540 180.0 177.62
|
|
383
|
+
4 2003-12-01 09:00:00.387105 178.21 12 -1.0 2003-12-01 09:00:00.087540 180.0 177.62
|
|
384
|
+
"""
|
|
385
|
+
|
|
386
|
+
schema = self.schema.copy()
|
|
387
|
+
if show_quote_fields:
|
|
388
|
+
schema.update(**qte.schema, **{'QTE_TIMESTAMP': otp.nsectime})
|
|
389
|
+
schema.update(**{'BuySellFlag': float})
|
|
390
|
+
|
|
391
|
+
result = otp.Source(
|
|
392
|
+
node=otq.LeeAndReady(
|
|
393
|
+
quote_delay=quote_delay,
|
|
394
|
+
show_quote_fields=show_quote_fields
|
|
395
|
+
),
|
|
396
|
+
schema=schema,
|
|
397
|
+
)
|
|
398
|
+
__copy_sources_on_merge_or_join(result, (self, qte), names=('TRD', 'QTE'))
|
|
399
|
+
|
|
400
|
+
return result
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
_smallest_time_granularity_msec_name_doc = param_doc(
|
|
404
|
+
name='smallest_time_granularity_msec',
|
|
405
|
+
annotation=int,
|
|
406
|
+
desc="""
|
|
407
|
+
This method works by first sampling the source tick series with a constant rate.
|
|
408
|
+
This is the sampling interval (1 / rate).
|
|
409
|
+
As a consequence, any computed delay will be divisible by this value.
|
|
410
|
+
It is important to carefully choose this parameter, as this method has a computational cost of O(N * log(N))
|
|
411
|
+
per bucket, where N = (*duration_of_bucket_in_msec* + ``max_ts_delay_msec``) / ``smallest_time_granularity_msec``.
|
|
412
|
+
Default: 1.
|
|
413
|
+
""",
|
|
414
|
+
default=1,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
_max_ts_delay_msec_doc = param_doc(
|
|
419
|
+
name='max_ts_delay_msec',
|
|
420
|
+
annotation=int,
|
|
421
|
+
desc="""
|
|
422
|
+
The known upper bound on the delay's magnitude.
|
|
423
|
+
The computed delay will never be greater than this value.
|
|
424
|
+
Default: 1000.
|
|
425
|
+
""",
|
|
426
|
+
default=1000,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
@docstring(
|
|
431
|
+
parameters=[
|
|
432
|
+
_smallest_time_granularity_msec_name_doc,
|
|
433
|
+
_max_ts_delay_msec_doc,
|
|
434
|
+
_bucket_interval_doc,
|
|
435
|
+
_bucket_time_doc,
|
|
436
|
+
_bucket_units_doc,
|
|
437
|
+
_bucket_end_condition_doc,
|
|
438
|
+
_end_condition_per_group_doc,
|
|
439
|
+
_boundary_tick_bucket_doc,
|
|
440
|
+
_group_by_doc,
|
|
441
|
+
_groups_to_display_doc,
|
|
442
|
+
],
|
|
443
|
+
add_self=True,
|
|
444
|
+
)
|
|
445
|
+
def estimate_ts_delay(self: 'Source', other: 'Source',
|
|
446
|
+
input_field1_name: str, input_field2_name: str,
|
|
447
|
+
**kwargs) -> 'Source':
|
|
448
|
+
"""
|
|
449
|
+
Given two time series of ticks, computes how much delay the second series has in relation to the first.
|
|
450
|
+
A negative delay should be interpreted as the first series being delayed instead.
|
|
451
|
+
|
|
452
|
+
The two series do not necessarily have to be identical with respect to the delay,
|
|
453
|
+
nor do they have to represent the same quantity (i.e. be of the same magnitude).
|
|
454
|
+
The only requirement to get meaningful results is that the two series be (linearly) correlated.
|
|
455
|
+
|
|
456
|
+
Output ticks always have 2 fields:
|
|
457
|
+
|
|
458
|
+
* *DELAY_MS*, which is the computed delay in milliseconds, and
|
|
459
|
+
* *CORRELATION*, which is the Zero-Normalized Cross-Correlation of the two time series
|
|
460
|
+
after that delay is applied.
|
|
461
|
+
|
|
462
|
+
Parameters
|
|
463
|
+
----------
|
|
464
|
+
other: Source
|
|
465
|
+
The other source.
|
|
466
|
+
input_field1_name: str
|
|
467
|
+
The name of the compared field from the first source.
|
|
468
|
+
input_field2_name: str
|
|
469
|
+
The name of the compared field from the second source.
|
|
470
|
+
|
|
471
|
+
Returns
|
|
472
|
+
-------
|
|
473
|
+
:class:`Source`
|
|
474
|
+
|
|
475
|
+
See Also
|
|
476
|
+
--------
|
|
477
|
+
**ESTIMATE_TS_DELAY** OneTick event processor
|
|
478
|
+
|
|
479
|
+
Examples
|
|
480
|
+
--------
|
|
481
|
+
|
|
482
|
+
Calculating delay between the same sources will result in DELAY_MSEC equal to 0.0 and CORRELATION equal to 1.0:
|
|
483
|
+
(Note that correlation method may return NaN values for smaller buckets):
|
|
484
|
+
|
|
485
|
+
.. testcode::
|
|
486
|
+
:skipif: not otp.compatibility.is_supported_estimate_ts_delay()
|
|
487
|
+
|
|
488
|
+
import os
|
|
489
|
+
trd = otp.CSV(os.path.join(csv_path, 'trd.csv'))
|
|
490
|
+
other = trd.deepcopy()
|
|
491
|
+
data = trd.estimate_ts_delay(other, 'PRICE', 'PRICE', bucket_interval=10, bucket_time='start')
|
|
492
|
+
df = otp.run(data, start=otp.dt(2003, 12, 1, 9), end=otp.dt(2003, 12, 1, 10))
|
|
493
|
+
print(df)
|
|
494
|
+
|
|
495
|
+
.. testoutput::
|
|
496
|
+
|
|
497
|
+
Time DELAY_MSEC CORRELATION
|
|
498
|
+
0 2003-12-01 09:00:00 0.0 1.0
|
|
499
|
+
1 2003-12-01 09:00:10 0.0 1.0
|
|
500
|
+
2 2003-12-01 09:00:20 0.0 1.0
|
|
501
|
+
3 2003-12-01 09:00:30 0.0 1.0
|
|
502
|
+
4 2003-12-01 09:00:40 0.0 1.0
|
|
503
|
+
.. ... ... ...
|
|
504
|
+
355 2003-12-01 09:59:10 0.0 1.0
|
|
505
|
+
356 2003-12-01 09:59:20 0.0 1.0
|
|
506
|
+
357 2003-12-01 09:59:30 NaN NaN
|
|
507
|
+
358 2003-12-01 09:59:40 0.0 1.0
|
|
508
|
+
359 2003-12-01 09:59:50 0.0 1.0
|
|
509
|
+
|
|
510
|
+
[360 rows x 3 columns]
|
|
511
|
+
|
|
512
|
+
Try changing timestamps of other time-series to see how delay values are changed:
|
|
513
|
+
|
|
514
|
+
.. testcode::
|
|
515
|
+
:skipif: not otp.compatibility.is_supported_estimate_ts_delay()
|
|
516
|
+
|
|
517
|
+
import os
|
|
518
|
+
trd = otp.CSV(os.path.join(csv_path, 'trd.csv'))
|
|
519
|
+
other = trd.deepcopy()
|
|
520
|
+
other['TIMESTAMP'] += otp.Milli(5)
|
|
521
|
+
data = trd.estimate_ts_delay(other, 'PRICE', 'PRICE', bucket_interval=10, bucket_time='start')
|
|
522
|
+
df = otp.run(data, start=otp.dt(2003, 12, 1, 9), end=otp.dt(2003, 12, 1, 10))
|
|
523
|
+
print(df)
|
|
524
|
+
|
|
525
|
+
.. testoutput::
|
|
526
|
+
|
|
527
|
+
Time DELAY_MSEC CORRELATION
|
|
528
|
+
0 2003-12-01 09:00:00 -5.0 1.0
|
|
529
|
+
1 2003-12-01 09:00:10 -5.0 1.0
|
|
530
|
+
2 2003-12-01 09:00:20 -5.0 1.0
|
|
531
|
+
3 2003-12-01 09:00:30 -5.0 1.0
|
|
532
|
+
4 2003-12-01 09:00:40 -5.0 1.0
|
|
533
|
+
.. ... ... ...
|
|
534
|
+
355 2003-12-01 09:59:10 -5.0 1.0
|
|
535
|
+
356 2003-12-01 09:59:20 -5.0 1.0
|
|
536
|
+
357 2003-12-01 09:59:30 NaN NaN
|
|
537
|
+
358 2003-12-01 09:59:40 -5.0 1.0
|
|
538
|
+
359 2003-12-01 09:59:50 -5.0 1.0
|
|
539
|
+
|
|
540
|
+
[360 rows x 3 columns]
|
|
541
|
+
|
|
542
|
+
Try filtering out some ticks from other time-series to see how delay and correlation values are changed:
|
|
543
|
+
|
|
544
|
+
.. testcode::
|
|
545
|
+
:skipif: not otp.compatibility.is_supported_estimate_ts_delay()
|
|
546
|
+
|
|
547
|
+
import os
|
|
548
|
+
trd = otp.CSV(os.path.join(csv_path, 'trd.csv'))
|
|
549
|
+
other = trd.deepcopy()
|
|
550
|
+
other = other[::2]
|
|
551
|
+
data = trd.estimate_ts_delay(other, 'PRICE', 'PRICE', bucket_interval=10, bucket_time='start')
|
|
552
|
+
df = otp.run(data, start=otp.dt(2003, 12, 1, 9), end=otp.dt(2003, 12, 1, 10))
|
|
553
|
+
print(df)
|
|
554
|
+
|
|
555
|
+
.. testoutput::
|
|
556
|
+
|
|
557
|
+
Time DELAY_MSEC CORRELATION
|
|
558
|
+
0 2003-12-01 09:00:00 0.0 1.000000
|
|
559
|
+
1 2003-12-01 09:00:10 0.0 1.000000
|
|
560
|
+
2 2003-12-01 09:00:20 0.0 1.000000
|
|
561
|
+
3 2003-12-01 09:00:30 0.0 0.999115
|
|
562
|
+
4 2003-12-01 09:00:40 -1000.0 0.706111
|
|
563
|
+
.. ... ... ...
|
|
564
|
+
355 2003-12-01 09:59:10 0.0 0.983786
|
|
565
|
+
356 2003-12-01 09:59:20 0.0 1.000000
|
|
566
|
+
357 2003-12-01 09:59:30 NaN NaN
|
|
567
|
+
358 2003-12-01 09:59:40 -306.0 0.680049
|
|
568
|
+
359 2003-12-01 09:59:50 0.0 0.752731
|
|
569
|
+
|
|
570
|
+
[360 rows x 3 columns]
|
|
571
|
+
"""
|
|
572
|
+
if not otp.compatibility.is_supported_estimate_ts_delay():
|
|
573
|
+
raise RuntimeError('estimate_ts_delay() is not supported on this OneTick version')
|
|
574
|
+
|
|
575
|
+
if input_field1_name not in self.schema:
|
|
576
|
+
raise ValueError(f"Field '{input_field1_name}' is not in the schema of the first source.")
|
|
577
|
+
|
|
578
|
+
if input_field2_name not in other.schema:
|
|
579
|
+
raise ValueError(f"Field '{input_field2_name}' is not in the schema of the second source.")
|
|
580
|
+
|
|
581
|
+
schema = {'DELAY_MSEC': float, 'CORRELATION': float}
|
|
582
|
+
|
|
583
|
+
smallest_time_granularity_msec = kwargs.pop('smallest_time_granularity_msec', 1)
|
|
584
|
+
max_ts_delay_msec = kwargs.pop('max_ts_delay_msec', 1000)
|
|
585
|
+
|
|
586
|
+
# we only use this class for validation of common parameters
|
|
587
|
+
class EstimateTsDelay(_Aggregation):
|
|
588
|
+
EP = otq.EstimateTsDelay
|
|
589
|
+
NAME = 'ESTIMATE_TS_DELAY'
|
|
590
|
+
FIELDS_TO_SKIP = ['column_name', 'running', 'all_fields', 'output_field_name']
|
|
591
|
+
|
|
592
|
+
common_ep_params = {k.lower(): v for k, v in EstimateTsDelay('TIMESTAMP', **kwargs).ep_params.items()}
|
|
593
|
+
|
|
594
|
+
result = otp.Source(
|
|
595
|
+
node=otq.EstimateTsDelay(
|
|
596
|
+
input_field1_name=input_field1_name,
|
|
597
|
+
input_field2_name=input_field2_name,
|
|
598
|
+
smallest_time_granularity_msec=smallest_time_granularity_msec,
|
|
599
|
+
max_ts_delay_msec=max_ts_delay_msec,
|
|
600
|
+
**common_ep_params,
|
|
601
|
+
),
|
|
602
|
+
schema=schema,
|
|
603
|
+
)
|
|
604
|
+
__copy_sources_on_merge_or_join(result, (self, other))
|
|
605
|
+
return result
|