onetick-py 1.177.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- locator_parser/__init__.py +0 -0
- locator_parser/acl.py +73 -0
- locator_parser/actions.py +262 -0
- locator_parser/common.py +368 -0
- locator_parser/io.py +43 -0
- locator_parser/locator.py +150 -0
- onetick/__init__.py +101 -0
- onetick/doc_utilities/__init__.py +3 -0
- onetick/doc_utilities/napoleon.py +40 -0
- onetick/doc_utilities/ot_doctest.py +140 -0
- onetick/doc_utilities/snippets.py +279 -0
- onetick/lib/__init__.py +4 -0
- onetick/lib/instance.py +141 -0
- onetick/py/__init__.py +293 -0
- onetick/py/_stack_info.py +89 -0
- onetick/py/_version.py +2 -0
- onetick/py/aggregations/__init__.py +11 -0
- onetick/py/aggregations/_base.py +648 -0
- onetick/py/aggregations/_docs.py +948 -0
- onetick/py/aggregations/compute.py +286 -0
- onetick/py/aggregations/functions.py +2216 -0
- onetick/py/aggregations/generic.py +104 -0
- onetick/py/aggregations/high_low.py +80 -0
- onetick/py/aggregations/num_distinct.py +83 -0
- onetick/py/aggregations/order_book.py +501 -0
- onetick/py/aggregations/other.py +1014 -0
- onetick/py/backports.py +26 -0
- onetick/py/cache.py +374 -0
- onetick/py/callback/__init__.py +5 -0
- onetick/py/callback/callback.py +276 -0
- onetick/py/callback/callbacks.py +131 -0
- onetick/py/compatibility.py +798 -0
- onetick/py/configuration.py +771 -0
- onetick/py/core/__init__.py +0 -0
- onetick/py/core/_csv_inspector.py +93 -0
- onetick/py/core/_internal/__init__.py +0 -0
- onetick/py/core/_internal/_manually_bound_value.py +6 -0
- onetick/py/core/_internal/_nodes_history.py +250 -0
- onetick/py/core/_internal/_op_utils/__init__.py +0 -0
- onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
- onetick/py/core/_internal/_op_utils/is_const.py +10 -0
- onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
- onetick/py/core/_internal/_proxy_node.py +140 -0
- onetick/py/core/_internal/_state_objects.py +2312 -0
- onetick/py/core/_internal/_state_vars.py +93 -0
- onetick/py/core/_source/__init__.py +0 -0
- onetick/py/core/_source/_symbol_param.py +95 -0
- onetick/py/core/_source/schema.py +97 -0
- onetick/py/core/_source/source_methods/__init__.py +0 -0
- onetick/py/core/_source/source_methods/aggregations.py +809 -0
- onetick/py/core/_source/source_methods/applyers.py +296 -0
- onetick/py/core/_source/source_methods/columns.py +141 -0
- onetick/py/core/_source/source_methods/data_quality.py +301 -0
- onetick/py/core/_source/source_methods/debugs.py +272 -0
- onetick/py/core/_source/source_methods/drops.py +120 -0
- onetick/py/core/_source/source_methods/fields.py +619 -0
- onetick/py/core/_source/source_methods/filters.py +1002 -0
- onetick/py/core/_source/source_methods/joins.py +1413 -0
- onetick/py/core/_source/source_methods/merges.py +605 -0
- onetick/py/core/_source/source_methods/misc.py +1455 -0
- onetick/py/core/_source/source_methods/pandases.py +155 -0
- onetick/py/core/_source/source_methods/renames.py +356 -0
- onetick/py/core/_source/source_methods/sorts.py +183 -0
- onetick/py/core/_source/source_methods/switches.py +142 -0
- onetick/py/core/_source/source_methods/symbols.py +117 -0
- onetick/py/core/_source/source_methods/times.py +627 -0
- onetick/py/core/_source/source_methods/writes.py +986 -0
- onetick/py/core/_source/symbol.py +205 -0
- onetick/py/core/_source/tmp_otq.py +222 -0
- onetick/py/core/column.py +209 -0
- onetick/py/core/column_operations/__init__.py +0 -0
- onetick/py/core/column_operations/_methods/__init__.py +4 -0
- onetick/py/core/column_operations/_methods/_internal.py +28 -0
- onetick/py/core/column_operations/_methods/conversions.py +216 -0
- onetick/py/core/column_operations/_methods/methods.py +292 -0
- onetick/py/core/column_operations/_methods/op_types.py +160 -0
- onetick/py/core/column_operations/accessors/__init__.py +0 -0
- onetick/py/core/column_operations/accessors/_accessor.py +28 -0
- onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
- onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
- onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
- onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
- onetick/py/core/column_operations/base.py +1121 -0
- onetick/py/core/cut_builder.py +150 -0
- onetick/py/core/db_constants.py +20 -0
- onetick/py/core/eval_query.py +245 -0
- onetick/py/core/lambda_object.py +441 -0
- onetick/py/core/multi_output_source.py +232 -0
- onetick/py/core/per_tick_script.py +2256 -0
- onetick/py/core/query_inspector.py +464 -0
- onetick/py/core/source.py +1744 -0
- onetick/py/db/__init__.py +2 -0
- onetick/py/db/_inspection.py +1128 -0
- onetick/py/db/db.py +1327 -0
- onetick/py/db/utils.py +64 -0
- onetick/py/docs/__init__.py +0 -0
- onetick/py/docs/docstring_parser.py +112 -0
- onetick/py/docs/utils.py +81 -0
- onetick/py/functions.py +2398 -0
- onetick/py/license.py +190 -0
- onetick/py/log.py +88 -0
- onetick/py/math.py +935 -0
- onetick/py/misc.py +470 -0
- onetick/py/oqd/__init__.py +22 -0
- onetick/py/oqd/eps.py +1195 -0
- onetick/py/oqd/sources.py +325 -0
- onetick/py/otq.py +216 -0
- onetick/py/pyomd_mock.py +47 -0
- onetick/py/run.py +916 -0
- onetick/py/servers.py +173 -0
- onetick/py/session.py +1347 -0
- onetick/py/sources/__init__.py +19 -0
- onetick/py/sources/cache.py +167 -0
- onetick/py/sources/common.py +128 -0
- onetick/py/sources/csv.py +642 -0
- onetick/py/sources/custom.py +85 -0
- onetick/py/sources/data_file.py +305 -0
- onetick/py/sources/data_source.py +1045 -0
- onetick/py/sources/empty.py +94 -0
- onetick/py/sources/odbc.py +337 -0
- onetick/py/sources/order_book.py +271 -0
- onetick/py/sources/parquet.py +168 -0
- onetick/py/sources/pit.py +191 -0
- onetick/py/sources/query.py +495 -0
- onetick/py/sources/snapshots.py +419 -0
- onetick/py/sources/split_query_output_by_symbol.py +198 -0
- onetick/py/sources/symbology_mapping.py +123 -0
- onetick/py/sources/symbols.py +374 -0
- onetick/py/sources/ticks.py +825 -0
- onetick/py/sql.py +70 -0
- onetick/py/state.py +251 -0
- onetick/py/types.py +2131 -0
- onetick/py/utils/__init__.py +70 -0
- onetick/py/utils/acl.py +93 -0
- onetick/py/utils/config.py +186 -0
- onetick/py/utils/default.py +49 -0
- onetick/py/utils/file.py +38 -0
- onetick/py/utils/helpers.py +76 -0
- onetick/py/utils/locator.py +94 -0
- onetick/py/utils/perf.py +498 -0
- onetick/py/utils/query.py +49 -0
- onetick/py/utils/render.py +1374 -0
- onetick/py/utils/script.py +244 -0
- onetick/py/utils/temp.py +471 -0
- onetick/py/utils/types.py +120 -0
- onetick/py/utils/tz.py +84 -0
- onetick_py-1.177.0.dist-info/METADATA +137 -0
- onetick_py-1.177.0.dist-info/RECORD +152 -0
- onetick_py-1.177.0.dist-info/WHEEL +5 -0
- onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
- onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
- onetick_py-1.177.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1002 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import warnings
|
|
3
|
+
from contextlib import suppress
|
|
4
|
+
from datetime import time
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
|
|
6
|
+
from onetick.py.backports import Literal
|
|
7
|
+
|
|
8
|
+
from onetick import py as otp
|
|
9
|
+
from onetick.py import types as ott
|
|
10
|
+
from onetick.py import utils
|
|
11
|
+
from onetick.py.core.column import _Column
|
|
12
|
+
from onetick.py.core.column_operations.base import _Operation
|
|
13
|
+
from onetick.py.core.eval_query import _QueryEvalWrapper
|
|
14
|
+
from onetick.py.otq import otq
|
|
15
|
+
|
|
16
|
+
from .misc import inplace_operation
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from onetick.py.core.source import Source
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def if_else(self: 'Source', condition: _Operation, if_expr, else_expr) -> 'otp.Column':
|
|
23
|
+
"""
|
|
24
|
+
Shortcut for :meth:`~onetick.py.Source.apply` with lambda if-else expression
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
condition: :class:`Operation`
|
|
29
|
+
- condition for matching ticks
|
|
30
|
+
|
|
31
|
+
if_expr: :class:`Operation`, value
|
|
32
|
+
- value or `Operation` to set if `condition` is true
|
|
33
|
+
|
|
34
|
+
else_expr: :class:`Operation`, value
|
|
35
|
+
- value or `Operation` to set if `condition` is false
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
Column
|
|
40
|
+
|
|
41
|
+
Examples
|
|
42
|
+
--------
|
|
43
|
+
Basic example of apply if-else to a tick flow:
|
|
44
|
+
|
|
45
|
+
>>> data = otp.Ticks(X=[1, 2, 3])
|
|
46
|
+
>>> data['Y'] = data.if_else(data['X'] > 2, 1, 0)
|
|
47
|
+
>>> otp.run(data)
|
|
48
|
+
Time X Y
|
|
49
|
+
0 2003-12-01 00:00:00.000 1 0
|
|
50
|
+
1 2003-12-01 00:00:00.001 2 0
|
|
51
|
+
2 2003-12-01 00:00:00.002 3 1
|
|
52
|
+
|
|
53
|
+
You can also set column value via :class:`Operation`:
|
|
54
|
+
|
|
55
|
+
>>> data = otp.Ticks(X=[1, 2, 3])
|
|
56
|
+
>>> data['Y'] = data.if_else(data['X'] > 2, data['X'] * 2, 0)
|
|
57
|
+
>>> otp.run(data)
|
|
58
|
+
Time X Y
|
|
59
|
+
0 2003-12-01 00:00:00.000 1 0
|
|
60
|
+
1 2003-12-01 00:00:00.001 2 0
|
|
61
|
+
2 2003-12-01 00:00:00.002 3 6
|
|
62
|
+
|
|
63
|
+
See Also
|
|
64
|
+
--------
|
|
65
|
+
:py:meth:`onetick.py.Source.apply`
|
|
66
|
+
"""
|
|
67
|
+
return self.apply(lambda tick: if_expr if condition else else_expr)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def where_clause(
|
|
71
|
+
self: 'Source', condition, discard_on_match: bool = False, stop_on_first_mismatch: bool = False
|
|
72
|
+
) -> Tuple['Source', 'Source']:
|
|
73
|
+
"""
|
|
74
|
+
Split source in two branches depending on ``condition``:
|
|
75
|
+
one branch with ticks that meet the condition and
|
|
76
|
+
the other branch with ticks that don't meet the condition.
|
|
77
|
+
|
|
78
|
+
Original source object is not modified.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
condition: :class:`Operation`, :func:`eval`
|
|
83
|
+
Condition expression to filter ticks or object evaluating another query.
|
|
84
|
+
In the latter case another query should have only one tick as a result with only one field.
|
|
85
|
+
discard_on_match: bool
|
|
86
|
+
Inverts the ``condition``.
|
|
87
|
+
|
|
88
|
+
Ticks that don't meet the condition will be returned in the first branch,
|
|
89
|
+
and ticks that meet the condition will be returned in the second branch.
|
|
90
|
+
stop_on_first_mismatch: bool
|
|
91
|
+
If set, no ticks will be propagated in the first branch
|
|
92
|
+
starting with the first tick that does not meet the ``condition``.
|
|
93
|
+
|
|
94
|
+
Other branch will contain all ticks starting with the first mismatch, even if they don't meet the condition.
|
|
95
|
+
|
|
96
|
+
See Also
|
|
97
|
+
--------
|
|
98
|
+
| :meth:`Source.where`
|
|
99
|
+
| :meth:`Source.__getitem__`
|
|
100
|
+
| **WHERE_CLAUSE** OneTick event processor
|
|
101
|
+
|
|
102
|
+
Examples
|
|
103
|
+
--------
|
|
104
|
+
|
|
105
|
+
Filtering based on expression:
|
|
106
|
+
|
|
107
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
108
|
+
>>> odd, even = data.where_clause(data['X'] % 2 == 1)
|
|
109
|
+
>>> otp.run(odd)
|
|
110
|
+
Time X
|
|
111
|
+
0 2003-12-01 00:00:00.000 1
|
|
112
|
+
1 2003-12-01 00:00:00.002 3
|
|
113
|
+
>>> otp.run(even)
|
|
114
|
+
Time X
|
|
115
|
+
0 2003-12-01 00:00:00.001 2
|
|
116
|
+
1 2003-12-01 00:00:00.003 4
|
|
117
|
+
|
|
118
|
+
Filtering based on the result of another query:
|
|
119
|
+
|
|
120
|
+
>>> another_query = otp.Tick(WHERE='mod(X, 2) = 1')
|
|
121
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
122
|
+
>>> data, _ = data.where_clause(otp.eval(another_query))
|
|
123
|
+
>>> otp.run(data)
|
|
124
|
+
Time X
|
|
125
|
+
0 2003-12-01 00:00:00.000 1
|
|
126
|
+
1 2003-12-01 00:00:00.002 3
|
|
127
|
+
|
|
128
|
+
Using ``discard_on_match`` parameter to invert the condition:
|
|
129
|
+
|
|
130
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
131
|
+
>>> even, odd = data.where_clause(data['X'] % 2 == 1, discard_on_match=True)
|
|
132
|
+
>>> otp.run(even)
|
|
133
|
+
Time X
|
|
134
|
+
0 2003-12-01 00:00:00.001 2
|
|
135
|
+
1 2003-12-01 00:00:00.003 4
|
|
136
|
+
>>> otp.run(odd)
|
|
137
|
+
Time X
|
|
138
|
+
0 2003-12-01 00:00:00.000 1
|
|
139
|
+
1 2003-12-01 00:00:00.002 3
|
|
140
|
+
|
|
141
|
+
Using ``stop_on_first_mismatch`` parameter to not propagate ticks after first mismatch:
|
|
142
|
+
|
|
143
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
144
|
+
>>> data, other = data.where_clause(data['X'] % 2 == 1, stop_on_first_mismatch=True)
|
|
145
|
+
>>> otp.run(data)
|
|
146
|
+
Time X
|
|
147
|
+
0 2003-12-01 1
|
|
148
|
+
|
|
149
|
+
But other branch will contain all ticks after the mismatch, even if they don't meet the condition:
|
|
150
|
+
|
|
151
|
+
>>> otp.run(other)
|
|
152
|
+
Time X
|
|
153
|
+
0 2003-12-01 00:00:00.001 2
|
|
154
|
+
1 2003-12-01 00:00:00.002 3
|
|
155
|
+
2 2003-12-01 00:00:00.003 4
|
|
156
|
+
"""
|
|
157
|
+
if not isinstance(condition, (_Operation, _QueryEvalWrapper)):
|
|
158
|
+
raise TypeError(f"Unsupported type of value for 'condition' parameter: {type(condition)}")
|
|
159
|
+
|
|
160
|
+
if isinstance(condition, _Operation):
|
|
161
|
+
condition = condition._make_python_way_bool_expression()
|
|
162
|
+
if isinstance(condition, _QueryEvalWrapper):
|
|
163
|
+
condition = condition.to_eval_string(self._tmp_otq)
|
|
164
|
+
where_branch = self.copy(
|
|
165
|
+
ep=otq.WhereClause(
|
|
166
|
+
where=str(condition), discard_on_match=discard_on_match, stop_on_first_mismatch=stop_on_first_mismatch
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
if_source = where_branch.copy()
|
|
171
|
+
if_source.node().out_pin("IF")
|
|
172
|
+
|
|
173
|
+
else_source = where_branch.copy()
|
|
174
|
+
else_source.node().out_pin("ELSE")
|
|
175
|
+
# TODO: add ability to remove then this ep, because it is required only for right output
|
|
176
|
+
else_source.sink(otq.Passthrough())
|
|
177
|
+
|
|
178
|
+
return if_source, else_source
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def where(self: 'Source', condition, discard_on_match: bool = False, stop_on_first_mismatch: bool = False) -> 'Source':
|
|
182
|
+
"""
|
|
183
|
+
Filter ticks that meet the ``condition``.
|
|
184
|
+
|
|
185
|
+
Returns new object, original source object is not modified.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
condition: :class:`Operation`, :func:`eval`
|
|
190
|
+
Condition expression to filter ticks or object evaluating another query.
|
|
191
|
+
In the latter case another query should have only one tick as a result with only one field.
|
|
192
|
+
|
|
193
|
+
discard_on_match: bool
|
|
194
|
+
Inverts the ``condition``.
|
|
195
|
+
|
|
196
|
+
Ticks that don't meet the condition will be returned.
|
|
197
|
+
|
|
198
|
+
stop_on_first_mismatch: bool
|
|
199
|
+
If set, no ticks will be propagated starting with the first tick that does not meet the ``condition``.
|
|
200
|
+
|
|
201
|
+
See Also
|
|
202
|
+
--------
|
|
203
|
+
| :meth:`Source.where_clause`
|
|
204
|
+
| :meth:`Source.__getitem__`
|
|
205
|
+
| **WHERE_CLAUSE** OneTick event processor
|
|
206
|
+
|
|
207
|
+
Examples
|
|
208
|
+
--------
|
|
209
|
+
|
|
210
|
+
Filtering based on expression:
|
|
211
|
+
|
|
212
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
213
|
+
>>> data = data.where(data['X'] % 2 == 1)
|
|
214
|
+
>>> otp.run(data)
|
|
215
|
+
Time X
|
|
216
|
+
0 2003-12-01 00:00:00.000 1
|
|
217
|
+
1 2003-12-01 00:00:00.002 3
|
|
218
|
+
|
|
219
|
+
Filtering based on the result of another query:
|
|
220
|
+
|
|
221
|
+
>>> another_query = otp.Tick(WHERE='mod(X, 2) = 1')
|
|
222
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
223
|
+
>>> data = data.where(otp.eval(another_query))
|
|
224
|
+
>>> otp.run(data)
|
|
225
|
+
Time X
|
|
226
|
+
0 2003-12-01 00:00:00.000 1
|
|
227
|
+
1 2003-12-01 00:00:00.002 3
|
|
228
|
+
|
|
229
|
+
Using ``discard_on_match`` parameter to invert the condition:
|
|
230
|
+
|
|
231
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
232
|
+
>>> data = data.where(data['X'] % 2 == 1, discard_on_match=True)
|
|
233
|
+
>>> otp.run(data)
|
|
234
|
+
Time X
|
|
235
|
+
0 2003-12-01 00:00:00.001 2
|
|
236
|
+
1 2003-12-01 00:00:00.003 4
|
|
237
|
+
|
|
238
|
+
Using ``stop_on_first_mismatch`` parameter to not propagate ticks after first mismatch:
|
|
239
|
+
|
|
240
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
241
|
+
>>> data = data.where(data['X'] % 2 == 1, stop_on_first_mismatch=True)
|
|
242
|
+
>>> otp.run(data)
|
|
243
|
+
Time X
|
|
244
|
+
0 2003-12-01 1
|
|
245
|
+
"""
|
|
246
|
+
return self.where_clause(
|
|
247
|
+
condition, discard_on_match=discard_on_match, stop_on_first_mismatch=stop_on_first_mismatch
|
|
248
|
+
)[0]
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _get_integer_slice(self: 'Source', item: slice) -> Optional['Source']:
|
|
252
|
+
"""
|
|
253
|
+
Treat otp.Source object as a sequence of ticks
|
|
254
|
+
and apply common python integer slicing logic to it.
|
|
255
|
+
"""
|
|
256
|
+
start, stop, step = item.start, item.stop, item.step
|
|
257
|
+
for v in (start, stop, step):
|
|
258
|
+
if v is not None and not isinstance(v, int):
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
# let's filter out cases that we don't want to support
|
|
262
|
+
if step is not None and step <= 0:
|
|
263
|
+
raise ValueError("step value can't be negative or zero")
|
|
264
|
+
if stop is not None and stop == 0:
|
|
265
|
+
raise ValueError("stop value can't be zero")
|
|
266
|
+
# pylint: disable=chained-comparison
|
|
267
|
+
if start and stop and start > 0 and stop > 0 and start >= stop:
|
|
268
|
+
raise ValueError("stop value can't be less than start")
|
|
269
|
+
if start and start < 0 and stop and stop > 0:
|
|
270
|
+
raise ValueError("start value can't be negative when start value is positive")
|
|
271
|
+
|
|
272
|
+
def add_counter(src, force=False):
|
|
273
|
+
if '__NUM__' not in src.schema or force:
|
|
274
|
+
if '__NUM__' in src.schema:
|
|
275
|
+
src = src.drop('__NUM__')
|
|
276
|
+
src = src.agg({'__NUM__': otp.agg.count()}, running=True, all_fields=True)
|
|
277
|
+
return src
|
|
278
|
+
|
|
279
|
+
result = self.copy()
|
|
280
|
+
if start:
|
|
281
|
+
if start > 0:
|
|
282
|
+
result = add_counter(result)
|
|
283
|
+
result, _ = result[result['__NUM__'] > start]
|
|
284
|
+
if start < 0:
|
|
285
|
+
result = result.last(-start)
|
|
286
|
+
if stop:
|
|
287
|
+
if stop > 0:
|
|
288
|
+
result = add_counter(result)
|
|
289
|
+
result, _ = result[result['__NUM__'] <= stop]
|
|
290
|
+
if stop < 0:
|
|
291
|
+
result = add_counter(result)
|
|
292
|
+
last_ticks = result.last(-stop)
|
|
293
|
+
last_ticks['__FLAG__'] = 1
|
|
294
|
+
last_ticks = last_ticks[['__FLAG__', '__NUM__']]
|
|
295
|
+
result = otp.join(
|
|
296
|
+
result, last_ticks, on=result['__NUM__'] == last_ticks['__NUM__'], how='left_outer', rprefix='RIGHT'
|
|
297
|
+
)
|
|
298
|
+
result, _ = result[result['__FLAG__'] == 0]
|
|
299
|
+
result = result.drop(['__FLAG__', 'RIGHT___NUM__'])
|
|
300
|
+
if step:
|
|
301
|
+
if step > 0: # NOSONAR
|
|
302
|
+
# resetting counter
|
|
303
|
+
result = add_counter(result, force=True)
|
|
304
|
+
result, _ = result[(result['__NUM__'] - 1) % step == 0]
|
|
305
|
+
if '__NUM__' in result.schema:
|
|
306
|
+
result = result.drop('__NUM__')
|
|
307
|
+
return result
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def __getitem__(self: 'Source', item):
|
|
311
|
+
"""
|
|
312
|
+
Allows to express multiple things:
|
|
313
|
+
|
|
314
|
+
- access a field by name
|
|
315
|
+
|
|
316
|
+
- filter ticks by condition
|
|
317
|
+
|
|
318
|
+
- select subset of fields
|
|
319
|
+
|
|
320
|
+
- set order of fields
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
item: str, :class:`Operation`, :func:`eval`, list of str
|
|
325
|
+
|
|
326
|
+
- ``str`` is to access column by name or columns specified by regex.
|
|
327
|
+
|
|
328
|
+
- ``Operation`` to express filter condition.
|
|
329
|
+
|
|
330
|
+
- ``otp.eval`` to express filter condition based on external query
|
|
331
|
+
|
|
332
|
+
- ``List[str]`` select subset of specified columns or columns specified in regexes.
|
|
333
|
+
|
|
334
|
+
- ``slice[List[str]::]`` set order of columns
|
|
335
|
+
|
|
336
|
+
- ``slice[Tuple[str, Type]::]`` type defaulting
|
|
337
|
+
|
|
338
|
+
- ``slice[:]`` alias to :meth:`Source.copy()`
|
|
339
|
+
|
|
340
|
+
- ``slice[int:int:int]`` select ticks the same way as elements in python lists
|
|
341
|
+
|
|
342
|
+
Returns
|
|
343
|
+
-------
|
|
344
|
+
Column, Source or tuple of Sources
|
|
345
|
+
- Column if column name was specified.
|
|
346
|
+
|
|
347
|
+
- Two sources if filtering expression or eval was provided: the first one is for ticks that pass condition
|
|
348
|
+
and the second one that do not.
|
|
349
|
+
|
|
350
|
+
Examples
|
|
351
|
+
--------
|
|
352
|
+
|
|
353
|
+
Access to the `X` column: add `Y` based on `X`
|
|
354
|
+
|
|
355
|
+
>>> data = otp.Ticks(X=[1, 2, 3])
|
|
356
|
+
>>> data['Y'] = data['X'] * 2
|
|
357
|
+
>>> otp.run(data)
|
|
358
|
+
Time X Y
|
|
359
|
+
0 2003-12-01 00:00:00.000 1 2
|
|
360
|
+
1 2003-12-01 00:00:00.001 2 4
|
|
361
|
+
2 2003-12-01 00:00:00.002 3 6
|
|
362
|
+
|
|
363
|
+
Filtering based on expression:
|
|
364
|
+
|
|
365
|
+
>>> data = otp.Ticks(X=[1, 2, 3])
|
|
366
|
+
>>> data_more, data_less = data[(data['X'] > 2)]
|
|
367
|
+
>>> otp.run(data_more)
|
|
368
|
+
Time X
|
|
369
|
+
0 2003-12-01 00:00:00.002 3
|
|
370
|
+
>>> otp.run(data_less)
|
|
371
|
+
Time X
|
|
372
|
+
0 2003-12-01 00:00:00.000 1
|
|
373
|
+
1 2003-12-01 00:00:00.001 2
|
|
374
|
+
|
|
375
|
+
Filtering based on the result of another query. Another query should
|
|
376
|
+
have only one tick as a result with only one field (whatever it names).
|
|
377
|
+
|
|
378
|
+
>>> exp_to_select = otp.Ticks(WHERE=['X > 2'])
|
|
379
|
+
>>> data = otp.Ticks(X=[1, 2, 3], Y=['a', 'b', 'c'], Z=[.4, .3, .1])
|
|
380
|
+
>>> data, _ = data[otp.eval(exp_to_select)]
|
|
381
|
+
>>> otp.run(data)
|
|
382
|
+
Time X Y Z
|
|
383
|
+
0 2003-12-01 00:00:00.002 3 c 0.1
|
|
384
|
+
|
|
385
|
+
Select subset of specified columns:
|
|
386
|
+
|
|
387
|
+
>>> data = otp.Ticks(X=[1, 2, 3], Y=['a', 'b', 'c'], Z=[.4, .3, .1])
|
|
388
|
+
>>> data = data[['X', 'Z']]
|
|
389
|
+
>>> otp.run(data)
|
|
390
|
+
Time X Z
|
|
391
|
+
0 2003-12-01 00:00:00.000 1 0.4
|
|
392
|
+
1 2003-12-01 00:00:00.001 2 0.3
|
|
393
|
+
2 2003-12-01 00:00:00.002 3 0.1
|
|
394
|
+
|
|
395
|
+
Slice with list will keep all columns, but change order:
|
|
396
|
+
|
|
397
|
+
>>> data=otp.Tick(Y=1, X=2, Z=3)
|
|
398
|
+
>>> otp.run(data)
|
|
399
|
+
Time Y X Z
|
|
400
|
+
0 2003-12-01 1 2 3
|
|
401
|
+
>>> data = data[['X', 'Y']:]
|
|
402
|
+
>>> otp.run(data)
|
|
403
|
+
Time X Y Z
|
|
404
|
+
0 2003-12-01 2 1 3
|
|
405
|
+
|
|
406
|
+
Slice can be used as short-cut for :meth:`Source.copy`:
|
|
407
|
+
|
|
408
|
+
>>> data[:] # doctest: +ELLIPSIS
|
|
409
|
+
<onetick.py.sources.ticks.Tick object at ...>
|
|
410
|
+
|
|
411
|
+
Slices can use integers.
|
|
412
|
+
In this case ticks are selected the same way as elements in python lists.
|
|
413
|
+
|
|
414
|
+
>>> data = otp.Ticks({'A': [1, 2, 3, 4, 5]})
|
|
415
|
+
|
|
416
|
+
Select first 3 ticks:
|
|
417
|
+
|
|
418
|
+
>>> otp.run(data[:3])
|
|
419
|
+
Time A
|
|
420
|
+
0 2003-12-01 00:00:00.000 1
|
|
421
|
+
1 2003-12-01 00:00:00.001 2
|
|
422
|
+
2 2003-12-01 00:00:00.002 3
|
|
423
|
+
|
|
424
|
+
Skip first 3 ticks:
|
|
425
|
+
|
|
426
|
+
>>> otp.run(data[3:])
|
|
427
|
+
Time A
|
|
428
|
+
0 2003-12-01 00:00:00.003 4
|
|
429
|
+
1 2003-12-01 00:00:00.004 5
|
|
430
|
+
|
|
431
|
+
Select last 3 ticks:
|
|
432
|
+
|
|
433
|
+
>>> otp.run(data[-3:])
|
|
434
|
+
Time A
|
|
435
|
+
0 2003-12-01 00:00:00.002 3
|
|
436
|
+
1 2003-12-01 00:00:00.003 4
|
|
437
|
+
2 2003-12-01 00:00:00.004 5
|
|
438
|
+
|
|
439
|
+
Skip last 3 ticks:
|
|
440
|
+
|
|
441
|
+
>>> otp.run(data[:-3])
|
|
442
|
+
Time A
|
|
443
|
+
0 2003-12-01 00:00:00.000 1
|
|
444
|
+
1 2003-12-01 00:00:00.001 2
|
|
445
|
+
|
|
446
|
+
Skip first and last tick:
|
|
447
|
+
|
|
448
|
+
>>> otp.run(data[1:-1])
|
|
449
|
+
Time A
|
|
450
|
+
0 2003-12-01 00:00:00.001 2
|
|
451
|
+
1 2003-12-01 00:00:00.002 3
|
|
452
|
+
2 2003-12-01 00:00:00.003 4
|
|
453
|
+
|
|
454
|
+
Select every second tick:
|
|
455
|
+
|
|
456
|
+
>>> otp.run(data[::2])
|
|
457
|
+
Time A
|
|
458
|
+
0 2003-12-01 00:00:00.000 1
|
|
459
|
+
1 2003-12-01 00:00:00.002 3
|
|
460
|
+
2 2003-12-01 00:00:00.004 5
|
|
461
|
+
|
|
462
|
+
Select every second tick, not including first and last tick:
|
|
463
|
+
|
|
464
|
+
>>> otp.run(data[1:-1:2])
|
|
465
|
+
Time A
|
|
466
|
+
0 2003-12-01 00:00:00.001 2
|
|
467
|
+
1 2003-12-01 00:00:00.003 4
|
|
468
|
+
|
|
469
|
+
Regular expressions can be used to select fields too:
|
|
470
|
+
|
|
471
|
+
>>> data = otp.Tick(A=1, AA=2, AB=3, B=4, BB=5, BA=6)
|
|
472
|
+
>>> otp.run(data['A.*'])
|
|
473
|
+
Time A AA AB BA
|
|
474
|
+
0 2003-12-01 1 2 3 6
|
|
475
|
+
|
|
476
|
+
Note that by default pattern is matched in any position of the string.
|
|
477
|
+
Use characters ^ and $ to specify start and end of the string:
|
|
478
|
+
|
|
479
|
+
>>> otp.run(data['^A'])
|
|
480
|
+
Time A AA AB
|
|
481
|
+
0 2003-12-01 1 2 3
|
|
482
|
+
|
|
483
|
+
Several regular expressions can be specified too:
|
|
484
|
+
|
|
485
|
+
>>> otp.run(data[['^A+$', '^B+$']])
|
|
486
|
+
Time A AA B BB
|
|
487
|
+
0 2003-12-01 1 2 4 5
|
|
488
|
+
|
|
489
|
+
See Also
|
|
490
|
+
--------
|
|
491
|
+
| :meth:`Source.table`: another and more generic way to select subset of specified columns
|
|
492
|
+
| **PASSTHROUGH** OneTick event processor
|
|
493
|
+
| **WHERE_CLAUSE** OneTick event processor
|
|
494
|
+
|
|
495
|
+
"""
|
|
496
|
+
|
|
497
|
+
strict = True
|
|
498
|
+
|
|
499
|
+
with suppress(TypeError):
|
|
500
|
+
return self.where_clause(item)
|
|
501
|
+
|
|
502
|
+
if isinstance(item, slice):
|
|
503
|
+
|
|
504
|
+
result = self._get_integer_slice(item)
|
|
505
|
+
if result:
|
|
506
|
+
return result
|
|
507
|
+
|
|
508
|
+
if item.step:
|
|
509
|
+
raise AttributeError("Source columns slice with step set makes no sense")
|
|
510
|
+
if item.start and item.stop:
|
|
511
|
+
raise AttributeError("Source columns slice with both start and stop set is not available now")
|
|
512
|
+
if not item.start and item.stop:
|
|
513
|
+
raise AttributeError("Source columns slice with only stop set is not implemented yet")
|
|
514
|
+
if item.start is None and item.stop is None:
|
|
515
|
+
return self.copy()
|
|
516
|
+
|
|
517
|
+
item = item.start
|
|
518
|
+
strict = False
|
|
519
|
+
|
|
520
|
+
if isinstance(item, tuple):
|
|
521
|
+
item = dict([item])
|
|
522
|
+
|
|
523
|
+
elif isinstance(item, list):
|
|
524
|
+
if not item:
|
|
525
|
+
return self.copy()
|
|
526
|
+
item_type = list(set([type(x) for x in item]))
|
|
527
|
+
|
|
528
|
+
if len(item_type) > 1:
|
|
529
|
+
raise AttributeError(f"Different types {item_type} in slice list is not supported")
|
|
530
|
+
if item_type[0] == tuple:
|
|
531
|
+
item = dict(item)
|
|
532
|
+
|
|
533
|
+
if isinstance(item, (list, str)):
|
|
534
|
+
# check if item has regex characters
|
|
535
|
+
item_list = [item] if isinstance(item, str) else item
|
|
536
|
+
try:
|
|
537
|
+
items_to_passthrough, use_regex = self._columns_names_regex(item_list)
|
|
538
|
+
except TypeError:
|
|
539
|
+
use_regex = False
|
|
540
|
+
if use_regex:
|
|
541
|
+
src = self.copy()
|
|
542
|
+
src.sink(otq.Passthrough(fields=','.join(items_to_passthrough), use_regex=True))
|
|
543
|
+
return src
|
|
544
|
+
|
|
545
|
+
if isinstance(item, list):
|
|
546
|
+
# ---------
|
|
547
|
+
# TABLE
|
|
548
|
+
# ---------
|
|
549
|
+
items = []
|
|
550
|
+
|
|
551
|
+
for it in item:
|
|
552
|
+
if isinstance(it, _Column):
|
|
553
|
+
items.append(it.name)
|
|
554
|
+
elif isinstance(it, str):
|
|
555
|
+
items.append(it)
|
|
556
|
+
else:
|
|
557
|
+
raise ValueError(f"It is not supported to filter '{it}' object of '{type(it)}' type")
|
|
558
|
+
|
|
559
|
+
# validation
|
|
560
|
+
for item in items:
|
|
561
|
+
if item not in self.schema:
|
|
562
|
+
existing_columns = ", ".join(self.schema.keys())
|
|
563
|
+
raise AttributeError(f"There is no '{item}' column. There are existing columns: {existing_columns}")
|
|
564
|
+
|
|
565
|
+
columns = {
|
|
566
|
+
column_name: self.schema[column_name] for column_name in items if not self._check_key_is_meta(column_name)
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
return self.table(strict=strict, **columns)
|
|
570
|
+
|
|
571
|
+
if isinstance(item, dict):
|
|
572
|
+
return self.table(strict=strict, **item)
|
|
573
|
+
|
|
574
|
+
# way to set type
|
|
575
|
+
if isinstance(item, tuple):
|
|
576
|
+
name, dtype = item
|
|
577
|
+
warnings.warn(
|
|
578
|
+
'Using tuple with name and type in otp.Source.__getitem__() is not supported anymore,'
|
|
579
|
+
' change your code to use otp.Source.schema object instead.',
|
|
580
|
+
FutureWarning,
|
|
581
|
+
)
|
|
582
|
+
return self._set_field_by_tuple(name, dtype)
|
|
583
|
+
|
|
584
|
+
name = item
|
|
585
|
+
if name not in self.__dict__:
|
|
586
|
+
raise KeyError(
|
|
587
|
+
f'Column name {name} is not in the schema. Please, check that this column '
|
|
588
|
+
'is in the schema or add it using the .schema property'
|
|
589
|
+
)
|
|
590
|
+
if not isinstance(self.__dict__[name], _Column):
|
|
591
|
+
raise AttributeError(f"There is no '{name}' column")
|
|
592
|
+
return self.__dict__[name]
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
@inplace_operation
|
|
596
|
+
def dropna(
|
|
597
|
+
self: 'Source', how: Literal["any", "all"] = "any", subset: Optional[List[Any]] = None, inplace=False
|
|
598
|
+
) -> Optional['Source']:
|
|
599
|
+
"""
|
|
600
|
+
Drops ticks that contain NaN values according to the policy in the ``how`` parameter
|
|
601
|
+
|
|
602
|
+
Parameters
|
|
603
|
+
----------
|
|
604
|
+
how: "any" or "all"
|
|
605
|
+
|
|
606
|
+
``any`` - filters out ticks if at least one field has NaN value
|
|
607
|
+
|
|
608
|
+
``all`` - filters out ticks if all fields have NaN values.
|
|
609
|
+
subset: list of str
|
|
610
|
+
list of columns to check for NaN values. If ``None`` then all columns are checked.
|
|
611
|
+
inplace: bool
|
|
612
|
+
the flag controls whether operation should be applied inplace.
|
|
613
|
+
|
|
614
|
+
Returns
|
|
615
|
+
-------
|
|
616
|
+
:class:`Source` or ``None``
|
|
617
|
+
|
|
618
|
+
Examples
|
|
619
|
+
--------
|
|
620
|
+
|
|
621
|
+
Drop ticks where **at least one** field has ``nan`` value.
|
|
622
|
+
|
|
623
|
+
>>> data = otp.Ticks([[ 'X', 'Y'],
|
|
624
|
+
... [ 0.0, 1.0],
|
|
625
|
+
... [ otp.nan, 2.0],
|
|
626
|
+
... [ 4.0, otp.nan],
|
|
627
|
+
... [ otp.nan, otp.nan],
|
|
628
|
+
... [ 6.0, 7.0]])
|
|
629
|
+
>>> data = data.dropna()
|
|
630
|
+
>>> otp.run(data)[['X', 'Y']]
|
|
631
|
+
X Y
|
|
632
|
+
0 0.0 1.0
|
|
633
|
+
1 6.0 7.0
|
|
634
|
+
|
|
635
|
+
Drop ticks where **all** fields have ``nan`` values.
|
|
636
|
+
|
|
637
|
+
>>> data = otp.Ticks([[ 'X', 'Y'],
|
|
638
|
+
... [ 0.0, 1.0],
|
|
639
|
+
... [ otp.nan, 2.0],
|
|
640
|
+
... [ 4.0, otp.nan],
|
|
641
|
+
... [ otp.nan, otp.nan],
|
|
642
|
+
... [ 6.0, 7.0]])
|
|
643
|
+
>>> data = data.dropna(how='all')
|
|
644
|
+
>>> otp.run(data)[['X', 'Y']]
|
|
645
|
+
X Y
|
|
646
|
+
0 0.0 1.0
|
|
647
|
+
1 NaN 2.0
|
|
648
|
+
2 4.0 NaN
|
|
649
|
+
3 6.0 7.0
|
|
650
|
+
|
|
651
|
+
Drop ticks where **all** fields in **subset** of columns have ``nan`` values.
|
|
652
|
+
|
|
653
|
+
>>> data = otp.Ticks([[ 'X', 'Y', 'Z'],
|
|
654
|
+
... [ 0.0, 1.0, otp.nan],
|
|
655
|
+
... [ otp.nan, 2.0, otp.nan],
|
|
656
|
+
... [ 4.0, otp.nan, otp.nan],
|
|
657
|
+
... [ otp.nan, otp.nan, otp.nan],
|
|
658
|
+
... [ 6.0, 7.0, otp.nan]])
|
|
659
|
+
>>> data = data.dropna(how='all', subset=['X', 'Y'])
|
|
660
|
+
>>> otp.run(data)[['X', 'Y', 'Z']]
|
|
661
|
+
X Y Z
|
|
662
|
+
0 0.0 1.0 NaN
|
|
663
|
+
1 NaN 2.0 NaN
|
|
664
|
+
2 4.0 NaN NaN
|
|
665
|
+
3 6.0 7.0 NaN
|
|
666
|
+
|
|
667
|
+
"""
|
|
668
|
+
if how not in ["any", "all"]:
|
|
669
|
+
raise ValueError(f"It is expected to see 'any' or 'all' values for 'how' parameter, but got '{how}'")
|
|
670
|
+
|
|
671
|
+
condition = None
|
|
672
|
+
columns = self.columns(skip_meta_fields=True)
|
|
673
|
+
if subset is not None:
|
|
674
|
+
for column_name in subset:
|
|
675
|
+
if column_name not in columns:
|
|
676
|
+
raise ValueError(f"There is no '{column_name}' column in the source")
|
|
677
|
+
if columns[column_name] is not float:
|
|
678
|
+
raise ValueError(f"Column '{column_name}' is not float type")
|
|
679
|
+
|
|
680
|
+
for column_name, dtype in columns.items():
|
|
681
|
+
if subset is not None and column_name not in subset:
|
|
682
|
+
continue
|
|
683
|
+
if dtype is float:
|
|
684
|
+
if condition is None:
|
|
685
|
+
condition = self[column_name] != ott.nan
|
|
686
|
+
else:
|
|
687
|
+
if how == "any":
|
|
688
|
+
condition &= self[column_name] != ott.nan
|
|
689
|
+
elif how == "all":
|
|
690
|
+
condition |= self[column_name] != ott.nan
|
|
691
|
+
|
|
692
|
+
self.sink(otq.WhereClause(where=str(condition)))
|
|
693
|
+
return self
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
@inplace_operation
|
|
697
|
+
def time_filter(
|
|
698
|
+
self: 'Source',
|
|
699
|
+
discard_on_match: bool = False,
|
|
700
|
+
start_time: Union[str, int, time] = 0,
|
|
701
|
+
end_time: Union[str, int, time] = 0,
|
|
702
|
+
day_patterns: Union[str, List[str]] = "",
|
|
703
|
+
timezone=utils.default, # type: ignore
|
|
704
|
+
end_time_tick_matches: bool = False,
|
|
705
|
+
inplace=False,
|
|
706
|
+
) -> Optional['Source']:
|
|
707
|
+
"""
|
|
708
|
+
Filters ticks by time.
|
|
709
|
+
|
|
710
|
+
Parameters
|
|
711
|
+
----------
|
|
712
|
+
discard_on_match : bool, optional
|
|
713
|
+
If ``True``, then ticks that match the filter will be discarded.
|
|
714
|
+
Otherwise, only ticks that match the filter will be passed.
|
|
715
|
+
start_time : str or int or :py:class:`datetime.time`, optional
|
|
716
|
+
Start time of the filter, string must be in the format ``HHMMSSmmm``.
|
|
717
|
+
Default value is 0.
|
|
718
|
+
end_time : str or int or :py:class:`datetime.time`, optional
|
|
719
|
+
End time of the filter, string must be in the format ``HHMMSSmmm``.
|
|
720
|
+
To filter ticks for an entire day, this parameter should be set to 240000000.
|
|
721
|
+
Default value is 0.
|
|
722
|
+
day_patterns : list or str
|
|
723
|
+
Pattern or list of patterns that determines days for which the ticks can be propagated.
|
|
724
|
+
A tick can be propagated if its date matches one or more of the patterns.
|
|
725
|
+
Three supported pattern formats are:
|
|
726
|
+
|
|
727
|
+
1. ``month.week.weekdays``, 0 month means any month, 0 week means any week,
|
|
728
|
+
6 week means the last week of the month for a given weekday(s),
|
|
729
|
+
weekdays are digits for each day, 0 being Sunday.
|
|
730
|
+
|
|
731
|
+
2. ``month/day``, 0 month means any month.
|
|
732
|
+
|
|
733
|
+
3. ``year/month/day``, 0 year means any year, 0 month means any month.
|
|
734
|
+
|
|
735
|
+
timezone : str, optional
|
|
736
|
+
Timezone of the filter.
|
|
737
|
+
Default value is ``configuration.config.tz``
|
|
738
|
+
or timezone set in the parameter of :py:func:`onetick.py.run`.
|
|
739
|
+
end_time_tick_matches : bool, optional
|
|
740
|
+
If ``True``, then the end time is inclusive.
|
|
741
|
+
Otherwise, the end time is exclusive.
|
|
742
|
+
inplace : bool, optional
|
|
743
|
+
The flag controls whether operation should be applied inplace or not.
|
|
744
|
+
If ``inplace=True``, then it returns nothing. Otherwise method returns a new modified
|
|
745
|
+
object. Default value is ``False``.
|
|
746
|
+
|
|
747
|
+
Returns
|
|
748
|
+
-------
|
|
749
|
+
:class:`Source` or ``None``
|
|
750
|
+
Returns ``None`` if ``inplace=True``.
|
|
751
|
+
|
|
752
|
+
See also
|
|
753
|
+
--------
|
|
754
|
+
**TIME_FILTER** OneTick event processor
|
|
755
|
+
|
|
756
|
+
Examples
|
|
757
|
+
--------
|
|
758
|
+
>>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL')
|
|
759
|
+
>>> data = data.time_filter(start_time='000000001', end_time='000000003')
|
|
760
|
+
>>> otp.run(data, start=otp.dt(2022, 3, 1), end=otp.dt(2022, 3, 2))
|
|
761
|
+
Time PRICE SIZE
|
|
762
|
+
0 2022-03-01 00:00:00.001 1.4 10
|
|
763
|
+
1 2022-03-01 00:00:00.002 1.4 50
|
|
764
|
+
|
|
765
|
+
"""
|
|
766
|
+
if timezone is utils.default:
|
|
767
|
+
# doesn't work without expr for some reason
|
|
768
|
+
timezone = 'expr(_TIMEZONE)'
|
|
769
|
+
|
|
770
|
+
if day_patterns:
|
|
771
|
+
if isinstance(day_patterns, str):
|
|
772
|
+
day_patterns = [day_patterns]
|
|
773
|
+
for day_pattern in day_patterns:
|
|
774
|
+
if not re.match(r"(^\d\d?\.[0-6].\d\d?$)|(^\d\d?\/\d\d?$)|(^\d{1,4}\/\d\d?\/\d\d?$)", day_pattern):
|
|
775
|
+
raise ValueError(f"Invalid day pattern: {day_pattern}")
|
|
776
|
+
|
|
777
|
+
if isinstance(start_time, time):
|
|
778
|
+
start_time = start_time.strftime('%H%M%S%f')[:-3]
|
|
779
|
+
|
|
780
|
+
if isinstance(end_time, time):
|
|
781
|
+
end_time = end_time.strftime('%H%M%S%f')[:-3]
|
|
782
|
+
|
|
783
|
+
day_patterns = ",".join(day_patterns)
|
|
784
|
+
self.sink(
|
|
785
|
+
otq.TimeFilter(
|
|
786
|
+
discard_on_match=discard_on_match,
|
|
787
|
+
start_time=start_time,
|
|
788
|
+
end_time=end_time,
|
|
789
|
+
timezone=timezone,
|
|
790
|
+
day_patterns=day_patterns,
|
|
791
|
+
end_time_tick_matches=end_time_tick_matches,
|
|
792
|
+
)
|
|
793
|
+
)
|
|
794
|
+
return self
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
@inplace_operation
|
|
798
|
+
def skip_bad_tick(
|
|
799
|
+
self: 'Source',
|
|
800
|
+
field: Union[str, _Column],
|
|
801
|
+
discard_on_match: bool = False,
|
|
802
|
+
jump_threshold: float = 2.0,
|
|
803
|
+
num_neighbor_ticks: int = 5,
|
|
804
|
+
use_absolute_values: bool = False,
|
|
805
|
+
inplace=False,
|
|
806
|
+
) -> Optional['Source']:
|
|
807
|
+
"""
|
|
808
|
+
Discards ticks based on whether the value of the attribute specified by ``field`` differs from the value
|
|
809
|
+
of the same attribute in the surrounding ticks more times than a given threshold.
|
|
810
|
+
Uses SKIP_BAD_TICK EP.
|
|
811
|
+
|
|
812
|
+
Parameters
|
|
813
|
+
----------
|
|
814
|
+
field: str, :py:class:`~onetick.py.Column`
|
|
815
|
+
Name of the field (must be present in the input tick descriptor).
|
|
816
|
+
discard_on_match: bool
|
|
817
|
+
When set to ``True`` only ticks that did not match the filter are propagated,
|
|
818
|
+
otherwise ticks that satisfy the filter condition are propagated.
|
|
819
|
+
jump_threshold: float
|
|
820
|
+
A threshold to determine if a tick is "good" or "bad."
|
|
821
|
+
|
|
822
|
+
Good ticks are the ticks whose ``field`` value differs less than ``jump_threshold`` times
|
|
823
|
+
from the ``field``'s value of less than or half of the surrounding ``num_neighbor_ticks`` ticks.
|
|
824
|
+
num_neighbor_ticks: int
|
|
825
|
+
The number of ticks before this tick and after this tick to compare a tick against.
|
|
826
|
+
use_absolute_values: bool
|
|
827
|
+
When set to ``True``, use absolute values of numbers when checking whether they are within the jump threshold.
|
|
828
|
+
inplace: bool
|
|
829
|
+
The flag controls whether operation should be applied inplace or not.
|
|
830
|
+
If ``inplace=True``, then it returns nothing.
|
|
831
|
+
Otherwise, method returns a new modified object.
|
|
832
|
+
|
|
833
|
+
See also
|
|
834
|
+
--------
|
|
835
|
+
**SKIP_BAD_TICK** OneTick event processor
|
|
836
|
+
|
|
837
|
+
Returns
|
|
838
|
+
-------
|
|
839
|
+
:class:`Source` or ``None``
|
|
840
|
+
|
|
841
|
+
Examples
|
|
842
|
+
--------
|
|
843
|
+
Keep ticks whose price did not jump by more than 20% relative to the surrounding ticks:
|
|
844
|
+
|
|
845
|
+
>>> data = otp.Ticks(X=[10, 11, 15, 11, 9, 10])
|
|
846
|
+
>>> data = data.skip_bad_tick(field="X", jump_threshold=1.2, num_neighbor_ticks=1)
|
|
847
|
+
>>> otp.run(data)
|
|
848
|
+
Time X
|
|
849
|
+
0 2003-12-01 00:00:00.000 10
|
|
850
|
+
1 2003-12-01 00:00:00.001 11
|
|
851
|
+
2 2003-12-01 00:00:00.003 11
|
|
852
|
+
3 2003-12-01 00:00:00.005 10
|
|
853
|
+
|
|
854
|
+
Same example, but with passing column as ``field`` parameter:
|
|
855
|
+
|
|
856
|
+
>>> data = otp.Ticks(X=[10, 11, 15, 11, 9, 10])
|
|
857
|
+
>>> data = data.skip_bad_tick(field=data["X"], jump_threshold=1.2, num_neighbor_ticks=1)
|
|
858
|
+
>>> otp.run(data)
|
|
859
|
+
Time X
|
|
860
|
+
0 2003-12-01 00:00:00.000 10
|
|
861
|
+
1 2003-12-01 00:00:00.001 11
|
|
862
|
+
2 2003-12-01 00:00:00.003 11
|
|
863
|
+
3 2003-12-01 00:00:00.005 10
|
|
864
|
+
|
|
865
|
+
If you want to keep only "bad ticks", which don't match the filter,
|
|
866
|
+
set ``discard_on_match`` parameter to ``True``:
|
|
867
|
+
|
|
868
|
+
>>> data = otp.Ticks(X=[10, 11, 15, 11, 9, 10])
|
|
869
|
+
>>> data = data.skip_bad_tick(field=data["X"], jump_threshold=1.2, num_neighbor_ticks=1, discard_on_match=True)
|
|
870
|
+
>>> otp.run(data)
|
|
871
|
+
Time X
|
|
872
|
+
0 2003-12-01 00:00:00.002 15
|
|
873
|
+
1 2003-12-01 00:00:00.004 9
|
|
874
|
+
|
|
875
|
+
In case, if you need to compare values on an absolute basis, set ``use_absolute_values`` parameter to ``True``:
|
|
876
|
+
|
|
877
|
+
>>> data = otp.Ticks(X=[10, -11, -15, 11, 9, 10])
|
|
878
|
+
>>> data = data.skip_bad_tick(field=data["X"], jump_threshold=1.2, num_neighbor_ticks=1, use_absolute_values=True)
|
|
879
|
+
>>> otp.run(data)
|
|
880
|
+
Time X
|
|
881
|
+
0 2003-12-01 00:00:00.000 10
|
|
882
|
+
1 2003-12-01 00:00:00.001 -11
|
|
883
|
+
2 2003-12-01 00:00:00.003 11
|
|
884
|
+
3 2003-12-01 00:00:00.005 10
|
|
885
|
+
"""
|
|
886
|
+
if isinstance(field, _Column):
|
|
887
|
+
field = field.name
|
|
888
|
+
|
|
889
|
+
if field not in self.schema:
|
|
890
|
+
raise ValueError(f'Field {field} not in the schema.')
|
|
891
|
+
|
|
892
|
+
self.sink(otq.SkipBadTick(
|
|
893
|
+
discard_on_match=discard_on_match,
|
|
894
|
+
jump_threshold=jump_threshold,
|
|
895
|
+
field=field,
|
|
896
|
+
num_neighbor_ticks=num_neighbor_ticks,
|
|
897
|
+
use_absolute_values=use_absolute_values,
|
|
898
|
+
))
|
|
899
|
+
|
|
900
|
+
return self
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
@inplace_operation
|
|
904
|
+
def character_present(
|
|
905
|
+
self: 'Source',
|
|
906
|
+
field: Union[str, _Column],
|
|
907
|
+
characters: Union[str, List[str]],
|
|
908
|
+
characters_field: Union[str, _Column] = "",
|
|
909
|
+
discard_on_match: bool = False,
|
|
910
|
+
inplace: bool = False,
|
|
911
|
+
):
|
|
912
|
+
"""
|
|
913
|
+
Propagates ticks based on whether the value of the field specified by `field` contains a character
|
|
914
|
+
in the set of characters specified by `characters`.
|
|
915
|
+
Uses **CHARACTER_PRESENT** EP.
|
|
916
|
+
|
|
917
|
+
Parameters
|
|
918
|
+
----------
|
|
919
|
+
field: str, :py:class:`~onetick.py.Column`
|
|
920
|
+
Name of the field (must be present in the input tick descriptor).
|
|
921
|
+
characters: str, List[str]
|
|
922
|
+
A set of characters that are searched for in the value of the `field`.
|
|
923
|
+
If set as string, works as list of characters.
|
|
924
|
+
characters_field: str, :py:class:`~onetick.py.Column`
|
|
925
|
+
If specified, will take a current value of that field and append it to `characters`, if any.
|
|
926
|
+
discard_on_match: bool
|
|
927
|
+
When set to ``True`` only ticks that did not match the filter are propagated,
|
|
928
|
+
otherwise ticks that satisfy the filter condition are propagated.
|
|
929
|
+
inplace: bool
|
|
930
|
+
The flag controls whether operation should be applied inplace or not.
|
|
931
|
+
If ``inplace=True``, then it returns nothing.
|
|
932
|
+
Otherwise, method returns a new modified object.
|
|
933
|
+
|
|
934
|
+
See also
|
|
935
|
+
--------
|
|
936
|
+
**CHARACTER_PRESENT** OneTick event processor
|
|
937
|
+
|
|
938
|
+
Returns
|
|
939
|
+
-------
|
|
940
|
+
:class:`Source` or ``None``
|
|
941
|
+
|
|
942
|
+
Examples
|
|
943
|
+
--------
|
|
944
|
+
|
|
945
|
+
Select ticks that have the N or T in EXCHANGE field
|
|
946
|
+
|
|
947
|
+
>>> data = otp.DataSource('TEST_DATABASE', tick_type='TRD', symbols='A') # doctest: +SKIP
|
|
948
|
+
>>> data = data[['PRICE', 'SIZE', 'EXCHANGE']] # doctest: +SKIP
|
|
949
|
+
>>> data = data.character_present(field=data['EXCHANGE'], characters='NT') # doctest: +SKIP
|
|
950
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
951
|
+
Time PRICE SIZE EXCHANGE
|
|
952
|
+
0 2003-12-01 00:00:00.000 28.44 55100 N
|
|
953
|
+
1 2003-12-01 00:00:00.001 28.44 100 T
|
|
954
|
+
2 2003-12-01 00:00:00.002 28.44 200 T
|
|
955
|
+
3 2003-12-01 00:00:00.003 28.45 100 T
|
|
956
|
+
4 2003-12-01 00:00:00.004 28.44 500 T
|
|
957
|
+
|
|
958
|
+
Select ticks that have the N or T in EXCHANGE field and character set in OLD_EXCHANGE field
|
|
959
|
+
|
|
960
|
+
>>> data = otp.DataSource('TEST_DATABASE', tick_type='TRD', symbols='A') # doctest: +SKIP
|
|
961
|
+
>>> data = data.character_present( # doctest: +SKIP
|
|
962
|
+
... field=data['EXCHANGE'], characters='NT', characters_field=data['OLD_EXCHANGE'],
|
|
963
|
+
... )
|
|
964
|
+
>>> data = data[['PRICE', 'SIZE', 'EXCHANGE']] # doctest: +SKIP
|
|
965
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
966
|
+
Time PRICE SIZE EXCHANGE
|
|
967
|
+
0 2003-12-01 00:00:00.000 28.44 55100 N
|
|
968
|
+
1 2003-12-01 00:00:00.001 28.44 100 B
|
|
969
|
+
2 2003-12-01 00:00:00.002 28.44 200 B
|
|
970
|
+
3 2003-12-01 00:00:00.003 28.45 100 T
|
|
971
|
+
4 2003-12-01 00:00:00.004 28.44 200 T
|
|
972
|
+
"""
|
|
973
|
+
if isinstance(field, _Column):
|
|
974
|
+
field = field.name
|
|
975
|
+
|
|
976
|
+
if isinstance(characters_field, _Column):
|
|
977
|
+
characters_field = characters_field.name
|
|
978
|
+
|
|
979
|
+
if isinstance(characters, list):
|
|
980
|
+
characters = ''.join(characters)
|
|
981
|
+
|
|
982
|
+
for name, value in zip(['field', 'characters_field'], [field, characters_field]):
|
|
983
|
+
if not value:
|
|
984
|
+
continue
|
|
985
|
+
|
|
986
|
+
if value not in self.schema:
|
|
987
|
+
raise ValueError(f'Field {value}, passed as parameter `{name}`, not in the schema.')
|
|
988
|
+
|
|
989
|
+
if not (self.schema[value] == str or issubclass(self.schema[value], otp.string)):
|
|
990
|
+
raise TypeError(
|
|
991
|
+
f'Field {value}, passed as parameter `{name}`, has incompatible type: {self.schema[value]}, '
|
|
992
|
+
f'expected: str',
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
self.sink(otq.CharacterPresent(
|
|
996
|
+
field=field,
|
|
997
|
+
characters=characters,
|
|
998
|
+
characters_field=characters_field,
|
|
999
|
+
discard_on_match=discard_on_match,
|
|
1000
|
+
))
|
|
1001
|
+
|
|
1002
|
+
return self
|