onetick-py 1.177.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- locator_parser/__init__.py +0 -0
- locator_parser/acl.py +73 -0
- locator_parser/actions.py +262 -0
- locator_parser/common.py +368 -0
- locator_parser/io.py +43 -0
- locator_parser/locator.py +150 -0
- onetick/__init__.py +101 -0
- onetick/doc_utilities/__init__.py +3 -0
- onetick/doc_utilities/napoleon.py +40 -0
- onetick/doc_utilities/ot_doctest.py +140 -0
- onetick/doc_utilities/snippets.py +279 -0
- onetick/lib/__init__.py +4 -0
- onetick/lib/instance.py +141 -0
- onetick/py/__init__.py +293 -0
- onetick/py/_stack_info.py +89 -0
- onetick/py/_version.py +2 -0
- onetick/py/aggregations/__init__.py +11 -0
- onetick/py/aggregations/_base.py +648 -0
- onetick/py/aggregations/_docs.py +948 -0
- onetick/py/aggregations/compute.py +286 -0
- onetick/py/aggregations/functions.py +2216 -0
- onetick/py/aggregations/generic.py +104 -0
- onetick/py/aggregations/high_low.py +80 -0
- onetick/py/aggregations/num_distinct.py +83 -0
- onetick/py/aggregations/order_book.py +501 -0
- onetick/py/aggregations/other.py +1014 -0
- onetick/py/backports.py +26 -0
- onetick/py/cache.py +374 -0
- onetick/py/callback/__init__.py +5 -0
- onetick/py/callback/callback.py +276 -0
- onetick/py/callback/callbacks.py +131 -0
- onetick/py/compatibility.py +798 -0
- onetick/py/configuration.py +771 -0
- onetick/py/core/__init__.py +0 -0
- onetick/py/core/_csv_inspector.py +93 -0
- onetick/py/core/_internal/__init__.py +0 -0
- onetick/py/core/_internal/_manually_bound_value.py +6 -0
- onetick/py/core/_internal/_nodes_history.py +250 -0
- onetick/py/core/_internal/_op_utils/__init__.py +0 -0
- onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
- onetick/py/core/_internal/_op_utils/is_const.py +10 -0
- onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
- onetick/py/core/_internal/_proxy_node.py +140 -0
- onetick/py/core/_internal/_state_objects.py +2312 -0
- onetick/py/core/_internal/_state_vars.py +93 -0
- onetick/py/core/_source/__init__.py +0 -0
- onetick/py/core/_source/_symbol_param.py +95 -0
- onetick/py/core/_source/schema.py +97 -0
- onetick/py/core/_source/source_methods/__init__.py +0 -0
- onetick/py/core/_source/source_methods/aggregations.py +809 -0
- onetick/py/core/_source/source_methods/applyers.py +296 -0
- onetick/py/core/_source/source_methods/columns.py +141 -0
- onetick/py/core/_source/source_methods/data_quality.py +301 -0
- onetick/py/core/_source/source_methods/debugs.py +272 -0
- onetick/py/core/_source/source_methods/drops.py +120 -0
- onetick/py/core/_source/source_methods/fields.py +619 -0
- onetick/py/core/_source/source_methods/filters.py +1002 -0
- onetick/py/core/_source/source_methods/joins.py +1413 -0
- onetick/py/core/_source/source_methods/merges.py +605 -0
- onetick/py/core/_source/source_methods/misc.py +1455 -0
- onetick/py/core/_source/source_methods/pandases.py +155 -0
- onetick/py/core/_source/source_methods/renames.py +356 -0
- onetick/py/core/_source/source_methods/sorts.py +183 -0
- onetick/py/core/_source/source_methods/switches.py +142 -0
- onetick/py/core/_source/source_methods/symbols.py +117 -0
- onetick/py/core/_source/source_methods/times.py +627 -0
- onetick/py/core/_source/source_methods/writes.py +986 -0
- onetick/py/core/_source/symbol.py +205 -0
- onetick/py/core/_source/tmp_otq.py +222 -0
- onetick/py/core/column.py +209 -0
- onetick/py/core/column_operations/__init__.py +0 -0
- onetick/py/core/column_operations/_methods/__init__.py +4 -0
- onetick/py/core/column_operations/_methods/_internal.py +28 -0
- onetick/py/core/column_operations/_methods/conversions.py +216 -0
- onetick/py/core/column_operations/_methods/methods.py +292 -0
- onetick/py/core/column_operations/_methods/op_types.py +160 -0
- onetick/py/core/column_operations/accessors/__init__.py +0 -0
- onetick/py/core/column_operations/accessors/_accessor.py +28 -0
- onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
- onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
- onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
- onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
- onetick/py/core/column_operations/base.py +1121 -0
- onetick/py/core/cut_builder.py +150 -0
- onetick/py/core/db_constants.py +20 -0
- onetick/py/core/eval_query.py +245 -0
- onetick/py/core/lambda_object.py +441 -0
- onetick/py/core/multi_output_source.py +232 -0
- onetick/py/core/per_tick_script.py +2256 -0
- onetick/py/core/query_inspector.py +464 -0
- onetick/py/core/source.py +1744 -0
- onetick/py/db/__init__.py +2 -0
- onetick/py/db/_inspection.py +1128 -0
- onetick/py/db/db.py +1327 -0
- onetick/py/db/utils.py +64 -0
- onetick/py/docs/__init__.py +0 -0
- onetick/py/docs/docstring_parser.py +112 -0
- onetick/py/docs/utils.py +81 -0
- onetick/py/functions.py +2398 -0
- onetick/py/license.py +190 -0
- onetick/py/log.py +88 -0
- onetick/py/math.py +935 -0
- onetick/py/misc.py +470 -0
- onetick/py/oqd/__init__.py +22 -0
- onetick/py/oqd/eps.py +1195 -0
- onetick/py/oqd/sources.py +325 -0
- onetick/py/otq.py +216 -0
- onetick/py/pyomd_mock.py +47 -0
- onetick/py/run.py +916 -0
- onetick/py/servers.py +173 -0
- onetick/py/session.py +1347 -0
- onetick/py/sources/__init__.py +19 -0
- onetick/py/sources/cache.py +167 -0
- onetick/py/sources/common.py +128 -0
- onetick/py/sources/csv.py +642 -0
- onetick/py/sources/custom.py +85 -0
- onetick/py/sources/data_file.py +305 -0
- onetick/py/sources/data_source.py +1045 -0
- onetick/py/sources/empty.py +94 -0
- onetick/py/sources/odbc.py +337 -0
- onetick/py/sources/order_book.py +271 -0
- onetick/py/sources/parquet.py +168 -0
- onetick/py/sources/pit.py +191 -0
- onetick/py/sources/query.py +495 -0
- onetick/py/sources/snapshots.py +419 -0
- onetick/py/sources/split_query_output_by_symbol.py +198 -0
- onetick/py/sources/symbology_mapping.py +123 -0
- onetick/py/sources/symbols.py +374 -0
- onetick/py/sources/ticks.py +825 -0
- onetick/py/sql.py +70 -0
- onetick/py/state.py +251 -0
- onetick/py/types.py +2131 -0
- onetick/py/utils/__init__.py +70 -0
- onetick/py/utils/acl.py +93 -0
- onetick/py/utils/config.py +186 -0
- onetick/py/utils/default.py +49 -0
- onetick/py/utils/file.py +38 -0
- onetick/py/utils/helpers.py +76 -0
- onetick/py/utils/locator.py +94 -0
- onetick/py/utils/perf.py +498 -0
- onetick/py/utils/query.py +49 -0
- onetick/py/utils/render.py +1374 -0
- onetick/py/utils/script.py +244 -0
- onetick/py/utils/temp.py +471 -0
- onetick/py/utils/types.py +120 -0
- onetick/py/utils/tz.py +84 -0
- onetick_py-1.177.0.dist-info/METADATA +137 -0
- onetick_py-1.177.0.dist-info/RECORD +152 -0
- onetick_py-1.177.0.dist-info/WHEEL +5 -0
- onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
- onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
- onetick_py-1.177.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,809 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import TYPE_CHECKING, Tuple, Union
|
|
3
|
+
|
|
4
|
+
from onetick import py as otp
|
|
5
|
+
from onetick.py import aggregations
|
|
6
|
+
from onetick.py.aggregations._docs import (
|
|
7
|
+
_all_fields_with_policy_doc,
|
|
8
|
+
_boundary_tick_bucket_doc,
|
|
9
|
+
_bucket_end_condition_doc,
|
|
10
|
+
_bucket_interval_doc,
|
|
11
|
+
_bucket_time_doc,
|
|
12
|
+
_bucket_units_doc,
|
|
13
|
+
_end_condition_per_group_doc,
|
|
14
|
+
_group_by_doc,
|
|
15
|
+
_groups_to_display_doc,
|
|
16
|
+
_running_doc,
|
|
17
|
+
copy_method,
|
|
18
|
+
)
|
|
19
|
+
from onetick.py.docs.utils import docstring, param_doc
|
|
20
|
+
from onetick.py.otq import otq
|
|
21
|
+
|
|
22
|
+
from .misc import inplace_operation
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from onetick.py.core.source import Source
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
_agg_doc = param_doc(
|
|
29
|
+
name='aggs',
|
|
30
|
+
annotation=dict,
|
|
31
|
+
str_annotation='dict of aggregations',
|
|
32
|
+
desc="""
|
|
33
|
+
aggregation dict:
|
|
34
|
+
|
|
35
|
+
* key - output column name for regular aggregations, prefix for column names for tick and multi-column aggregations;
|
|
36
|
+
* value - aggregation
|
|
37
|
+
""",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@docstring(
|
|
42
|
+
parameters=[
|
|
43
|
+
_agg_doc,
|
|
44
|
+
_running_doc,
|
|
45
|
+
_all_fields_with_policy_doc,
|
|
46
|
+
_bucket_interval_doc,
|
|
47
|
+
_bucket_time_doc,
|
|
48
|
+
_bucket_units_doc,
|
|
49
|
+
_bucket_end_condition_doc,
|
|
50
|
+
_end_condition_per_group_doc,
|
|
51
|
+
_boundary_tick_bucket_doc,
|
|
52
|
+
_group_by_doc,
|
|
53
|
+
_groups_to_display_doc,
|
|
54
|
+
],
|
|
55
|
+
add_self=True,
|
|
56
|
+
)
|
|
57
|
+
def agg(self: 'Source', aggs, *args, **kwargs) -> 'Source':
|
|
58
|
+
"""
|
|
59
|
+
Applies composition of :ref:`otp.agg <aggregations_funcs>` aggregations
|
|
60
|
+
|
|
61
|
+
See Also
|
|
62
|
+
--------
|
|
63
|
+
| :ref:`Aggregations <aggregations_funcs>`
|
|
64
|
+
| **COMPUTE** OneTick event processor
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
:py:class:`~onetick.py.Source`
|
|
69
|
+
|
|
70
|
+
Examples
|
|
71
|
+
--------
|
|
72
|
+
|
|
73
|
+
By default the whole data is aggregated:
|
|
74
|
+
|
|
75
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
76
|
+
>>> data = data.agg({'X_SUM': otp.agg.sum('X')})
|
|
77
|
+
>>> otp.run(data)
|
|
78
|
+
Time X_SUM
|
|
79
|
+
0 2003-12-04 10
|
|
80
|
+
|
|
81
|
+
Multiple aggregations can be applied at the same time:
|
|
82
|
+
|
|
83
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
84
|
+
>>> data = data.agg({'X_SUM': otp.agg.sum('X'),
|
|
85
|
+
... 'X_MEAN': otp.agg.average('X')})
|
|
86
|
+
>>> otp.run(data)
|
|
87
|
+
Time X_SUM X_MEAN
|
|
88
|
+
0 2003-12-04 10 2.5
|
|
89
|
+
|
|
90
|
+
Aggregation can be used in running mode:
|
|
91
|
+
|
|
92
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
93
|
+
>>> data = data.agg({'CUM_SUM': otp.agg.sum('X')}, running=True)
|
|
94
|
+
>>> otp.run(data)
|
|
95
|
+
Time CUM_SUM
|
|
96
|
+
0 2003-12-01 00:00:00.000 1
|
|
97
|
+
1 2003-12-01 00:00:00.001 3
|
|
98
|
+
2 2003-12-01 00:00:00.002 6
|
|
99
|
+
3 2003-12-01 00:00:00.003 10
|
|
100
|
+
|
|
101
|
+
Aggregation can be split in buckets:
|
|
102
|
+
|
|
103
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
104
|
+
>>> data = data.agg({'X_SUM': otp.agg.sum('X')}, bucket_interval=2, bucket_units='ticks')
|
|
105
|
+
>>> otp.run(data)
|
|
106
|
+
Time X_SUM
|
|
107
|
+
0 2003-12-01 00:00:00.001 3
|
|
108
|
+
1 2003-12-01 00:00:00.003 7
|
|
109
|
+
|
|
110
|
+
Running aggregation can be used with buckets too. In this case (all_fields=False and running=True) output ticks
|
|
111
|
+
are created when a tick enters or leaves the sliding window (that's why for this example there are 8 output
|
|
112
|
+
ticks for 4 input ticks):
|
|
113
|
+
|
|
114
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[0, 1000, 1500, 3600])
|
|
115
|
+
>>> data = data.agg(dict(X_MEAN=otp.agg.average("X"),
|
|
116
|
+
... X_STD=otp.agg.stddev("X")),
|
|
117
|
+
... running=True, bucket_interval=2)
|
|
118
|
+
>>> otp.run(data)
|
|
119
|
+
Time X_MEAN X_STD
|
|
120
|
+
0 2003-12-01 00:00:00.000 1.0 0.000000
|
|
121
|
+
1 2003-12-01 00:00:01.000 1.5 0.500000
|
|
122
|
+
2 2003-12-01 00:00:01.500 2.0 0.816497
|
|
123
|
+
3 2003-12-01 00:00:02.000 2.5 0.500000
|
|
124
|
+
4 2003-12-01 00:00:03.000 3.0 0.000000
|
|
125
|
+
5 2003-12-01 00:00:03.500 NaN NaN
|
|
126
|
+
6 2003-12-01 00:00:03.600 4.0 0.000000
|
|
127
|
+
7 2003-12-01 00:00:05.600 NaN NaN
|
|
128
|
+
|
|
129
|
+
By default, if you run aggregation with buckets and group_by, then a bucket will be taken first, and after that
|
|
130
|
+
grouping and aggregation will be performed:
|
|
131
|
+
|
|
132
|
+
>>> ticks = otp.Ticks(
|
|
133
|
+
... {
|
|
134
|
+
... 'QTY': [10, 2, 30, 4, 50],
|
|
135
|
+
... 'TRADER': ['A', 'B', 'A', 'B', 'A']
|
|
136
|
+
... }
|
|
137
|
+
... )
|
|
138
|
+
>>>
|
|
139
|
+
>>> ticks = ticks.agg(
|
|
140
|
+
... {'SUM_QTY': otp.agg.sum('QTY')}, group_by='TRADER',
|
|
141
|
+
... bucket_interval=3, bucket_units='ticks',
|
|
142
|
+
... running=True, all_fields=True,
|
|
143
|
+
... )
|
|
144
|
+
>>>
|
|
145
|
+
>>> otp.run(ticks)
|
|
146
|
+
Time TRADER QTY SUM_QTY
|
|
147
|
+
0 2003-12-01 00:00:00.000 A 10 10
|
|
148
|
+
1 2003-12-01 00:00:00.001 B 2 2
|
|
149
|
+
2 2003-12-01 00:00:00.002 A 30 40
|
|
150
|
+
3 2003-12-01 00:00:00.003 B 4 6
|
|
151
|
+
4 2003-12-01 00:00:00.004 A 50 80
|
|
152
|
+
|
|
153
|
+
In the row with index 4, the result of summing up the trades for trader "A" turned out to be 80, instead of 90.
|
|
154
|
+
We first took a bucket of 3 ticks, then within it took the group with trader "A" (2 ticks remained) and
|
|
155
|
+
added up the volumes.
|
|
156
|
+
To prevent this behaviour, and group ticks first, set parameter ``end_condition_per_group`` to True:
|
|
157
|
+
|
|
158
|
+
>>> ticks = otp.Ticks(
|
|
159
|
+
... {
|
|
160
|
+
... 'QTY': [10, 2, 30, 4, 50],
|
|
161
|
+
... 'TRADER': ['A', 'B', 'A', 'B', 'A']
|
|
162
|
+
... }
|
|
163
|
+
... )
|
|
164
|
+
>>>
|
|
165
|
+
>>> ticks = ticks.agg(
|
|
166
|
+
... {'SUM_QTY': otp.agg.sum('QTY')}, group_by='TRADER',
|
|
167
|
+
... bucket_interval=3, bucket_units='ticks',
|
|
168
|
+
... running=True, all_fields=True,
|
|
169
|
+
... end_condition_per_group=True,
|
|
170
|
+
... )
|
|
171
|
+
>>>
|
|
172
|
+
>>> otp.run(ticks)
|
|
173
|
+
Time TRADER QTY SUM_QTY
|
|
174
|
+
0 2003-12-01 00:00:00.000 A 10 10
|
|
175
|
+
1 2003-12-01 00:00:00.001 B 2 2
|
|
176
|
+
2 2003-12-01 00:00:00.002 A 30 40
|
|
177
|
+
3 2003-12-01 00:00:00.003 B 4 6
|
|
178
|
+
4 2003-12-01 00:00:00.004 A 50 90
|
|
179
|
+
|
|
180
|
+
Tick aggregations and aggregations, which return more than one output column, could be also used.
|
|
181
|
+
Dict key set for an aggregation in ``aggs`` parameter will be used as prefix
|
|
182
|
+
for each output column of this aggregation.
|
|
183
|
+
|
|
184
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], Y=[10, 20, 30, 40])
|
|
185
|
+
>>> data = data.agg({'X_SUM': otp.agg.sum('X'), 'X_FIRST': otp.agg.first_tick()})
|
|
186
|
+
>>> otp.run(data)
|
|
187
|
+
Time X_FIRST.X X_FIRST.Y X_SUM
|
|
188
|
+
0 2003-12-04 1 10 10
|
|
189
|
+
|
|
190
|
+
These aggregations can be split in buckets too:
|
|
191
|
+
|
|
192
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], Y=[10, 20, 30, 40])
|
|
193
|
+
>>> data = data.agg(
|
|
194
|
+
... {'X_SUM': otp.agg.sum('X'), 'X_FIRST': otp.agg.first_tick()},
|
|
195
|
+
... bucket_interval=2, bucket_units='ticks',
|
|
196
|
+
... )
|
|
197
|
+
>>> otp.run(data)
|
|
198
|
+
Time X_FIRST.X X_FIRST.Y X_SUM
|
|
199
|
+
0 2003-12-01 00:00:00.001 1 10 3
|
|
200
|
+
1 2003-12-01 00:00:00.003 3 30 7
|
|
201
|
+
|
|
202
|
+
If all_fields=True an output tick is generated only for arrival events, but all attributes from the input tick
|
|
203
|
+
causing an arrival event are copied over to the output tick and the aggregation is added as another attribute:
|
|
204
|
+
|
|
205
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[0, 1000, 1500, 3600])
|
|
206
|
+
>>> data = data.agg(dict(X_MEAN=otp.agg.average("X"),
|
|
207
|
+
... X_STD=otp.agg.stddev("X")),
|
|
208
|
+
... all_fields=True, running=True)
|
|
209
|
+
>>> otp.run(data)
|
|
210
|
+
Time X X_MEAN X_STD
|
|
211
|
+
0 2003-12-01 00:00:00.000 1 1.0 0.000000
|
|
212
|
+
1 2003-12-01 00:00:01.000 2 1.5 0.500000
|
|
213
|
+
2 2003-12-01 00:00:01.500 3 2.0 0.816497
|
|
214
|
+
3 2003-12-01 00:00:03.600 4 2.5 1.118034
|
|
215
|
+
|
|
216
|
+
``all_fields`` parameter can be used when there is need to have all original fields in the output:
|
|
217
|
+
|
|
218
|
+
>>> ticks = otp.Ticks(X=[3, 4, 1, 2])
|
|
219
|
+
>>> data = ticks.agg(dict(X_MEAN=otp.agg.average("X"),
|
|
220
|
+
... X_STD=otp.agg.stddev("X")),
|
|
221
|
+
... all_fields=True)
|
|
222
|
+
>>> otp.run(data)
|
|
223
|
+
Time X X_MEAN X_STD
|
|
224
|
+
0 2003-12-04 3 2.5 1.118034
|
|
225
|
+
|
|
226
|
+
There are different politics for ``all_fields`` parameter:
|
|
227
|
+
|
|
228
|
+
>>> data = ticks.agg(dict(X_MEAN=otp.agg.average("X"),
|
|
229
|
+
... X_STD=otp.agg.stddev("X")),
|
|
230
|
+
... all_fields="last")
|
|
231
|
+
>>> otp.run(data)
|
|
232
|
+
Time X X_MEAN X_STD
|
|
233
|
+
0 2003-12-04 2 2.5 1.118034
|
|
234
|
+
|
|
235
|
+
For low/high policies the field selected as input is set this way:
|
|
236
|
+
|
|
237
|
+
>>> data = ticks.agg(dict(X_MEAN=otp.agg.average("X"),
|
|
238
|
+
... X_STD=otp.agg.stddev("X")),
|
|
239
|
+
... all_fields=otp.agg.low_tick(data["X"]))
|
|
240
|
+
>>> otp.run(data)
|
|
241
|
+
Time X X_MEAN X_STD
|
|
242
|
+
0 2003-12-04 1 2.5 1.118034
|
|
243
|
+
|
|
244
|
+
Example of using 'flexible' buckets. Here every bucket consists of consecutive upticks.
|
|
245
|
+
|
|
246
|
+
>>> trades = otp.Ticks(PRICE=[194.65, 194.65, 194.65, 194.75, 194.75, 194.51, 194.70, 194.71, 194.75, 194.71])
|
|
247
|
+
>>> trades = trades.agg({'COUNT': otp.agg.count(),
|
|
248
|
+
... 'FIRST_TIME': otp.agg.first('Time'),
|
|
249
|
+
... 'LAST_TIME': otp.agg.last('Time')},
|
|
250
|
+
... bucket_units='flexible',
|
|
251
|
+
... bucket_end_condition=trades['PRICE'] < trades['PRICE'][-1])
|
|
252
|
+
>>> otp.run(trades)
|
|
253
|
+
Time COUNT FIRST_TIME LAST_TIME
|
|
254
|
+
0 2003-12-01 00:00:00.005 5 2003-12-01 00:00:00.000 2003-12-01 00:00:00.004
|
|
255
|
+
1 2003-12-01 00:00:00.009 4 2003-12-01 00:00:00.005 2003-12-01 00:00:00.008
|
|
256
|
+
2 2003-12-04 00:00:00.000 1 2003-12-01 00:00:00.009 2003-12-01 00:00:00.009
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
aggs = aggs.copy()
|
|
260
|
+
result = self.copy()
|
|
261
|
+
|
|
262
|
+
what_to_aggregate = aggregations.compute(
|
|
263
|
+
*args,
|
|
264
|
+
**kwargs,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
for name, ag in aggs.items():
|
|
268
|
+
what_to_aggregate.add(name, ag)
|
|
269
|
+
|
|
270
|
+
result = what_to_aggregate.apply(result)
|
|
271
|
+
result._add_table()
|
|
272
|
+
|
|
273
|
+
return result
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# Aggregations copy
|
|
277
|
+
# we need this functions to store and collect documentation
|
|
278
|
+
# copy_method decorator will
|
|
279
|
+
# set docstring (will compare docstring of donor function and method docstring)
|
|
280
|
+
# apply same signature from donor function + self
|
|
281
|
+
# for mimic=True will apply agg function as is
|
|
282
|
+
@copy_method(aggregations.functions.high_tick)
|
|
283
|
+
def high(self, *args, **kwargs):
|
|
284
|
+
"""
|
|
285
|
+
Examples
|
|
286
|
+
--------
|
|
287
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[0, 1000, 1500, 3000])
|
|
288
|
+
>>> data = data.high(['X'], 2) # OTdirective: snippet-name: Aggregations.high tick;
|
|
289
|
+
>>> otp.run(data)
|
|
290
|
+
Time X
|
|
291
|
+
0 2003-12-01 00:00:01.500 3
|
|
292
|
+
1 2003-12-01 00:00:03.000 4
|
|
293
|
+
"""
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@copy_method(aggregations.functions.low_tick)
|
|
298
|
+
def low(self, *args, **kwargs):
|
|
299
|
+
"""
|
|
300
|
+
Examples
|
|
301
|
+
--------
|
|
302
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[0, 1000, 1500, 3000])
|
|
303
|
+
>>> data = data.low(['X'],2) # OTdirective: snippet-name: Aggregations.low tick;
|
|
304
|
+
>>> otp.run(data)
|
|
305
|
+
Time X
|
|
306
|
+
0 2003-12-01 00:00:00 1
|
|
307
|
+
1 2003-12-01 00:00:01 2
|
|
308
|
+
"""
|
|
309
|
+
pass
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@copy_method(aggregations.functions.first_tick)
|
|
313
|
+
def first(self, *args, **kwargs):
|
|
314
|
+
"""
|
|
315
|
+
Examples
|
|
316
|
+
--------
|
|
317
|
+
|
|
318
|
+
Get first tick:
|
|
319
|
+
|
|
320
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
321
|
+
>>> data = data.first() # OTdirective: snippet-name: Aggregations.first;
|
|
322
|
+
>>> otp.run(data)
|
|
323
|
+
Time X
|
|
324
|
+
0 2003-12-01 1
|
|
325
|
+
|
|
326
|
+
Get first tick each day:
|
|
327
|
+
|
|
328
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[otp.Day(0), otp.Day(0), otp.Day(2), otp.Day(2)])
|
|
329
|
+
>>> data = data.first(bucket_interval=1, bucket_units='days', bucket_time='start')
|
|
330
|
+
>>> otp.run(data)
|
|
331
|
+
Time X
|
|
332
|
+
0 2003-12-01 1
|
|
333
|
+
1 2003-12-03 3
|
|
334
|
+
|
|
335
|
+
Get first tick each day and set tick value for empty buckets:
|
|
336
|
+
|
|
337
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[otp.Day(0), otp.Day(0), otp.Day(2), otp.Day(2)])
|
|
338
|
+
>>> data = data.first(bucket_interval=1, bucket_units='days', bucket_time='start', default_tick={'X': -1})
|
|
339
|
+
>>> otp.run(data)
|
|
340
|
+
Time X
|
|
341
|
+
0 2003-12-01 1
|
|
342
|
+
1 2003-12-02 -1
|
|
343
|
+
2 2003-12-03 3
|
|
344
|
+
"""
|
|
345
|
+
pass
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
@copy_method(aggregations.functions.last_tick)
|
|
349
|
+
def last(self, *args, **kwargs):
|
|
350
|
+
"""
|
|
351
|
+
Examples
|
|
352
|
+
--------
|
|
353
|
+
|
|
354
|
+
Get last tick:
|
|
355
|
+
|
|
356
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4])
|
|
357
|
+
>>> data = data.last() # OTdirective: snippet-name: Aggregations.last;
|
|
358
|
+
>>> otp.run(data)
|
|
359
|
+
Time X
|
|
360
|
+
0 2003-12-01 00:00:00.003 4
|
|
361
|
+
|
|
362
|
+
Get last tick each day:
|
|
363
|
+
|
|
364
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[otp.Day(0), otp.Day(0), otp.Day(2), otp.Day(2)])
|
|
365
|
+
>>> data = data.last(bucket_interval=1, bucket_units='days', bucket_time='start')
|
|
366
|
+
>>> otp.run(data)
|
|
367
|
+
Time X
|
|
368
|
+
0 2003-12-01 2
|
|
369
|
+
1 2003-12-03 4
|
|
370
|
+
|
|
371
|
+
Get last tick each day and set tick value for empty buckets:
|
|
372
|
+
|
|
373
|
+
>>> data = otp.Ticks(X=[1, 2, 3, 4], offset=[otp.Day(0), otp.Day(0), otp.Day(2), otp.Day(2)])
|
|
374
|
+
>>> data = data.last(bucket_interval=1, bucket_units='days', bucket_time='start', default_tick={'X': -1})
|
|
375
|
+
>>> otp.run(data)
|
|
376
|
+
Time X
|
|
377
|
+
0 2003-12-01 2
|
|
378
|
+
1 2003-12-02 -1
|
|
379
|
+
2 2003-12-03 4
|
|
380
|
+
"""
|
|
381
|
+
pass
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@copy_method(aggregations.functions.distinct, mimic=False)
|
|
385
|
+
def distinct(self: 'Source', *args, **kwargs):
|
|
386
|
+
"""
|
|
387
|
+
Examples
|
|
388
|
+
--------
|
|
389
|
+
>>> data = otp.Ticks(dict(x=[1, 3, 1, 5, 3]))
|
|
390
|
+
>>> data = data.distinct('x') # OTdirective: snippet-name: Aggregations.distinct;
|
|
391
|
+
>>> otp.run(data)
|
|
392
|
+
Time x
|
|
393
|
+
0 2003-12-04 1
|
|
394
|
+
1 2003-12-04 3
|
|
395
|
+
2 2003-12-04 5
|
|
396
|
+
"""
|
|
397
|
+
if 'bucket_interval_units' in kwargs:
|
|
398
|
+
kwargs['bucket_units'] = kwargs.pop('bucket_interval_units')
|
|
399
|
+
aggr = aggregations.functions.distinct(*args, **kwargs)
|
|
400
|
+
return aggr.apply(self)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# mimic=False for backward compatibility
|
|
404
|
+
@copy_method(aggregations.functions.high_time, mimic=False, drop_examples=True)
|
|
405
|
+
def high_time(self: 'Source', *args, **kwargs):
|
|
406
|
+
"""
|
|
407
|
+
Returns timestamp of tick with the highest value of input field
|
|
408
|
+
|
|
409
|
+
.. deprecated:: 1.14.5
|
|
410
|
+
|
|
411
|
+
Use :py:func:`.high_time` instead
|
|
412
|
+
|
|
413
|
+
See Also
|
|
414
|
+
--------
|
|
415
|
+
:py:func:`.high_time`
|
|
416
|
+
|
|
417
|
+
"""
|
|
418
|
+
warnings.warn(
|
|
419
|
+
f"{self.__class__.__name__}.high_time deprecated. Use otp.agg.high_time instead",
|
|
420
|
+
FutureWarning,
|
|
421
|
+
stacklevel=2,
|
|
422
|
+
)
|
|
423
|
+
aggr = aggregations.functions.high_time(*args, **kwargs)
|
|
424
|
+
return aggr.apply(self, 'VALUE')
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# mimic=False for backward compatibility
|
|
428
|
+
@copy_method(aggregations.functions.low_time, mimic=False, drop_examples=True)
|
|
429
|
+
def low_time(self: 'Source', *args, **kwargs):
|
|
430
|
+
"""
|
|
431
|
+
Returns timestamp of tick with the lowest value of input field
|
|
432
|
+
|
|
433
|
+
.. deprecated:: 1.14.5
|
|
434
|
+
|
|
435
|
+
Use :py:func:`.low_time` instead
|
|
436
|
+
|
|
437
|
+
See Also
|
|
438
|
+
--------
|
|
439
|
+
:py:func:`.low_time`
|
|
440
|
+
|
|
441
|
+
"""
|
|
442
|
+
warnings.warn(
|
|
443
|
+
f"{self.__class__.__name__}.low_time deprecated. Use otp.agg.low_time instead",
|
|
444
|
+
FutureWarning,
|
|
445
|
+
stacklevel=2,
|
|
446
|
+
)
|
|
447
|
+
aggr = aggregations.functions.low_time(*args, **kwargs)
|
|
448
|
+
return aggr.apply(self, 'VALUE')
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
@copy_method(aggregations.functions.ob_snapshot)
|
|
452
|
+
def ob_snapshot(self, *args, **kwargs):
|
|
453
|
+
"""
|
|
454
|
+
Examples
|
|
455
|
+
--------
|
|
456
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='PRL', symbols='AA') # doctest: +SKIP
|
|
457
|
+
>>> data = data.ob_snapshot(max_levels=1) # doctest: +SKIP
|
|
458
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
459
|
+
Time PRICE UPDATE_TIME SIZE LEVEL BUY_SELL_FLAG
|
|
460
|
+
0 2003-12-04 2.0 2003-12-01 00:00:00.003 6 1 1
|
|
461
|
+
1 2003-12-04 5.0 2003-12-01 00:00:00.004 7 1 0
|
|
462
|
+
"""
|
|
463
|
+
pass
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
@copy_method(aggregations.functions.ob_snapshot_wide)
|
|
467
|
+
def ob_snapshot_wide(self, *args, **kwargs):
|
|
468
|
+
"""
|
|
469
|
+
Examples
|
|
470
|
+
--------
|
|
471
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='PRL', symbols='AA') # doctest: +SKIP
|
|
472
|
+
>>> data = data.ob_snapshot_wide(max_levels=1) # doctest: +SKIP
|
|
473
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
474
|
+
Time BID_PRICE BID_UPDATE_TIME BID_SIZE ASK_PRICE ASK_UPDATE_TIME ASK_SIZE LEVEL
|
|
475
|
+
0 2003-12-03 5.0 2003-12-01 00:00:00.004 7 2.0 2003-12-01 00:00:00.003 6 1
|
|
476
|
+
"""
|
|
477
|
+
pass
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@copy_method(aggregations.functions.ob_snapshot_flat)
|
|
481
|
+
def ob_snapshot_flat(self, *args, **kwargs):
|
|
482
|
+
"""
|
|
483
|
+
Examples
|
|
484
|
+
--------
|
|
485
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='PRL', symbols='AA') # doctest: +SKIP
|
|
486
|
+
>>> data = data.ob_snapshot_flat(max_levels=1) # doctest: +SKIP
|
|
487
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
488
|
+
Time BID_PRICE1 BID_UPDATE_TIME1 BID_SIZE1 ASK_PRICE1 ASK_UPDATE_TIME1 ASK_SIZE1
|
|
489
|
+
0 2003-12-03 5.0 2003-12-01 00:00:00.004 7 2.0 2003-12-01 00:00:00.003 6
|
|
490
|
+
"""
|
|
491
|
+
pass
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
@copy_method(aggregations.functions.ob_summary)
|
|
495
|
+
def ob_summary(self, *args, **kwargs):
|
|
496
|
+
"""
|
|
497
|
+
Examples
|
|
498
|
+
--------
|
|
499
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='PRL', symbols='AA') # doctest: +SKIP
|
|
500
|
+
>>> data = data.ob_summary(max_levels=1) # doctest: +SKIP
|
|
501
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
502
|
+
Time BID_PRICE BID_SIZE BID_VWAP BEST_BID_PRICE WORST_BID_SIZE NUM_BID_LEVELS ASK_SIZE\
|
|
503
|
+
ASK_VWAP BEST_ASK_PRICE WORST_ASK_PRICE NUM_ASK_LEVELS
|
|
504
|
+
0 2003-12-04 NaN 7 5.0 5.0 NaN 1 6\
|
|
505
|
+
2.0 2.0 2.0 1
|
|
506
|
+
"""
|
|
507
|
+
pass
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
@copy_method(aggregations.functions.ob_size)
|
|
511
|
+
def ob_size(self, *args, **kwargs):
|
|
512
|
+
"""
|
|
513
|
+
Examples
|
|
514
|
+
--------
|
|
515
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='PRL', symbols='AA') # doctest: +SKIP
|
|
516
|
+
>>> data = data.ob_size(max_levels=10) # doctest: +SKIP
|
|
517
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
518
|
+
Time ASK_VALUE BID_VALUE
|
|
519
|
+
0 2003-12-01 84800 64500
|
|
520
|
+
"""
|
|
521
|
+
pass
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
@copy_method(aggregations.functions.ob_vwap)
|
|
525
|
+
def ob_vwap(self, *args, **kwargs):
|
|
526
|
+
"""
|
|
527
|
+
Examples
|
|
528
|
+
--------
|
|
529
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='PRL', symbols='AA') # doctest: +SKIP
|
|
530
|
+
>>> data = data.ob_vwap(max_levels=10) # doctest: +SKIP
|
|
531
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
532
|
+
Time ASK_VALUE BID_VALUE
|
|
533
|
+
0 2003-12-01 23.313 23.20848
|
|
534
|
+
"""
|
|
535
|
+
pass
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
@copy_method(aggregations.functions.ob_num_levels)
|
|
539
|
+
def ob_num_levels(self, *args, **kwargs):
|
|
540
|
+
"""
|
|
541
|
+
Examples
|
|
542
|
+
--------
|
|
543
|
+
>>> data = otp.DataSource(db='SOME_DB', tick_type='PRL', symbols='AA') # doctest: +SKIP
|
|
544
|
+
>>> data = data.ob_num_levels() # doctest: +SKIP
|
|
545
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
546
|
+
Time ASK_VALUE BID_VALUE
|
|
547
|
+
0 2003-12-01 248 67
|
|
548
|
+
"""
|
|
549
|
+
pass
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@copy_method(aggregations.functions.ranking)
|
|
553
|
+
def ranking(self: 'Source', *args, **kwargs):
|
|
554
|
+
# method implementation is copied by decorator
|
|
555
|
+
pass
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
@copy_method(aggregations.functions.percentile)
|
|
559
|
+
def percentile(self: 'Source', *args, **kwargs):
|
|
560
|
+
# method implementation is copied by decorator
|
|
561
|
+
pass
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
@copy_method(aggregations.functions.find_value_for_percentile)
|
|
565
|
+
def find_value_for_percentile(self: 'Source', *args, **kwargs):
|
|
566
|
+
# method implementation is copied by decorator
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
@copy_method(aggregations.functions.exp_w_average)
|
|
571
|
+
def exp_w_average(self: 'Source', *args, **kwargs):
|
|
572
|
+
# method implementation is copied by decorator
|
|
573
|
+
pass
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
@copy_method(aggregations.functions.exp_tw_average)
|
|
577
|
+
def exp_tw_average(self: 'Source', *args, **kwargs):
|
|
578
|
+
# method implementation is copied by decorator
|
|
579
|
+
pass
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
@copy_method(aggregations.functions.standardized_moment)
|
|
583
|
+
def standardized_moment(self: 'Source', *args, **kwargs):
|
|
584
|
+
# method implementation is copied by decorator
|
|
585
|
+
pass
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
@copy_method(aggregations.functions.portfolio_price)
|
|
589
|
+
def portfolio_price(self: 'Source', *args, **kwargs):
|
|
590
|
+
# method implementation is copied by decorator
|
|
591
|
+
pass
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
@copy_method(aggregations.functions.multi_portfolio_price)
|
|
595
|
+
def multi_portfolio_price(self: 'Source', *args, **kwargs):
|
|
596
|
+
# method implementation is copied by decorator
|
|
597
|
+
pass
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
@copy_method(aggregations.functions.return_ep)
|
|
601
|
+
def return_ep(self: 'Source', *args, **kwargs):
|
|
602
|
+
# method implementation is copied by decorator
|
|
603
|
+
pass
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
@copy_method(aggregations.functions.implied_vol)
|
|
607
|
+
def implied_vol(self: 'Source', *args, **kwargs):
|
|
608
|
+
# method implementation is copied by decorator
|
|
609
|
+
pass
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
@copy_method(aggregations.functions.linear_regression)
|
|
613
|
+
def linear_regression(self: 'Source', *args, **kwargs):
|
|
614
|
+
# method implementation is copied by decorator
|
|
615
|
+
pass
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
@inplace_operation
|
|
619
|
+
def process_by_group(
|
|
620
|
+
self: 'Source', process_source_func, group_by=None, source_name=None, num_threads=None, inplace=False
|
|
621
|
+
) -> Union['Source', Tuple['Source', ...], None]:
|
|
622
|
+
"""
|
|
623
|
+
Groups data by ``group_by`` and run ``process_source_func`` for each group and merge outputs for every group.
|
|
624
|
+
Note ``process_source_func`` will be converted to Onetick object and passed to query,
|
|
625
|
+
that means that python callable will be called only once.
|
|
626
|
+
|
|
627
|
+
Parameters
|
|
628
|
+
----------
|
|
629
|
+
process_source_func: callable
|
|
630
|
+
``process_source_func`` should take :class:`Source` apply necessary logic and return it
|
|
631
|
+
or tuple of :class:`Source` in this case all of them should have a common root that is the
|
|
632
|
+
input :class:`Source`.
|
|
633
|
+
The number of sources returned by this method is the same as the number of sources
|
|
634
|
+
returned by ``process_source_func``.
|
|
635
|
+
group_by: list
|
|
636
|
+
A list of field names to group input ticks by.
|
|
637
|
+
|
|
638
|
+
If group_by is None then no group_by fields are defined
|
|
639
|
+
and logic of ``process_source_func`` is applied to all input ticks
|
|
640
|
+
at once
|
|
641
|
+
source_name: str
|
|
642
|
+
A name for the source that represents all of group_by sources. Can be passed here or as a name
|
|
643
|
+
of the inner sources; if passed by both ways, should be consistent
|
|
644
|
+
num_threads: int
|
|
645
|
+
If specified and not zero, turns on asynchronous processing mode
|
|
646
|
+
and specifies number of threads to be used for processing input ticks.
|
|
647
|
+
If this parameter is not specified or zero, then input ticks are processed synchronously.
|
|
648
|
+
inplace: bool
|
|
649
|
+
If True - nothing will be returned and changes will be applied to current query
|
|
650
|
+
otherwise changes query will be returned.
|
|
651
|
+
Error is raised if ``inplace`` is set to True
|
|
652
|
+
and multiple sources returned by ``process_source_func``.
|
|
653
|
+
|
|
654
|
+
Returns
|
|
655
|
+
-------
|
|
656
|
+
:class:`Source`, Tuple[:class:`Source`] or None:
|
|
657
|
+
|
|
658
|
+
See also
|
|
659
|
+
--------
|
|
660
|
+
**GROUP_BY** OneTick event processor
|
|
661
|
+
|
|
662
|
+
Examples
|
|
663
|
+
--------
|
|
664
|
+
|
|
665
|
+
>>> # OTdirective: snippet-name: Arrange.group.single output;
|
|
666
|
+
>>> d = otp.Ticks(X=[1, 1, 2, 2],
|
|
667
|
+
... Y=[1, 2, 3, 4])
|
|
668
|
+
>>>
|
|
669
|
+
>>> def func(source):
|
|
670
|
+
... return source.first()
|
|
671
|
+
>>>
|
|
672
|
+
>>> res = d.process_by_group(func, group_by=['X'])
|
|
673
|
+
>>> otp.run(res)[["X", "Y"]]
|
|
674
|
+
X Y
|
|
675
|
+
0 1 1
|
|
676
|
+
1 2 3
|
|
677
|
+
|
|
678
|
+
Set asynchronous processing:
|
|
679
|
+
|
|
680
|
+
>>> res = d.process_by_group(func, group_by=['X'], num_threads=2)
|
|
681
|
+
>>> otp.run(res)[['X', 'Y']]
|
|
682
|
+
X Y
|
|
683
|
+
0 1 1
|
|
684
|
+
1 2 3
|
|
685
|
+
|
|
686
|
+
Return multiple outputs, each with unique grouping logic:
|
|
687
|
+
|
|
688
|
+
>>> d = otp.Ticks(X=[1, 1, 2, 2],
|
|
689
|
+
... Y=[1, 2, 1, 3])
|
|
690
|
+
>>>
|
|
691
|
+
>>> def func(source):
|
|
692
|
+
... source['Z'] = source['X']
|
|
693
|
+
... source2 = source.copy()
|
|
694
|
+
... source = source.first()
|
|
695
|
+
... source2 = source2.last()
|
|
696
|
+
... return source, source2
|
|
697
|
+
>>> # OTdirective: snippet-name: Arrange.group.multiple output;
|
|
698
|
+
>>> res1, res2 = d.process_by_group(func, group_by=['Y'])
|
|
699
|
+
>>> df1 = otp.run(res1)
|
|
700
|
+
>>> df2 = otp.run(res2)
|
|
701
|
+
>>> df1[['X', 'Y', 'Z']]
|
|
702
|
+
X Y Z
|
|
703
|
+
0 1 1 1
|
|
704
|
+
1 1 2 1
|
|
705
|
+
2 2 3 2
|
|
706
|
+
>>> df2[['X', 'Y', 'Z']] # OTdirective: skip-snippet:;
|
|
707
|
+
X Y Z
|
|
708
|
+
0 1 2 1
|
|
709
|
+
1 2 1 2
|
|
710
|
+
2 2 3 2
|
|
711
|
+
"""
|
|
712
|
+
|
|
713
|
+
if group_by is None:
|
|
714
|
+
group_by = []
|
|
715
|
+
|
|
716
|
+
if inplace:
|
|
717
|
+
main_source = self
|
|
718
|
+
else:
|
|
719
|
+
main_source = self.copy()
|
|
720
|
+
|
|
721
|
+
input_schema = main_source.columns(skip_meta_fields=True)
|
|
722
|
+
for field in group_by:
|
|
723
|
+
if field not in input_schema:
|
|
724
|
+
raise ValueError(f"Group by field name {field} not present in input source schema")
|
|
725
|
+
|
|
726
|
+
process_source_root = otp.DataSource(tick_type="ANY", schema_policy="manual", schema=input_schema)
|
|
727
|
+
if source_name:
|
|
728
|
+
process_source_root.set_name(source_name)
|
|
729
|
+
process_sources = process_source_func(process_source_root)
|
|
730
|
+
|
|
731
|
+
if isinstance(process_sources, otp.Source):
|
|
732
|
+
# returned one source
|
|
733
|
+
process_sources = [process_sources]
|
|
734
|
+
elif len(process_sources) == 1:
|
|
735
|
+
# returned one source as an iterable
|
|
736
|
+
pass
|
|
737
|
+
else:
|
|
738
|
+
# returned multiple sources
|
|
739
|
+
if inplace:
|
|
740
|
+
raise ValueError("Cannot use inplace=True with multi-source processing function!")
|
|
741
|
+
|
|
742
|
+
num_source = 0
|
|
743
|
+
for process_source in process_sources:
|
|
744
|
+
output_schema = process_source.columns(skip_meta_fields=True)
|
|
745
|
+
|
|
746
|
+
if process_source.get_name():
|
|
747
|
+
if not process_source_root.get_name():
|
|
748
|
+
process_source_root.set_name(process_source.get_name())
|
|
749
|
+
if process_source_root.get_name() != process_source.get_name():
|
|
750
|
+
warnings.warn(
|
|
751
|
+
"Different strings passed as names for the root source used in "
|
|
752
|
+
f"process_by_group: '{process_source.get_name()}' "
|
|
753
|
+
f"and '{process_source_root.get_name()}'"
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
# removing key fields from output schema since they will be
|
|
757
|
+
# added by the GROUP_BY EP
|
|
758
|
+
process_source.drop([field for field in group_by if field in output_schema], inplace=True)
|
|
759
|
+
process_source.sink(otq.Passthrough().node_name(f"OUT_{num_source}"))
|
|
760
|
+
process_source_root.node().add_rules(process_source.node().copy_rules())
|
|
761
|
+
main_source._merge_tmp_otq(process_source)
|
|
762
|
+
num_source += 1
|
|
763
|
+
|
|
764
|
+
query_name = process_source_root._store_in_tmp_otq(
|
|
765
|
+
main_source._tmp_otq, operation_suffix="group_by", add_passthrough=False,
|
|
766
|
+
# set default symbol, even if it's not set by user, symbol's value doesn't matter in this case
|
|
767
|
+
symbols=otp.config.get('default_symbol', 'ANY')
|
|
768
|
+
)
|
|
769
|
+
process_path = f'THIS::{query_name}'
|
|
770
|
+
num_outputs = len(process_sources)
|
|
771
|
+
|
|
772
|
+
# we shouldn't set named outputs if GROUP_BY EP has only one output due to onetick behaviour
|
|
773
|
+
if num_outputs == 1:
|
|
774
|
+
outputs = ""
|
|
775
|
+
else:
|
|
776
|
+
outputs = ",".join([f"OUT_{i}" for i in range(0, num_outputs)])
|
|
777
|
+
|
|
778
|
+
kwargs = {}
|
|
779
|
+
if num_threads is not None:
|
|
780
|
+
if num_threads < 0:
|
|
781
|
+
raise ValueError("Parameter 'num_threads' can't be negative.")
|
|
782
|
+
kwargs['num_threads'] = num_threads
|
|
783
|
+
|
|
784
|
+
main_source.sink(otq.GroupBy(key_fields=",".join(group_by), query_name=process_path, outputs=outputs, **kwargs))
|
|
785
|
+
|
|
786
|
+
output_sources = []
|
|
787
|
+
for num_output in range(0, num_outputs):
|
|
788
|
+
if num_outputs == 1 and inplace:
|
|
789
|
+
output_source = main_source
|
|
790
|
+
else:
|
|
791
|
+
output_source = main_source.copy()
|
|
792
|
+
|
|
793
|
+
if num_outputs > 1:
|
|
794
|
+
output_source.node().out_pin(f"OUT_{num_output}")
|
|
795
|
+
|
|
796
|
+
# setting schema after processing
|
|
797
|
+
output_schema = process_sources[num_output].columns(skip_meta_fields=True)
|
|
798
|
+
for field in group_by:
|
|
799
|
+
output_schema[field] = input_schema[field]
|
|
800
|
+
for field, field_type in output_schema.items():
|
|
801
|
+
output_source.schema[field] = field_type
|
|
802
|
+
output_source = output_source[list(output_schema)]
|
|
803
|
+
output_source._merge_tmp_otq(main_source)
|
|
804
|
+
output_sources.append(output_source)
|
|
805
|
+
|
|
806
|
+
if num_outputs == 1:
|
|
807
|
+
return output_sources[0]
|
|
808
|
+
else:
|
|
809
|
+
return tuple(output_sources)
|