onetick-py 1.177.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- locator_parser/__init__.py +0 -0
- locator_parser/acl.py +73 -0
- locator_parser/actions.py +262 -0
- locator_parser/common.py +368 -0
- locator_parser/io.py +43 -0
- locator_parser/locator.py +150 -0
- onetick/__init__.py +101 -0
- onetick/doc_utilities/__init__.py +3 -0
- onetick/doc_utilities/napoleon.py +40 -0
- onetick/doc_utilities/ot_doctest.py +140 -0
- onetick/doc_utilities/snippets.py +279 -0
- onetick/lib/__init__.py +4 -0
- onetick/lib/instance.py +141 -0
- onetick/py/__init__.py +293 -0
- onetick/py/_stack_info.py +89 -0
- onetick/py/_version.py +2 -0
- onetick/py/aggregations/__init__.py +11 -0
- onetick/py/aggregations/_base.py +648 -0
- onetick/py/aggregations/_docs.py +948 -0
- onetick/py/aggregations/compute.py +286 -0
- onetick/py/aggregations/functions.py +2216 -0
- onetick/py/aggregations/generic.py +104 -0
- onetick/py/aggregations/high_low.py +80 -0
- onetick/py/aggregations/num_distinct.py +83 -0
- onetick/py/aggregations/order_book.py +501 -0
- onetick/py/aggregations/other.py +1014 -0
- onetick/py/backports.py +26 -0
- onetick/py/cache.py +374 -0
- onetick/py/callback/__init__.py +5 -0
- onetick/py/callback/callback.py +276 -0
- onetick/py/callback/callbacks.py +131 -0
- onetick/py/compatibility.py +798 -0
- onetick/py/configuration.py +771 -0
- onetick/py/core/__init__.py +0 -0
- onetick/py/core/_csv_inspector.py +93 -0
- onetick/py/core/_internal/__init__.py +0 -0
- onetick/py/core/_internal/_manually_bound_value.py +6 -0
- onetick/py/core/_internal/_nodes_history.py +250 -0
- onetick/py/core/_internal/_op_utils/__init__.py +0 -0
- onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
- onetick/py/core/_internal/_op_utils/is_const.py +10 -0
- onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
- onetick/py/core/_internal/_proxy_node.py +140 -0
- onetick/py/core/_internal/_state_objects.py +2312 -0
- onetick/py/core/_internal/_state_vars.py +93 -0
- onetick/py/core/_source/__init__.py +0 -0
- onetick/py/core/_source/_symbol_param.py +95 -0
- onetick/py/core/_source/schema.py +97 -0
- onetick/py/core/_source/source_methods/__init__.py +0 -0
- onetick/py/core/_source/source_methods/aggregations.py +809 -0
- onetick/py/core/_source/source_methods/applyers.py +296 -0
- onetick/py/core/_source/source_methods/columns.py +141 -0
- onetick/py/core/_source/source_methods/data_quality.py +301 -0
- onetick/py/core/_source/source_methods/debugs.py +272 -0
- onetick/py/core/_source/source_methods/drops.py +120 -0
- onetick/py/core/_source/source_methods/fields.py +619 -0
- onetick/py/core/_source/source_methods/filters.py +1002 -0
- onetick/py/core/_source/source_methods/joins.py +1413 -0
- onetick/py/core/_source/source_methods/merges.py +605 -0
- onetick/py/core/_source/source_methods/misc.py +1455 -0
- onetick/py/core/_source/source_methods/pandases.py +155 -0
- onetick/py/core/_source/source_methods/renames.py +356 -0
- onetick/py/core/_source/source_methods/sorts.py +183 -0
- onetick/py/core/_source/source_methods/switches.py +142 -0
- onetick/py/core/_source/source_methods/symbols.py +117 -0
- onetick/py/core/_source/source_methods/times.py +627 -0
- onetick/py/core/_source/source_methods/writes.py +986 -0
- onetick/py/core/_source/symbol.py +205 -0
- onetick/py/core/_source/tmp_otq.py +222 -0
- onetick/py/core/column.py +209 -0
- onetick/py/core/column_operations/__init__.py +0 -0
- onetick/py/core/column_operations/_methods/__init__.py +4 -0
- onetick/py/core/column_operations/_methods/_internal.py +28 -0
- onetick/py/core/column_operations/_methods/conversions.py +216 -0
- onetick/py/core/column_operations/_methods/methods.py +292 -0
- onetick/py/core/column_operations/_methods/op_types.py +160 -0
- onetick/py/core/column_operations/accessors/__init__.py +0 -0
- onetick/py/core/column_operations/accessors/_accessor.py +28 -0
- onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
- onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
- onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
- onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
- onetick/py/core/column_operations/base.py +1121 -0
- onetick/py/core/cut_builder.py +150 -0
- onetick/py/core/db_constants.py +20 -0
- onetick/py/core/eval_query.py +245 -0
- onetick/py/core/lambda_object.py +441 -0
- onetick/py/core/multi_output_source.py +232 -0
- onetick/py/core/per_tick_script.py +2256 -0
- onetick/py/core/query_inspector.py +464 -0
- onetick/py/core/source.py +1744 -0
- onetick/py/db/__init__.py +2 -0
- onetick/py/db/_inspection.py +1128 -0
- onetick/py/db/db.py +1327 -0
- onetick/py/db/utils.py +64 -0
- onetick/py/docs/__init__.py +0 -0
- onetick/py/docs/docstring_parser.py +112 -0
- onetick/py/docs/utils.py +81 -0
- onetick/py/functions.py +2398 -0
- onetick/py/license.py +190 -0
- onetick/py/log.py +88 -0
- onetick/py/math.py +935 -0
- onetick/py/misc.py +470 -0
- onetick/py/oqd/__init__.py +22 -0
- onetick/py/oqd/eps.py +1195 -0
- onetick/py/oqd/sources.py +325 -0
- onetick/py/otq.py +216 -0
- onetick/py/pyomd_mock.py +47 -0
- onetick/py/run.py +916 -0
- onetick/py/servers.py +173 -0
- onetick/py/session.py +1347 -0
- onetick/py/sources/__init__.py +19 -0
- onetick/py/sources/cache.py +167 -0
- onetick/py/sources/common.py +128 -0
- onetick/py/sources/csv.py +642 -0
- onetick/py/sources/custom.py +85 -0
- onetick/py/sources/data_file.py +305 -0
- onetick/py/sources/data_source.py +1045 -0
- onetick/py/sources/empty.py +94 -0
- onetick/py/sources/odbc.py +337 -0
- onetick/py/sources/order_book.py +271 -0
- onetick/py/sources/parquet.py +168 -0
- onetick/py/sources/pit.py +191 -0
- onetick/py/sources/query.py +495 -0
- onetick/py/sources/snapshots.py +419 -0
- onetick/py/sources/split_query_output_by_symbol.py +198 -0
- onetick/py/sources/symbology_mapping.py +123 -0
- onetick/py/sources/symbols.py +374 -0
- onetick/py/sources/ticks.py +825 -0
- onetick/py/sql.py +70 -0
- onetick/py/state.py +251 -0
- onetick/py/types.py +2131 -0
- onetick/py/utils/__init__.py +70 -0
- onetick/py/utils/acl.py +93 -0
- onetick/py/utils/config.py +186 -0
- onetick/py/utils/default.py +49 -0
- onetick/py/utils/file.py +38 -0
- onetick/py/utils/helpers.py +76 -0
- onetick/py/utils/locator.py +94 -0
- onetick/py/utils/perf.py +498 -0
- onetick/py/utils/query.py +49 -0
- onetick/py/utils/render.py +1374 -0
- onetick/py/utils/script.py +244 -0
- onetick/py/utils/temp.py +471 -0
- onetick/py/utils/types.py +120 -0
- onetick/py/utils/tz.py +84 -0
- onetick_py-1.177.0.dist-info/METADATA +137 -0
- onetick_py-1.177.0.dist-info/RECORD +152 -0
- onetick_py-1.177.0.dist-info/WHEEL +5 -0
- onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
- onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
- onetick_py-1.177.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,986 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import datetime
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, Set, Type, Union
|
|
4
|
+
from onetick.py.backports import Literal
|
|
5
|
+
|
|
6
|
+
from onetick import py as otp
|
|
7
|
+
from onetick.py import configuration
|
|
8
|
+
from onetick.py.core.column import _Column, field_name_contains_lowercase
|
|
9
|
+
from onetick.py.otq import otq
|
|
10
|
+
from onetick.py.utils import adaptive
|
|
11
|
+
from onetick.py.compatibility import is_save_snapshot_database_parameter_supported
|
|
12
|
+
|
|
13
|
+
from .misc import inplace_operation
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from onetick.py.core.source import Source
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@inplace_operation
|
|
20
|
+
def write(
|
|
21
|
+
self,
|
|
22
|
+
db: Union[str, 'otp.DB'],
|
|
23
|
+
symbol: Union[str, 'otp.Column', None] = None,
|
|
24
|
+
tick_type: Union[str, 'otp.Column', None] = None,
|
|
25
|
+
date: Union[datetime.date, Type[adaptive], None] = adaptive,
|
|
26
|
+
start_date: Optional[datetime.date] = None,
|
|
27
|
+
end_date: Optional[datetime.date] = None,
|
|
28
|
+
append: bool = False,
|
|
29
|
+
keep_symbol_and_tick_type: Union[bool, Type[adaptive]] = adaptive,
|
|
30
|
+
propagate: bool = True,
|
|
31
|
+
out_of_range_tick_action: Literal['exception', 'ignore', 'load'] = 'exception',
|
|
32
|
+
timestamp: Optional['otp.Column'] = None,
|
|
33
|
+
keep_timestamp: bool = True,
|
|
34
|
+
correction_type: Optional['otp.Column'] = None,
|
|
35
|
+
replace_existing_time_series: bool = False,
|
|
36
|
+
allow_concurrent_write: bool = False,
|
|
37
|
+
context: Union[str, Type[adaptive]] = adaptive,
|
|
38
|
+
use_context_of_query: bool = False,
|
|
39
|
+
inplace: bool = False,
|
|
40
|
+
**kwargs,
|
|
41
|
+
) -> Optional['Source']:
|
|
42
|
+
"""
|
|
43
|
+
Saves data result to OneTick database.
|
|
44
|
+
|
|
45
|
+
Note
|
|
46
|
+
----
|
|
47
|
+
This method does not save anything. It adds instruction in query to save.
|
|
48
|
+
Data will be saved when query will be executed.
|
|
49
|
+
|
|
50
|
+
Using ``start``+``end`` parameters instead of single ``date`` have some limitations:
|
|
51
|
+
|
|
52
|
+
* ``inplace`` is not supported
|
|
53
|
+
* if ``DAY_BOUNDARY_TZ`` and ``DAY_BOUNDARY_OFFSET`` specified against
|
|
54
|
+
individual locations of database, then day boundary could be calculated incorrectly.
|
|
55
|
+
* ``out_of_range_tick_action`` could be only ``exception`` or ``ignore``
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
db: str or :py:class:`otp.DB <onetick.py.DB>`
|
|
60
|
+
database name or object.
|
|
61
|
+
symbol: str or Column
|
|
62
|
+
resulting symbol name string or column to get symbol name from.
|
|
63
|
+
If this parameter is not set, then ticks _SYMBOL_NAME pseudo-field is used.
|
|
64
|
+
If it is empty, an attempt is made to retrieve
|
|
65
|
+
the symbol name from the field named SYMBOL_NAME.
|
|
66
|
+
tick_type: str or Column
|
|
67
|
+
resulting tick type string or column to get tick type from.
|
|
68
|
+
If this parameter is not set, the _TICK_TYPE pseudo-field is used.
|
|
69
|
+
If it is empty, an attempt is made to retrieve
|
|
70
|
+
the tick type from the field named TICK_TYPE.
|
|
71
|
+
date: :py:class:`otp.datetime <onetick.py.datetime>` or None
|
|
72
|
+
date where to save data.
|
|
73
|
+
Should be set to `None` if writing to accelerator or memory database.
|
|
74
|
+
By default, it is set to `otp.config.default_date`.
|
|
75
|
+
start_date: :py:class:`otp.datetime <onetick.py.datetime>` or None
|
|
76
|
+
Start date for data to save. It is inclusive.
|
|
77
|
+
Cannot be used with ``date`` parameter.
|
|
78
|
+
Also cannot be used with ``inplace`` set to ``True``.
|
|
79
|
+
Should be set to `None` if writing to accelerator or memory database.
|
|
80
|
+
By default, None.
|
|
81
|
+
end_date: :py:class:`otp.datetime <onetick.py.datetime>` or None
|
|
82
|
+
End date for data to save. It is inclusive.
|
|
83
|
+
Cannot be used with ``date`` parameter.
|
|
84
|
+
Also cannot be used with ``inplace`` set to ``True``.
|
|
85
|
+
Should be set to `None` if writing to accelerator or memory database.
|
|
86
|
+
By default, None.
|
|
87
|
+
append: bool
|
|
88
|
+
If False - data will be rewritten for this ``date``
|
|
89
|
+
or range of dates (from ``start_date`` to ``end_date``),
|
|
90
|
+
otherwise data will be appended: new symbols are added,
|
|
91
|
+
existing symbols can be modified (append new ticks, modify existing ticks).
|
|
92
|
+
This option is not valid for accelerator databases.
|
|
93
|
+
keep_symbol_and_tick_type: bool
|
|
94
|
+
keep fields containing symbol name and tick type when writing ticks
|
|
95
|
+
to the database or propagating them.
|
|
96
|
+
By default, this parameter is adaptive.
|
|
97
|
+
If ``symbol`` or ``tick_type`` are column objects, then it's set to True.
|
|
98
|
+
Otherwise, it's set to False.
|
|
99
|
+
propagate: bool
|
|
100
|
+
Propagate ticks after that event processor or not.
|
|
101
|
+
out_of_range_tick_action: str
|
|
102
|
+
Action to be executed if tick's timestamp's date is not ``date`` or between ``start_date`` or ``end_date``:
|
|
103
|
+
|
|
104
|
+
* `exception`: runtime exception will be raised
|
|
105
|
+
* `ignore`: tick will not be written to the database
|
|
106
|
+
* `load`: writes tick to the database anyway.
|
|
107
|
+
Can be used only with ``date``, not with ``start_date``+``end_date``.
|
|
108
|
+
|
|
109
|
+
Default: `exception`
|
|
110
|
+
timestamp: Column
|
|
111
|
+
Field that contains the timestamp with which the ticks will be written to the database.
|
|
112
|
+
By default, the TIMESTAMP pseudo-column is used.
|
|
113
|
+
keep_timestamp: bool
|
|
114
|
+
If ``timestamp`` parameter is set and this parameter is set to True,
|
|
115
|
+
then timestamp column is removed.
|
|
116
|
+
correction_type: Column
|
|
117
|
+
The name of the column that contains the correction type.
|
|
118
|
+
This column will be removed.
|
|
119
|
+
If this parameter is not set, no corrections will be submitted.
|
|
120
|
+
replace_existing_time_series: bool
|
|
121
|
+
If ``append`` is set to True, setting this option to True instructs the loader
|
|
122
|
+
to replace existing time series, instead of appending to them.
|
|
123
|
+
Other time series will remain unchanged.
|
|
124
|
+
allow_concurrent_write: bool
|
|
125
|
+
Allows different queries running on the same server to load concurrently into the same database.
|
|
126
|
+
context: str
|
|
127
|
+
The server context used to look up the database.
|
|
128
|
+
By default, `otp.config.context` is used if ``use_context_of_query`` is not set.
|
|
129
|
+
use_context_of_query: bool
|
|
130
|
+
If this parameter is set to True and the ``context`` parameter is not set,
|
|
131
|
+
the context of the query is used instead of the default value of the ``context`` parameter.
|
|
132
|
+
inplace: bool
|
|
133
|
+
A flag controls whether operation should be applied inplace.
|
|
134
|
+
If ``inplace=True``, then it returns nothing.
|
|
135
|
+
Otherwise, method returns a new modified object.
|
|
136
|
+
Cannot be ``True`` if ``start_date`` and ``end_date`` are set.
|
|
137
|
+
kwargs:
|
|
138
|
+
.. deprecated:: 1.21.0
|
|
139
|
+
|
|
140
|
+
Use named parameters instead.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
:class:`Source` or None
|
|
145
|
+
|
|
146
|
+
See also
|
|
147
|
+
--------
|
|
148
|
+
**WRITE_TO_ONETICK_DB** OneTick event processor
|
|
149
|
+
|
|
150
|
+
Examples
|
|
151
|
+
--------
|
|
152
|
+
>>> data = otp.Ticks(X=[1, 2, 3])
|
|
153
|
+
>>> data = data.write('SOME_DB', symbol='S_WRITE', tick_type='T_WRITE')
|
|
154
|
+
>>> otp.run(data)
|
|
155
|
+
Time X
|
|
156
|
+
0 2003-12-01 00:00:00.000 1
|
|
157
|
+
1 2003-12-01 00:00:00.001 2
|
|
158
|
+
2 2003-12-01 00:00:00.002 3
|
|
159
|
+
>>> data = otp.DataSource('SOME_DB', symbol='S_WRITE', tick_type='T_WRITE')
|
|
160
|
+
>>> otp.run(data)
|
|
161
|
+
Time X
|
|
162
|
+
0 2003-12-01 00:00:00.000 1
|
|
163
|
+
1 2003-12-01 00:00:00.001 2
|
|
164
|
+
2 2003-12-01 00:00:00.002 3
|
|
165
|
+
"""
|
|
166
|
+
if 'append_mode' in kwargs:
|
|
167
|
+
warnings.warn("Parameter 'append_mode' is deprecated, use 'append'", FutureWarning)
|
|
168
|
+
append = kwargs.pop('append_mode')
|
|
169
|
+
|
|
170
|
+
if 'timestamp_field' in kwargs:
|
|
171
|
+
warnings.warn("Parameter 'timestamp_field' is deprecated, use 'timestamp'", FutureWarning)
|
|
172
|
+
timestamp = kwargs.pop('timestamp_field')
|
|
173
|
+
|
|
174
|
+
if 'keep_timestamp_field' in kwargs:
|
|
175
|
+
warnings.warn("Parameter 'keep_timestamp_field' is deprecated, use 'keep_timestamp'", FutureWarning)
|
|
176
|
+
keep_timestamp = kwargs.pop('keep_timestamp_field')
|
|
177
|
+
|
|
178
|
+
if 'start' in kwargs:
|
|
179
|
+
warnings.warn("Parameter 'start' is deprecated, use 'start_date'", FutureWarning)
|
|
180
|
+
start_date = kwargs.pop('start')
|
|
181
|
+
|
|
182
|
+
if 'end' in kwargs:
|
|
183
|
+
warnings.warn("Parameter 'end' is deprecated, use 'end_date'", FutureWarning)
|
|
184
|
+
# Parameter 'end' was exclusive. Parameter 'end_date' is inclusive.
|
|
185
|
+
end_date = kwargs.pop('end') - otp.Day(1)
|
|
186
|
+
|
|
187
|
+
if kwargs:
|
|
188
|
+
raise TypeError(f'write() got unexpected arguments: {list(kwargs)}')
|
|
189
|
+
|
|
190
|
+
kwargs = {}
|
|
191
|
+
|
|
192
|
+
# validate field names
|
|
193
|
+
for field_name in self.schema:
|
|
194
|
+
if field_name_contains_lowercase(field_name):
|
|
195
|
+
if otp.config.allow_lowercase_in_saved_fields:
|
|
196
|
+
warnings.warn(
|
|
197
|
+
f'Field "{field_name}" contains lowercase characters and is being saved'
|
|
198
|
+
' to a Onetick database. This field will be converted to uppercase upon saving.'
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
raise ValueError(
|
|
202
|
+
f'Field "{field_name}" contains lowercase characters and cannot be saved to a Onetick database'
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if date is not adaptive and (start_date or end_date):
|
|
206
|
+
raise ValueError('date cannot be used with start_date+end_date')
|
|
207
|
+
|
|
208
|
+
if date is adaptive and (start_date and end_date) and inplace:
|
|
209
|
+
# join_with_query and merge are used for multiple dates, so inplace is not supported
|
|
210
|
+
raise ValueError(
|
|
211
|
+
'cannot run on multiple dates if inplace is True,'
|
|
212
|
+
' use one value for date instead of start_date+end_date'
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
if (start_date and not end_date) or (not start_date and end_date):
|
|
216
|
+
raise ValueError('start_date and end_date should be both specified or both None')
|
|
217
|
+
|
|
218
|
+
if date is adaptive:
|
|
219
|
+
date = configuration.config.default_date
|
|
220
|
+
|
|
221
|
+
if symbol is not None:
|
|
222
|
+
if isinstance(symbol, _Column):
|
|
223
|
+
kwargs['symbol_name_field'] = str(symbol)
|
|
224
|
+
if keep_symbol_and_tick_type is adaptive:
|
|
225
|
+
keep_symbol_and_tick_type = True
|
|
226
|
+
else:
|
|
227
|
+
kwargs.setdefault('symbol_name_field', '_SYMBOL_NAME_FIELD_')
|
|
228
|
+
self[kwargs['symbol_name_field']] = symbol
|
|
229
|
+
|
|
230
|
+
if tick_type is not None:
|
|
231
|
+
if isinstance(tick_type, _Column):
|
|
232
|
+
kwargs['tick_type_field'] = str(tick_type)
|
|
233
|
+
if keep_symbol_and_tick_type is adaptive:
|
|
234
|
+
keep_symbol_and_tick_type = True
|
|
235
|
+
else:
|
|
236
|
+
kwargs.setdefault('tick_type_field', '_TICK_TYPE_FIELD_')
|
|
237
|
+
self[kwargs['tick_type_field']] = tick_type
|
|
238
|
+
|
|
239
|
+
if keep_symbol_and_tick_type is adaptive:
|
|
240
|
+
keep_symbol_and_tick_type = False
|
|
241
|
+
|
|
242
|
+
if timestamp is not None:
|
|
243
|
+
kwargs['timestamp_field'] = str(timestamp)
|
|
244
|
+
|
|
245
|
+
if correction_type is not None:
|
|
246
|
+
kwargs['correction_type_field'] = str(correction_type)
|
|
247
|
+
|
|
248
|
+
if context is not adaptive:
|
|
249
|
+
kwargs['context'] = context
|
|
250
|
+
elif not use_context_of_query:
|
|
251
|
+
if otp.config.context is not None:
|
|
252
|
+
kwargs['context'] = otp.config.context
|
|
253
|
+
|
|
254
|
+
if out_of_range_tick_action.upper() == 'IGNORE':
|
|
255
|
+
# let's ignore
|
|
256
|
+
pass
|
|
257
|
+
elif out_of_range_tick_action.upper() == 'LOAD':
|
|
258
|
+
if start_date and end_date:
|
|
259
|
+
raise ValueError('LOAD out_of_range_tick_action cannot be used with start_date+end_date, use date instead')
|
|
260
|
+
elif out_of_range_tick_action.upper() == 'EXCEPTION':
|
|
261
|
+
if start_date and end_date:
|
|
262
|
+
# WRITE_TO_ONETICK_DB use DAY_BOUNDARY_TZ and DAY_BOUNDARY_OFFSET
|
|
263
|
+
# to check tick timestamp is out of range or not
|
|
264
|
+
# so we mimic it here with THROW event processor
|
|
265
|
+
src = otp.Source(otq.DbShowConfig(str(db), 'DB_TIME_INTERVALS'))
|
|
266
|
+
src.table(inplace=True, DAY_BOUNDARY_TZ=str, DAY_BOUNDARY_OFFSET=int)
|
|
267
|
+
# DAY_BOUNDARY_OFFSET offset are in seconds
|
|
268
|
+
src['DAY_BOUNDARY_OFFSET'] = src['DAY_BOUNDARY_OFFSET'] * 1000
|
|
269
|
+
src.rename(
|
|
270
|
+
{'DAY_BOUNDARY_TZ': '__DAY_BOUNDARY_TZ', 'DAY_BOUNDARY_OFFSET': '__DAY_BOUNDARY_OFFSET'}, inplace=True
|
|
271
|
+
)
|
|
272
|
+
self = self.join_with_query(src, symbol=f"{str(db)}::DUMMY", caching='per_symbol')
|
|
273
|
+
timezone = self['__DAY_BOUNDARY_TZ']
|
|
274
|
+
offset = self['__DAY_BOUNDARY_OFFSET']
|
|
275
|
+
convert_timestamp = self['TIMESTAMP'].dt.strftime('%Y%m%d%H%M%S.%J', timezone=timezone)
|
|
276
|
+
|
|
277
|
+
start_formatted = start_date.strftime('%Y-%m-%d')
|
|
278
|
+
start_op = otp.dt(start_date).to_operation(timezone=timezone) + offset
|
|
279
|
+
self.throw(
|
|
280
|
+
where=(self['TIMESTAMP'] < start_op),
|
|
281
|
+
message=(
|
|
282
|
+
'Timestamp '
|
|
283
|
+
+ convert_timestamp
|
|
284
|
+
+ ' of a tick, visible or hidden, '
|
|
285
|
+
+ f'earlier than {start_formatted} in timezone '
|
|
286
|
+
+ timezone
|
|
287
|
+
),
|
|
288
|
+
inplace=True,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
end = end_date + otp.Day(1) # end_date is inclusive
|
|
292
|
+
end_formatted = end.strftime('%Y-%m-%d')
|
|
293
|
+
end_op = otp.dt(end).to_operation(timezone=timezone) + offset
|
|
294
|
+
self.throw(
|
|
295
|
+
where=(self['TIMESTAMP'] >= end_op),
|
|
296
|
+
message=(
|
|
297
|
+
'Timestamp '
|
|
298
|
+
+ convert_timestamp
|
|
299
|
+
+ ' of a tick, visible or hidden, '
|
|
300
|
+
+ f'later than {end_formatted} in timezone '
|
|
301
|
+
+ timezone
|
|
302
|
+
),
|
|
303
|
+
inplace=True,
|
|
304
|
+
)
|
|
305
|
+
self.drop(['__DAY_BOUNDARY_TZ', '__DAY_BOUNDARY_OFFSET'], inplace=True)
|
|
306
|
+
else:
|
|
307
|
+
raise ValueError(
|
|
308
|
+
f'Unknown out_of_range_tick_action: {out_of_range_tick_action}.'
|
|
309
|
+
' Possible values are: "ignore", "exception"'
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
kwargs = dict(
|
|
313
|
+
**kwargs,
|
|
314
|
+
database=str(db),
|
|
315
|
+
append_mode=append,
|
|
316
|
+
keep_symbol_name_and_tick_type=keep_symbol_and_tick_type,
|
|
317
|
+
keep_timestamp_field=keep_timestamp,
|
|
318
|
+
replace_existing_time_series=replace_existing_time_series,
|
|
319
|
+
allow_concurrent_write=allow_concurrent_write,
|
|
320
|
+
use_context_of_query=use_context_of_query,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if start_date and end_date:
|
|
324
|
+
days = (end_date - start_date).days
|
|
325
|
+
if days < 0:
|
|
326
|
+
raise ValueError("Parameter 'start_date' must be less than or equal to parameter 'end_date'")
|
|
327
|
+
branches = []
|
|
328
|
+
for i in range(days + 1):
|
|
329
|
+
branch = self.copy()
|
|
330
|
+
branch.sink(
|
|
331
|
+
otq.WriteToOnetickDb(
|
|
332
|
+
date=(start_date + otp.Day(i)).strftime('%Y%m%d'),
|
|
333
|
+
propagate_ticks=propagate,
|
|
334
|
+
out_of_range_tick_action='IGNORE',
|
|
335
|
+
**kwargs,
|
|
336
|
+
)
|
|
337
|
+
)
|
|
338
|
+
branches.append(branch)
|
|
339
|
+
self = otp.merge(branches)
|
|
340
|
+
else:
|
|
341
|
+
self.sink(
|
|
342
|
+
otq.WriteToOnetickDb(
|
|
343
|
+
date=date.strftime('%Y%m%d') if date else '', # type: ignore[union-attr]
|
|
344
|
+
propagate_ticks=propagate,
|
|
345
|
+
out_of_range_tick_action=out_of_range_tick_action.upper(),
|
|
346
|
+
**kwargs,
|
|
347
|
+
)
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
for col in ('_SYMBOL_NAME_FIELD_', '_TICK_TYPE_FIELD_'):
|
|
351
|
+
if col in self.schema:
|
|
352
|
+
self.drop(col, inplace=True)
|
|
353
|
+
|
|
354
|
+
to_drop: Set[str] = set()
|
|
355
|
+
if not keep_symbol_and_tick_type:
|
|
356
|
+
if 'symbol_name_field' in kwargs:
|
|
357
|
+
to_drop.add(kwargs['symbol_name_field'])
|
|
358
|
+
if 'tick_type_field' in kwargs:
|
|
359
|
+
to_drop.add(kwargs['tick_type_field'])
|
|
360
|
+
if not keep_timestamp and timestamp is not None and str(timestamp) not in {'Time', 'TIMESTAMP'}:
|
|
361
|
+
to_drop.add(str(timestamp))
|
|
362
|
+
if correction_type is not None:
|
|
363
|
+
to_drop.add(str(correction_type))
|
|
364
|
+
self.schema.set(**{k: v for k, v in self.schema.items() if k not in to_drop})
|
|
365
|
+
return self
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
@inplace_operation
|
|
369
|
+
def write_parquet(
|
|
370
|
+
self,
|
|
371
|
+
output_path,
|
|
372
|
+
compression_type="snappy",
|
|
373
|
+
num_tick_per_row_group=1000,
|
|
374
|
+
partitioning_keys="",
|
|
375
|
+
propagate_input_ticks=False,
|
|
376
|
+
inplace=False,
|
|
377
|
+
):
|
|
378
|
+
"""
|
|
379
|
+
Writes the input tick series to parquet data file.
|
|
380
|
+
|
|
381
|
+
Input must not have field 'time' as that field will also be added by the EP in the resulting file(s)
|
|
382
|
+
|
|
383
|
+
Parameters
|
|
384
|
+
----------
|
|
385
|
+
output_path: str
|
|
386
|
+
Path for saving ticks to Parquet file.
|
|
387
|
+
Partitioned: Path to the root directory of the parquet files.
|
|
388
|
+
Non-partitioned: Path to the parquet file.
|
|
389
|
+
compression_type: str
|
|
390
|
+
Compression type for parquet files.
|
|
391
|
+
Should be one of these: `gzip`, `lz4`, `none`, `snappy` (default), `zstd`.
|
|
392
|
+
num_tick_per_row_group: int
|
|
393
|
+
Number of rows per row group.
|
|
394
|
+
partitioning_keys: list, str
|
|
395
|
+
List of fields (`list` or comma-separated string) to be used as keys for partitioning.
|
|
396
|
+
|
|
397
|
+
Setting this parameter will switch this EP to partitioned mode.
|
|
398
|
+
|
|
399
|
+
In non-partitioned mode, if the path points to a file that already exists, it will be overridden.
|
|
400
|
+
When partitioning is active:
|
|
401
|
+
|
|
402
|
+
* The target directory must be empty
|
|
403
|
+
* Key fields and their string values will be automatically URL-encoded to avoid conflicts with
|
|
404
|
+
filesystem naming rules.
|
|
405
|
+
|
|
406
|
+
Pseudo-fields '_SYMBOL_NAME' and '_TICK_TYPE' may be used as `partitioning_keys` and
|
|
407
|
+
will be added to the schema automatically.
|
|
408
|
+
propagate_input_ticks: bool
|
|
409
|
+
Switches propagation of the ticks. If set to `True`, ticks will be propagated.
|
|
410
|
+
inplace: bool
|
|
411
|
+
A flag controls whether operation should be applied inplace.
|
|
412
|
+
If ``inplace=True``, then it returns nothing. Otherwise method
|
|
413
|
+
returns a new modified object.
|
|
414
|
+
|
|
415
|
+
See also
|
|
416
|
+
--------
|
|
417
|
+
| **WRITE_TO_PARQUET** OneTick event processor
|
|
418
|
+
| :py:class:`onetick.py.ReadParquet`
|
|
419
|
+
|
|
420
|
+
Examples
|
|
421
|
+
--------
|
|
422
|
+
Simple usage:
|
|
423
|
+
|
|
424
|
+
>>> data = otp.Ticks(A=[1, 2, 3])
|
|
425
|
+
>>> data = data.write_parquet("/path/to/parquet/file") # doctest: +SKIP
|
|
426
|
+
>>> otp.run(data) # doctest: +SKIP
|
|
427
|
+
"""
|
|
428
|
+
if not hasattr(otq, "WriteToParquet"):
|
|
429
|
+
raise RuntimeError("Current version of OneTick don't support WRITE_TO_PARQUET EP")
|
|
430
|
+
|
|
431
|
+
if isinstance(partitioning_keys, list):
|
|
432
|
+
partitioning_keys = ",".join(partitioning_keys)
|
|
433
|
+
|
|
434
|
+
compression_type = compression_type.upper()
|
|
435
|
+
|
|
436
|
+
ep_kwargs = {}
|
|
437
|
+
if 'num_tick_per_row_group' in otq.WriteToParquet.Parameters.list_parameters():
|
|
438
|
+
ep_kwargs['num_tick_per_row_group'] = num_tick_per_row_group
|
|
439
|
+
else:
|
|
440
|
+
ep_kwargs['num_ticks_per_row_group'] = num_tick_per_row_group
|
|
441
|
+
|
|
442
|
+
self.sink(
|
|
443
|
+
otq.WriteToParquet(
|
|
444
|
+
output_path=output_path,
|
|
445
|
+
compression_type=compression_type,
|
|
446
|
+
partitioning_keys=partitioning_keys,
|
|
447
|
+
propagate_input_ticks=propagate_input_ticks,
|
|
448
|
+
**ep_kwargs,
|
|
449
|
+
)
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
return self
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
@inplace_operation
|
|
456
|
+
def save_snapshot(
|
|
457
|
+
self: 'Source',
|
|
458
|
+
snapshot_name='VALUE',
|
|
459
|
+
snapshot_storage='memory',
|
|
460
|
+
default_db='CEP_SNAPSHOT',
|
|
461
|
+
database='',
|
|
462
|
+
symbol_name_field=None,
|
|
463
|
+
expected_symbols_per_time_series=1000,
|
|
464
|
+
num_ticks=1,
|
|
465
|
+
reread_prevention_level=1,
|
|
466
|
+
group_by=None,
|
|
467
|
+
expected_groups_per_symbol=10,
|
|
468
|
+
keep_snapshot_after_query=False,
|
|
469
|
+
allow_concurrent_writers=False,
|
|
470
|
+
remove_snapshot_upon_start=None,
|
|
471
|
+
inplace=False,
|
|
472
|
+
):
|
|
473
|
+
"""
|
|
474
|
+
Saves last (at most) `n` ticks of each group of ticks from the input time series in global storage or
|
|
475
|
+
in a memory mapped file under a specified snapshot name.
|
|
476
|
+
Tick descriptor should be the same for all ticks saved into the snapshot.
|
|
477
|
+
These ticks can then be read via :py:class:`ReadSnapshot <onetick.py.ReadSnapshot>` by using the name
|
|
478
|
+
of the snapshot and the same symbol name (``<db_name>::<symbol>``) that were used by this method.
|
|
479
|
+
|
|
480
|
+
The event processor cannot be used by default. To enable it, access control should be configured,
|
|
481
|
+
so user could have rights to use **SAVE_SNAPSHOT** EP.
|
|
482
|
+
|
|
483
|
+
Parameters
|
|
484
|
+
----------
|
|
485
|
+
snapshot_name: str
|
|
486
|
+
The name of the snapshot, can be any string which doesn't contain slashes or backslashes.
|
|
487
|
+
Two snapshots can have the same name if they are stored in memory mapped files for different databases. Also,
|
|
488
|
+
they can have the same names if they are stored in the memories of different processes (different tick_servers).
|
|
489
|
+
In all other cases the names should be unique.
|
|
490
|
+
|
|
491
|
+
Default: `VALUE`
|
|
492
|
+
snapshot_storage: str
|
|
493
|
+
This parameter specifies the place of storage of the snapshot. Possible options are:
|
|
494
|
+
|
|
495
|
+
* `memory` - the snapshot is stored in the dynamic (heap) memory of the process
|
|
496
|
+
that ran (or is still running) the :py:meth:`onetick.py.Source.save_snapshot` for the snapshot.
|
|
497
|
+
* `memory_mapped_file` - the snapshot is stored in a memory mapped file.
|
|
498
|
+
For each symbol to get the location of the snapshot in the file system, ``save_snapshot`` looks at
|
|
499
|
+
the **SAVE_SNAPSHOT_DIR** parameter value in the locator section for the database of the symbol.
|
|
500
|
+
In a specified directory it creates a new directory with the name of the snapshot and keeps
|
|
501
|
+
the memory mapped file and some other helper files there.
|
|
502
|
+
|
|
503
|
+
Default: `memory`
|
|
504
|
+
default_db: str
|
|
505
|
+
The ticks with empty symbol names or symbol names with no database name as a prefix are saved as
|
|
506
|
+
if they have symbol names equal to **DEFAULT_DB::SYMBOL_NAME** (where **SYMBOL_NAME** can be empty).
|
|
507
|
+
These kinds of ticks, for example, can appear after merging time series. To save/read these ticks
|
|
508
|
+
to/from storage a dummy database with the specified default name should be configured in the locator.
|
|
509
|
+
|
|
510
|
+
Default: `CEP_SNAPSHOT`
|
|
511
|
+
database: str, optional
|
|
512
|
+
Specifies the output database for saving the snapshot.
|
|
513
|
+
symbol_name_field: str, :py:class:`~onetick.py.Column`, optional
|
|
514
|
+
If this parameter is specified, then each input time series is assumed to be a union of several time series and
|
|
515
|
+
the value of the specified attribute of each tick determines to which time series the tick actually belongs.
|
|
516
|
+
These values should be pure symbol names (for instance if the tick belongs to the time series **DEMO_L1::A**,
|
|
517
|
+
then the value of the corresponding attribute should be **A**) and the database name will be taken from
|
|
518
|
+
symbol of the merged time series.
|
|
519
|
+
expected_symbols_per_time_series: int
|
|
520
|
+
This parameter makes sense only when ``symbol_name_field`` is specified.
|
|
521
|
+
It is the number of real symbols that are expected to occur per input time series.
|
|
522
|
+
Bigger numbers may result in larger memory utilization by the query but will make the query faster.
|
|
523
|
+
|
|
524
|
+
Default: `1000`
|
|
525
|
+
num_ticks: int
|
|
526
|
+
The number of ticks to be stored for each group per each symbol.
|
|
527
|
+
|
|
528
|
+
Default: `1`
|
|
529
|
+
reread_prevention_level: int
|
|
530
|
+
For better performance we do not use synchronization mechanisms between the snapshot writer[s] and reader[s].
|
|
531
|
+
That is why when the writer submits ticks for some symbol very quickly the reader may fail to read
|
|
532
|
+
those ticks, and it will keep trying to reread them until it succeeds.
|
|
533
|
+
The ``reread_prevention_level`` parameter addresses this problem.
|
|
534
|
+
The higher the reread prevention level the higher the chance for the reader to read ticks successfully.
|
|
535
|
+
But high prevention level also means high memory utilization, that is why it is recommended to keep
|
|
536
|
+
the value of this parameter unchanged until you get an error about inability of the reader to read the snapshot
|
|
537
|
+
due to fast writer.
|
|
538
|
+
|
|
539
|
+
Default: `1`
|
|
540
|
+
group_by: list of str, :py:class:`~onetick.py.Column`, optional
|
|
541
|
+
When specified, the EP will keep the last **n** ticks of each group for each symbol;
|
|
542
|
+
otherwise it will just keep the last **n** ticks of the input time series.
|
|
543
|
+
The group is a list of input ticks with the same values in the specified fields.
|
|
544
|
+
expected_groups_per_symbol: int
|
|
545
|
+
The number of expected groups of ticks for each time series.
|
|
546
|
+
The specified value is used only when ``group_by`` fields are specified,
|
|
547
|
+
otherwise it is ignored, and we assume that the number of expected groups is 1.
|
|
548
|
+
The number hints the EP to allocate memory for such number of tick groups each time
|
|
549
|
+
a new group of ticks is going to be created and no free memory is left.
|
|
550
|
+
|
|
551
|
+
Default: `10`
|
|
552
|
+
keep_snapshot_after_query: bool
|
|
553
|
+
If the snapshot is saved in process memory and this parameter is set, the saved snapshot continues to live
|
|
554
|
+
after the query ends. If this parameter is not set, the snapshot is removed as soon as the query finishes and
|
|
555
|
+
its name is released for saving new snapshots with the same name.
|
|
556
|
+
This parameter is ignored if the snapshot is saved in the memory mapped file.
|
|
557
|
+
|
|
558
|
+
Default: `False`
|
|
559
|
+
allow_concurrent_writers: bool
|
|
560
|
+
If this parameter is ``True`` multiple saver queries can write to the same snapshot contemporaneously.
|
|
561
|
+
But different writers should write to different time series.
|
|
562
|
+
Also, saver queries should run inside the same process (i.e., different tick servers or loaders with otq
|
|
563
|
+
transformers cannot write to the same ``memory_mapped_file`` snapshot concurrently).
|
|
564
|
+
|
|
565
|
+
Default: `False`
|
|
566
|
+
remove_snapshot_upon_start: bool, optional
|
|
567
|
+
If this parameter is ``True`` the snapshot will be removed at the beginning of the query the next time
|
|
568
|
+
``save_snapshot`` is called for the same snapshot. If the parameter is ``False`` the snapshot
|
|
569
|
+
with the specified name will be appended to upon the next run of ``save_snapshot``.
|
|
570
|
+
|
|
571
|
+
If you'll leave this parameter as ``None``, it will be equal to setting this parameter to ``NOT_SET`` in EP.
|
|
572
|
+
``NOT_SET`` option operates in the same way as ``True`` for ``memory`` snapshots or ``False``
|
|
573
|
+
for ``memory_mapped_file`` snapshots.
|
|
574
|
+
|
|
575
|
+
Default: None (``NOT_SET``)
|
|
576
|
+
inplace: bool
|
|
577
|
+
A flag controls whether operation should be applied inplace.
|
|
578
|
+
If ``inplace=True``, then it returns nothing. Otherwise method
|
|
579
|
+
returns a new modified object.
|
|
580
|
+
|
|
581
|
+
See also
|
|
582
|
+
--------
|
|
583
|
+
| **SAVE_SNAPSHOT** OneTick event processor
|
|
584
|
+
| :py:class:`onetick.py.ReadSnapshot`
|
|
585
|
+
| :py:class:`onetick.py.ShowSnapshotList`
|
|
586
|
+
| :py:class:`onetick.py.FindSnapshotSymbols`
|
|
587
|
+
| :py:meth:`onetick.py.Source.join_with_snapshot`
|
|
588
|
+
|
|
589
|
+
Examples
|
|
590
|
+
--------
|
|
591
|
+
Save ticks to a snapshot in a memory:
|
|
592
|
+
|
|
593
|
+
>>> src = otp.Ticks(X=[1, 2, 3, 4, 5])
|
|
594
|
+
>>> src = src.save_snapshot(snapshot_name='some_snapshot') # doctest: +SKIP
|
|
595
|
+
>>> otp.run(src) # doctest: +SKIP
|
|
596
|
+
|
|
597
|
+
If you want to use snapshot, stored in memory, after query, use parameter ``keep_snapshot_after_query``:
|
|
598
|
+
|
|
599
|
+
>>> src = src.save_snapshot(snapshot_name='some_snapshot', keep_snapshot_after_query=True) # doctest: +SKIP
|
|
600
|
+
|
|
601
|
+
Snapshot will be associated with default database. You can set database via ``database`` parameter:
|
|
602
|
+
|
|
603
|
+
>>> src = src.save_snapshot(
|
|
604
|
+
... snapshot_name='some_snapshot', database='SOME_DATABASE', keep_snapshot_after_query=True
|
|
605
|
+
... ) # doctest: +SKIP
|
|
606
|
+
>>> otp.run(src) # doctest: +SKIP
|
|
607
|
+
>>>
|
|
608
|
+
>>> src = otp.ShowSnapshotList() # doctest: +SKIP
|
|
609
|
+
>>> otp.run(src) # doctest: +SKIP
|
|
610
|
+
Time SNAPSHOT_NAME STORAGE_TYPE DB_NAME
|
|
611
|
+
0 2003-12-01 some_snapshot MEMORY SOME_DATABASE
|
|
612
|
+
|
|
613
|
+
By default, only one last tick per group, if it set, or from all ticks per symbol is saved.
|
|
614
|
+
You can change this number by setting ``num_ticks`` parameter:
|
|
615
|
+
|
|
616
|
+
>>> src = src.save_snapshot(snapshot_name='some_snapshot', num_ticks=100) # doctest: +SKIP
|
|
617
|
+
|
|
618
|
+
Setting symbol name for every tick in snapshot from source field:
|
|
619
|
+
|
|
620
|
+
>>> src = otp.Ticks(X=[1, 2, 3], SYMBOL_FIELD=['A', 'B', 'C'])
|
|
621
|
+
>>> src = src.save_snapshot(
|
|
622
|
+
... snapshot_name='some_snapshot', symbol_name_field='SYMBOL_FIELD', keep_snapshot_after_query=True,
|
|
623
|
+
... ) # doctest: +SKIP
|
|
624
|
+
>>> otp.run(src) # doctest: +SKIP
|
|
625
|
+
>>>
|
|
626
|
+
>>> src = otp.FindSnapshotSymbols(snapshot_name='some_snapshot') # doctest: +SKIP
|
|
627
|
+
>>> otp.run(src) # doctest: +SKIP
|
|
628
|
+
Time SYMBOL_NAME
|
|
629
|
+
0 2003-12-01 DEMO_L1::A
|
|
630
|
+
1 2003-12-01 DEMO_L1::B
|
|
631
|
+
2 2003-12-01 DEMO_L1::C
|
|
632
|
+
|
|
633
|
+
Group ticks by column ``X`` and keep last 2 ticks from each group:
|
|
634
|
+
|
|
635
|
+
>>> src = otp.Ticks(X=[0, 0, 0, 1, 1, 1], Y=[1, 2, 3, 4, 5, 6])
|
|
636
|
+
>>> src = src.save_snapshot(
|
|
637
|
+
... snapshot_name='some_snapshot', group_by=[src['X']], num_ticks=2, keep_snapshot_after_query=True,
|
|
638
|
+
... ) # doctest: +SKIP
|
|
639
|
+
>>> otp.run(src) # doctest: +SKIP
|
|
640
|
+
>>>
|
|
641
|
+
>>> src = otp.ReadSnapshot(snapshot_name='some_snapshot') # doctest: +SKIP
|
|
642
|
+
>>> otp.run(src) # doctest: +SKIP
|
|
643
|
+
Time X Y TICK_TIME
|
|
644
|
+
0 2003-12-01 0 2 2003-12-01 00:00:00.001
|
|
645
|
+
1 2003-12-01 0 3 2003-12-01 00:00:00.002
|
|
646
|
+
2 2003-12-01 1 5 2003-12-01 00:00:00.004
|
|
647
|
+
3 2003-12-01 1 6 2003-12-01 00:00:00.005
|
|
648
|
+
|
|
649
|
+
"""
|
|
650
|
+
kwargs = {}
|
|
651
|
+
|
|
652
|
+
if not hasattr(otq, "SaveSnapshot"):
|
|
653
|
+
raise RuntimeError("Current version of OneTick doesn't support SAVE_SNAPSHOT EP")
|
|
654
|
+
|
|
655
|
+
if snapshot_storage not in ['memory', 'memory_mapped_file']:
|
|
656
|
+
raise ValueError('`snapshot_storage` must be one of "memory", "memory_mapped_file"')
|
|
657
|
+
|
|
658
|
+
if isinstance(symbol_name_field, _Column):
|
|
659
|
+
symbol_name_field = str(symbol_name_field)
|
|
660
|
+
if symbol_name_field and symbol_name_field not in self.schema:
|
|
661
|
+
raise ValueError(f'Field "{symbol_name_field}" passed as `symbol_name_field` parameter is not in schema.')
|
|
662
|
+
|
|
663
|
+
is_database_param_supported = is_save_snapshot_database_parameter_supported()
|
|
664
|
+
|
|
665
|
+
if database:
|
|
666
|
+
if not is_database_param_supported:
|
|
667
|
+
raise RuntimeError("Current version of OneTick doesn't support `database` parameter on SAVE_SNAPSHOT EP")
|
|
668
|
+
|
|
669
|
+
kwargs['database'] = database
|
|
670
|
+
|
|
671
|
+
if symbol_name_field is None:
|
|
672
|
+
symbol_name_field = ''
|
|
673
|
+
|
|
674
|
+
if group_by is None:
|
|
675
|
+
group_by = []
|
|
676
|
+
|
|
677
|
+
if not isinstance(group_by, list):
|
|
678
|
+
raise ValueError('`group_by` must be a list')
|
|
679
|
+
|
|
680
|
+
result_group_by = []
|
|
681
|
+
|
|
682
|
+
for column in group_by:
|
|
683
|
+
item = column
|
|
684
|
+
if isinstance(column, _Column):
|
|
685
|
+
item = str(column)
|
|
686
|
+
|
|
687
|
+
if item not in self.schema:
|
|
688
|
+
raise ValueError(f'Field "{item}" passed as `group_by` parameter is not in schema.')
|
|
689
|
+
|
|
690
|
+
result_group_by.append(item)
|
|
691
|
+
|
|
692
|
+
snapshot_storage = snapshot_storage.upper()
|
|
693
|
+
|
|
694
|
+
if remove_snapshot_upon_start is None:
|
|
695
|
+
remove_snapshot_upon_start = 'NOT_SET'
|
|
696
|
+
|
|
697
|
+
# clear schema
|
|
698
|
+
self.schema.set()
|
|
699
|
+
|
|
700
|
+
self.sink(
|
|
701
|
+
otq.SaveSnapshot(
|
|
702
|
+
snapshot_name=snapshot_name,
|
|
703
|
+
snapshot_storage=snapshot_storage,
|
|
704
|
+
default_db=default_db,
|
|
705
|
+
symbol_name_field=symbol_name_field,
|
|
706
|
+
expected_symbols_per_time_series=expected_symbols_per_time_series,
|
|
707
|
+
num_ticks=num_ticks,
|
|
708
|
+
reread_prevention_level=reread_prevention_level,
|
|
709
|
+
group_by=','.join(result_group_by),
|
|
710
|
+
expected_groups_per_symbol=expected_groups_per_symbol,
|
|
711
|
+
keep_snapshot_after_query=keep_snapshot_after_query,
|
|
712
|
+
allow_concurrent_writers=allow_concurrent_writers,
|
|
713
|
+
remove_snapshot_upon_start=remove_snapshot_upon_start,
|
|
714
|
+
**kwargs,
|
|
715
|
+
)
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
return self
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
@inplace_operation
|
|
722
|
+
def write_text(
|
|
723
|
+
self: 'Source',
|
|
724
|
+
*,
|
|
725
|
+
propagate_ticks=True,
|
|
726
|
+
output_headers=True,
|
|
727
|
+
output_types_in_headers=False,
|
|
728
|
+
order=None,
|
|
729
|
+
prepend_symbol_name=True,
|
|
730
|
+
prepended_symbol_name_size=0,
|
|
731
|
+
prepend_timestamp=True,
|
|
732
|
+
separator=',',
|
|
733
|
+
formats_of_fields=None,
|
|
734
|
+
double_format='%f',
|
|
735
|
+
output_dir=None,
|
|
736
|
+
output_file=None,
|
|
737
|
+
error_file=None,
|
|
738
|
+
warning_file=None,
|
|
739
|
+
data_quality_file=None,
|
|
740
|
+
treat_input_as_binary=False,
|
|
741
|
+
flush=True,
|
|
742
|
+
append=False,
|
|
743
|
+
allow_concurrent_write=False,
|
|
744
|
+
inplace=False,
|
|
745
|
+
):
|
|
746
|
+
r"""
|
|
747
|
+
Writes the input tick series to a text file or standard output.
|
|
748
|
+
|
|
749
|
+
Parameters
|
|
750
|
+
----------
|
|
751
|
+
propagate_ticks: bool
|
|
752
|
+
If True (default) then ticks will be propagated after this method, otherwise this method won't return ticks.
|
|
753
|
+
output_headers: bool
|
|
754
|
+
Switches the output of the headers.
|
|
755
|
+
If True (default), a tick descriptor line appears in the output before the very first tick for that query.
|
|
756
|
+
If the structure of the output tick changes, another tick descriptor line appears before the first changed tick.
|
|
757
|
+
The header line starts with **#**.
|
|
758
|
+
The field names are ordered as mandated by the ``order`` parameter or,
|
|
759
|
+
if it is empty, in the order of appearance in the tick descriptor.
|
|
760
|
+
Fields that are not specified in the ``order`` parameter
|
|
761
|
+
will appear after specified ones in the order of their appearance in the tick descriptor.
|
|
762
|
+
output_types_in_headers: bool
|
|
763
|
+
Switches the output of field types in the header lines.
|
|
764
|
+
``output_types_in_headers`` can be set only when ``output_headers`` is set too.
|
|
765
|
+
order: list
|
|
766
|
+
The field appearance order in the output.
|
|
767
|
+
If all or some fields are not specified,
|
|
768
|
+
those fields will be written in the order of their appearance in the tick descriptor.
|
|
769
|
+
|
|
770
|
+
Field **SYMBOL_NAME** may be specified if parameter ``prepend_symbol_name`` is set.
|
|
771
|
+
|
|
772
|
+
Field **TIMESTAMP** may be specified if parameter ``prepend_timestamp`` is set.
|
|
773
|
+
prepend_symbol_name: bool
|
|
774
|
+
If True (default), prepends symbol name before other fields as a new field named **SYMBOL_NAME** in the header
|
|
775
|
+
(if ``output_headers`` is set).
|
|
776
|
+
prepended_symbol_name_size: int
|
|
777
|
+
When ``prepend_symbol_name`` is set, symbol will be adjusted to this size.
|
|
778
|
+
If set to 0 (default), no adjustment will be done.
|
|
779
|
+
prepend_timestamp: bool
|
|
780
|
+
If set (default), tick timestamps, formatted as *YYYYMMDDhhmmss.qqqqqq* in the GMT time zone,
|
|
781
|
+
will be prepended to the output lines.
|
|
782
|
+
Header lines, if present, will have **TIMESTAMP** as the first field name.
|
|
783
|
+
The default output format for tick timestamps can be specified in the ``formats_of_fields`` parameter.
|
|
784
|
+
separator: str
|
|
785
|
+
The delimiter string. This doesn't have to be a single character.
|
|
786
|
+
Escape sequences are allowed for **\\t** (tab), **\\\\** (\\ character) and **\\xHH** (hex codes).
|
|
787
|
+
By default "," (comma) will be used.
|
|
788
|
+
formats_of_fields: dict
|
|
789
|
+
The dictionary of field names and their formatting specifications.
|
|
790
|
+
The formatting specification is the same as in the standard C
|
|
791
|
+
`printf <https://pubs.opengroup.org/onlinepubs/009695399/functions/printf.html>`_ function.
|
|
792
|
+
|
|
793
|
+
For float and decimal fields **%f** and **%.[<precision>]f** formats are only supported,
|
|
794
|
+
first one being the default an outputting 6 decimal digits.
|
|
795
|
+
|
|
796
|
+
Also if the field format starts with **%|**,
|
|
797
|
+
it means that this is a timestamp field and should be in the format **%|tz|time_format_spec**,
|
|
798
|
+
where the *tz* is the time zone name (if not specified GMT will be used),
|
|
799
|
+
and *time_format_spec* is a custom time format specification,
|
|
800
|
+
which is the same as the one used by the
|
|
801
|
+
`strftime <https://pubs.opengroup.org/onlinepubs/009695399/functions/strftime.html>`_ function.
|
|
802
|
+
|
|
803
|
+
In addition, you can also use **%q** , **%Q** , **%k** and **%J** placeholders,
|
|
804
|
+
which will be replaced by 3 and 2 sign milliseconds, 6 sign microseconds and 9 sign nanoseconds, respectively.
|
|
805
|
+
|
|
806
|
+
**%#**, **%-**, **%U**, **%N** placeholders will be replaced by Unix timestamp, Unix timestamp in milliseconds,
|
|
807
|
+
microseconds and nanoseconds, respectively.
|
|
808
|
+
|
|
809
|
+
**%+** and **%~** placeholders will be replaced by milliseconds and nanoseconds passed since midnight.
|
|
810
|
+
double_format: str
|
|
811
|
+
This format will be used for fields that are holding double values
|
|
812
|
+
if they are not specified in ``formats_of_fields``.
|
|
813
|
+
output_dir: str
|
|
814
|
+
If specified, all output (output, warning, error, and data quality) files will be redirected to it.
|
|
815
|
+
If this directory does not exist, it will get created.
|
|
816
|
+
By default, the current directory is used.
|
|
817
|
+
output_file: str
|
|
818
|
+
The output file name for generated text data.
|
|
819
|
+
If not set, the standard output will be used.
|
|
820
|
+
It is also possible to add symbol name, database name, tick type,
|
|
821
|
+
date of tick and query start time to the file name.
|
|
822
|
+
For this special placeholders should be used, which will be replaced with the appropriate values:
|
|
823
|
+
|
|
824
|
+
* **%SYMBOL%** - will be replaced with symbol name,
|
|
825
|
+
* **%DBNAME%** - with database name,
|
|
826
|
+
* **%TICKTYPE%** - with tick type,
|
|
827
|
+
* **%DATE%** - with date of tick,
|
|
828
|
+
* **%STARTTIME%** - with start time of the query.
|
|
829
|
+
|
|
830
|
+
.. note::
|
|
831
|
+
In case of using placeholders the output of the data may be split into different files.
|
|
832
|
+
For example when querying several days of data and using **%DATE%** placeholder,
|
|
833
|
+
the file will be created for every day of the interval.
|
|
834
|
+
|
|
835
|
+
This format is also available for ``error_file``, ``warning_file`` and ``data_quality_file`` input parameters.
|
|
836
|
+
error_file: str
|
|
837
|
+
The file name where all error messages are directed.
|
|
838
|
+
If not set the standard error will be used.
|
|
839
|
+
warning_file: str
|
|
840
|
+
The file name where all warning messages are directed.
|
|
841
|
+
If not set the standard error will be used.
|
|
842
|
+
data_quality_file: str
|
|
843
|
+
The file name where all data quality messages are directed.
|
|
844
|
+
If not set the standard error will be used.
|
|
845
|
+
treat_input_as_binary: bool
|
|
846
|
+
Opens output file in binary mode to not modify content of ticks when printing them to the file.
|
|
847
|
+
Also in this mode method prints no new line to the file after every tick write.
|
|
848
|
+
flush: bool
|
|
849
|
+
If True (default) then the output will be flushed to disk after every tick.
|
|
850
|
+
|
|
851
|
+
.. note::
|
|
852
|
+
Notice that while this setting makes results of the query recorded into a file without delay,
|
|
853
|
+
making them immediately available to applications that read this file,
|
|
854
|
+
it may slow down the query significantly.
|
|
855
|
+
|
|
856
|
+
append: bool
|
|
857
|
+
If set to True, will try to append data to files (output, error, warning, data_quality), instead of overwriting.
|
|
858
|
+
allow_concurrent_write: bool
|
|
859
|
+
Allows different queries running on the same server to write concurrently to the same files
|
|
860
|
+
(output, error, warning, data_quality).
|
|
861
|
+
inplace: bool
|
|
862
|
+
A flag controls whether operation should be applied inplace.
|
|
863
|
+
If ``inplace=True``, then it returns nothing. Otherwise method
|
|
864
|
+
returns a new modified object.
|
|
865
|
+
|
|
866
|
+
See also
|
|
867
|
+
--------
|
|
868
|
+
| **WRITE_TEXT** OneTick event processor
|
|
869
|
+
| :py:meth:`onetick.py.Source.dump`
|
|
870
|
+
|
|
871
|
+
Examples
|
|
872
|
+
--------
|
|
873
|
+
|
|
874
|
+
By default the text is written to the standard output:
|
|
875
|
+
|
|
876
|
+
>>> data = otp.Ticks(A=[1, 2, 3])
|
|
877
|
+
>>> write = data.write_text()
|
|
878
|
+
>>> _ = otp.run(write) # doctest: +SKIP
|
|
879
|
+
#SYMBOL_NAME,TIMESTAMP,A
|
|
880
|
+
AAPL,20031201050000.000000,1
|
|
881
|
+
AAPL,20031201050000.001000,2
|
|
882
|
+
AAPL,20031201050000.002000,3
|
|
883
|
+
|
|
884
|
+
Output file can also be specified:
|
|
885
|
+
|
|
886
|
+
>>> write = data.write_text(output_file='result.csv')
|
|
887
|
+
>>> _ = otp.run(write) # doctest: +SKIP
|
|
888
|
+
>>> with open('result.csv') as f: # doctest: +SKIP
|
|
889
|
+
... print(f.read()) # doctest: +SKIP
|
|
890
|
+
#SYMBOL_NAME,TIMESTAMP,A
|
|
891
|
+
AAPL,20031201050000.000000,1
|
|
892
|
+
AAPL,20031201050000.001000,2
|
|
893
|
+
AAPL,20031201050000.002000,3
|
|
894
|
+
|
|
895
|
+
Symbol name, timestamp of the tick and can be removed from the output:
|
|
896
|
+
|
|
897
|
+
>>> write = data.write_text(prepend_timestamp=False,
|
|
898
|
+
... prepend_symbol_name=False)
|
|
899
|
+
>>> _ = otp.run(write) # doctest: +SKIP
|
|
900
|
+
#A
|
|
901
|
+
1
|
|
902
|
+
2
|
|
903
|
+
3
|
|
904
|
+
|
|
905
|
+
The header can also be removed from the output:
|
|
906
|
+
|
|
907
|
+
>>> write = data.write_text(output_headers=False)
|
|
908
|
+
>>> _ = otp.run(write) # doctest: +SKIP
|
|
909
|
+
AAPL,20031201050000.000000,1
|
|
910
|
+
AAPL,20031201050000.001000,2
|
|
911
|
+
AAPL,20031201050000.002000,3
|
|
912
|
+
|
|
913
|
+
The order of fields and separator character can be specified:
|
|
914
|
+
|
|
915
|
+
>>> write = data.write_text(order=['A', 'TIMESTAMP'],
|
|
916
|
+
... separator='\t',
|
|
917
|
+
... prepend_symbol_name=False)
|
|
918
|
+
>>> _ = otp.run(write) # doctest: +SKIP
|
|
919
|
+
#A TIMESTAMP
|
|
920
|
+
1 20031201050000.000000
|
|
921
|
+
2 20031201050000.001000
|
|
922
|
+
3 20031201050000.002000
|
|
923
|
+
|
|
924
|
+
The formatting can be specified for each field:
|
|
925
|
+
|
|
926
|
+
>>> write = data.write_text(formats_of_fields={
|
|
927
|
+
... 'TIMESTAMP': '%|GMT|%Y-%m-%d %H:%M:%S.%q',
|
|
928
|
+
... 'A': '%3d'
|
|
929
|
+
... })
|
|
930
|
+
>>> _ = otp.run(write) # doctest: +SKIP
|
|
931
|
+
#SYMBOL_NAME,TIMESTAMP,A
|
|
932
|
+
AAPL,2003-12-01 05:00:00.000, 1
|
|
933
|
+
AAPL,2003-12-01 05:00:00.001, 2
|
|
934
|
+
AAPL,2003-12-01 05:00:00.002, 3
|
|
935
|
+
"""
|
|
936
|
+
if output_types_in_headers and not output_headers:
|
|
937
|
+
raise ValueError("Parameter 'output_types_in_headers' can only be set together with 'output_headers'")
|
|
938
|
+
|
|
939
|
+
order = order or []
|
|
940
|
+
formats_of_fields = formats_of_fields or {}
|
|
941
|
+
for field in list(order) + list(formats_of_fields):
|
|
942
|
+
if prepend_symbol_name and field == 'SYMBOL_NAME':
|
|
943
|
+
continue
|
|
944
|
+
if not prepend_symbol_name and field == 'SYMBOL_NAME' and field not in self.schema:
|
|
945
|
+
raise ValueError(
|
|
946
|
+
"Field 'SYMBOL_NAME' can't be specified in 'order' parameter if 'prepend_symbol_name' is not set"
|
|
947
|
+
)
|
|
948
|
+
if not prepend_timestamp and field == 'TIMESTAMP':
|
|
949
|
+
raise ValueError(
|
|
950
|
+
"Field 'TIMESTAMP' can't be specified in 'order' parameter if 'prepend_timestamp' is not set"
|
|
951
|
+
)
|
|
952
|
+
if field not in self.schema:
|
|
953
|
+
raise ValueError(f"Field '{field}' is not in schema")
|
|
954
|
+
|
|
955
|
+
kwargs = dict(
|
|
956
|
+
propagate_ticks=propagate_ticks,
|
|
957
|
+
output_headers=output_headers,
|
|
958
|
+
output_types_in_headers=output_types_in_headers,
|
|
959
|
+
order='|'.join(order),
|
|
960
|
+
prepend_symbol_name=prepend_symbol_name,
|
|
961
|
+
prepended_symbol_name_size=prepended_symbol_name_size,
|
|
962
|
+
prepend_timestamp=prepend_timestamp,
|
|
963
|
+
# OneTick uses \ as an escape character,
|
|
964
|
+
# so replacing a single \ character with two \\ characters to escape it in OneTick
|
|
965
|
+
separator=separator.replace('\\', r'\\'),
|
|
966
|
+
formats_of_fields='\n'.join(f'{k}={v}' for k, v in formats_of_fields.items()),
|
|
967
|
+
double_format=double_format,
|
|
968
|
+
output_dir=output_dir,
|
|
969
|
+
output_file=output_file,
|
|
970
|
+
error_file=error_file,
|
|
971
|
+
warning_file=warning_file,
|
|
972
|
+
data_quality_file=data_quality_file,
|
|
973
|
+
treat_input_as_binary=treat_input_as_binary,
|
|
974
|
+
flush=flush,
|
|
975
|
+
append=append,
|
|
976
|
+
allow_concurrent_write=allow_concurrent_write,
|
|
977
|
+
)
|
|
978
|
+
for k, v in kwargs.items():
|
|
979
|
+
if v is None:
|
|
980
|
+
# None values may not be supported by onetick.query
|
|
981
|
+
kwargs[k] = ''
|
|
982
|
+
|
|
983
|
+
self.sink(otq.WriteText(**kwargs))
|
|
984
|
+
if not propagate_ticks:
|
|
985
|
+
self.schema.set(**{})
|
|
986
|
+
return self
|