onetick-py 1.177.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- locator_parser/__init__.py +0 -0
- locator_parser/acl.py +73 -0
- locator_parser/actions.py +262 -0
- locator_parser/common.py +368 -0
- locator_parser/io.py +43 -0
- locator_parser/locator.py +150 -0
- onetick/__init__.py +101 -0
- onetick/doc_utilities/__init__.py +3 -0
- onetick/doc_utilities/napoleon.py +40 -0
- onetick/doc_utilities/ot_doctest.py +140 -0
- onetick/doc_utilities/snippets.py +279 -0
- onetick/lib/__init__.py +4 -0
- onetick/lib/instance.py +141 -0
- onetick/py/__init__.py +293 -0
- onetick/py/_stack_info.py +89 -0
- onetick/py/_version.py +2 -0
- onetick/py/aggregations/__init__.py +11 -0
- onetick/py/aggregations/_base.py +648 -0
- onetick/py/aggregations/_docs.py +948 -0
- onetick/py/aggregations/compute.py +286 -0
- onetick/py/aggregations/functions.py +2216 -0
- onetick/py/aggregations/generic.py +104 -0
- onetick/py/aggregations/high_low.py +80 -0
- onetick/py/aggregations/num_distinct.py +83 -0
- onetick/py/aggregations/order_book.py +501 -0
- onetick/py/aggregations/other.py +1014 -0
- onetick/py/backports.py +26 -0
- onetick/py/cache.py +374 -0
- onetick/py/callback/__init__.py +5 -0
- onetick/py/callback/callback.py +276 -0
- onetick/py/callback/callbacks.py +131 -0
- onetick/py/compatibility.py +798 -0
- onetick/py/configuration.py +771 -0
- onetick/py/core/__init__.py +0 -0
- onetick/py/core/_csv_inspector.py +93 -0
- onetick/py/core/_internal/__init__.py +0 -0
- onetick/py/core/_internal/_manually_bound_value.py +6 -0
- onetick/py/core/_internal/_nodes_history.py +250 -0
- onetick/py/core/_internal/_op_utils/__init__.py +0 -0
- onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
- onetick/py/core/_internal/_op_utils/is_const.py +10 -0
- onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
- onetick/py/core/_internal/_proxy_node.py +140 -0
- onetick/py/core/_internal/_state_objects.py +2312 -0
- onetick/py/core/_internal/_state_vars.py +93 -0
- onetick/py/core/_source/__init__.py +0 -0
- onetick/py/core/_source/_symbol_param.py +95 -0
- onetick/py/core/_source/schema.py +97 -0
- onetick/py/core/_source/source_methods/__init__.py +0 -0
- onetick/py/core/_source/source_methods/aggregations.py +809 -0
- onetick/py/core/_source/source_methods/applyers.py +296 -0
- onetick/py/core/_source/source_methods/columns.py +141 -0
- onetick/py/core/_source/source_methods/data_quality.py +301 -0
- onetick/py/core/_source/source_methods/debugs.py +272 -0
- onetick/py/core/_source/source_methods/drops.py +120 -0
- onetick/py/core/_source/source_methods/fields.py +619 -0
- onetick/py/core/_source/source_methods/filters.py +1002 -0
- onetick/py/core/_source/source_methods/joins.py +1413 -0
- onetick/py/core/_source/source_methods/merges.py +605 -0
- onetick/py/core/_source/source_methods/misc.py +1455 -0
- onetick/py/core/_source/source_methods/pandases.py +155 -0
- onetick/py/core/_source/source_methods/renames.py +356 -0
- onetick/py/core/_source/source_methods/sorts.py +183 -0
- onetick/py/core/_source/source_methods/switches.py +142 -0
- onetick/py/core/_source/source_methods/symbols.py +117 -0
- onetick/py/core/_source/source_methods/times.py +627 -0
- onetick/py/core/_source/source_methods/writes.py +986 -0
- onetick/py/core/_source/symbol.py +205 -0
- onetick/py/core/_source/tmp_otq.py +222 -0
- onetick/py/core/column.py +209 -0
- onetick/py/core/column_operations/__init__.py +0 -0
- onetick/py/core/column_operations/_methods/__init__.py +4 -0
- onetick/py/core/column_operations/_methods/_internal.py +28 -0
- onetick/py/core/column_operations/_methods/conversions.py +216 -0
- onetick/py/core/column_operations/_methods/methods.py +292 -0
- onetick/py/core/column_operations/_methods/op_types.py +160 -0
- onetick/py/core/column_operations/accessors/__init__.py +0 -0
- onetick/py/core/column_operations/accessors/_accessor.py +28 -0
- onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
- onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
- onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
- onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
- onetick/py/core/column_operations/base.py +1121 -0
- onetick/py/core/cut_builder.py +150 -0
- onetick/py/core/db_constants.py +20 -0
- onetick/py/core/eval_query.py +245 -0
- onetick/py/core/lambda_object.py +441 -0
- onetick/py/core/multi_output_source.py +232 -0
- onetick/py/core/per_tick_script.py +2256 -0
- onetick/py/core/query_inspector.py +464 -0
- onetick/py/core/source.py +1744 -0
- onetick/py/db/__init__.py +2 -0
- onetick/py/db/_inspection.py +1128 -0
- onetick/py/db/db.py +1327 -0
- onetick/py/db/utils.py +64 -0
- onetick/py/docs/__init__.py +0 -0
- onetick/py/docs/docstring_parser.py +112 -0
- onetick/py/docs/utils.py +81 -0
- onetick/py/functions.py +2398 -0
- onetick/py/license.py +190 -0
- onetick/py/log.py +88 -0
- onetick/py/math.py +935 -0
- onetick/py/misc.py +470 -0
- onetick/py/oqd/__init__.py +22 -0
- onetick/py/oqd/eps.py +1195 -0
- onetick/py/oqd/sources.py +325 -0
- onetick/py/otq.py +216 -0
- onetick/py/pyomd_mock.py +47 -0
- onetick/py/run.py +916 -0
- onetick/py/servers.py +173 -0
- onetick/py/session.py +1347 -0
- onetick/py/sources/__init__.py +19 -0
- onetick/py/sources/cache.py +167 -0
- onetick/py/sources/common.py +128 -0
- onetick/py/sources/csv.py +642 -0
- onetick/py/sources/custom.py +85 -0
- onetick/py/sources/data_file.py +305 -0
- onetick/py/sources/data_source.py +1045 -0
- onetick/py/sources/empty.py +94 -0
- onetick/py/sources/odbc.py +337 -0
- onetick/py/sources/order_book.py +271 -0
- onetick/py/sources/parquet.py +168 -0
- onetick/py/sources/pit.py +191 -0
- onetick/py/sources/query.py +495 -0
- onetick/py/sources/snapshots.py +419 -0
- onetick/py/sources/split_query_output_by_symbol.py +198 -0
- onetick/py/sources/symbology_mapping.py +123 -0
- onetick/py/sources/symbols.py +374 -0
- onetick/py/sources/ticks.py +825 -0
- onetick/py/sql.py +70 -0
- onetick/py/state.py +251 -0
- onetick/py/types.py +2131 -0
- onetick/py/utils/__init__.py +70 -0
- onetick/py/utils/acl.py +93 -0
- onetick/py/utils/config.py +186 -0
- onetick/py/utils/default.py +49 -0
- onetick/py/utils/file.py +38 -0
- onetick/py/utils/helpers.py +76 -0
- onetick/py/utils/locator.py +94 -0
- onetick/py/utils/perf.py +498 -0
- onetick/py/utils/query.py +49 -0
- onetick/py/utils/render.py +1374 -0
- onetick/py/utils/script.py +244 -0
- onetick/py/utils/temp.py +471 -0
- onetick/py/utils/types.py +120 -0
- onetick/py/utils/tz.py +84 -0
- onetick_py-1.177.0.dist-info/METADATA +137 -0
- onetick_py-1.177.0.dist-info/RECORD +152 -0
- onetick_py-1.177.0.dist-info/WHEEL +5 -0
- onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
- onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
- onetick_py-1.177.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,642 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import os
|
|
3
|
+
import io
|
|
4
|
+
import string
|
|
5
|
+
|
|
6
|
+
from functools import partial
|
|
7
|
+
from typing import Optional, Union, Dict
|
|
8
|
+
|
|
9
|
+
import onetick.py as otp
|
|
10
|
+
from onetick.py.otq import otq
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from onetick.py.core._source._symbol_param import _SymbolParamSource
|
|
14
|
+
from onetick.py.core.source import Source
|
|
15
|
+
|
|
16
|
+
from .. import types as ott
|
|
17
|
+
from .. import utils, configuration
|
|
18
|
+
from ..core import _csv_inspector
|
|
19
|
+
|
|
20
|
+
from .common import default_date_converter, to_timestamp_nanos, update_node_tick_type
|
|
21
|
+
from .ticks import Ticks
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def CSV( # NOSONAR
|
|
25
|
+
filepath_or_buffer=None,
|
|
26
|
+
timestamp_name: Optional[str] = "Time",
|
|
27
|
+
first_line_is_title: bool = True,
|
|
28
|
+
names: Optional[list] = None,
|
|
29
|
+
dtype: Optional[dict] = None,
|
|
30
|
+
converters: Optional[dict] = None,
|
|
31
|
+
order_ticks=False,
|
|
32
|
+
drop_index=True,
|
|
33
|
+
change_date_to=None,
|
|
34
|
+
auto_increase_timestamps=True,
|
|
35
|
+
db='LOCAL',
|
|
36
|
+
field_delimiter=',',
|
|
37
|
+
handle_escaped_chars=False,
|
|
38
|
+
quote_char='"',
|
|
39
|
+
timestamp_format: Optional[Union[str, Dict[str, str]]] = None,
|
|
40
|
+
file_contents: Optional[str] = None,
|
|
41
|
+
**kwargs,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Construct source based on CSV file.
|
|
45
|
+
|
|
46
|
+
There are several steps determining column types.
|
|
47
|
+
|
|
48
|
+
1. Initially, all column treated as ``str``.
|
|
49
|
+
2. If column name in CSV title have format ``type COLUMNNAME``,
|
|
50
|
+
it will change type from ``str`` to specified type.
|
|
51
|
+
3. All column type are determined automatically from its data.
|
|
52
|
+
4. You could override determined types in ``dtype`` argument explicitly.
|
|
53
|
+
5. ``converters`` argument is applied after ``dtype`` and could also change column type.
|
|
54
|
+
|
|
55
|
+
NOTE: Double quotes are not supported in CSV files for escaping quotes in strings,
|
|
56
|
+
you should use escape character ``\\`` before the quote instead,
|
|
57
|
+
for example: ``"I'm a string with a \\"quotes\\" inside"``. And then set `handle_escaped_chars=True`.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
filepath_or_buffer: str, os.PathLike, FileBuffer, optional
|
|
62
|
+
Path to CSV file or :class:`file buffer <FileBuffer>`. If None value is taken through symbol.
|
|
63
|
+
When taken from symbol, symbol must have ``LOCAL::`` prefix.
|
|
64
|
+
In that case you should set the columns otherwise schema will be empty.
|
|
65
|
+
timestamp_name: str, default "Time"
|
|
66
|
+
Name of TIMESTAMP column used for ticks. Used only if it is exists in CSV columns, otherwise ignored.
|
|
67
|
+
Output data will be sorted by this column.
|
|
68
|
+
first_line_is_title: bool
|
|
69
|
+
Use first line of CSV file as a source for column names and types.
|
|
70
|
+
If CSV file is started with # symbol, this parameter **must** be ``True``.
|
|
71
|
+
|
|
72
|
+
- If ``True``, column names are inferred from the first line of the file,
|
|
73
|
+
it is not allowed to have empty name for any column.
|
|
74
|
+
|
|
75
|
+
- If ``False``, first line is processed as data, column names will be COLUMN_1, ..., COLUMN_N.
|
|
76
|
+
You could specify column names in ``names`` argument.
|
|
77
|
+
|
|
78
|
+
names: list, optional
|
|
79
|
+
List of column names to use, or None.
|
|
80
|
+
Length must be equal to columns number in file.
|
|
81
|
+
Duplicates in this list are not allowed.
|
|
82
|
+
dtype: dict, optional
|
|
83
|
+
Data type for columns, as dict of pairs {column_name: type}.
|
|
84
|
+
Will convert column type from ``str`` to specified type, before applying converters.
|
|
85
|
+
converters: dict, optional
|
|
86
|
+
Dict of functions for converting values in certain columns. Keys are column names.
|
|
87
|
+
Function must be valid callable with ``onetick.py`` syntax, example::
|
|
88
|
+
|
|
89
|
+
converters={
|
|
90
|
+
"time_number": lambda c: c.apply(otp.nsectime),
|
|
91
|
+
"stock": lambda c: c.str.lower(),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
Converters applied *after* ``dtype`` conversion.
|
|
95
|
+
order_ticks: bool, optional
|
|
96
|
+
If ``True`` and ``timestamp_name`` column are used, then source will order tick by time.
|
|
97
|
+
Note, that if ``False`` and ticks are not ordered in sequence, then OneTick will raise Exception in runtime.
|
|
98
|
+
drop_index: bool, optional
|
|
99
|
+
if ``True`` and 'Index' column is in the csv file then this column will be removed.
|
|
100
|
+
change_date_to: datetime, date, optional
|
|
101
|
+
change date from a timestamp column to a specific date. Default is None, means not changing timestamp column.
|
|
102
|
+
auto_increase_timestamps: bool, optional
|
|
103
|
+
Only used if provided CSV file does not have a TIMESTAMP column. If ``True``, timestamps of loaded ticks
|
|
104
|
+
would start at ``start_time`` and on each next tick, would increase by 1 millisecond.
|
|
105
|
+
If ``False``, timestamps of all loaded ticks would be equal to ``start_time``
|
|
106
|
+
db: str, optional
|
|
107
|
+
Name of a database to define a destination where the csv file will be transported for processing.
|
|
108
|
+
``LOCAL`` is default value that means OneTick will process it on the site where a query runs.
|
|
109
|
+
field_delimiter: str, optional
|
|
110
|
+
A character that is used to tokenize each line of the CSV file.
|
|
111
|
+
For a tab character \t (back-slash followed by t) should be specified.
|
|
112
|
+
handle_escaped_chars: bool, optional
|
|
113
|
+
If set, the backslash char ``\\`` gets a special meaning and everywhere in the input text
|
|
114
|
+
the combinations ``\\'``, ``\\"`` and ``\\\\`` are changed correspondingly by ``'``, ``"`` and ``\\``,
|
|
115
|
+
which are processed then as regular chars.
|
|
116
|
+
Besides, combinations like ``\\x??``, where ?-s are hexadecimal digits (0-9, a-f or A-F),
|
|
117
|
+
are changed by the chars with the specified ASCII code.
|
|
118
|
+
For example, ``\\x0A`` will be replaced by a newline character, ``\\x09`` will be replaced by tab, and so on.
|
|
119
|
+
Default: False
|
|
120
|
+
quote_char: str
|
|
121
|
+
Character used to denote the start and end of a quoted item. Quoted items can include the delimiter,
|
|
122
|
+
and it will be ignored. The same character cannot be marked both as the quote character and as the
|
|
123
|
+
field delimiter. Besides, space characters cannot be used as quote.
|
|
124
|
+
Default: " (double quotes)
|
|
125
|
+
timestamp_format: str or dict
|
|
126
|
+
Expected format for ``timestamp_name`` and all other datetime columns.
|
|
127
|
+
If dictionary is passed, then different format can be specified for each column.
|
|
128
|
+
This format is expected when converting strings from csv file to ``dtype``.
|
|
129
|
+
Default format is ``%Y/%m/%d %H:%M:%S.%J`` for :py:class:`~onetick.py.nsectime` columns and
|
|
130
|
+
``%Y/%m/%d %H:%M:%S.%q`` for :py:class:`~onetick.py.msectime` columns.
|
|
131
|
+
file_contents: str
|
|
132
|
+
Specify the contents of the csv file as string.
|
|
133
|
+
Can be used instead of ``filepath_or_buffer`` parameter.
|
|
134
|
+
|
|
135
|
+
See also
|
|
136
|
+
--------
|
|
137
|
+
**CSV_FILE_LISTING** OneTick event processor
|
|
138
|
+
|
|
139
|
+
Examples
|
|
140
|
+
--------
|
|
141
|
+
Simple CSV file reading
|
|
142
|
+
|
|
143
|
+
>>> data = otp.CSV(os.path.join(csv_path, "data.csv"))
|
|
144
|
+
>>> otp.run(data)
|
|
145
|
+
Time time_number px side
|
|
146
|
+
0 2003-12-01 00:00:00.000 1656690986953602371 30.89 Buy
|
|
147
|
+
1 2003-12-01 00:00:00.001 1656667706281508365 682.88 Buy
|
|
148
|
+
|
|
149
|
+
Read CSV file and get timestamp for ticks from specific field.
|
|
150
|
+
You need to specify query start/end interval including all ticks.
|
|
151
|
+
|
|
152
|
+
>>> data = otp.CSV(os.path.join(csv_path, "data.csv"),
|
|
153
|
+
... timestamp_name="time_number",
|
|
154
|
+
... converters={"time_number": lambda c: c.apply(otp.nsectime)},
|
|
155
|
+
... start=otp.dt(2010, 8, 1),
|
|
156
|
+
... end=otp.dt(2022, 9, 2))
|
|
157
|
+
>>> otp.run(data)
|
|
158
|
+
Time px side
|
|
159
|
+
0 2022-07-01 05:28:26.281508365 682.88 Buy
|
|
160
|
+
1 2022-07-01 11:56:26.953602371 30.89 Buy
|
|
161
|
+
|
|
162
|
+
Path to csv can be passed via symbol with `LOCAL::` prefix:
|
|
163
|
+
|
|
164
|
+
>>> data = otp.CSV()
|
|
165
|
+
>>> otp.run(data, symbols=f"LOCAL::{os.path.join(csv_path, 'data.csv')}")
|
|
166
|
+
Time time_number px side
|
|
167
|
+
0 2003-12-01 00:00:00.000 1656690986953602371 30.89 Buy
|
|
168
|
+
1 2003-12-01 00:00:00.001 1656667706281508365 682.88 Buy
|
|
169
|
+
|
|
170
|
+
Field delimiters can be set via ``field_delimiters`` parameter:
|
|
171
|
+
|
|
172
|
+
>>> data = otp.CSV(os.path.join(csv_path, 'data_diff_delimiters.csv'),
|
|
173
|
+
... field_delimiter=' ',
|
|
174
|
+
... first_line_is_title=False)
|
|
175
|
+
>>> otp.run(data)
|
|
176
|
+
Time COLUMN_0 COLUMN_1
|
|
177
|
+
0 2003-12-01 00:00:00.000 1,2 3
|
|
178
|
+
1 2003-12-01 00:00:00.001 4 5,6
|
|
179
|
+
|
|
180
|
+
Quote char can be set via ``quote_char`` parameter:
|
|
181
|
+
|
|
182
|
+
>>> data = otp.CSV(os.path.join(csv_path, 'data_diff_quote_chars.csv'),
|
|
183
|
+
... quote_char="'",
|
|
184
|
+
... first_line_is_title=False)
|
|
185
|
+
>>> otp.run(data)
|
|
186
|
+
Time COLUMN_0 COLUMN_1
|
|
187
|
+
0 2003-12-01 00:00:00.000 1,"2 3"
|
|
188
|
+
1 2003-12-01 00:00:00.001 "1 2",3
|
|
189
|
+
|
|
190
|
+
Use parameter ``file_contents`` to read the data from string:
|
|
191
|
+
|
|
192
|
+
>>> data = otp.CSV(file_contents=os.linesep.join([
|
|
193
|
+
... 'A,B,C',
|
|
194
|
+
... '1,f,3.3',
|
|
195
|
+
... '2,g,4.4',
|
|
196
|
+
... ]))
|
|
197
|
+
>>> otp.run(data)
|
|
198
|
+
Time A B C
|
|
199
|
+
0 2003-12-01 00:00:00.000 1 f 3.3
|
|
200
|
+
1 2003-12-01 00:00:00.001 2 g 4.4
|
|
201
|
+
"""
|
|
202
|
+
csv_source = _CSV(
|
|
203
|
+
filepath_or_buffer=filepath_or_buffer,
|
|
204
|
+
timestamp_name=timestamp_name,
|
|
205
|
+
first_line_is_title=first_line_is_title,
|
|
206
|
+
names=names,
|
|
207
|
+
dtype=dtype,
|
|
208
|
+
converters=converters,
|
|
209
|
+
order_ticks=order_ticks,
|
|
210
|
+
drop_index=drop_index,
|
|
211
|
+
change_date_to=change_date_to,
|
|
212
|
+
auto_increase_timestamps=auto_increase_timestamps,
|
|
213
|
+
db=db,
|
|
214
|
+
field_delimiter=field_delimiter,
|
|
215
|
+
handle_escaped_chars=handle_escaped_chars,
|
|
216
|
+
quote_char=quote_char,
|
|
217
|
+
timestamp_format=timestamp_format,
|
|
218
|
+
file_contents=file_contents,
|
|
219
|
+
**kwargs,
|
|
220
|
+
)
|
|
221
|
+
csv_source = csv_source.sort(csv_source['Time'])
|
|
222
|
+
return otp.merge([csv_source, otp.Empty(db=db)])
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class _CSV(Source):
|
|
226
|
+
_PROPERTIES = Source._PROPERTIES + [
|
|
227
|
+
"_dtype",
|
|
228
|
+
"_names",
|
|
229
|
+
"_columns",
|
|
230
|
+
"_columns_with_bool_replaced",
|
|
231
|
+
"_forced_title",
|
|
232
|
+
"_default_types",
|
|
233
|
+
"_has_time",
|
|
234
|
+
"_to_drop",
|
|
235
|
+
"_start",
|
|
236
|
+
"_end",
|
|
237
|
+
"_ep_fields",
|
|
238
|
+
"_symbols",
|
|
239
|
+
"_field_delimiter",
|
|
240
|
+
"_converters",
|
|
241
|
+
"_order_ticks",
|
|
242
|
+
"_auto_increase_timestamps",
|
|
243
|
+
"_db",
|
|
244
|
+
"_drop_index",
|
|
245
|
+
"_change_date_to",
|
|
246
|
+
"_timestamp_name",
|
|
247
|
+
"_filepath_or_buffer",
|
|
248
|
+
"_first_line_is_title",
|
|
249
|
+
"_handle_escaped_chars",
|
|
250
|
+
"_quote_char",
|
|
251
|
+
"_timestamp_format",
|
|
252
|
+
"_file_contents",
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
def __init__(self,
|
|
256
|
+
filepath_or_buffer=None,
|
|
257
|
+
timestamp_name: Optional[str] = "Time",
|
|
258
|
+
first_line_is_title: bool = True,
|
|
259
|
+
names: Optional[list] = None,
|
|
260
|
+
dtype: Optional[dict] = None,
|
|
261
|
+
converters: Optional[dict] = None,
|
|
262
|
+
order_ticks=False,
|
|
263
|
+
drop_index=True,
|
|
264
|
+
change_date_to=None,
|
|
265
|
+
auto_increase_timestamps=True,
|
|
266
|
+
db='LOCAL',
|
|
267
|
+
field_delimiter=',',
|
|
268
|
+
handle_escaped_chars=False,
|
|
269
|
+
quote_char='"',
|
|
270
|
+
timestamp_format: Optional[Union[str, Dict[str, str]]] = None,
|
|
271
|
+
file_contents: Optional[str] = None,
|
|
272
|
+
**kwargs):
|
|
273
|
+
|
|
274
|
+
self._dtype = dtype or {}
|
|
275
|
+
self._names = names
|
|
276
|
+
self._converters = converters or {}
|
|
277
|
+
if (len(field_delimiter) != 1 and field_delimiter != '\t') or field_delimiter == '"' or field_delimiter == "'":
|
|
278
|
+
raise ValueError(f'`field_delimiter` can be single character (except quotes) '
|
|
279
|
+
f'or "\t" but "{field_delimiter}" was passed')
|
|
280
|
+
self._field_delimiter = field_delimiter
|
|
281
|
+
if len(quote_char) > 1:
|
|
282
|
+
raise ValueError(f'quote_char should be single char but `{quote_char}` was passed')
|
|
283
|
+
if self._field_delimiter == quote_char:
|
|
284
|
+
raise ValueError(f'`{self._field_delimiter}` is both field_delimiter and quote_char')
|
|
285
|
+
if quote_char in string.whitespace:
|
|
286
|
+
raise ValueError('Whitespace can not be a quote_char')
|
|
287
|
+
self._quote_char = quote_char
|
|
288
|
+
self._order_ticks = order_ticks
|
|
289
|
+
self._auto_increase_timestamps = auto_increase_timestamps
|
|
290
|
+
self._db = db
|
|
291
|
+
self._drop_index = drop_index
|
|
292
|
+
self._change_date_to = change_date_to
|
|
293
|
+
self._timestamp_name = timestamp_name
|
|
294
|
+
self._filepath_or_buffer = filepath_or_buffer
|
|
295
|
+
self._first_line_is_title = first_line_is_title
|
|
296
|
+
self._handle_escaped_chars = handle_escaped_chars
|
|
297
|
+
self._timestamp_format = timestamp_format
|
|
298
|
+
self._file_contents = file_contents
|
|
299
|
+
|
|
300
|
+
if self._try_default_constructor(**kwargs):
|
|
301
|
+
return
|
|
302
|
+
|
|
303
|
+
if self._filepath_or_buffer is not None and self._file_contents is not None:
|
|
304
|
+
raise ValueError("Parameters 'filepath_or_buffer' and 'file_contents' can't be set at the same time.")
|
|
305
|
+
|
|
306
|
+
need_to_parse_file = (
|
|
307
|
+
self._file_contents is not None or
|
|
308
|
+
self._filepath_or_buffer is not None and not isinstance(self._filepath_or_buffer, _SymbolParamSource)
|
|
309
|
+
)
|
|
310
|
+
if need_to_parse_file:
|
|
311
|
+
self._columns, self._default_types, self._forced_title, self._symbols = self._parse_file()
|
|
312
|
+
else:
|
|
313
|
+
self._filepath_or_buffer = None
|
|
314
|
+
names = self._names or []
|
|
315
|
+
self._columns = {name: str for name in names}
|
|
316
|
+
self._default_types = {}
|
|
317
|
+
# we don't know it is actually forced, but otherwise we would ignore the first not commented-out line
|
|
318
|
+
self._forced_title = self._first_line_is_title
|
|
319
|
+
self._symbols = None
|
|
320
|
+
|
|
321
|
+
self._check_time_column()
|
|
322
|
+
|
|
323
|
+
for t in self._dtype:
|
|
324
|
+
if t not in self._columns:
|
|
325
|
+
raise ValueError(f"dtype '{t}' not found in columns list")
|
|
326
|
+
self._columns[t] = self._dtype[t]
|
|
327
|
+
|
|
328
|
+
self._ep_fields = ",".join(
|
|
329
|
+
f'{ott.type2str(dtype)} {column}' if issubclass(dtype, otp.string) else column
|
|
330
|
+
for column, dtype in self._columns.items()
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
self._to_drop = self._get_to_drop()
|
|
334
|
+
self._has_time, self._start, self._end = self._get_start_end(**kwargs)
|
|
335
|
+
|
|
336
|
+
self._columns_with_bool_replaced = dict((n, c if c != bool else float) for n, c in self._columns.items())
|
|
337
|
+
|
|
338
|
+
super().__init__(
|
|
339
|
+
_symbols=self._symbols,
|
|
340
|
+
_start=self._start,
|
|
341
|
+
_end=self._end,
|
|
342
|
+
_base_ep_func=self.base_ep,
|
|
343
|
+
schema=self._columns_with_bool_replaced,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# fake run converters to set proper schema
|
|
347
|
+
if self._converters:
|
|
348
|
+
for column, converter in self._converters.items():
|
|
349
|
+
self.schema[column] = converter(self[column]).dtype
|
|
350
|
+
|
|
351
|
+
if self._has_time and self._timestamp_name in self.schema:
|
|
352
|
+
if self.schema[self._timestamp_name] not in [ott.nsectime, ott.msectime]:
|
|
353
|
+
raise ValueError(f"CSV converter for {self._timestamp_name} is converting to "
|
|
354
|
+
f"{self.schema[timestamp_name]} type, but expected resulted type is "
|
|
355
|
+
f"ott.msectime or ott.nsectime")
|
|
356
|
+
|
|
357
|
+
# remove timestamp_name column, if we use it as TIMESTAMP source
|
|
358
|
+
if self._has_time and self._timestamp_name != "Time":
|
|
359
|
+
del self[self._timestamp_name]
|
|
360
|
+
|
|
361
|
+
def _check_time_column(self):
|
|
362
|
+
if "TIMESTAMP" in self._columns:
|
|
363
|
+
raise ValueError(
|
|
364
|
+
"It is not allowed to have 'TIMESTAMP' columns, because it is reserved name in OneTick"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
if "Time" in self._columns and self._timestamp_name != "Time":
|
|
368
|
+
raise ValueError(
|
|
369
|
+
"It is not allowed to have 'Time' column not used as timestamp field."
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
def _get_to_drop(self):
|
|
373
|
+
to_drop = []
|
|
374
|
+
if "TICK_STATUS" in self._columns:
|
|
375
|
+
del self._columns["TICK_STATUS"]
|
|
376
|
+
to_drop.append("TICK_STATUS")
|
|
377
|
+
|
|
378
|
+
if "Index" in self._columns and self._drop_index:
|
|
379
|
+
del self._columns["Index"]
|
|
380
|
+
to_drop.append("Index")
|
|
381
|
+
return to_drop
|
|
382
|
+
|
|
383
|
+
def _get_start_end(self, **kwargs):
|
|
384
|
+
start = kwargs.get("start", utils.adaptive)
|
|
385
|
+
end = kwargs.get("end", utils.adaptive)
|
|
386
|
+
|
|
387
|
+
has_time = False
|
|
388
|
+
if self._timestamp_name in self._columns:
|
|
389
|
+
has_time = True
|
|
390
|
+
|
|
391
|
+
# remove to resolve exception in Source.__init__
|
|
392
|
+
if self._timestamp_name == "Time":
|
|
393
|
+
del self._columns["Time"]
|
|
394
|
+
|
|
395
|
+
# redefine start/end time for change_date_to
|
|
396
|
+
if self._change_date_to:
|
|
397
|
+
start = dt.datetime(self._change_date_to.year, self._change_date_to.month, self._change_date_to.day)
|
|
398
|
+
end = ott.next_day(start)
|
|
399
|
+
return has_time, start, end
|
|
400
|
+
|
|
401
|
+
def _parse_file(self):
|
|
402
|
+
"""
|
|
403
|
+
This function finds the file and get columns names, default types and checks if first line is title via pandas.
|
|
404
|
+
Is also sets the correct value for symbols.
|
|
405
|
+
"""
|
|
406
|
+
obj_to_inspect = self._filepath_or_buffer
|
|
407
|
+
if isinstance(obj_to_inspect, utils.FileBuffer):
|
|
408
|
+
obj_to_inspect = io.StringIO(obj_to_inspect.get())
|
|
409
|
+
if self._file_contents is not None:
|
|
410
|
+
obj_to_inspect = io.StringIO(self._file_contents)
|
|
411
|
+
|
|
412
|
+
if isinstance(obj_to_inspect, str) and not os.path.exists(obj_to_inspect):
|
|
413
|
+
# if not found, probably, CSV file is located in OneTick CSV_FILE_PATH, check it for inspect_by_pandas()
|
|
414
|
+
csv_paths = otp.utils.get_config_param(os.environ["ONE_TICK_CONFIG"], "CSV_FILE_PATH", default="")
|
|
415
|
+
if csv_paths:
|
|
416
|
+
for csv_path in csv_paths.split(","):
|
|
417
|
+
csv_path = os.path.join(csv_path, obj_to_inspect)
|
|
418
|
+
if os.path.exists(csv_path):
|
|
419
|
+
obj_to_inspect = csv_path
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
columns, default_types, forced_title = _csv_inspector.inspect_by_pandas(
|
|
423
|
+
obj_to_inspect,
|
|
424
|
+
self._first_line_is_title,
|
|
425
|
+
self._names,
|
|
426
|
+
self._field_delimiter,
|
|
427
|
+
self._quote_char,
|
|
428
|
+
)
|
|
429
|
+
if isinstance(self._filepath_or_buffer, utils.FileBuffer) or self._file_contents is not None:
|
|
430
|
+
symbols = 'DUMMY'
|
|
431
|
+
else:
|
|
432
|
+
# str, because there might passed an os.PathLike object
|
|
433
|
+
symbols = str(obj_to_inspect)
|
|
434
|
+
return columns, default_types, forced_title, symbols
|
|
435
|
+
|
|
436
|
+
def _get_timestamp_format(self, column_name, dtype):
|
|
437
|
+
if dtype not in (ott.nsectime, ott.msectime):
|
|
438
|
+
raise ValueError(f"Wrong value for parameter 'dtype': {dtype}")
|
|
439
|
+
if self._timestamp_format is None:
|
|
440
|
+
# by default we parse timestamp_name into TIMESTAMP field
|
|
441
|
+
# from typical/default Time format from OneTick dump
|
|
442
|
+
if dtype is ott.nsectime:
|
|
443
|
+
return '%Y/%m/%d %H:%M:%S.%J'
|
|
444
|
+
else:
|
|
445
|
+
return '%Y/%m/%d %H:%M:%S.%q'
|
|
446
|
+
if isinstance(self._timestamp_format, dict):
|
|
447
|
+
return self._timestamp_format[column_name]
|
|
448
|
+
return self._timestamp_format
|
|
449
|
+
|
|
450
|
+
def base_ep(self):
|
|
451
|
+
# initialize Source and set schema to columns.
|
|
452
|
+
file_contents = ''
|
|
453
|
+
columns_to_drop = self._to_drop.copy()
|
|
454
|
+
|
|
455
|
+
if isinstance(self._filepath_or_buffer, utils.FileBuffer):
|
|
456
|
+
file_contents = self._filepath_or_buffer.get()
|
|
457
|
+
if self._file_contents is not None:
|
|
458
|
+
file_contents = self._file_contents
|
|
459
|
+
|
|
460
|
+
csv = Source(
|
|
461
|
+
otq.CsvFileListing(
|
|
462
|
+
field_delimiters=f"'{self._field_delimiter}'",
|
|
463
|
+
time_assignment="_START_TIME",
|
|
464
|
+
# we use EP's first_line_is_title only when file path is passed through symbol
|
|
465
|
+
# otherwise we don't use EP's first_line_is_title, because EP raise error on empty column name,
|
|
466
|
+
# and we explicitly define name for such columns in FIELDS arg.
|
|
467
|
+
# but if first line started with # (forced_title=True), then this param ignored :(
|
|
468
|
+
first_line_is_title=(self._filepath_or_buffer is None and
|
|
469
|
+
self._file_contents is None and
|
|
470
|
+
self._first_line_is_title),
|
|
471
|
+
fields=self._ep_fields,
|
|
472
|
+
file_contents=file_contents,
|
|
473
|
+
handle_escaped_chars=self._handle_escaped_chars,
|
|
474
|
+
quote_chars=f"'{self._quote_char}'",
|
|
475
|
+
),
|
|
476
|
+
schema=self._columns_with_bool_replaced,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
if self._first_line_is_title and not self._forced_title:
|
|
480
|
+
# remove first line with titles for columns.
|
|
481
|
+
csv.sink(otq.DeclareStateVariables(variables="long __TICK_INDEX=0"))
|
|
482
|
+
csv.sink(otq.PerTickScript("STATE::__TICK_INDEX = STATE::__TICK_INDEX + 1;"))
|
|
483
|
+
csv.sink(otq.WhereClause(discard_on_match=False, where="STATE::__TICK_INDEX > 1"))
|
|
484
|
+
|
|
485
|
+
# set tick type to ANY
|
|
486
|
+
update_node_tick_type(csv, "ANY", self._db)
|
|
487
|
+
|
|
488
|
+
# check whether need to update types, because if column type is not specified in header
|
|
489
|
+
# then by default column has string type in OneTick
|
|
490
|
+
update_columns = {}
|
|
491
|
+
for name, dtype in self._columns.items():
|
|
492
|
+
if not issubclass(dtype, str) and name not in self._default_types:
|
|
493
|
+
update_columns[name] = dtype
|
|
494
|
+
|
|
495
|
+
for name, dtype in update_columns.items():
|
|
496
|
+
if dtype is int:
|
|
497
|
+
# BE-142 - workaround for converting string to int
|
|
498
|
+
# OneTick first convert string to float, and then to int, which leeds to losing precision
|
|
499
|
+
csv.sink(otq.AddField(field=f"_TMP_{name}", value="atol(" + name + ")"))
|
|
500
|
+
csv.sink(otq.Passthrough(fields=name, drop_fields=True))
|
|
501
|
+
csv.sink(otq.AddField(field=f"{name}", value=f"_TMP_{name}"))
|
|
502
|
+
csv.sink(otq.Passthrough(fields=f"_TMP_{name}", drop_fields=True))
|
|
503
|
+
elif dtype is float:
|
|
504
|
+
csv.sink(otq.UpdateField(field=name, value="atof(" + name + ")"))
|
|
505
|
+
elif dtype is ott.msectime:
|
|
506
|
+
timestamp_format = self._get_timestamp_format(name, dtype)
|
|
507
|
+
csv.sink(otq.UpdateField(field=name,
|
|
508
|
+
value=f'time_format("{timestamp_format}",0,_TIMEZONE)',
|
|
509
|
+
where=name + '=""'))
|
|
510
|
+
csv.sink(otq.UpdateField(field=name, value=f'parse_time("{timestamp_format}",{name},_TIMEZONE)'))
|
|
511
|
+
elif dtype is ott.nsectime:
|
|
512
|
+
timestamp_format = self._get_timestamp_format(name, dtype)
|
|
513
|
+
csv.sink(otq.UpdateField(field=name,
|
|
514
|
+
value=f'time_format("{timestamp_format}",0,_TIMEZONE)',
|
|
515
|
+
where=name + '=""'))
|
|
516
|
+
# TODO: this is the logic from otp.Source._update_field,
|
|
517
|
+
# we should use _Source methods here or refactor
|
|
518
|
+
csv.sink(otq.AddField(field=f"_TMP_{name}",
|
|
519
|
+
value=f'parse_nsectime("{timestamp_format}",{name},_TIMEZONE)'))
|
|
520
|
+
csv.sink(otq.Passthrough(fields=name, drop_fields=True))
|
|
521
|
+
csv.sink(otq.AddField(field=name, value=f"_TMP_{name}"))
|
|
522
|
+
csv.sink(otq.Passthrough(fields=f"_TMP_{name}", drop_fields=True))
|
|
523
|
+
elif dtype is bool:
|
|
524
|
+
csv.sink(otq.UpdateField(field=name, value="CASE(" + name + ", 'true', 1.0, 0.0)"))
|
|
525
|
+
else:
|
|
526
|
+
raise TypeError(f"Unsupported type '{dtype}'")
|
|
527
|
+
|
|
528
|
+
# run converters
|
|
529
|
+
if self._converters:
|
|
530
|
+
for column, converter in self._converters.items():
|
|
531
|
+
if csv[column].dtype is not otp.nsectime and converter(csv[column]).dtype is otp.nsectime:
|
|
532
|
+
# workaround for resolve bug on column type changing:
|
|
533
|
+
# https://onemarketdata.atlassian.net/browse/PY-416
|
|
534
|
+
csv[f'_T_{name}'] = converter(csv[column])
|
|
535
|
+
del csv[column]
|
|
536
|
+
csv[column] = csv[f'_T_{name}']
|
|
537
|
+
del csv[f'_T_{name}']
|
|
538
|
+
else:
|
|
539
|
+
csv[column] = converter(csv[column])
|
|
540
|
+
|
|
541
|
+
if self._has_time:
|
|
542
|
+
# if timestamp_name column is defined in the csv, then apply tick time adjustment
|
|
543
|
+
|
|
544
|
+
if self._timestamp_name in self._converters:
|
|
545
|
+
# we assume that if timestamp_name field in converters,
|
|
546
|
+
# then it is already converted to otp.dt
|
|
547
|
+
csv.sink(
|
|
548
|
+
otq.UpdateField(
|
|
549
|
+
field="TIMESTAMP",
|
|
550
|
+
value=self._timestamp_name,
|
|
551
|
+
allow_unordered_output_times=True,
|
|
552
|
+
)
|
|
553
|
+
)
|
|
554
|
+
else:
|
|
555
|
+
if self._change_date_to:
|
|
556
|
+
self._change_date_to = self._change_date_to.strftime("%Y/%m/%d")
|
|
557
|
+
csv.sink(otq.UpdateField(field="Time",
|
|
558
|
+
value=f'"{self._change_date_to}" + substr({self._timestamp_name}, 10)'))
|
|
559
|
+
|
|
560
|
+
timestamp_format = self._get_timestamp_format(self._timestamp_name, otp.nsectime)
|
|
561
|
+
csv.sink(
|
|
562
|
+
otq.UpdateField(
|
|
563
|
+
field="TIMESTAMP",
|
|
564
|
+
value=f'parse_nsectime("{timestamp_format}", {self._timestamp_name}, _TIMEZONE)',
|
|
565
|
+
allow_unordered_output_times=True,
|
|
566
|
+
)
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# drop source timestamp_name field in favor of new TIMESTAMP field
|
|
570
|
+
columns_to_drop.append(self._timestamp_name)
|
|
571
|
+
elif self._auto_increase_timestamps:
|
|
572
|
+
# default time for ticks are increasing from 0
|
|
573
|
+
csv.sink(otq.DeclareStateVariables(variables="long __TIMESTAMP_INC__ = 0"))
|
|
574
|
+
csv.sink(otq.UpdateField(
|
|
575
|
+
field="TIMESTAMP",
|
|
576
|
+
value='DATEADD("millisecond",STATE::__TIMESTAMP_INC__,TIMESTAMP,_TIMEZONE)'))
|
|
577
|
+
csv.sink(otq.UpdateField(field="STATE::__TIMESTAMP_INC__", value="STATE::__TIMESTAMP_INC__ + 1"))
|
|
578
|
+
|
|
579
|
+
if self._order_ticks:
|
|
580
|
+
csv.sort('TIMESTAMP', inplace=True)
|
|
581
|
+
|
|
582
|
+
if columns_to_drop:
|
|
583
|
+
csv.sink(otq.Passthrough(fields=",".join(columns_to_drop), drop_fields="True"))
|
|
584
|
+
|
|
585
|
+
return csv
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def LocalCSVTicks(path, # NOSONAR
|
|
589
|
+
start=utils.adaptive,
|
|
590
|
+
end=utils.adaptive,
|
|
591
|
+
date_converter=default_date_converter,
|
|
592
|
+
additional_date_columns=None,
|
|
593
|
+
converters=None,
|
|
594
|
+
tz=None,
|
|
595
|
+
):
|
|
596
|
+
"""
|
|
597
|
+
Loads ticks from csv file, and creating otp.Ticks object from them
|
|
598
|
+
|
|
599
|
+
Parameters
|
|
600
|
+
----------
|
|
601
|
+
path: str
|
|
602
|
+
Absolute path to csv file
|
|
603
|
+
start: datetime object
|
|
604
|
+
Start of the query interval
|
|
605
|
+
end: datetime object
|
|
606
|
+
End of the query interval
|
|
607
|
+
date_converter:
|
|
608
|
+
A converter from string to datetime format, by default used only to TIMESTAMP column
|
|
609
|
+
additional_date_columns:
|
|
610
|
+
Other columns to convert to datetime format
|
|
611
|
+
converters:
|
|
612
|
+
Non default converters to columns from strings
|
|
613
|
+
tz:
|
|
614
|
+
timezone
|
|
615
|
+
|
|
616
|
+
Returns
|
|
617
|
+
-------
|
|
618
|
+
otp.Ticks
|
|
619
|
+
"""
|
|
620
|
+
if tz is None:
|
|
621
|
+
tz = configuration.config.tz
|
|
622
|
+
|
|
623
|
+
c = {'TIMESTAMP': partial(to_timestamp_nanos, date_converter=date_converter, tz=tz)}
|
|
624
|
+
if converters is not None:
|
|
625
|
+
c.update(converters)
|
|
626
|
+
if additional_date_columns is not None:
|
|
627
|
+
c.update({column: partial(to_timestamp_nanos,
|
|
628
|
+
date_converter=date_converter,
|
|
629
|
+
tz=tz,
|
|
630
|
+
) for column in additional_date_columns})
|
|
631
|
+
df = pd.read_csv(path, converters=c)
|
|
632
|
+
df['TS_'] = df['TIMESTAMP']
|
|
633
|
+
df['SYMBOL_NAME'] = df['#SYMBOL_NAME']
|
|
634
|
+
d = df.to_dict(orient='list')
|
|
635
|
+
del d['TIMESTAMP']
|
|
636
|
+
del d['#SYMBOL_NAME']
|
|
637
|
+
|
|
638
|
+
ticks = Ticks(d, start=start, end=end)
|
|
639
|
+
ticks['TIMESTAMP'] = ticks['TS_']
|
|
640
|
+
ticks = ticks.drop('TS_')
|
|
641
|
+
|
|
642
|
+
return ticks
|