onetick-py 1.177.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +262 -0
  4. locator_parser/common.py +368 -0
  5. locator_parser/io.py +43 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +279 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +141 -0
  14. onetick/py/__init__.py +293 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +648 -0
  19. onetick/py/aggregations/_docs.py +948 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +501 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +374 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +276 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +798 -0
  33. onetick/py/configuration.py +771 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2312 -0
  45. onetick/py/core/_internal/_state_vars.py +93 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +809 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +272 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1002 -0
  58. onetick/py/core/_source/source_methods/joins.py +1413 -0
  59. onetick/py/core/_source/source_methods/merges.py +605 -0
  60. onetick/py/core/_source/source_methods/misc.py +1455 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +986 -0
  68. onetick/py/core/_source/symbol.py +205 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +216 -0
  75. onetick/py/core/column_operations/_methods/methods.py +292 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +160 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +28 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
  83. onetick/py/core/column_operations/base.py +1121 -0
  84. onetick/py/core/cut_builder.py +150 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +245 -0
  87. onetick/py/core/lambda_object.py +441 -0
  88. onetick/py/core/multi_output_source.py +232 -0
  89. onetick/py/core/per_tick_script.py +2256 -0
  90. onetick/py/core/query_inspector.py +464 -0
  91. onetick/py/core/source.py +1744 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1128 -0
  94. onetick/py/db/db.py +1327 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2398 -0
  100. onetick/py/license.py +190 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +935 -0
  103. onetick/py/misc.py +470 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +216 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +916 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1347 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +128 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1045 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +271 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +374 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +251 -0
  132. onetick/py/types.py +2131 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +498 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1374 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +120 -0
  146. onetick/py/utils/tz.py +84 -0
  147. onetick_py-1.177.0.dist-info/METADATA +137 -0
  148. onetick_py-1.177.0.dist-info/RECORD +152 -0
  149. onetick_py-1.177.0.dist-info/WHEEL +5 -0
  150. onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.177.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,642 @@
1
+ import datetime as dt
2
+ import os
3
+ import io
4
+ import string
5
+
6
+ from functools import partial
7
+ from typing import Optional, Union, Dict
8
+
9
+ import onetick.py as otp
10
+ from onetick.py.otq import otq
11
+ import pandas as pd
12
+
13
+ from onetick.py.core._source._symbol_param import _SymbolParamSource
14
+ from onetick.py.core.source import Source
15
+
16
+ from .. import types as ott
17
+ from .. import utils, configuration
18
+ from ..core import _csv_inspector
19
+
20
+ from .common import default_date_converter, to_timestamp_nanos, update_node_tick_type
21
+ from .ticks import Ticks
22
+
23
+
24
+ def CSV( # NOSONAR
25
+ filepath_or_buffer=None,
26
+ timestamp_name: Optional[str] = "Time",
27
+ first_line_is_title: bool = True,
28
+ names: Optional[list] = None,
29
+ dtype: Optional[dict] = None,
30
+ converters: Optional[dict] = None,
31
+ order_ticks=False,
32
+ drop_index=True,
33
+ change_date_to=None,
34
+ auto_increase_timestamps=True,
35
+ db='LOCAL',
36
+ field_delimiter=',',
37
+ handle_escaped_chars=False,
38
+ quote_char='"',
39
+ timestamp_format: Optional[Union[str, Dict[str, str]]] = None,
40
+ file_contents: Optional[str] = None,
41
+ **kwargs,
42
+ ):
43
+ """
44
+ Construct source based on CSV file.
45
+
46
+ There are several steps determining column types.
47
+
48
+ 1. Initially, all column treated as ``str``.
49
+ 2. If column name in CSV title have format ``type COLUMNNAME``,
50
+ it will change type from ``str`` to specified type.
51
+ 3. All column type are determined automatically from its data.
52
+ 4. You could override determined types in ``dtype`` argument explicitly.
53
+ 5. ``converters`` argument is applied after ``dtype`` and could also change column type.
54
+
55
+ NOTE: Double quotes are not supported in CSV files for escaping quotes in strings,
56
+ you should use escape character ``\\`` before the quote instead,
57
+ for example: ``"I'm a string with a \\"quotes\\" inside"``. And then set `handle_escaped_chars=True`.
58
+
59
+ Parameters
60
+ ----------
61
+ filepath_or_buffer: str, os.PathLike, FileBuffer, optional
62
+ Path to CSV file or :class:`file buffer <FileBuffer>`. If None value is taken through symbol.
63
+ When taken from symbol, symbol must have ``LOCAL::`` prefix.
64
+ In that case you should set the columns otherwise schema will be empty.
65
+ timestamp_name: str, default "Time"
66
+ Name of TIMESTAMP column used for ticks. Used only if it is exists in CSV columns, otherwise ignored.
67
+ Output data will be sorted by this column.
68
+ first_line_is_title: bool
69
+ Use first line of CSV file as a source for column names and types.
70
+ If CSV file is started with # symbol, this parameter **must** be ``True``.
71
+
72
+ - If ``True``, column names are inferred from the first line of the file,
73
+ it is not allowed to have empty name for any column.
74
+
75
+ - If ``False``, first line is processed as data, column names will be COLUMN_1, ..., COLUMN_N.
76
+ You could specify column names in ``names`` argument.
77
+
78
+ names: list, optional
79
+ List of column names to use, or None.
80
+ Length must be equal to columns number in file.
81
+ Duplicates in this list are not allowed.
82
+ dtype: dict, optional
83
+ Data type for columns, as dict of pairs {column_name: type}.
84
+ Will convert column type from ``str`` to specified type, before applying converters.
85
+ converters: dict, optional
86
+ Dict of functions for converting values in certain columns. Keys are column names.
87
+ Function must be valid callable with ``onetick.py`` syntax, example::
88
+
89
+ converters={
90
+ "time_number": lambda c: c.apply(otp.nsectime),
91
+ "stock": lambda c: c.str.lower(),
92
+ }
93
+
94
+ Converters applied *after* ``dtype`` conversion.
95
+ order_ticks: bool, optional
96
+ If ``True`` and ``timestamp_name`` column are used, then source will order tick by time.
97
+ Note, that if ``False`` and ticks are not ordered in sequence, then OneTick will raise Exception in runtime.
98
+ drop_index: bool, optional
99
+ if ``True`` and 'Index' column is in the csv file then this column will be removed.
100
+ change_date_to: datetime, date, optional
101
+ change date from a timestamp column to a specific date. Default is None, means not changing timestamp column.
102
+ auto_increase_timestamps: bool, optional
103
+ Only used if provided CSV file does not have a TIMESTAMP column. If ``True``, timestamps of loaded ticks
104
+ would start at ``start_time`` and on each next tick, would increase by 1 millisecond.
105
+ If ``False``, timestamps of all loaded ticks would be equal to ``start_time``
106
+ db: str, optional
107
+ Name of a database to define a destination where the csv file will be transported for processing.
108
+ ``LOCAL`` is default value that means OneTick will process it on the site where a query runs.
109
+ field_delimiter: str, optional
110
+ A character that is used to tokenize each line of the CSV file.
111
+ For a tab character \t (back-slash followed by t) should be specified.
112
+ handle_escaped_chars: bool, optional
113
+ If set, the backslash char ``\\`` gets a special meaning and everywhere in the input text
114
+ the combinations ``\\'``, ``\\"`` and ``\\\\`` are changed correspondingly by ``'``, ``"`` and ``\\``,
115
+ which are processed then as regular chars.
116
+ Besides, combinations like ``\\x??``, where ?-s are hexadecimal digits (0-9, a-f or A-F),
117
+ are changed by the chars with the specified ASCII code.
118
+ For example, ``\\x0A`` will be replaced by a newline character, ``\\x09`` will be replaced by tab, and so on.
119
+ Default: False
120
+ quote_char: str
121
+ Character used to denote the start and end of a quoted item. Quoted items can include the delimiter,
122
+ and it will be ignored. The same character cannot be marked both as the quote character and as the
123
+ field delimiter. Besides, space characters cannot be used as quote.
124
+ Default: " (double quotes)
125
+ timestamp_format: str or dict
126
+ Expected format for ``timestamp_name`` and all other datetime columns.
127
+ If dictionary is passed, then different format can be specified for each column.
128
+ This format is expected when converting strings from csv file to ``dtype``.
129
+ Default format is ``%Y/%m/%d %H:%M:%S.%J`` for :py:class:`~onetick.py.nsectime` columns and
130
+ ``%Y/%m/%d %H:%M:%S.%q`` for :py:class:`~onetick.py.msectime` columns.
131
+ file_contents: str
132
+ Specify the contents of the csv file as string.
133
+ Can be used instead of ``filepath_or_buffer`` parameter.
134
+
135
+ See also
136
+ --------
137
+ **CSV_FILE_LISTING** OneTick event processor
138
+
139
+ Examples
140
+ --------
141
+ Simple CSV file reading
142
+
143
+ >>> data = otp.CSV(os.path.join(csv_path, "data.csv"))
144
+ >>> otp.run(data)
145
+ Time time_number px side
146
+ 0 2003-12-01 00:00:00.000 1656690986953602371 30.89 Buy
147
+ 1 2003-12-01 00:00:00.001 1656667706281508365 682.88 Buy
148
+
149
+ Read CSV file and get timestamp for ticks from specific field.
150
+ You need to specify query start/end interval including all ticks.
151
+
152
+ >>> data = otp.CSV(os.path.join(csv_path, "data.csv"),
153
+ ... timestamp_name="time_number",
154
+ ... converters={"time_number": lambda c: c.apply(otp.nsectime)},
155
+ ... start=otp.dt(2010, 8, 1),
156
+ ... end=otp.dt(2022, 9, 2))
157
+ >>> otp.run(data)
158
+ Time px side
159
+ 0 2022-07-01 05:28:26.281508365 682.88 Buy
160
+ 1 2022-07-01 11:56:26.953602371 30.89 Buy
161
+
162
+ Path to csv can be passed via symbol with `LOCAL::` prefix:
163
+
164
+ >>> data = otp.CSV()
165
+ >>> otp.run(data, symbols=f"LOCAL::{os.path.join(csv_path, 'data.csv')}")
166
+ Time time_number px side
167
+ 0 2003-12-01 00:00:00.000 1656690986953602371 30.89 Buy
168
+ 1 2003-12-01 00:00:00.001 1656667706281508365 682.88 Buy
169
+
170
+ Field delimiters can be set via ``field_delimiters`` parameter:
171
+
172
+ >>> data = otp.CSV(os.path.join(csv_path, 'data_diff_delimiters.csv'),
173
+ ... field_delimiter=' ',
174
+ ... first_line_is_title=False)
175
+ >>> otp.run(data)
176
+ Time COLUMN_0 COLUMN_1
177
+ 0 2003-12-01 00:00:00.000 1,2 3
178
+ 1 2003-12-01 00:00:00.001 4 5,6
179
+
180
+ Quote char can be set via ``quote_char`` parameter:
181
+
182
+ >>> data = otp.CSV(os.path.join(csv_path, 'data_diff_quote_chars.csv'),
183
+ ... quote_char="'",
184
+ ... first_line_is_title=False)
185
+ >>> otp.run(data)
186
+ Time COLUMN_0 COLUMN_1
187
+ 0 2003-12-01 00:00:00.000 1,"2 3"
188
+ 1 2003-12-01 00:00:00.001 "1 2",3
189
+
190
+ Use parameter ``file_contents`` to read the data from string:
191
+
192
+ >>> data = otp.CSV(file_contents=os.linesep.join([
193
+ ... 'A,B,C',
194
+ ... '1,f,3.3',
195
+ ... '2,g,4.4',
196
+ ... ]))
197
+ >>> otp.run(data)
198
+ Time A B C
199
+ 0 2003-12-01 00:00:00.000 1 f 3.3
200
+ 1 2003-12-01 00:00:00.001 2 g 4.4
201
+ """
202
+ csv_source = _CSV(
203
+ filepath_or_buffer=filepath_or_buffer,
204
+ timestamp_name=timestamp_name,
205
+ first_line_is_title=first_line_is_title,
206
+ names=names,
207
+ dtype=dtype,
208
+ converters=converters,
209
+ order_ticks=order_ticks,
210
+ drop_index=drop_index,
211
+ change_date_to=change_date_to,
212
+ auto_increase_timestamps=auto_increase_timestamps,
213
+ db=db,
214
+ field_delimiter=field_delimiter,
215
+ handle_escaped_chars=handle_escaped_chars,
216
+ quote_char=quote_char,
217
+ timestamp_format=timestamp_format,
218
+ file_contents=file_contents,
219
+ **kwargs,
220
+ )
221
+ csv_source = csv_source.sort(csv_source['Time'])
222
+ return otp.merge([csv_source, otp.Empty(db=db)])
223
+
224
+
225
+ class _CSV(Source):
226
+ _PROPERTIES = Source._PROPERTIES + [
227
+ "_dtype",
228
+ "_names",
229
+ "_columns",
230
+ "_columns_with_bool_replaced",
231
+ "_forced_title",
232
+ "_default_types",
233
+ "_has_time",
234
+ "_to_drop",
235
+ "_start",
236
+ "_end",
237
+ "_ep_fields",
238
+ "_symbols",
239
+ "_field_delimiter",
240
+ "_converters",
241
+ "_order_ticks",
242
+ "_auto_increase_timestamps",
243
+ "_db",
244
+ "_drop_index",
245
+ "_change_date_to",
246
+ "_timestamp_name",
247
+ "_filepath_or_buffer",
248
+ "_first_line_is_title",
249
+ "_handle_escaped_chars",
250
+ "_quote_char",
251
+ "_timestamp_format",
252
+ "_file_contents",
253
+ ]
254
+
255
+ def __init__(self,
256
+ filepath_or_buffer=None,
257
+ timestamp_name: Optional[str] = "Time",
258
+ first_line_is_title: bool = True,
259
+ names: Optional[list] = None,
260
+ dtype: Optional[dict] = None,
261
+ converters: Optional[dict] = None,
262
+ order_ticks=False,
263
+ drop_index=True,
264
+ change_date_to=None,
265
+ auto_increase_timestamps=True,
266
+ db='LOCAL',
267
+ field_delimiter=',',
268
+ handle_escaped_chars=False,
269
+ quote_char='"',
270
+ timestamp_format: Optional[Union[str, Dict[str, str]]] = None,
271
+ file_contents: Optional[str] = None,
272
+ **kwargs):
273
+
274
+ self._dtype = dtype or {}
275
+ self._names = names
276
+ self._converters = converters or {}
277
+ if (len(field_delimiter) != 1 and field_delimiter != '\t') or field_delimiter == '"' or field_delimiter == "'":
278
+ raise ValueError(f'`field_delimiter` can be single character (except quotes) '
279
+ f'or "\t" but "{field_delimiter}" was passed')
280
+ self._field_delimiter = field_delimiter
281
+ if len(quote_char) > 1:
282
+ raise ValueError(f'quote_char should be single char but `{quote_char}` was passed')
283
+ if self._field_delimiter == quote_char:
284
+ raise ValueError(f'`{self._field_delimiter}` is both field_delimiter and quote_char')
285
+ if quote_char in string.whitespace:
286
+ raise ValueError('Whitespace can not be a quote_char')
287
+ self._quote_char = quote_char
288
+ self._order_ticks = order_ticks
289
+ self._auto_increase_timestamps = auto_increase_timestamps
290
+ self._db = db
291
+ self._drop_index = drop_index
292
+ self._change_date_to = change_date_to
293
+ self._timestamp_name = timestamp_name
294
+ self._filepath_or_buffer = filepath_or_buffer
295
+ self._first_line_is_title = first_line_is_title
296
+ self._handle_escaped_chars = handle_escaped_chars
297
+ self._timestamp_format = timestamp_format
298
+ self._file_contents = file_contents
299
+
300
+ if self._try_default_constructor(**kwargs):
301
+ return
302
+
303
+ if self._filepath_or_buffer is not None and self._file_contents is not None:
304
+ raise ValueError("Parameters 'filepath_or_buffer' and 'file_contents' can't be set at the same time.")
305
+
306
+ need_to_parse_file = (
307
+ self._file_contents is not None or
308
+ self._filepath_or_buffer is not None and not isinstance(self._filepath_or_buffer, _SymbolParamSource)
309
+ )
310
+ if need_to_parse_file:
311
+ self._columns, self._default_types, self._forced_title, self._symbols = self._parse_file()
312
+ else:
313
+ self._filepath_or_buffer = None
314
+ names = self._names or []
315
+ self._columns = {name: str for name in names}
316
+ self._default_types = {}
317
+ # we don't know it is actually forced, but otherwise we would ignore the first not commented-out line
318
+ self._forced_title = self._first_line_is_title
319
+ self._symbols = None
320
+
321
+ self._check_time_column()
322
+
323
+ for t in self._dtype:
324
+ if t not in self._columns:
325
+ raise ValueError(f"dtype '{t}' not found in columns list")
326
+ self._columns[t] = self._dtype[t]
327
+
328
+ self._ep_fields = ",".join(
329
+ f'{ott.type2str(dtype)} {column}' if issubclass(dtype, otp.string) else column
330
+ for column, dtype in self._columns.items()
331
+ )
332
+
333
+ self._to_drop = self._get_to_drop()
334
+ self._has_time, self._start, self._end = self._get_start_end(**kwargs)
335
+
336
+ self._columns_with_bool_replaced = dict((n, c if c != bool else float) for n, c in self._columns.items())
337
+
338
+ super().__init__(
339
+ _symbols=self._symbols,
340
+ _start=self._start,
341
+ _end=self._end,
342
+ _base_ep_func=self.base_ep,
343
+ schema=self._columns_with_bool_replaced,
344
+ )
345
+
346
+ # fake run converters to set proper schema
347
+ if self._converters:
348
+ for column, converter in self._converters.items():
349
+ self.schema[column] = converter(self[column]).dtype
350
+
351
+ if self._has_time and self._timestamp_name in self.schema:
352
+ if self.schema[self._timestamp_name] not in [ott.nsectime, ott.msectime]:
353
+ raise ValueError(f"CSV converter for {self._timestamp_name} is converting to "
354
+ f"{self.schema[timestamp_name]} type, but expected resulted type is "
355
+ f"ott.msectime or ott.nsectime")
356
+
357
+ # remove timestamp_name column, if we use it as TIMESTAMP source
358
+ if self._has_time and self._timestamp_name != "Time":
359
+ del self[self._timestamp_name]
360
+
361
+ def _check_time_column(self):
362
+ if "TIMESTAMP" in self._columns:
363
+ raise ValueError(
364
+ "It is not allowed to have 'TIMESTAMP' columns, because it is reserved name in OneTick"
365
+ )
366
+
367
+ if "Time" in self._columns and self._timestamp_name != "Time":
368
+ raise ValueError(
369
+ "It is not allowed to have 'Time' column not used as timestamp field."
370
+ )
371
+
372
+ def _get_to_drop(self):
373
+ to_drop = []
374
+ if "TICK_STATUS" in self._columns:
375
+ del self._columns["TICK_STATUS"]
376
+ to_drop.append("TICK_STATUS")
377
+
378
+ if "Index" in self._columns and self._drop_index:
379
+ del self._columns["Index"]
380
+ to_drop.append("Index")
381
+ return to_drop
382
+
383
+ def _get_start_end(self, **kwargs):
384
+ start = kwargs.get("start", utils.adaptive)
385
+ end = kwargs.get("end", utils.adaptive)
386
+
387
+ has_time = False
388
+ if self._timestamp_name in self._columns:
389
+ has_time = True
390
+
391
+ # remove to resolve exception in Source.__init__
392
+ if self._timestamp_name == "Time":
393
+ del self._columns["Time"]
394
+
395
+ # redefine start/end time for change_date_to
396
+ if self._change_date_to:
397
+ start = dt.datetime(self._change_date_to.year, self._change_date_to.month, self._change_date_to.day)
398
+ end = ott.next_day(start)
399
+ return has_time, start, end
400
+
401
+ def _parse_file(self):
402
+ """
403
+ This function finds the file and get columns names, default types and checks if first line is title via pandas.
404
+ Is also sets the correct value for symbols.
405
+ """
406
+ obj_to_inspect = self._filepath_or_buffer
407
+ if isinstance(obj_to_inspect, utils.FileBuffer):
408
+ obj_to_inspect = io.StringIO(obj_to_inspect.get())
409
+ if self._file_contents is not None:
410
+ obj_to_inspect = io.StringIO(self._file_contents)
411
+
412
+ if isinstance(obj_to_inspect, str) and not os.path.exists(obj_to_inspect):
413
+ # if not found, probably, CSV file is located in OneTick CSV_FILE_PATH, check it for inspect_by_pandas()
414
+ csv_paths = otp.utils.get_config_param(os.environ["ONE_TICK_CONFIG"], "CSV_FILE_PATH", default="")
415
+ if csv_paths:
416
+ for csv_path in csv_paths.split(","):
417
+ csv_path = os.path.join(csv_path, obj_to_inspect)
418
+ if os.path.exists(csv_path):
419
+ obj_to_inspect = csv_path
420
+ break
421
+
422
+ columns, default_types, forced_title = _csv_inspector.inspect_by_pandas(
423
+ obj_to_inspect,
424
+ self._first_line_is_title,
425
+ self._names,
426
+ self._field_delimiter,
427
+ self._quote_char,
428
+ )
429
+ if isinstance(self._filepath_or_buffer, utils.FileBuffer) or self._file_contents is not None:
430
+ symbols = 'DUMMY'
431
+ else:
432
+ # str, because there might passed an os.PathLike object
433
+ symbols = str(obj_to_inspect)
434
+ return columns, default_types, forced_title, symbols
435
+
436
+ def _get_timestamp_format(self, column_name, dtype):
437
+ if dtype not in (ott.nsectime, ott.msectime):
438
+ raise ValueError(f"Wrong value for parameter 'dtype': {dtype}")
439
+ if self._timestamp_format is None:
440
+ # by default we parse timestamp_name into TIMESTAMP field
441
+ # from typical/default Time format from OneTick dump
442
+ if dtype is ott.nsectime:
443
+ return '%Y/%m/%d %H:%M:%S.%J'
444
+ else:
445
+ return '%Y/%m/%d %H:%M:%S.%q'
446
+ if isinstance(self._timestamp_format, dict):
447
+ return self._timestamp_format[column_name]
448
+ return self._timestamp_format
449
+
450
+ def base_ep(self):
451
+ # initialize Source and set schema to columns.
452
+ file_contents = ''
453
+ columns_to_drop = self._to_drop.copy()
454
+
455
+ if isinstance(self._filepath_or_buffer, utils.FileBuffer):
456
+ file_contents = self._filepath_or_buffer.get()
457
+ if self._file_contents is not None:
458
+ file_contents = self._file_contents
459
+
460
+ csv = Source(
461
+ otq.CsvFileListing(
462
+ field_delimiters=f"'{self._field_delimiter}'",
463
+ time_assignment="_START_TIME",
464
+ # we use EP's first_line_is_title only when file path is passed through symbol
465
+ # otherwise we don't use EP's first_line_is_title, because EP raise error on empty column name,
466
+ # and we explicitly define name for such columns in FIELDS arg.
467
+ # but if first line started with # (forced_title=True), then this param ignored :(
468
+ first_line_is_title=(self._filepath_or_buffer is None and
469
+ self._file_contents is None and
470
+ self._first_line_is_title),
471
+ fields=self._ep_fields,
472
+ file_contents=file_contents,
473
+ handle_escaped_chars=self._handle_escaped_chars,
474
+ quote_chars=f"'{self._quote_char}'",
475
+ ),
476
+ schema=self._columns_with_bool_replaced,
477
+ )
478
+
479
+ if self._first_line_is_title and not self._forced_title:
480
+ # remove first line with titles for columns.
481
+ csv.sink(otq.DeclareStateVariables(variables="long __TICK_INDEX=0"))
482
+ csv.sink(otq.PerTickScript("STATE::__TICK_INDEX = STATE::__TICK_INDEX + 1;"))
483
+ csv.sink(otq.WhereClause(discard_on_match=False, where="STATE::__TICK_INDEX > 1"))
484
+
485
+ # set tick type to ANY
486
+ update_node_tick_type(csv, "ANY", self._db)
487
+
488
+ # check whether need to update types, because if column type is not specified in header
489
+ # then by default column has string type in OneTick
490
+ update_columns = {}
491
+ for name, dtype in self._columns.items():
492
+ if not issubclass(dtype, str) and name not in self._default_types:
493
+ update_columns[name] = dtype
494
+
495
+ for name, dtype in update_columns.items():
496
+ if dtype is int:
497
+ # BE-142 - workaround for converting string to int
498
+ # OneTick first convert string to float, and then to int, which leeds to losing precision
499
+ csv.sink(otq.AddField(field=f"_TMP_{name}", value="atol(" + name + ")"))
500
+ csv.sink(otq.Passthrough(fields=name, drop_fields=True))
501
+ csv.sink(otq.AddField(field=f"{name}", value=f"_TMP_{name}"))
502
+ csv.sink(otq.Passthrough(fields=f"_TMP_{name}", drop_fields=True))
503
+ elif dtype is float:
504
+ csv.sink(otq.UpdateField(field=name, value="atof(" + name + ")"))
505
+ elif dtype is ott.msectime:
506
+ timestamp_format = self._get_timestamp_format(name, dtype)
507
+ csv.sink(otq.UpdateField(field=name,
508
+ value=f'time_format("{timestamp_format}",0,_TIMEZONE)',
509
+ where=name + '=""'))
510
+ csv.sink(otq.UpdateField(field=name, value=f'parse_time("{timestamp_format}",{name},_TIMEZONE)'))
511
+ elif dtype is ott.nsectime:
512
+ timestamp_format = self._get_timestamp_format(name, dtype)
513
+ csv.sink(otq.UpdateField(field=name,
514
+ value=f'time_format("{timestamp_format}",0,_TIMEZONE)',
515
+ where=name + '=""'))
516
+ # TODO: this is the logic from otp.Source._update_field,
517
+ # we should use _Source methods here or refactor
518
+ csv.sink(otq.AddField(field=f"_TMP_{name}",
519
+ value=f'parse_nsectime("{timestamp_format}",{name},_TIMEZONE)'))
520
+ csv.sink(otq.Passthrough(fields=name, drop_fields=True))
521
+ csv.sink(otq.AddField(field=name, value=f"_TMP_{name}"))
522
+ csv.sink(otq.Passthrough(fields=f"_TMP_{name}", drop_fields=True))
523
+ elif dtype is bool:
524
+ csv.sink(otq.UpdateField(field=name, value="CASE(" + name + ", 'true', 1.0, 0.0)"))
525
+ else:
526
+ raise TypeError(f"Unsupported type '{dtype}'")
527
+
528
+ # run converters
529
+ if self._converters:
530
+ for column, converter in self._converters.items():
531
+ if csv[column].dtype is not otp.nsectime and converter(csv[column]).dtype is otp.nsectime:
532
+ # workaround for resolve bug on column type changing:
533
+ # https://onemarketdata.atlassian.net/browse/PY-416
534
+ csv[f'_T_{name}'] = converter(csv[column])
535
+ del csv[column]
536
+ csv[column] = csv[f'_T_{name}']
537
+ del csv[f'_T_{name}']
538
+ else:
539
+ csv[column] = converter(csv[column])
540
+
541
+ if self._has_time:
542
+ # if timestamp_name column is defined in the csv, then apply tick time adjustment
543
+
544
+ if self._timestamp_name in self._converters:
545
+ # we assume that if timestamp_name field in converters,
546
+ # then it is already converted to otp.dt
547
+ csv.sink(
548
+ otq.UpdateField(
549
+ field="TIMESTAMP",
550
+ value=self._timestamp_name,
551
+ allow_unordered_output_times=True,
552
+ )
553
+ )
554
+ else:
555
+ if self._change_date_to:
556
+ self._change_date_to = self._change_date_to.strftime("%Y/%m/%d")
557
+ csv.sink(otq.UpdateField(field="Time",
558
+ value=f'"{self._change_date_to}" + substr({self._timestamp_name}, 10)'))
559
+
560
+ timestamp_format = self._get_timestamp_format(self._timestamp_name, otp.nsectime)
561
+ csv.sink(
562
+ otq.UpdateField(
563
+ field="TIMESTAMP",
564
+ value=f'parse_nsectime("{timestamp_format}", {self._timestamp_name}, _TIMEZONE)',
565
+ allow_unordered_output_times=True,
566
+ )
567
+ )
568
+
569
+ # drop source timestamp_name field in favor of new TIMESTAMP field
570
+ columns_to_drop.append(self._timestamp_name)
571
+ elif self._auto_increase_timestamps:
572
+ # default time for ticks are increasing from 0
573
+ csv.sink(otq.DeclareStateVariables(variables="long __TIMESTAMP_INC__ = 0"))
574
+ csv.sink(otq.UpdateField(
575
+ field="TIMESTAMP",
576
+ value='DATEADD("millisecond",STATE::__TIMESTAMP_INC__,TIMESTAMP,_TIMEZONE)'))
577
+ csv.sink(otq.UpdateField(field="STATE::__TIMESTAMP_INC__", value="STATE::__TIMESTAMP_INC__ + 1"))
578
+
579
+ if self._order_ticks:
580
+ csv.sort('TIMESTAMP', inplace=True)
581
+
582
+ if columns_to_drop:
583
+ csv.sink(otq.Passthrough(fields=",".join(columns_to_drop), drop_fields="True"))
584
+
585
+ return csv
586
+
587
+
588
+ def LocalCSVTicks(path, # NOSONAR
589
+ start=utils.adaptive,
590
+ end=utils.adaptive,
591
+ date_converter=default_date_converter,
592
+ additional_date_columns=None,
593
+ converters=None,
594
+ tz=None,
595
+ ):
596
+ """
597
+ Loads ticks from csv file, and creating otp.Ticks object from them
598
+
599
+ Parameters
600
+ ----------
601
+ path: str
602
+ Absolute path to csv file
603
+ start: datetime object
604
+ Start of the query interval
605
+ end: datetime object
606
+ End of the query interval
607
+ date_converter:
608
+ A converter from string to datetime format, by default used only to TIMESTAMP column
609
+ additional_date_columns:
610
+ Other columns to convert to datetime format
611
+ converters:
612
+ Non default converters to columns from strings
613
+ tz:
614
+ timezone
615
+
616
+ Returns
617
+ -------
618
+ otp.Ticks
619
+ """
620
+ if tz is None:
621
+ tz = configuration.config.tz
622
+
623
+ c = {'TIMESTAMP': partial(to_timestamp_nanos, date_converter=date_converter, tz=tz)}
624
+ if converters is not None:
625
+ c.update(converters)
626
+ if additional_date_columns is not None:
627
+ c.update({column: partial(to_timestamp_nanos,
628
+ date_converter=date_converter,
629
+ tz=tz,
630
+ ) for column in additional_date_columns})
631
+ df = pd.read_csv(path, converters=c)
632
+ df['TS_'] = df['TIMESTAMP']
633
+ df['SYMBOL_NAME'] = df['#SYMBOL_NAME']
634
+ d = df.to_dict(orient='list')
635
+ del d['TIMESTAMP']
636
+ del d['#SYMBOL_NAME']
637
+
638
+ ticks = Ticks(d, start=start, end=end)
639
+ ticks['TIMESTAMP'] = ticks['TS_']
640
+ ticks = ticks.drop('TS_')
641
+
642
+ return ticks