onetick-py 1.162.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +266 -0
  4. locator_parser/common.py +365 -0
  5. locator_parser/io.py +41 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +280 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +138 -0
  14. onetick/py/__init__.py +290 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +645 -0
  19. onetick/py/aggregations/_docs.py +912 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +427 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +373 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +275 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +752 -0
  33. onetick/py/configuration.py +736 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2307 -0
  45. onetick/py/core/_internal/_state_vars.py +87 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +810 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +270 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1001 -0
  58. onetick/py/core/_source/source_methods/joins.py +1393 -0
  59. onetick/py/core/_source/source_methods/merges.py +566 -0
  60. onetick/py/core/_source/source_methods/misc.py +1325 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +702 -0
  68. onetick/py/core/_source/symbol.py +202 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +215 -0
  75. onetick/py/core/column_operations/_methods/methods.py +294 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +150 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +30 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +92 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +464 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +160 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1374 -0
  83. onetick/py/core/column_operations/base.py +1061 -0
  84. onetick/py/core/cut_builder.py +149 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +244 -0
  87. onetick/py/core/lambda_object.py +442 -0
  88. onetick/py/core/multi_output_source.py +193 -0
  89. onetick/py/core/per_tick_script.py +2253 -0
  90. onetick/py/core/query_inspector.py +465 -0
  91. onetick/py/core/source.py +1663 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1042 -0
  94. onetick/py/db/db.py +1423 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2354 -0
  100. onetick/py/license.py +188 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +947 -0
  103. onetick/py/misc.py +437 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +211 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +841 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1342 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +126 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1049 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +238 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +357 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +256 -0
  132. onetick/py/types.py +2056 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +499 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1139 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +118 -0
  146. onetick/py/utils/tz.py +82 -0
  147. onetick_py-1.162.2.dist-info/METADATA +148 -0
  148. onetick_py-1.162.2.dist-info/RECORD +152 -0
  149. onetick_py-1.162.2.dist-info/WHEEL +5 -0
  150. onetick_py-1.162.2.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.162.2.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.162.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1049 @@
1
+ import datetime as dt
2
+ import inspect
3
+ import warnings
4
+
5
+ from typing import Dict, Iterable, Optional
6
+
7
+ import onetick.py as otp
8
+ from onetick.py.otq import otq
9
+
10
+ from onetick.py.db import _inspection
11
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
12
+ from onetick.py.core._source.tmp_otq import TmpOtq
13
+ from onetick.py.core.eval_query import _QueryEvalWrapper
14
+ from onetick.py.core.source import Source
15
+ from onetick.py.core.column_operations.base import Raw, OnetickParameter
16
+
17
+ from .. import types as ott
18
+ from .. import utils
19
+ from ..core.column_operations.base import _Operation
20
+ from ..db.db import DB
21
+ from ..compatibility import is_supported_where_clause_for_back_ticks
22
+
23
+ from onetick.py.docs.utils import docstring, param_doc
24
+
25
+ from .common import convert_tick_type_to_str, get_start_end_by_date
26
+ from .symbols import Symbols
27
+ from .ticks import Ticks
28
+ from .query import query
29
+
30
+
31
+ _db_doc = param_doc(
32
+ name='db',
33
+ desc="""
34
+ Name(s) of the database or the database object(s).
35
+ """,
36
+ str_annotation='str, list of str, :class:`otp.DB <onetick.py.DB>`',
37
+ default=None,
38
+ str_default='None',
39
+ )
40
+ _symbol_doc = param_doc(
41
+ name='symbol',
42
+ desc="""
43
+ Symbol(s) from which data should be taken.
44
+ """,
45
+ str_annotation='str, list of str, :class:`Source`, :class:`query`, :py:func:`eval query <onetick.py.eval>`',
46
+ default=utils.adaptive,
47
+ str_default=' :py:class:`onetick.py.adaptive`',
48
+ )
49
+ _symbols_doc = param_doc(
50
+ name='symbols',
51
+ desc="""
52
+ Symbol(s) from which data should be taken.
53
+ Alias for ``symbol`` parameter. Will take precedence over it.
54
+ """,
55
+ str_annotation=('str, list of str, :class:`Source`, :class:`query`, :py:func:`eval query <onetick.py.eval>`, '
56
+ ':py:class:`onetick.query.GraphQuery`.'),
57
+ default=None,
58
+ )
59
+ _tick_type_doc = param_doc(
60
+ name='tick_type',
61
+ desc="""
62
+ Tick type of the data.
63
+ If not specified, all ticks from `db` will be taken.
64
+ If ticks can't be found or there are many databases specified in `db` then default is "TRD".
65
+ """,
66
+ str_annotation='str, list of str',
67
+ default=utils.adaptive,
68
+ str_default=' :py:class:`onetick.py.adaptive`',
69
+ )
70
+ _start_doc = param_doc(
71
+ name='start',
72
+ desc="""
73
+ Start of the interval from which the data should be taken.
74
+ Default is :py:class:`onetick.py.adaptive`, making the final query deduce the time
75
+ limits from the rest of the graph.
76
+ """,
77
+ str_annotation=(
78
+ ':py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,'
79
+ ' :py:class:`onetick.py.adaptive`'
80
+ ),
81
+ default=utils.adaptive,
82
+ str_default=' :py:class:`onetick.py.adaptive`',
83
+ )
84
+ _end_doc = param_doc(
85
+ name='end',
86
+ desc="""
87
+ End of the interval from which the data should be taken.
88
+ Default is :py:class:`onetick.py.adaptive`, making the final query deduce the time
89
+ limits from the rest of the graph.
90
+ """,
91
+ str_annotation=(
92
+ ':py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,'
93
+ ' :py:class:`onetick.py.adaptive`'
94
+ ),
95
+ default=utils.adaptive,
96
+ str_default=' :py:class:`onetick.py.adaptive`',
97
+ )
98
+ _date_doc = param_doc(
99
+ name='date',
100
+ desc="""
101
+ Allows to specify a whole day instead of passing explicitly ``start`` and ``end`` parameters.
102
+ If it is set along with the ``start`` and ``end`` parameters then last two are ignored.
103
+ """,
104
+ str_annotation=":class:`datetime.datetime`, :class:`otp.datetime <onetick.py.datetime>`",
105
+ default=None,
106
+ )
107
+ _schema_policy_doc = param_doc(
108
+ name='schema_policy',
109
+ desc="""
110
+ Schema deduction policy:
111
+
112
+ - 'tolerant' (default)
113
+ The resulting schema is a combination of ``schema`` and database schema.
114
+ If the database schema can be deduced,
115
+ it's checked to be type-compatible with a ``schema``,
116
+ and ValueError is raised if checks are failed.
117
+ Also, with this policy database is scanned 5 days back to find the schema.
118
+ It is useful when database is misconfigured or in case of holidays.
119
+
120
+ - 'tolerant_strict'
121
+ The resulting schema will be ``schema`` if it's not empty.
122
+ Otherwise, database schema is used.
123
+ If the database schema can be deduced,
124
+ it's checked if it lacks fields from the ``schema``
125
+ and it's checked to be type-compatible with a ``schema``
126
+ and ValueError is raised if checks are failed.
127
+ Also, with this policy database is scanned 5 days back to find the schema.
128
+ It is useful when database is misconfigured or in case of holidays.
129
+
130
+ - 'fail'
131
+ The same as 'tolerant', but if the database schema can't be deduced, raises an Exception.
132
+
133
+ - 'fail_strict'
134
+ The same as 'tolerant_strict', but if the database schema can't be deduced, raises an Exception.
135
+
136
+ - 'manual'
137
+ The resulting schema is a combination of ``schema`` and database schema.
138
+ Compatibility with database schema will not be checked.
139
+
140
+ - 'manual_strict'
141
+ The resulting schema will be exactly ``schema``.
142
+ Compatibility with database schema will not be checked.
143
+ If some fields specified in ``schema`` do not exist in the database,
144
+ their values will be set to some default value for a type
145
+ (0 for integers, NaNs for floats, empty string for strings, epoch for datetimes).
146
+
147
+ Default value is :py:class:`onetick.py.adaptive` (if deprecated parameter ``guess_schema`` is not set).
148
+ If ``guess_schema`` is set to True then value is 'fail', if False then 'manual'.
149
+ If ``schema_policy`` is set to ``None`` then default value is 'tolerant'.
150
+
151
+ Default value can be changed with
152
+ :py:attr:`otp.config.default_schema_policy<onetick.py.configuration.Config.default_schema_policy>`
153
+ configuration parameter.
154
+
155
+ If you set schema manually, while creating DataSource instance, and don't set ``schema_policy``,
156
+ it will be automatically set to ``manual``.
157
+ """,
158
+ str_annotation="'tolerant', 'tolerant_strict', 'fail', 'fail_strict', 'manual', 'manual_strict'",
159
+ default=utils.adaptive,
160
+ str_default=' :py:class:`onetick.py.adaptive`',
161
+ )
162
+ _guess_schema_doc = param_doc(
163
+ name='guess_schema',
164
+ desc="""
165
+ .. deprecated:: 1.3.16
166
+
167
+ Use ``schema_policy`` parameter instead.
168
+
169
+ If ``guess_schema`` is set to True then ``schema_policy`` value is 'fail', if False then 'manual'.
170
+ """,
171
+ annotation=bool,
172
+ default=None,
173
+ )
174
+ _identify_input_ts_doc = param_doc(
175
+ name='identify_input_ts',
176
+ desc="""
177
+ If set to False, the fields SYMBOL_NAME and TICK_TYPE are not appended to the output ticks.
178
+ """,
179
+ annotation=bool,
180
+ default=False,
181
+ )
182
+ _back_to_first_tick_doc = param_doc(
183
+ name='back_to_first_tick',
184
+ desc="""
185
+ Determines how far back to go looking for the latest tick before ``start`` time.
186
+ If one is found, it is inserted into the output time series with the timestamp set to ``start`` time.
187
+ Note: it will be rounded to int, so otp.Millis(999) will be 0 seconds.
188
+ """,
189
+ str_annotation=('int, :ref:`offset <datetime_offsets>`, '
190
+ ':class:`otp.expr <onetick.py.expr>`, '
191
+ ':py:class:`~onetick.py.Operation`'),
192
+ default=0,
193
+ )
194
+ _keep_first_tick_timestamp_doc = param_doc(
195
+ name='keep_first_tick_timestamp',
196
+ desc="""
197
+ If set, new field with this name will be added to source.
198
+ This field contains original timestamp of the tick that was taken from before the start time of the query.
199
+ For all other ticks value in this field will be equal to the value of Time field.
200
+ This parameter is ignored if ``back_to_first_tick`` is not set.
201
+ """,
202
+ annotation=str,
203
+ default=None,
204
+ )
205
+ _presort_doc = param_doc(
206
+ name='presort',
207
+ desc="""
208
+ Add the presort EP in case of bound symbols.
209
+ Applicable only when ``symbols`` is not None.
210
+ By default, it is set to True if ``symbols`` are set
211
+ and to False otherwise.
212
+ """,
213
+ annotation=bool,
214
+ default=utils.adaptive,
215
+ str_default=' :py:class:`onetick.py.adaptive`',
216
+ )
217
+ _concurrency_doc = param_doc(
218
+ name='concurrency',
219
+ desc="""
220
+ Specifies the number of CPU cores to utilize for the ``presort``.
221
+ By default, the value is inherited from the value of the query where this PRESORT is used.
222
+
223
+ For the main query it may be specified in the ``concurrency`` parameter of :meth:`run` method
224
+ (which by default is set to
225
+ :py:attr:`otp.config.default_concurrency<onetick.py.configuration.Config.default_concurrency>`).
226
+
227
+ For the auxiliary queries (like first-stage queries) empty value means OneTick's default of 1.
228
+ If :py:attr:`otp.config.presort_force_default_concurrency<onetick.py.configuration.Config.presort_force_default_concurrency>`
229
+ is set then default concurrency value will be set in all PRESORT EPs in all queries.
230
+ """, # noqa: E501
231
+ annotation=int,
232
+ default=utils.default,
233
+ str_default=' :py:class:`onetick.py.utils.default`',
234
+ )
235
+ _batch_size_doc = param_doc(
236
+ name='batch_size',
237
+ desc="""
238
+ Specifies the query batch size for the ``presort``.
239
+ By default, the value from
240
+ :py:attr:`otp.config.default_batch_size<onetick.py.configuration.Config.default_batch_size>` is used.
241
+ """,
242
+ annotation=int,
243
+ default=None,
244
+ )
245
+ _schema_doc = param_doc(
246
+ name='schema',
247
+ desc="""
248
+ Dict of <column name> -> <column type> pairs that the source is expected to have.
249
+ If the type is irrelevant, provide None as the type in question.
250
+ """,
251
+ annotation=Optional[Dict[str, type]],
252
+ default=None,
253
+ )
254
+ _desired_schema_doc = param_doc(
255
+ name='kwargs',
256
+ desc="""
257
+ Deprecated. Use ``schema`` instead.
258
+ List of <column name> -> <column type> pairs that the source is expected to have.
259
+ If the type is irrelevant, provide None as the type in question.
260
+ """,
261
+ str_annotation='type[str]',
262
+ kind=inspect.Parameter.VAR_KEYWORD,
263
+ )
264
+
265
+ _max_back_ticks_to_prepend_doc = param_doc(
266
+ name='max_back_ticks_to_prepend',
267
+ desc="""
268
+ When the ``back_to_first_tick`` interval is specified, this parameter determines the maximum number
269
+ of the most recent ticks before start_time that will be prepended to the output time series.
270
+ Their timestamp will be changed to start_time.
271
+ """,
272
+ annotation=int,
273
+ default=1,
274
+ )
275
+
276
+ _where_clause_for_back_ticks_doc = param_doc(
277
+ name='where_clause_for_back_ticks',
278
+ desc="""
279
+ A logical expression that is computed only for the ticks encountered when a query goes back from the start time,
280
+ in search of the ticks to prepend. If it returns false, a tick is ignored.
281
+ """,
282
+ annotation=Raw,
283
+ default=None,
284
+ )
285
+ _symbol_date_doc = param_doc(
286
+ name='symbol_date',
287
+ desc="""
288
+ Symbol date or integer in the YYYYMMDD format.
289
+ Can only be specified if parameters ``symbols`` is set.
290
+ """,
291
+ str_annotation=':py:class:`otp.datetime <onetick.py.datetime>` or :py:class:`datetime.datetime` or int',
292
+ default=None,
293
+ )
294
+
295
+ DATA_SOURCE_DOC_PARAMS = [
296
+ _db_doc, _symbol_doc, _tick_type_doc,
297
+ _start_doc, _end_doc, _date_doc,
298
+ _schema_policy_doc, _guess_schema_doc,
299
+ _identify_input_ts_doc,
300
+ _back_to_first_tick_doc, _keep_first_tick_timestamp_doc,
301
+ _max_back_ticks_to_prepend_doc,
302
+ _where_clause_for_back_ticks_doc,
303
+ _symbols_doc,
304
+ _presort_doc, _batch_size_doc, _concurrency_doc,
305
+ _schema_doc,
306
+ _symbol_date_doc,
307
+ _desired_schema_doc,
308
+ ]
309
+
310
+
311
+ class DataSource(Source):
312
+
313
+ POLICY_MANUAL = "manual"
314
+ POLICY_MANUAL_STRICT = "manual_strict"
315
+ POLICY_TOLERANT = "tolerant"
316
+ POLICY_TOLERANT_STRICT = "tolerant_strict"
317
+ POLICY_FAIL = "fail"
318
+ POLICY_FAIL_STRICT = "fail_strict"
319
+
320
+ _VALID_POLICIES = frozenset([POLICY_MANUAL, POLICY_MANUAL_STRICT,
321
+ POLICY_TOLERANT, POLICY_TOLERANT_STRICT,
322
+ POLICY_FAIL, POLICY_FAIL_STRICT])
323
+ _PROPERTIES = Source._PROPERTIES + ["_p_db", "_p_strict", "_p_schema", "_schema", "logger"]
324
+
325
+ def __get_schema(self, db, start, schema_policy):
326
+ schema = {}
327
+
328
+ if start is utils.adaptive:
329
+ start = None # means that use the last date with data
330
+
331
+ if isinstance(db, list):
332
+ ''' This case of a merge, since we need to get combined schema
333
+ across different tick types and dbs '''
334
+ for t_db in db:
335
+ if t_db.startswith('expr('):
336
+ continue
337
+
338
+ _db = t_db.split(':')[0]
339
+ _tt = t_db.split(':')[-1]
340
+
341
+ # tick type as parameter
342
+ if _tt.startswith('$'):
343
+ _tt = None
344
+
345
+ db_obj = _inspection.DB(_db)
346
+ if schema_policy == self.POLICY_TOLERANT and start:
347
+ # repeating the same logic as in db_obj.last_date
348
+ start = db_obj.last_not_empty_date(start, days_back=5, tick_type=_tt)
349
+
350
+ db_schema = {}
351
+ try:
352
+ db_schema = db_obj.schema(date=start, tick_type=_tt)
353
+ except Exception as e:
354
+ if _tt is not None:
355
+ warnings.warn(f"Couldn't get schema from the database {db_obj}:\n{e}.\n\n"
356
+ "Set parameter schema_policy='manual' to set the schema manually.")
357
+
358
+ schema.update(db_schema)
359
+
360
+ if db is None or isinstance(db, _SymbolParamColumn):
361
+ ''' In this case we can't get schema, because db is calculated dynamically.
362
+ Set to empty to indicate that in this case we expect the manually set schema. '''
363
+ schema = {}
364
+ return schema
365
+
366
+ def __prepare_schema(self, db, start, schema_policy, guess_schema, schema):
367
+ if guess_schema is not None:
368
+ warnings.warn(
369
+ "guess_schema flag is deprecated; use schema_policy argument instead",
370
+ FutureWarning,
371
+ )
372
+ if schema_policy is not None:
373
+ raise ValueError("guess_schema and schema_policy cannot be set at the same time")
374
+ if guess_schema:
375
+ schema_policy = self.POLICY_FAIL
376
+ else:
377
+ schema_policy = self.POLICY_MANUAL
378
+
379
+ if schema_policy is None:
380
+ schema_policy = self.POLICY_TOLERANT
381
+ if schema_policy not in self._VALID_POLICIES:
382
+ raise ValueError(f"Invalid schema_policy; allowed values are: {self._VALID_POLICIES}")
383
+
384
+ actual_schema = {}
385
+ if schema_policy not in (self.POLICY_MANUAL, self.POLICY_MANUAL_STRICT):
386
+ actual_schema = self.__get_schema(db, start, schema_policy)
387
+ dbs = ', '.join(db if isinstance(db, list) else [])
388
+
389
+ if len(actual_schema) == 0:
390
+ if schema_policy in (self.POLICY_FAIL, self.POLICY_FAIL_STRICT):
391
+ raise ValueError(f'No ticks found in database(-s) {dbs}')
392
+ # lets try to use at least something
393
+ return schema.copy()
394
+
395
+ for k, v in schema.items():
396
+ field_type = actual_schema.get(k, None)
397
+ incompatible_types = False
398
+ if field_type is None:
399
+ if self._p_strict or schema_policy in (self.POLICY_TOLERANT, self.POLICY_FAIL):
400
+ raise ValueError(f"Database(-s) {dbs} schema has no {k} field")
401
+ elif issubclass(field_type, str) and issubclass(v, str):
402
+ field_length = ott.string.DEFAULT_LENGTH
403
+ if issubclass(field_type, ott.string):
404
+ field_length = field_type.length
405
+ v_length = ott.string.DEFAULT_LENGTH
406
+ if issubclass(v, ott.string):
407
+ v_length = v.length
408
+ if issubclass(field_type, ott.varstring):
409
+ if not issubclass(v, ott.varstring):
410
+ incompatible_types = True
411
+ elif not issubclass(v, ott.varstring) and v_length < field_length:
412
+ incompatible_types = True
413
+ elif not issubclass(field_type, v):
414
+ incompatible_types = True
415
+ if incompatible_types:
416
+ error_message = f"Database(-s) {dbs} schema field {k} has type {field_type}, but {v} was requested"
417
+ if field_type in (str, ott.string) or v in (str, ott.string):
418
+ error_message = f"{error_message}. Notice, that `str` and `otp.string` lengths are 64"
419
+ raise ValueError(error_message)
420
+ if not self._p_strict:
421
+ schema.update(actual_schema)
422
+
423
+ table_schema = schema.copy()
424
+ if not self._p_strict:
425
+ # in this case we will table only fields specified by user
426
+ table_schema = {
427
+ k: v for k, v in table_schema.items() if k not in actual_schema
428
+ }
429
+ return table_schema
430
+
431
+ def __prepare_db_tick_type(self, db, tick_type, start, end):
432
+ if isinstance(db, list):
433
+ ''' If everything is correct then this branch should leave
434
+ the `db` var as a list of databases with tick types and the
435
+ `tick_type` var is None.
436
+ Valid cases:
437
+ - Fully defined case. The `db` parameter has a list of databases where
438
+ every database has a tick type, when the `tick_type`
439
+ parameter has default value or None (for backward compatibility)
440
+ - Partially defined case. The `db` parameter has a list of databases but
441
+ not every database has a tick type, and meantime the `tick_type`
442
+ is passed to not None value. In that case databases without tick type
443
+ are exetended with a tick type from the `tick_type` parameter
444
+ - No defined case. The `db` parameter has a list of databases and
445
+ every database there has no tick type, and the `tick_type` is
446
+ set to not None value. In that case every database is extended with
447
+ the tick type from the `tick_type`.
448
+ '''
449
+
450
+ def db_converter(_db):
451
+ if isinstance(_db, DB):
452
+ return _db.name
453
+ else:
454
+ return _db
455
+
456
+ db = [db_converter(_db) for _db in db]
457
+ res = all(('::' in _db and _db[-1] != ':' for _db in db))
458
+ if res:
459
+ if tick_type is utils.adaptive or tick_type is None:
460
+ tick_type = None # tick types is specified for all databases
461
+ else:
462
+ raise ValueError('The `tick_type` is set as a parameter '
463
+ 'and also as a part of the `db` parameter'
464
+ 'for every database')
465
+ else:
466
+ dbs_without_tt = [_db.split(':')[0] for _db in db
467
+ if '::' not in _db or _db[-1] == ':']
468
+
469
+ if tick_type is utils.adaptive:
470
+ tick_type = 'TRD' # default one for backward compatibility and testing usecase
471
+ if tick_type is None:
472
+ raise ValueError('The tick type is not set for databases: ' +
473
+ ', '.join(dbs_without_tt))
474
+ else:
475
+ # extend databases with missing tick types from the tick tick parameter
476
+ dbs_with_tt = [_db for _db in db
477
+ if '::' in _db and _db[-1] != ':']
478
+
479
+ db = dbs_with_tt + [_db + '::' + tick_type for _db in dbs_without_tt]
480
+ tick_type = None
481
+
482
+ if isinstance(db, (DB, _inspection.DB)):
483
+ db = db.name # ... and we go to the next branch
484
+
485
+ if isinstance(db, str):
486
+ ''' The resulting `db` var contains a list with string value, that has the `db`
487
+ concatenated with the `tick_type`. '''
488
+ if '::' in db:
489
+ if tick_type is utils.adaptive or tick_type is None:
490
+ tick_type = db.split(':')[-1]
491
+ db = db.split('::')[0]
492
+ else:
493
+ raise ValueError('The `tick_type` is set as a parameter '
494
+ 'and also as a part of the `db` parameter')
495
+ else:
496
+ if tick_type is utils.adaptive or tick_type is None:
497
+ db_obj = _inspection.DB(db)
498
+
499
+ # try to find at least one common tick type
500
+ # through all days
501
+ tick_types = None
502
+
503
+ if start is utils.adaptive:
504
+ start = end = db_obj.get_last_date(show_warnings=False)
505
+
506
+ if start and end: # could be None if there is no data
507
+ t_start = start
508
+ while t_start <= end:
509
+ t_tts = set(db_obj.tick_types(t_start))
510
+
511
+ t_start += dt.timedelta(days=1)
512
+
513
+ if len(t_tts) == 0:
514
+ continue
515
+
516
+ if tick_types is None:
517
+ tick_types = t_tts
518
+ else:
519
+ tick_types &= t_tts
520
+
521
+ if len(tick_types) == 0:
522
+ raise ValueError(f'It seems that there is no common '
523
+ f'tick types for dates from {start} '
524
+ f'to {end}. Please specify a tick '
525
+ 'type')
526
+
527
+ if tick_types is None:
528
+ if tick_type is utils.adaptive:
529
+ tick_types = ['TRD'] # the default one
530
+ else:
531
+ raise ValueError(f'Could not find any data in from {start} '
532
+ f' to {end}. Could you check that tick type, '
533
+ ' database and date range are correct.')
534
+
535
+ if len(tick_types) != 1:
536
+ raise ValueError('The tick type is not specified, found '
537
+ 'multiple tick types in the database : ' +
538
+ ', '.join(tick_types))
539
+
540
+ tick_type = tick_types.pop()
541
+
542
+ if not isinstance(tick_type, str) and isinstance(tick_type, Iterable):
543
+ if isinstance(tick_type, _SymbolParamColumn):
544
+ db = [f"expr('{db}::' + {str(tick_type)})"]
545
+ else:
546
+ db = [f'{db}::{tt}' for tt in tick_type]
547
+ else:
548
+ db = [db + '::' + tick_type]
549
+ tick_type = None
550
+
551
+ if isinstance(db, _SymbolParamColumn):
552
+ ''' Do nothing, because we don't know whether db will come with the tick
553
+ type or not. The only one thing that definetely we know that tick_type
554
+ can not be utils.adpative '''
555
+ if tick_type is utils.adaptive:
556
+ # TODO: need to test this case
557
+ raise ValueError('The `db` is set to the symbol param, in that case '
558
+ 'the `tick_type` should be set explicitly to some value '
559
+ 'or to None')
560
+
561
+ if db is None:
562
+ ''' This case means that database comes with the symbol name, then tick type
563
+ should be defined '''
564
+ if tick_type is utils.adaptive or tick_type is None:
565
+ raise ValueError('The `db` is not specified that means database is '
566
+ 'expected to be defined with the symbol name. '
567
+ 'In that case the `tick_type` should be defined.')
568
+ if not isinstance(tick_type, str) and isinstance(tick_type, Iterable):
569
+ tick_type = '+'.join(tick_type)
570
+
571
+ return db, tick_type
572
+
573
+ @docstring(parameters=DATA_SOURCE_DOC_PARAMS, add_self=True)
574
+ def __init__(
575
+ self,
576
+ db=None,
577
+ symbol=utils.adaptive,
578
+ tick_type=utils.adaptive,
579
+ start=utils.adaptive,
580
+ end=utils.adaptive,
581
+ date=None,
582
+ schema=None,
583
+ schema_policy=utils.adaptive,
584
+ guess_schema=None,
585
+ identify_input_ts=False,
586
+ back_to_first_tick=0,
587
+ keep_first_tick_timestamp=None,
588
+ max_back_ticks_to_prepend=1,
589
+ where_clause_for_back_ticks=None,
590
+ symbols=None,
591
+ presort=utils.adaptive,
592
+ batch_size=None,
593
+ concurrency=utils.default,
594
+ symbol_date=None,
595
+ **kwargs,
596
+ ):
597
+ """
598
+ Construct a source providing data from a given ``db``.
599
+
600
+ .. warning::
601
+
602
+ Default value of the parameter ``schema_policy`` enables automatic deduction
603
+ of the data schema, but it is highly not recommended for production code.
604
+ For details see :ref:`static/concepts/schema:Schema deduction mechanism`.
605
+
606
+ Note
607
+ ----
608
+ If interval that was set for :py:class:`~onetick.py.DataSource` via ``start``/``end`` or ``date`` parameters
609
+ does not match intervals in other :py:class:`~onetick.py.Source` objects used in query,
610
+ or does not match the whole query interval, then :py:meth:`~otp.Source.modify_query_times` will be applied
611
+ to this ``DataSource`` with specified interval as start and end times parameters.
612
+
613
+ If ``symbols`` parameter is omitted, you need to specify unbound symbols for the query in ``symbols``
614
+ parameter of :py:func:`onetick.py.run` function.
615
+
616
+ If ``symbols`` parameter is set, :meth:`otp.merge <onetick.py.merge>` is used to merge all passed bound symbols.
617
+ In this case you don't need to specify unbound symbols in :py:func:`onetick.py.run` call.
618
+
619
+ It's not allowed to specify bound and unbound symbols at the same time.
620
+
621
+ See also
622
+ --------
623
+ :ref:`static/concepts/start_end:Query start / end flow`
624
+ :ref:`static/concepts/symbols:Symbols: bound and unbound`
625
+
626
+ Examples
627
+ --------
628
+
629
+ Query a single symbol from a database:
630
+
631
+ >>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols='S1')
632
+ >>> otp.run(data)
633
+ Time X
634
+ 0 2003-12-01 00:00:00.000 1
635
+ 1 2003-12-01 00:00:00.001 2
636
+ 2 2003-12-01 00:00:00.002 3
637
+
638
+ Parameter ``symbols`` can be a list.
639
+ In this case specified symbols will be merged into a single data flow:
640
+
641
+ >>> # OTdirective: snippet-name:fetch data.simple;
642
+ >>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols=['S1', 'S2'])
643
+ >>> otp.run(data)
644
+ Time X
645
+ 0 2003-12-01 00:00:00.000 1
646
+ 1 2003-12-01 00:00:00.000 -3
647
+ 2 2003-12-01 00:00:00.001 2
648
+ 3 2003-12-01 00:00:00.001 -2
649
+ 4 2003-12-01 00:00:00.002 3
650
+ 5 2003-12-01 00:00:00.002 -1
651
+
652
+ Parameter ``identify_input_ts`` can be used to automatically add field with symbol name for each tick:
653
+
654
+ >>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols=['S1', 'S2'], identify_input_ts=True)
655
+ >>> otp.run(data)
656
+ Time SYMBOL_NAME TICK_TYPE X
657
+ 0 2003-12-01 00:00:00.000 S1 TT 1
658
+ 1 2003-12-01 00:00:00.000 S2 TT -3
659
+ 2 2003-12-01 00:00:00.001 S1 TT 2
660
+ 3 2003-12-01 00:00:00.001 S2 TT -2
661
+ 4 2003-12-01 00:00:00.002 S1 TT 3
662
+ 5 2003-12-01 00:00:00.002 S2 TT -1
663
+
664
+ Source also can be passed as symbols, in such case magic named column SYMBOL_NAME will be transform to symbol
665
+ and all other columns will be symbol parameters
666
+
667
+ >>> # OTdirective: snippet-name:fetch data.symbols as a source;
668
+ >>> symbols = otp.Ticks(SYMBOL_NAME=['S1', 'S2'])
669
+ >>> data = otp.DataSource(db='SOME_DB', symbols=symbols, tick_type='TT')
670
+ >>> otp.run(data)
671
+ Time X
672
+ 0 2003-12-01 00:00:00.000 1
673
+ 1 2003-12-01 00:00:00.000 -3
674
+ 2 2003-12-01 00:00:00.001 2
675
+ 3 2003-12-01 00:00:00.001 -2
676
+ 4 2003-12-01 00:00:00.002 3
677
+ 5 2003-12-01 00:00:00.002 -1
678
+
679
+ Default schema policy is **tolerant** (unless you specified ``schema`` parameter and
680
+ left ``schema_policy`` with default value, when it will be set to **manual**).
681
+
682
+ >>> data = otp.DataSource(
683
+ ... db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 1),
684
+ ... )
685
+ >>> data.schema
686
+ {'PRICE': <class 'float'>, 'SIZE': <class 'int'>}
687
+
688
+ >>> data = otp.DataSource(
689
+ ... db='US_COMP', tick_type='TRD', symbols='AAPL', schema={'PRICE': int},
690
+ ... schema_policy='tolerant', date=otp.dt(2022, 3, 1),
691
+ ... )
692
+ Traceback (most recent call last):
693
+ ...
694
+ ValueError: Database(-s) US_COMP::TRD schema field PRICE has type <class 'float'>,
695
+ but <class 'int'> was requested
696
+
697
+ Schema policy **manual** uses exactly ``schema``:
698
+
699
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', schema={'PRICE': float},
700
+ ... date=otp.dt(2022, 3, 1), schema_policy='manual')
701
+ >>> data.schema
702
+ {'PRICE': <class 'float'>}
703
+
704
+ Schema policy **fail** raises an exception if the schema cannot be deduced:
705
+
706
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2021, 3, 1),
707
+ ... schema_policy='fail')
708
+ Traceback (most recent call last):
709
+ ...
710
+ ValueError: No ticks found in database(-s) US_COMP::TRD
711
+
712
+ ``back_to_first_tick`` sets how far back to go looking for the latest tick before ``start`` time:
713
+
714
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
715
+ ... back_to_first_tick=otp.Day(1))
716
+ >>> otp.run(data)
717
+ Time PRICE SIZE
718
+ 0 2022-03-02 00:00:00.000 1.4 50
719
+ 1 2022-03-02 00:00:00.000 1.0 100
720
+ 2 2022-03-02 00:00:00.001 1.1 101
721
+ 3 2022-03-02 00:00:00.002 1.2 102
722
+
723
+ ``keep_first_tick_timestamp`` allows to show the original timestamp of the tick that was taken from before
724
+ the start time of the query:
725
+
726
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
727
+ ... back_to_first_tick=otp.Day(1), keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
728
+ >>> otp.run(data)
729
+ Time ORIGIN_TIMESTAMP PRICE SIZE
730
+ 0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
731
+ 1 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
732
+ 2 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
733
+ 3 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
734
+
735
+ ``max_back_ticks_to_prepend`` is used with ``back_to_first_tick``
736
+ if more than 1 ticks before start time should be retrieved:
737
+
738
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
739
+ ... max_back_ticks_to_prepend=2, back_to_first_tick=otp.Day(1),
740
+ ... keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
741
+ >>> otp.run(data)
742
+ Time ORIGIN_TIMESTAMP PRICE SIZE
743
+ 0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.001 1.4 10
744
+ 1 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
745
+ 2 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
746
+ 3 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
747
+ 4 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
748
+
749
+ ``where_clause_for_back_ticks`` is used to filter out ticks before the start time:
750
+
751
+ .. testcode::
752
+ :skipif: not is_supported_where_clause_for_back_ticks()
753
+
754
+ data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
755
+ where_clause_for_back_ticks=otp.raw('SIZE>=50', dtype=bool),
756
+ back_to_first_tick=otp.Day(1), max_back_ticks_to_prepend=2,
757
+ keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
758
+ df = otp.run(data)
759
+ print(df)
760
+
761
+ .. testoutput::
762
+
763
+ Time ORIGIN_TIMESTAMP PRICE SIZE
764
+ 0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.000 1.3 100
765
+ 1 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
766
+ 2 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
767
+ 3 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
768
+ 4 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
769
+ """
770
+
771
+ self.logger = otp.get_logger(__name__, self.__class__.__name__)
772
+
773
+ if self._try_default_constructor(schema=schema, **kwargs):
774
+ return
775
+
776
+ schema = self._select_schema(schema, kwargs)
777
+
778
+ if schema and (not schema_policy or schema_policy is utils.adaptive):
779
+ schema_policy = self.POLICY_MANUAL
780
+
781
+ if schema_policy is utils.adaptive:
782
+ schema_policy = otp.config.default_schema_policy
783
+
784
+ # for cases when we want to explicitly convert into string,
785
+ # it might be symbol param or join_with_query parameter
786
+ if isinstance(tick_type, OnetickParameter):
787
+ tick_type = tick_type.parameter_expression
788
+
789
+ if date:
790
+ # TODO: write a warning in that case
791
+ start, end = get_start_end_by_date(date)
792
+
793
+ db, tick_type = self.__prepare_db_tick_type(db,
794
+ tick_type,
795
+ start,
796
+ end)
797
+
798
+ self._p_db = db
799
+
800
+ if not schema and schema_policy == self.POLICY_MANUAL_STRICT:
801
+ raise ValueError(
802
+ f"'{self.POLICY_MANUAL_STRICT}' schema policy was specified, but no schema has been provided"
803
+ )
804
+
805
+ self._p_strict = schema_policy in (self.POLICY_FAIL_STRICT,
806
+ self.POLICY_TOLERANT_STRICT,
807
+ self.POLICY_MANUAL_STRICT)
808
+
809
+ # this is deprecated, but user may have set some complex types or values in schema,
810
+ # let's infer basic onetick-py types from them
811
+ for k, v in schema.items():
812
+ schema[k] = ott.get_source_base_type(v)
813
+
814
+ self._p_schema = self.__prepare_schema(db, # tick type is embedded into the db
815
+ start,
816
+ schema_policy,
817
+ guess_schema,
818
+ schema)
819
+
820
+ if symbols is not None:
821
+ if symbol is utils.adaptive or symbol is None:
822
+ symbol = symbols
823
+ else:
824
+ # TODO: test it
825
+ raise ValueError('You have set the `symbol` and `symbols` parameters'
826
+ 'together, it is not allowed. Please, clarify parameters')
827
+
828
+ if symbol_date is not None:
829
+ if symbol is utils.adaptive or symbol is None:
830
+ raise ValueError("Parameter 'symbol_date' can only be specified together with parameter 'symbols'")
831
+ if isinstance(symbol, (str, list)):
832
+ # this is a hack
833
+ # onetick.query doesn't have an interface to set symbol_date for the EP node
834
+ # so instead of setting symbols for the EP node,
835
+ # we will turn symbol list into the first stage query, and symbol_date will be set for this query
836
+ if isinstance(symbol, str):
837
+ symbol = [symbol]
838
+ symbol = Ticks(SYMBOL_NAME=symbol)
839
+
840
+ if isinstance(symbol, Symbols) and symbol._p_db is None:
841
+ symbol = Symbols.duplicate(symbol, db=db)
842
+
843
+ if identify_input_ts:
844
+ if "SYMBOL_NAME" in schema:
845
+ # TODO: think about how user could workaround it
846
+ raise ValueError("Parameter 'identify_input_ts' is set,"
847
+ " but field 'SYMBOL_NAME' is already in the schema")
848
+ schema["SYMBOL_NAME"] = str
849
+ self._p_schema["SYMBOL_NAME"] = str
850
+ if "TICK_TYPE" in schema:
851
+ raise ValueError("Parameter 'identify_input_ts' is set,"
852
+ " but field 'TICK_TYPE' is already in the schema")
853
+ schema["TICK_TYPE"] = str
854
+ self._p_schema["TICK_TYPE"] = str
855
+
856
+ # unobvious way to convert otp.Minute/Hour/... to number of seconds
857
+ if type(back_to_first_tick).__name__ == '_DatePartCls':
858
+ back_to_first_tick = int((ott.dt(0) + back_to_first_tick).timestamp())
859
+
860
+ if isinstance(back_to_first_tick, _Operation):
861
+ back_to_first_tick = otp.expr(back_to_first_tick)
862
+
863
+ if back_to_first_tick != 0 and keep_first_tick_timestamp:
864
+ schema[keep_first_tick_timestamp] = ott.nsectime
865
+ self._p_schema[keep_first_tick_timestamp] = ott.nsectime
866
+
867
+ if max_back_ticks_to_prepend < 1:
868
+ raise ValueError(f'`max_back_ticks_to_prepend` must be at least 1 '
869
+ f'but {max_back_ticks_to_prepend} was passed')
870
+
871
+ if where_clause_for_back_ticks is not None:
872
+ # TODO: add otp.param here
873
+ if not isinstance(where_clause_for_back_ticks, Raw):
874
+ raise ValueError(f'Currently only otp.raw is supported for `where_clause_for_back_ticks` '
875
+ f'but {type(where_clause_for_back_ticks)} was passed')
876
+ if where_clause_for_back_ticks.dtype is not bool:
877
+ raise ValueError(f'Only bool dtype for otp.raw in `where_clause_for_back_ticks` is supported '
878
+ f'but {where_clause_for_back_ticks.dtype} was passed')
879
+ where_clause_for_back_ticks = str(where_clause_for_back_ticks)
880
+
881
+ self._schema = schema
882
+
883
+ if isinstance(symbol, _QueryEvalWrapper):
884
+ symbol_str = repr(symbol)
885
+ else:
886
+ symbol_str = symbol
887
+ self.logger.info(
888
+ otp.utils.json_dumps(dict(db=db, symbol=symbol_str, tick_type=tick_type, start=start, end=end))
889
+ )
890
+
891
+ if (
892
+ isinstance(symbol, Source)
893
+ or hasattr(symbol, "__iter__")
894
+ and not isinstance(symbol, dict)
895
+ and not isinstance(symbol, str)
896
+ and not isinstance(symbol, (OnetickParameter, _SymbolParamColumn))
897
+ or isinstance(symbol, query)
898
+ or isinstance(symbol, _QueryEvalWrapper)
899
+ or isinstance(symbol, otq.GraphQuery)
900
+ ):
901
+ super().__init__(
902
+ _start=start,
903
+ _end=end,
904
+ _base_ep_func=lambda: self._base_ep_for_cross_symbol(
905
+ db, symbol, tick_type,
906
+ identify_input_ts=identify_input_ts,
907
+ back_to_first_tick=back_to_first_tick,
908
+ keep_first_tick_timestamp=keep_first_tick_timestamp,
909
+ presort=presort, batch_size=batch_size, concurrency=concurrency,
910
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
911
+ where_clause_for_back_ticks=where_clause_for_back_ticks,
912
+ symbol_date=symbol_date,
913
+ ),
914
+ schema=schema,
915
+ )
916
+ else:
917
+ super().__init__(
918
+ _symbols=symbol,
919
+ _start=start,
920
+ _end=end,
921
+ _base_ep_func=lambda: self.base_ep(
922
+ db,
923
+ tick_type,
924
+ identify_input_ts=identify_input_ts,
925
+ back_to_first_tick=back_to_first_tick,
926
+ keep_first_tick_timestamp=keep_first_tick_timestamp,
927
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
928
+ where_clause_for_back_ticks=where_clause_for_back_ticks,
929
+ ),
930
+ schema=schema,
931
+ )
932
+
933
+ @property
934
+ def db(self):
935
+ return self._p_db
936
+
937
+ def _create_source(self, passthrough_ep, back_to_first_tick=0, keep_first_tick_timestamp=None):
938
+ """Create graph that save original timestamp of first tick if needed"""
939
+ if back_to_first_tick != 0 and keep_first_tick_timestamp:
940
+ src = Source(otq.Passthrough(), schema=self._schema)
941
+ src.sink(otq.AddField(field=keep_first_tick_timestamp, value='TIMESTAMP'))
942
+ src.sink(passthrough_ep)
943
+ return src
944
+ return Source(passthrough_ep, schema=self._schema)
945
+
946
+ def _table_schema(self, src):
947
+ return src.table(**self._p_schema, strict=self._p_strict)
948
+
949
+ def base_ep(
950
+ self,
951
+ db,
952
+ tick_type,
953
+ identify_input_ts,
954
+ back_to_first_tick=0,
955
+ keep_first_tick_timestamp=None,
956
+ max_back_ticks_to_prepend=1,
957
+ where_clause_for_back_ticks=None,
958
+ ):
959
+ str_db = convert_tick_type_to_str(tick_type, db)
960
+ params = dict(
961
+ go_back_to_first_tick=back_to_first_tick,
962
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
963
+ )
964
+
965
+ if where_clause_for_back_ticks is not None:
966
+ params['where_clause_for_back_ticks'] = where_clause_for_back_ticks
967
+
968
+ if isinstance(db, list) or isinstance(db, _SymbolParamColumn):
969
+ src = self._create_source(otq.Passthrough(**params),
970
+ back_to_first_tick=back_to_first_tick,
971
+ keep_first_tick_timestamp=keep_first_tick_timestamp)
972
+
973
+ if identify_input_ts or '+' in str_db or str_db.startswith('expr('):
974
+ # PY-941: use MERGE only if we need to identify input or there are many databases,
975
+ # otherwise use PASSTHROUGH, it seems to work faster in some cases
976
+ src.sink(otq.Merge(identify_input_ts=identify_input_ts))
977
+ else:
978
+ if identify_input_ts:
979
+ params["fields"] = "SYMBOL_NAME,TICK_TYPE"
980
+ params["drop_fields"] = True
981
+
982
+ src = self._create_source(otq.Passthrough(**params),
983
+ back_to_first_tick=back_to_first_tick,
984
+ keep_first_tick_timestamp=keep_first_tick_timestamp)
985
+ src.tick_type(str_db)
986
+
987
+ src = self._table_schema(src)
988
+ return src
989
+
990
+ def _base_ep_for_cross_symbol(
991
+ self, db, symbol, tick_type, identify_input_ts,
992
+ back_to_first_tick=0, keep_first_tick_timestamp=None,
993
+ presort=utils.adaptive, batch_size=None, concurrency=utils.default,
994
+ max_back_ticks_to_prepend=1,
995
+ where_clause_for_back_ticks=None,
996
+ symbol_date=None,
997
+ ):
998
+ tmp_otq = TmpOtq()
999
+
1000
+ if isinstance(symbol, _QueryEvalWrapper):
1001
+ symbol = symbol.to_eval_string(tmp_otq=tmp_otq, symbol_date=symbol_date)
1002
+ elif isinstance(symbol, query):
1003
+ if symbol_date is not None:
1004
+ raise ValueError("Parameter 'symbol_date' is not supported if symbols are set with otp.query object")
1005
+ symbol = symbol.to_eval_string()
1006
+ elif isinstance(symbol, (Source, otq.GraphQuery)):
1007
+ symbol = Source._convert_symbol_to_string(symbol, tmp_otq, symbol_date=symbol_date)
1008
+
1009
+ self.logger.info(f'symbol={symbol}')
1010
+
1011
+ tick_type = convert_tick_type_to_str(tick_type, db)
1012
+
1013
+ kwargs = dict(
1014
+ go_back_to_first_tick=back_to_first_tick,
1015
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
1016
+ )
1017
+
1018
+ if where_clause_for_back_ticks is not None:
1019
+ kwargs['where_clause_for_back_ticks'] = where_clause_for_back_ticks
1020
+
1021
+ src = self._create_source(otq.Passthrough(**kwargs),
1022
+ back_to_first_tick=back_to_first_tick,
1023
+ keep_first_tick_timestamp=keep_first_tick_timestamp)
1024
+ if presort is utils.adaptive:
1025
+ presort = True
1026
+ if presort:
1027
+ if batch_size is None:
1028
+ batch_size = otp.config.default_batch_size
1029
+ if concurrency is utils.default:
1030
+ concurrency = otp.configuration.default_presort_concurrency()
1031
+ if concurrency is None:
1032
+ # None means inherit concurrency from the query where this EP is used
1033
+ # otq.Presort does not support None
1034
+ concurrency = ''
1035
+ src.sink(
1036
+ otq.Presort(batch_size=batch_size, max_concurrency=concurrency).symbols(symbol).tick_type(tick_type)
1037
+ )
1038
+ src.sink(otq.Merge(identify_input_ts=identify_input_ts))
1039
+ else:
1040
+ src.sink(
1041
+ otq.Merge(identify_input_ts=identify_input_ts).symbols(symbol).tick_type(tick_type)
1042
+ )
1043
+ src._tmp_otq.merge(tmp_otq)
1044
+
1045
+ src = self._table_schema(src)
1046
+ return src
1047
+
1048
+
1049
+ Custom = DataSource # for backward compatiblity, previously we had only Custom