onetick-py 1.177.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +262 -0
  4. locator_parser/common.py +368 -0
  5. locator_parser/io.py +43 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +279 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +141 -0
  14. onetick/py/__init__.py +293 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +648 -0
  19. onetick/py/aggregations/_docs.py +948 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +501 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +374 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +276 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +798 -0
  33. onetick/py/configuration.py +771 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2312 -0
  45. onetick/py/core/_internal/_state_vars.py +93 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +809 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +272 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1002 -0
  58. onetick/py/core/_source/source_methods/joins.py +1413 -0
  59. onetick/py/core/_source/source_methods/merges.py +605 -0
  60. onetick/py/core/_source/source_methods/misc.py +1455 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +986 -0
  68. onetick/py/core/_source/symbol.py +205 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +216 -0
  75. onetick/py/core/column_operations/_methods/methods.py +292 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +160 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +28 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
  83. onetick/py/core/column_operations/base.py +1121 -0
  84. onetick/py/core/cut_builder.py +150 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +245 -0
  87. onetick/py/core/lambda_object.py +441 -0
  88. onetick/py/core/multi_output_source.py +232 -0
  89. onetick/py/core/per_tick_script.py +2256 -0
  90. onetick/py/core/query_inspector.py +464 -0
  91. onetick/py/core/source.py +1744 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1128 -0
  94. onetick/py/db/db.py +1327 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2398 -0
  100. onetick/py/license.py +190 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +935 -0
  103. onetick/py/misc.py +470 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +216 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +916 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1347 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +128 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1045 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +271 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +374 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +251 -0
  132. onetick/py/types.py +2131 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +498 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1374 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +120 -0
  146. onetick/py/utils/tz.py +84 -0
  147. onetick_py-1.177.0.dist-info/METADATA +137 -0
  148. onetick_py-1.177.0.dist-info/RECORD +152 -0
  149. onetick_py-1.177.0.dist-info/WHEEL +5 -0
  150. onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.177.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1045 @@
1
+ import datetime as dt
2
+ import inspect
3
+ import warnings
4
+
5
+ from typing import Dict, Iterable, Optional
6
+
7
+ import onetick.py as otp
8
+ from onetick.py.otq import otq
9
+
10
+ from onetick.py.db import _inspection
11
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
12
+ from onetick.py.core._source.tmp_otq import TmpOtq
13
+ from onetick.py.core.eval_query import _QueryEvalWrapper
14
+ from onetick.py.core.source import Source
15
+ from onetick.py.core.column_operations.base import Raw, OnetickParameter
16
+
17
+ from .. import types as ott
18
+ from .. import utils
19
+ from ..core.column_operations.base import _Operation
20
+ from ..db.db import DB
21
+ from ..compatibility import is_supported_where_clause_for_back_ticks
22
+
23
+ from onetick.py.docs.utils import docstring, param_doc
24
+
25
+ from .common import convert_tick_type_to_str, get_start_end_by_date
26
+ from .symbols import Symbols
27
+ from .ticks import Ticks
28
+ from .query import query
29
+
30
+
31
+ _db_doc = param_doc(
32
+ name='db',
33
+ desc="""
34
+ Name(s) of the database or the database object(s).
35
+ """,
36
+ str_annotation='str, list of str, :class:`otp.DB <onetick.py.DB>`',
37
+ default=None,
38
+ str_default='None',
39
+ )
40
+ _symbol_doc = param_doc(
41
+ name='symbol',
42
+ desc="""
43
+ Symbol(s) from which data should be taken.
44
+ """,
45
+ str_annotation='str, list of str, :class:`Source`, :class:`query`, :py:func:`eval query <onetick.py.eval>`',
46
+ default=utils.adaptive,
47
+ str_default=' :py:class:`onetick.py.adaptive`',
48
+ )
49
+ _symbols_doc = param_doc(
50
+ name='symbols',
51
+ desc="""
52
+ Symbol(s) from which data should be taken.
53
+ Alias for ``symbol`` parameter. Will take precedence over it.
54
+ """,
55
+ str_annotation=('str, list of str, :class:`Source`, :class:`query`, :py:func:`eval query <onetick.py.eval>`, '
56
+ ':py:class:`onetick.query.GraphQuery`.'),
57
+ default=None,
58
+ )
59
+ _tick_type_doc = param_doc(
60
+ name='tick_type',
61
+ desc="""
62
+ Tick type of the data.
63
+ If not specified, all ticks from `db` will be taken.
64
+ If ticks can't be found or there are many databases specified in `db` then default is "TRD".
65
+ """,
66
+ str_annotation='str, list of str',
67
+ default=utils.adaptive,
68
+ str_default=' :py:class:`onetick.py.adaptive`',
69
+ )
70
+ _start_doc = param_doc(
71
+ name='start',
72
+ desc="""
73
+ Start of the interval from which the data should be taken.
74
+ Default is :py:class:`onetick.py.adaptive`, making the final query deduce the time
75
+ limits from the rest of the graph.
76
+ """,
77
+ str_annotation=(
78
+ ':py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,'
79
+ ' :py:class:`onetick.py.adaptive`'
80
+ ),
81
+ default=utils.adaptive,
82
+ str_default=' :py:class:`onetick.py.adaptive`',
83
+ )
84
+ _end_doc = param_doc(
85
+ name='end',
86
+ desc="""
87
+ End of the interval from which the data should be taken.
88
+ Default is :py:class:`onetick.py.adaptive`, making the final query deduce the time
89
+ limits from the rest of the graph.
90
+ """,
91
+ str_annotation=(
92
+ ':py:class:`datetime.datetime`, :py:class:`otp.datetime <onetick.py.datetime>`,'
93
+ ' :py:class:`onetick.py.adaptive`'
94
+ ),
95
+ default=utils.adaptive,
96
+ str_default=' :py:class:`onetick.py.adaptive`',
97
+ )
98
+ _date_doc = param_doc(
99
+ name='date',
100
+ desc="""
101
+ Allows to specify a whole day instead of passing explicitly ``start`` and ``end`` parameters.
102
+ If it is set along with the ``start`` and ``end`` parameters then last two are ignored.
103
+ """,
104
+ str_annotation=":class:`datetime.datetime`, :class:`otp.datetime <onetick.py.datetime>`",
105
+ default=None,
106
+ )
107
+ _schema_policy_doc = param_doc(
108
+ name='schema_policy',
109
+ desc="""
110
+ Schema deduction policy:
111
+
112
+ - 'tolerant' (default)
113
+ The resulting schema is a combination of ``schema`` and database schema.
114
+ If the database schema can be deduced,
115
+ it's checked to be type-compatible with a ``schema``,
116
+ and ValueError is raised if checks are failed.
117
+ Also, with this policy database is scanned 5 days back to find the schema.
118
+ It is useful when database is misconfigured or in case of holidays.
119
+
120
+ - 'tolerant_strict'
121
+ The resulting schema will be ``schema`` if it's not empty.
122
+ Otherwise, database schema is used.
123
+ If the database schema can be deduced,
124
+ it's checked if it lacks fields from the ``schema``
125
+ and it's checked to be type-compatible with a ``schema``
126
+ and ValueError is raised if checks are failed.
127
+ Also, with this policy database is scanned 5 days back to find the schema.
128
+ It is useful when database is misconfigured or in case of holidays.
129
+
130
+ - 'fail'
131
+ The same as 'tolerant', but if the database schema can't be deduced, raises an Exception.
132
+
133
+ - 'fail_strict'
134
+ The same as 'tolerant_strict', but if the database schema can't be deduced, raises an Exception.
135
+
136
+ - 'manual'
137
+ The resulting schema is a combination of ``schema`` and database schema.
138
+ Compatibility with database schema will not be checked.
139
+
140
+ - 'manual_strict'
141
+ The resulting schema will be exactly ``schema``.
142
+ Compatibility with database schema will not be checked.
143
+ If some fields specified in ``schema`` do not exist in the database,
144
+ their values will be set to some default value for a type
145
+ (0 for integers, NaNs for floats, empty string for strings, epoch for datetimes).
146
+
147
+ Default value is :py:class:`onetick.py.adaptive` (if deprecated parameter ``guess_schema`` is not set).
148
+ If ``guess_schema`` is set to True then value is 'fail', if False then 'manual'.
149
+ If ``schema_policy`` is set to ``None`` then default value is 'tolerant'.
150
+
151
+ Default value can be changed with
152
+ :py:attr:`otp.config.default_schema_policy<onetick.py.configuration.Config.default_schema_policy>`
153
+ configuration parameter.
154
+
155
+ If you set schema manually, while creating DataSource instance, and don't set ``schema_policy``,
156
+ it will be automatically set to ``manual``.
157
+ """,
158
+ str_annotation="'tolerant', 'tolerant_strict', 'fail', 'fail_strict', 'manual', 'manual_strict'",
159
+ default=utils.adaptive,
160
+ str_default=' :py:class:`onetick.py.adaptive`',
161
+ )
162
+ _guess_schema_doc = param_doc(
163
+ name='guess_schema',
164
+ desc="""
165
+ .. deprecated:: 1.3.16
166
+
167
+ Use ``schema_policy`` parameter instead.
168
+
169
+ If ``guess_schema`` is set to True then ``schema_policy`` value is 'fail', if False then 'manual'.
170
+ """,
171
+ annotation=bool,
172
+ default=None,
173
+ )
174
+ _identify_input_ts_doc = param_doc(
175
+ name='identify_input_ts',
176
+ desc="""
177
+ If set to False, the fields SYMBOL_NAME and TICK_TYPE are not appended to the output ticks.
178
+ """,
179
+ annotation=bool,
180
+ default=False,
181
+ )
182
+ _back_to_first_tick_doc = param_doc(
183
+ name='back_to_first_tick',
184
+ desc="""
185
+ Determines how far back to go looking for the latest tick before ``start`` time.
186
+ If one is found, it is inserted into the output time series with the timestamp set to ``start`` time.
187
+ Note: it will be rounded to int, so otp.Millis(999) will be 0 seconds.
188
+ """,
189
+ str_annotation=('int, :ref:`offset <datetime_offsets>`, '
190
+ ':class:`otp.expr <onetick.py.expr>`, '
191
+ ':py:class:`~onetick.py.Operation`'),
192
+ default=0,
193
+ )
194
+ _keep_first_tick_timestamp_doc = param_doc(
195
+ name='keep_first_tick_timestamp',
196
+ desc="""
197
+ If set, new field with this name will be added to source.
198
+ This field contains original timestamp of the tick that was taken from before the start time of the query.
199
+ For all other ticks value in this field will be equal to the value of Time field.
200
+ This parameter is ignored if ``back_to_first_tick`` is not set.
201
+ """,
202
+ annotation=str,
203
+ default=None,
204
+ )
205
+ _presort_doc = param_doc(
206
+ name='presort',
207
+ desc="""
208
+ Add the presort EP in case of bound symbols.
209
+ Applicable only when ``symbols`` is not None.
210
+ By default, it is set to True if ``symbols`` are set
211
+ and to False otherwise.
212
+ """,
213
+ annotation=bool,
214
+ default=utils.adaptive,
215
+ str_default=' :py:class:`onetick.py.adaptive`',
216
+ )
217
+ _concurrency_doc = param_doc(
218
+ name='concurrency',
219
+ desc="""
220
+ Specifies the number of CPU cores to utilize for the ``presort``.
221
+ By default, the value is inherited from the value of the query where this PRESORT is used.
222
+
223
+ For the main query it may be specified in the ``concurrency`` parameter of :meth:`run` method
224
+ (which by default is set to
225
+ :py:attr:`otp.config.default_concurrency<onetick.py.configuration.Config.default_concurrency>`).
226
+
227
+ For the auxiliary queries (like first-stage queries) empty value means OneTick's default of 1.
228
+ If :py:attr:`otp.config.presort_force_default_concurrency<onetick.py.configuration.Config.presort_force_default_concurrency>`
229
+ is set then default concurrency value will be set in all PRESORT EPs in all queries.
230
+ """, # noqa: E501
231
+ annotation=int,
232
+ default=utils.default,
233
+ str_default=' :py:class:`onetick.py.utils.default`',
234
+ )
235
+ _batch_size_doc = param_doc(
236
+ name='batch_size',
237
+ desc="""
238
+ Specifies the query batch size for the ``presort``.
239
+ By default, the value from
240
+ :py:attr:`otp.config.default_batch_size<onetick.py.configuration.Config.default_batch_size>` is used.
241
+ """,
242
+ annotation=int,
243
+ default=None,
244
+ )
245
+ _schema_doc = param_doc(
246
+ name='schema',
247
+ desc="""
248
+ Dict of <column name> -> <column type> pairs that the source is expected to have.
249
+ If the type is irrelevant, provide None as the type in question.
250
+ """,
251
+ annotation=Optional[Dict[str, type]],
252
+ default=None,
253
+ )
254
+ _desired_schema_doc = param_doc(
255
+ name='kwargs',
256
+ desc="""
257
+ Deprecated. Use ``schema`` instead.
258
+ List of <column name> -> <column type> pairs that the source is expected to have.
259
+ If the type is irrelevant, provide None as the type in question.
260
+ """,
261
+ str_annotation='type[str]',
262
+ kind=inspect.Parameter.VAR_KEYWORD,
263
+ )
264
+
265
+ _max_back_ticks_to_prepend_doc = param_doc(
266
+ name='max_back_ticks_to_prepend',
267
+ desc="""
268
+ When the ``back_to_first_tick`` interval is specified, this parameter determines the maximum number
269
+ of the most recent ticks before start_time that will be prepended to the output time series.
270
+ Their timestamp will be changed to start_time.
271
+ """,
272
+ annotation=int,
273
+ default=1,
274
+ )
275
+
276
+ _where_clause_for_back_ticks_doc = param_doc(
277
+ name='where_clause_for_back_ticks',
278
+ desc="""
279
+ A logical expression that is computed only for the ticks encountered when a query goes back from the start time,
280
+ in search of the ticks to prepend. If it returns false, a tick is ignored.
281
+ """,
282
+ annotation=Raw,
283
+ default=None,
284
+ )
285
+ _symbol_date_doc = param_doc(
286
+ name='symbol_date',
287
+ desc="""
288
+ Symbol date or integer in the YYYYMMDD format.
289
+ Can only be specified if parameters ``symbols`` is set.
290
+ """,
291
+ str_annotation=':py:class:`otp.datetime <onetick.py.datetime>` or :py:class:`datetime.datetime` or int',
292
+ default=None,
293
+ )
294
+
295
+ DATA_SOURCE_DOC_PARAMS = [
296
+ _db_doc, _symbol_doc, _tick_type_doc,
297
+ _start_doc, _end_doc, _date_doc,
298
+ _schema_policy_doc, _guess_schema_doc,
299
+ _identify_input_ts_doc,
300
+ _back_to_first_tick_doc, _keep_first_tick_timestamp_doc,
301
+ _max_back_ticks_to_prepend_doc,
302
+ _where_clause_for_back_ticks_doc,
303
+ _symbols_doc,
304
+ _presort_doc, _batch_size_doc, _concurrency_doc,
305
+ _schema_doc,
306
+ _symbol_date_doc,
307
+ _desired_schema_doc,
308
+ ]
309
+
310
+
311
+ class DataSource(Source):
312
+
313
+ POLICY_MANUAL = "manual"
314
+ POLICY_MANUAL_STRICT = "manual_strict"
315
+ POLICY_TOLERANT = "tolerant"
316
+ POLICY_TOLERANT_STRICT = "tolerant_strict"
317
+ POLICY_FAIL = "fail"
318
+ POLICY_FAIL_STRICT = "fail_strict"
319
+
320
+ _VALID_POLICIES = frozenset([POLICY_MANUAL, POLICY_MANUAL_STRICT,
321
+ POLICY_TOLERANT, POLICY_TOLERANT_STRICT,
322
+ POLICY_FAIL, POLICY_FAIL_STRICT])
323
+ _PROPERTIES = Source._PROPERTIES + ["_p_db", "_p_strict", "_p_schema", "_schema", "logger"]
324
+
325
+ def __get_schema(self, db, start, schema_policy):
326
+ schema = {}
327
+
328
+ if start is utils.adaptive:
329
+ start = None # means that use the last date with data
330
+
331
+ if isinstance(db, list):
332
+ # This case of a merge, since we need to get combined schema across different tick types and dbs
333
+ for t_db in db:
334
+ if t_db.startswith('expr('):
335
+ continue
336
+
337
+ _db = t_db.split(':')[0]
338
+ _tt = t_db.split(':')[-1]
339
+
340
+ # tick type as parameter
341
+ if _tt.startswith('$'):
342
+ _tt = None
343
+
344
+ db_obj = _inspection.DB(_db)
345
+ if schema_policy == self.POLICY_TOLERANT and start:
346
+ # repeating the same logic as in db_obj.last_date
347
+ start = db_obj.last_not_empty_date(start, days_back=5, tick_type=_tt)
348
+
349
+ db_schema = {}
350
+ try:
351
+ db_schema = db_obj.schema(date=start, tick_type=_tt)
352
+ except Exception as e:
353
+ if _tt is not None:
354
+ warnings.warn(f"Couldn't get schema from the database {db_obj}:\n{e}.\n\n"
355
+ "Set parameter schema_policy='manual' to set the schema manually.")
356
+
357
+ schema.update(db_schema)
358
+
359
+ if db is None or isinstance(db, _SymbolParamColumn):
360
+ # In this case we can't get schema, because db is calculated dynamically.
361
+ # Set to empty to indicate that in this case we expect the manually set schema.
362
+ schema = {}
363
+ return schema
364
+
365
+ def __prepare_schema(self, db, start, schema_policy, guess_schema, schema):
366
+ if guess_schema is not None:
367
+ warnings.warn(
368
+ "guess_schema flag is deprecated; use schema_policy argument instead",
369
+ FutureWarning,
370
+ )
371
+ if schema_policy is not None:
372
+ raise ValueError("guess_schema and schema_policy cannot be set at the same time")
373
+ if guess_schema:
374
+ schema_policy = self.POLICY_FAIL
375
+ else:
376
+ schema_policy = self.POLICY_MANUAL
377
+
378
+ if schema_policy is None:
379
+ schema_policy = self.POLICY_TOLERANT
380
+ if schema_policy not in self._VALID_POLICIES:
381
+ raise ValueError(f"Invalid schema_policy; allowed values are: {self._VALID_POLICIES}")
382
+
383
+ actual_schema = {}
384
+ if schema_policy not in (self.POLICY_MANUAL, self.POLICY_MANUAL_STRICT):
385
+ actual_schema = self.__get_schema(db, start, schema_policy)
386
+ dbs = ', '.join(db if isinstance(db, list) else [])
387
+
388
+ if len(actual_schema) == 0:
389
+ if schema_policy in (self.POLICY_FAIL, self.POLICY_FAIL_STRICT):
390
+ raise ValueError(f'No ticks found in database(-s) {dbs}')
391
+ # lets try to use at least something
392
+ return schema.copy()
393
+
394
+ for k, v in schema.items():
395
+ field_type = actual_schema.get(k, None)
396
+ incompatible_types = False
397
+ if field_type is None:
398
+ if self._p_strict or schema_policy in (self.POLICY_TOLERANT, self.POLICY_FAIL):
399
+ raise ValueError(f"Database(-s) {dbs} schema has no {k} field")
400
+ elif issubclass(field_type, str) and issubclass(v, str):
401
+ field_length = ott.string.DEFAULT_LENGTH
402
+ if issubclass(field_type, ott.string):
403
+ field_length = field_type.length
404
+ v_length = ott.string.DEFAULT_LENGTH
405
+ if issubclass(v, ott.string):
406
+ v_length = v.length
407
+ if issubclass(field_type, ott.varstring):
408
+ if not issubclass(v, ott.varstring):
409
+ incompatible_types = True
410
+ elif not issubclass(v, ott.varstring) and v_length < field_length:
411
+ incompatible_types = True
412
+ elif not issubclass(field_type, v):
413
+ incompatible_types = True
414
+ if incompatible_types:
415
+ error_message = f"Database(-s) {dbs} schema field {k} has type {field_type}, but {v} was requested"
416
+ if field_type in (str, ott.string) or v in (str, ott.string):
417
+ error_message = f"{error_message}. Notice, that `str` and `otp.string` lengths are 64"
418
+ raise ValueError(error_message)
419
+ if not self._p_strict:
420
+ schema.update(actual_schema)
421
+
422
+ table_schema = schema.copy()
423
+ if not self._p_strict:
424
+ # in this case we will table only fields specified by user
425
+ table_schema = {
426
+ k: v for k, v in table_schema.items() if k not in actual_schema
427
+ }
428
+ return table_schema
429
+
430
+ def __prepare_db_tick_type(self, db, tick_type, start, end):
431
+ if isinstance(db, list):
432
+ # If everything is correct then this branch should leave
433
+ # the `db` var as a list of databases with tick types and the `tick_type` var is None.
434
+ # Valid cases:
435
+ # - Fully defined case. The `db` parameter has a list of databases where
436
+ # every database has a tick type, when the `tick_type`
437
+ # parameter has default value or None (for backward compatibility)
438
+ # - Partially defined case. The `db` parameter has a list of databases but
439
+ # not every database has a tick type, and meantime the `tick_type`
440
+ # is passed to not None value. In that case databases without tick type
441
+ # are exetended with a tick type from the `tick_type` parameter
442
+ # - No defined case. The `db` parameter has a list of databases and
443
+ # every database there has no tick type, and the `tick_type` is
444
+ # set to not None value. In that case every database is extended with
445
+ # the tick type from the `tick_type`.
446
+
447
+ def db_converter(_db):
448
+ if isinstance(_db, DB):
449
+ return _db.name
450
+ else:
451
+ return _db
452
+
453
+ db = [db_converter(_db) for _db in db]
454
+ res = all(('::' in _db and _db[-1] != ':' for _db in db))
455
+ if res:
456
+ if tick_type is utils.adaptive or tick_type is None:
457
+ tick_type = None # tick types is specified for all databases
458
+ else:
459
+ raise ValueError('The `tick_type` is set as a parameter '
460
+ 'and also as a part of the `db` parameter'
461
+ 'for every database')
462
+ else:
463
+ dbs_without_tt = [_db.split(':')[0] for _db in db
464
+ if '::' not in _db or _db[-1] == ':']
465
+
466
+ if tick_type is utils.adaptive:
467
+ tick_type = 'TRD' # default one for backward compatibility and testing usecase
468
+ if tick_type is None:
469
+ raise ValueError('The tick type is not set for databases: ' +
470
+ ', '.join(dbs_without_tt))
471
+ else:
472
+ # extend databases with missing tick types from the tick tick parameter
473
+ dbs_with_tt = [_db for _db in db
474
+ if '::' in _db and _db[-1] != ':']
475
+
476
+ db = dbs_with_tt + [_db + '::' + tick_type for _db in dbs_without_tt]
477
+ tick_type = None
478
+
479
+ if isinstance(db, (DB, _inspection.DB)):
480
+ db = db.name # ... and we go to the next branch
481
+
482
+ if isinstance(db, str):
483
+ # The resulting `db` var contains a list with string value, that has the `db`
484
+ # concatenated with the `tick_type`.
485
+ if '::' in db:
486
+ if tick_type is utils.adaptive or tick_type is None:
487
+ tick_type = db.split(':')[-1]
488
+ db = db.split('::')[0]
489
+ else:
490
+ raise ValueError('The `tick_type` is set as a parameter '
491
+ 'and also as a part of the `db` parameter')
492
+ else:
493
+ if tick_type is utils.adaptive or tick_type is None:
494
+ db_obj = _inspection.DB(db)
495
+
496
+ # try to find at least one common tick type
497
+ # through all days
498
+ tick_types = None
499
+
500
+ if start is utils.adaptive:
501
+ start = end = db_obj.get_last_date(show_warnings=False)
502
+
503
+ if start and end: # could be None if there is no data
504
+ t_start = start
505
+ while t_start <= end:
506
+ t_tts = set(db_obj.tick_types(t_start))
507
+
508
+ t_start += dt.timedelta(days=1)
509
+
510
+ if len(t_tts) == 0:
511
+ continue
512
+
513
+ if tick_types is None:
514
+ tick_types = t_tts
515
+ else:
516
+ tick_types &= t_tts
517
+
518
+ if len(tick_types) == 0:
519
+ raise ValueError(f'It seems that there is no common '
520
+ f'tick types for dates from {start} '
521
+ f'to {end}. Please specify a tick '
522
+ 'type')
523
+
524
+ if tick_types is None:
525
+ if tick_type is utils.adaptive:
526
+ tick_types = ['TRD'] # the default one
527
+ else:
528
+ raise ValueError(f'Could not find any data in from {start} '
529
+ f' to {end}. Could you check that tick type, '
530
+ ' database and date range are correct.')
531
+
532
+ if len(tick_types) != 1:
533
+ raise ValueError('The tick type is not specified, found '
534
+ 'multiple tick types in the database : ' +
535
+ ', '.join(tick_types))
536
+
537
+ tick_type = tick_types.pop()
538
+
539
+ if not isinstance(tick_type, str) and isinstance(tick_type, Iterable):
540
+ if isinstance(tick_type, _SymbolParamColumn):
541
+ db = [f"expr('{db}::' + {str(tick_type)})"]
542
+ else:
543
+ db = [f'{db}::{tt}' for tt in tick_type]
544
+ else:
545
+ db = [db + '::' + tick_type]
546
+ tick_type = None
547
+
548
+ if isinstance(db, _SymbolParamColumn):
549
+ # Do nothing, because we don't know whether db will come with the tick type or not.
550
+ # The only one thing that definetely we know that tick_type can not be utils.adaptive
551
+ if tick_type is utils.adaptive:
552
+ # TODO: need to test this case
553
+ raise ValueError('The `db` is set to the symbol param, in that case '
554
+ 'the `tick_type` should be set explicitly to some value '
555
+ 'or to None')
556
+
557
+ if db is None:
558
+ # This case means that database comes with the symbol name, then tick type should be defined
559
+ if tick_type is utils.adaptive or tick_type is None:
560
+ raise ValueError('The `db` is not specified that means database is '
561
+ 'expected to be defined with the symbol name. '
562
+ 'In that case the `tick_type` should be defined.')
563
+ if not isinstance(tick_type, str) and isinstance(tick_type, Iterable):
564
+ tick_type = '+'.join(tick_type)
565
+
566
+ return db, tick_type
567
+
568
+ @docstring(parameters=DATA_SOURCE_DOC_PARAMS, add_self=True)
569
+ def __init__(
570
+ self,
571
+ db=None,
572
+ symbol=utils.adaptive,
573
+ tick_type=utils.adaptive,
574
+ start=utils.adaptive,
575
+ end=utils.adaptive,
576
+ date=None,
577
+ schema=None,
578
+ schema_policy=utils.adaptive,
579
+ guess_schema=None,
580
+ identify_input_ts=False,
581
+ back_to_first_tick=0,
582
+ keep_first_tick_timestamp=None,
583
+ max_back_ticks_to_prepend=1,
584
+ where_clause_for_back_ticks=None,
585
+ symbols=None,
586
+ presort=utils.adaptive,
587
+ batch_size=None,
588
+ concurrency=utils.default,
589
+ symbol_date=None,
590
+ **kwargs,
591
+ ):
592
+ """
593
+ Construct a source providing data from a given ``db``.
594
+
595
+ .. warning::
596
+
597
+ Default value of the parameter ``schema_policy`` enables automatic deduction
598
+ of the data schema, but it is highly not recommended for production code.
599
+ For details see :ref:`static/concepts/schema:Schema deduction mechanism`.
600
+
601
+ Note
602
+ ----
603
+ If interval that was set for :py:class:`~onetick.py.DataSource` via ``start``/``end`` or ``date`` parameters
604
+ does not match intervals in other :py:class:`~onetick.py.Source` objects used in query,
605
+ or does not match the whole query interval, then :py:meth:`~otp.Source.modify_query_times` will be applied
606
+ to this ``DataSource`` with specified interval as start and end times parameters.
607
+
608
+ If ``symbols`` parameter is omitted, you need to specify unbound symbols for the query in ``symbols``
609
+ parameter of :py:func:`onetick.py.run` function.
610
+
611
+ If ``symbols`` parameter is set, :meth:`otp.merge <onetick.py.merge>` is used to merge all passed bound symbols.
612
+ In this case you don't need to specify unbound symbols in :py:func:`onetick.py.run` call.
613
+
614
+ It's not allowed to specify bound and unbound symbols at the same time.
615
+
616
+ See also
617
+ --------
618
+ :ref:`static/concepts/start_end:Query start / end flow`
619
+ :ref:`static/concepts/symbols:Symbols: bound and unbound`
620
+
621
+ Examples
622
+ --------
623
+
624
+ Query a single symbol from a database:
625
+
626
+ >>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols='S1')
627
+ >>> otp.run(data)
628
+ Time X
629
+ 0 2003-12-01 00:00:00.000 1
630
+ 1 2003-12-01 00:00:00.001 2
631
+ 2 2003-12-01 00:00:00.002 3
632
+
633
+ Parameter ``symbols`` can be a list.
634
+ In this case specified symbols will be merged into a single data flow:
635
+
636
+ >>> # OTdirective: snippet-name:fetch data.simple;
637
+ >>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols=['S1', 'S2'])
638
+ >>> otp.run(data)
639
+ Time X
640
+ 0 2003-12-01 00:00:00.000 1
641
+ 1 2003-12-01 00:00:00.000 -3
642
+ 2 2003-12-01 00:00:00.001 2
643
+ 3 2003-12-01 00:00:00.001 -2
644
+ 4 2003-12-01 00:00:00.002 3
645
+ 5 2003-12-01 00:00:00.002 -1
646
+
647
+ Parameter ``identify_input_ts`` can be used to automatically add field with symbol name for each tick:
648
+
649
+ >>> data = otp.DataSource(db='SOME_DB', tick_type='TT', symbols=['S1', 'S2'], identify_input_ts=True)
650
+ >>> otp.run(data)
651
+ Time SYMBOL_NAME TICK_TYPE X
652
+ 0 2003-12-01 00:00:00.000 S1 TT 1
653
+ 1 2003-12-01 00:00:00.000 S2 TT -3
654
+ 2 2003-12-01 00:00:00.001 S1 TT 2
655
+ 3 2003-12-01 00:00:00.001 S2 TT -2
656
+ 4 2003-12-01 00:00:00.002 S1 TT 3
657
+ 5 2003-12-01 00:00:00.002 S2 TT -1
658
+
659
+ Source also can be passed as symbols, in such case magic named column SYMBOL_NAME will be transform to symbol
660
+ and all other columns will be symbol parameters
661
+
662
+ >>> # OTdirective: snippet-name:fetch data.symbols as a source;
663
+ >>> symbols = otp.Ticks(SYMBOL_NAME=['S1', 'S2'])
664
+ >>> data = otp.DataSource(db='SOME_DB', symbols=symbols, tick_type='TT')
665
+ >>> otp.run(data)
666
+ Time X
667
+ 0 2003-12-01 00:00:00.000 1
668
+ 1 2003-12-01 00:00:00.000 -3
669
+ 2 2003-12-01 00:00:00.001 2
670
+ 3 2003-12-01 00:00:00.001 -2
671
+ 4 2003-12-01 00:00:00.002 3
672
+ 5 2003-12-01 00:00:00.002 -1
673
+
674
+ Default schema policy is **tolerant** (unless you specified ``schema`` parameter and
675
+ left ``schema_policy`` with default value, when it will be set to **manual**).
676
+
677
+ >>> data = otp.DataSource(
678
+ ... db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 1),
679
+ ... )
680
+ >>> data.schema
681
+ {'PRICE': <class 'float'>, 'SIZE': <class 'int'>}
682
+
683
+ >>> data = otp.DataSource(
684
+ ... db='US_COMP', tick_type='TRD', symbols='AAPL', schema={'PRICE': int},
685
+ ... schema_policy='tolerant', date=otp.dt(2022, 3, 1),
686
+ ... )
687
+ Traceback (most recent call last):
688
+ ...
689
+ ValueError: Database(-s) US_COMP::TRD schema field PRICE has type <class 'float'>,
690
+ but <class 'int'> was requested
691
+
692
+ Schema policy **manual** uses exactly ``schema``:
693
+
694
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', schema={'PRICE': float},
695
+ ... date=otp.dt(2022, 3, 1), schema_policy='manual')
696
+ >>> data.schema
697
+ {'PRICE': <class 'float'>}
698
+
699
+ Schema policy **fail** raises an exception if the schema cannot be deduced:
700
+
701
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2021, 3, 1),
702
+ ... schema_policy='fail')
703
+ Traceback (most recent call last):
704
+ ...
705
+ ValueError: No ticks found in database(-s) US_COMP::TRD
706
+
707
+ ``back_to_first_tick`` sets how far back to go looking for the latest tick before ``start`` time:
708
+
709
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
710
+ ... back_to_first_tick=otp.Day(1))
711
+ >>> otp.run(data)
712
+ Time PRICE SIZE
713
+ 0 2022-03-02 00:00:00.000 1.4 50
714
+ 1 2022-03-02 00:00:00.000 1.0 100
715
+ 2 2022-03-02 00:00:00.001 1.1 101
716
+ 3 2022-03-02 00:00:00.002 1.2 102
717
+
718
+ ``keep_first_tick_timestamp`` allows to show the original timestamp of the tick that was taken from before
719
+ the start time of the query:
720
+
721
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
722
+ ... back_to_first_tick=otp.Day(1), keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
723
+ >>> otp.run(data)
724
+ Time ORIGIN_TIMESTAMP PRICE SIZE
725
+ 0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
726
+ 1 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
727
+ 2 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
728
+ 3 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
729
+
730
+ ``max_back_ticks_to_prepend`` is used with ``back_to_first_tick``
731
+ if more than 1 ticks before start time should be retrieved:
732
+
733
+ >>> data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
734
+ ... max_back_ticks_to_prepend=2, back_to_first_tick=otp.Day(1),
735
+ ... keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
736
+ >>> otp.run(data)
737
+ Time ORIGIN_TIMESTAMP PRICE SIZE
738
+ 0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.001 1.4 10
739
+ 1 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
740
+ 2 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
741
+ 3 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
742
+ 4 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
743
+
744
+ ``where_clause_for_back_ticks`` is used to filter out ticks before the start time:
745
+
746
+ .. testcode::
747
+ :skipif: not is_supported_where_clause_for_back_ticks()
748
+
749
+ data = otp.DataSource(db='US_COMP', tick_type='TRD', symbols='AAPL', date=otp.dt(2022, 3, 2),
750
+ where_clause_for_back_ticks=otp.raw('SIZE>=50', dtype=bool),
751
+ back_to_first_tick=otp.Day(1), max_back_ticks_to_prepend=2,
752
+ keep_first_tick_timestamp='ORIGIN_TIMESTAMP')
753
+ df = otp.run(data)
754
+ print(df)
755
+
756
+ .. testoutput::
757
+
758
+ Time ORIGIN_TIMESTAMP PRICE SIZE
759
+ 0 2022-03-02 00:00:00.000 2022-03-01 00:00:00.000 1.3 100
760
+ 1 2022-03-02 00:00:00.000 2022-03-01 00:00:00.002 1.4 50
761
+ 2 2022-03-02 00:00:00.000 2022-03-02 00:00:00.000 1.0 100
762
+ 3 2022-03-02 00:00:00.001 2022-03-02 00:00:00.001 1.1 101
763
+ 4 2022-03-02 00:00:00.002 2022-03-02 00:00:00.002 1.2 102
764
+ """
765
+
766
+ self.logger = otp.get_logger(__name__, self.__class__.__name__)
767
+
768
+ if self._try_default_constructor(schema=schema, **kwargs):
769
+ return
770
+
771
+ schema = self._select_schema(schema, kwargs)
772
+
773
+ if schema and (not schema_policy or schema_policy is utils.adaptive):
774
+ schema_policy = self.POLICY_MANUAL
775
+
776
+ if schema_policy is utils.adaptive:
777
+ schema_policy = otp.config.default_schema_policy
778
+
779
+ # for cases when we want to explicitly convert into string,
780
+ # it might be symbol param or join_with_query parameter
781
+ if isinstance(tick_type, OnetickParameter):
782
+ tick_type = tick_type.parameter_expression
783
+
784
+ if date:
785
+ # TODO: write a warning in that case
786
+ start, end = get_start_end_by_date(date)
787
+
788
+ db, tick_type = self.__prepare_db_tick_type(db,
789
+ tick_type,
790
+ start,
791
+ end)
792
+
793
+ self._p_db = db
794
+
795
+ if not schema and schema_policy == self.POLICY_MANUAL_STRICT:
796
+ raise ValueError(
797
+ f"'{self.POLICY_MANUAL_STRICT}' schema policy was specified, but no schema has been provided"
798
+ )
799
+
800
+ self._p_strict = schema_policy in (self.POLICY_FAIL_STRICT,
801
+ self.POLICY_TOLERANT_STRICT,
802
+ self.POLICY_MANUAL_STRICT)
803
+
804
+ # this is deprecated, but user may have set some complex types or values in schema,
805
+ # let's infer basic onetick-py types from them
806
+ for k, v in schema.items():
807
+ schema[k] = ott.get_source_base_type(v)
808
+
809
+ self._p_schema = self.__prepare_schema(db, # tick type is embedded into the db
810
+ start,
811
+ schema_policy,
812
+ guess_schema,
813
+ schema)
814
+
815
+ if symbols is not None:
816
+ if symbol is utils.adaptive or symbol is None:
817
+ symbol = symbols
818
+ else:
819
+ # TODO: test it
820
+ raise ValueError('You have set the `symbol` and `symbols` parameters'
821
+ 'together, it is not allowed. Please, clarify parameters')
822
+
823
+ if symbol_date is not None:
824
+ if symbol is utils.adaptive or symbol is None:
825
+ raise ValueError("Parameter 'symbol_date' can only be specified together with parameter 'symbols'")
826
+ if isinstance(symbol, (str, list)):
827
+ # this is a hack
828
+ # onetick.query doesn't have an interface to set symbol_date for the EP node
829
+ # so instead of setting symbols for the EP node,
830
+ # we will turn symbol list into the first stage query, and symbol_date will be set for this query
831
+ if isinstance(symbol, str):
832
+ symbol = [symbol]
833
+ symbol = Ticks(SYMBOL_NAME=symbol)
834
+
835
+ if isinstance(symbol, Symbols) and symbol._p_db is None:
836
+ symbol = Symbols.duplicate(symbol, db=db)
837
+
838
+ if identify_input_ts:
839
+ if "SYMBOL_NAME" in schema:
840
+ # TODO: think about how user could workaround it
841
+ raise ValueError("Parameter 'identify_input_ts' is set,"
842
+ " but field 'SYMBOL_NAME' is already in the schema")
843
+ schema["SYMBOL_NAME"] = str
844
+ self._p_schema["SYMBOL_NAME"] = str
845
+ if "TICK_TYPE" in schema:
846
+ raise ValueError("Parameter 'identify_input_ts' is set,"
847
+ " but field 'TICK_TYPE' is already in the schema")
848
+ schema["TICK_TYPE"] = str
849
+ self._p_schema["TICK_TYPE"] = str
850
+
851
+ # unobvious way to convert otp.Minute/Hour/... to number of seconds
852
+ if type(back_to_first_tick).__name__ == '_DatePartCls':
853
+ back_to_first_tick = int((ott.dt(0) + back_to_first_tick).timestamp())
854
+
855
+ if isinstance(back_to_first_tick, _Operation):
856
+ back_to_first_tick = otp.expr(back_to_first_tick)
857
+
858
+ if back_to_first_tick != 0 and keep_first_tick_timestamp:
859
+ schema[keep_first_tick_timestamp] = ott.nsectime
860
+ self._p_schema[keep_first_tick_timestamp] = ott.nsectime
861
+
862
+ if max_back_ticks_to_prepend < 1:
863
+ raise ValueError(f'`max_back_ticks_to_prepend` must be at least 1 '
864
+ f'but {max_back_ticks_to_prepend} was passed')
865
+
866
+ if where_clause_for_back_ticks is not None:
867
+ # TODO: add otp.param here
868
+ if not isinstance(where_clause_for_back_ticks, Raw):
869
+ raise ValueError(f'Currently only otp.raw is supported for `where_clause_for_back_ticks` '
870
+ f'but {type(where_clause_for_back_ticks)} was passed')
871
+ if where_clause_for_back_ticks.dtype is not bool:
872
+ raise ValueError(f'Only bool dtype for otp.raw in `where_clause_for_back_ticks` is supported '
873
+ f'but {where_clause_for_back_ticks.dtype} was passed')
874
+ where_clause_for_back_ticks = str(where_clause_for_back_ticks)
875
+
876
+ self._schema = schema
877
+
878
+ if isinstance(symbol, _QueryEvalWrapper):
879
+ symbol_str = repr(symbol)
880
+ else:
881
+ symbol_str = symbol
882
+ self.logger.info(
883
+ otp.utils.json_dumps(dict(db=db, symbol=symbol_str, tick_type=tick_type, start=start, end=end))
884
+ )
885
+
886
+ if (
887
+ isinstance(symbol, (Source, query, _QueryEvalWrapper, otq.GraphQuery))
888
+ or hasattr(symbol, "__iter__")
889
+ and not isinstance(symbol, (dict, str, OnetickParameter, _SymbolParamColumn))
890
+ ):
891
+ super().__init__(
892
+ _start=start,
893
+ _end=end,
894
+ _base_ep_func=lambda: self._base_ep_for_cross_symbol(
895
+ db, symbol, tick_type,
896
+ identify_input_ts=identify_input_ts,
897
+ back_to_first_tick=back_to_first_tick,
898
+ keep_first_tick_timestamp=keep_first_tick_timestamp,
899
+ presort=presort, batch_size=batch_size, concurrency=concurrency,
900
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
901
+ where_clause_for_back_ticks=where_clause_for_back_ticks,
902
+ symbol_date=symbol_date,
903
+ ),
904
+ schema=schema,
905
+ )
906
+ else:
907
+ super().__init__(
908
+ _symbols=symbol,
909
+ _start=start,
910
+ _end=end,
911
+ _base_ep_func=lambda: self.base_ep(
912
+ db,
913
+ tick_type,
914
+ identify_input_ts=identify_input_ts,
915
+ back_to_first_tick=back_to_first_tick,
916
+ keep_first_tick_timestamp=keep_first_tick_timestamp,
917
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
918
+ where_clause_for_back_ticks=where_clause_for_back_ticks,
919
+ ),
920
+ schema=schema,
921
+ )
922
+
923
+ @property
924
+ def db(self):
925
+ return self._p_db
926
+
927
+ def _create_source(self, passthrough_ep, back_to_first_tick=0, keep_first_tick_timestamp=None):
928
+ """Create graph that save original timestamp of first tick if needed"""
929
+ if back_to_first_tick != 0 and keep_first_tick_timestamp:
930
+ src = Source(otq.Passthrough(), schema=self._schema)
931
+ src.sink(otq.AddField(field=keep_first_tick_timestamp, value='TIMESTAMP'))
932
+ src.sink(passthrough_ep)
933
+ return src
934
+ return Source(passthrough_ep, schema=self._schema)
935
+
936
+ def _table_schema(self, src):
937
+ return src.table(**self._p_schema, strict=self._p_strict)
938
+
939
+ def base_ep(
940
+ self,
941
+ db,
942
+ tick_type,
943
+ identify_input_ts,
944
+ back_to_first_tick=0,
945
+ keep_first_tick_timestamp=None,
946
+ max_back_ticks_to_prepend=1,
947
+ where_clause_for_back_ticks=None,
948
+ ):
949
+ str_db = convert_tick_type_to_str(tick_type, db)
950
+ params = dict(
951
+ go_back_to_first_tick=back_to_first_tick,
952
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
953
+ )
954
+
955
+ if where_clause_for_back_ticks is not None:
956
+ params['where_clause_for_back_ticks'] = where_clause_for_back_ticks
957
+
958
+ if isinstance(db, (list, _SymbolParamColumn)):
959
+ src = self._create_source(otq.Passthrough(**params),
960
+ back_to_first_tick=back_to_first_tick,
961
+ keep_first_tick_timestamp=keep_first_tick_timestamp)
962
+
963
+ if identify_input_ts or '+' in str_db or str_db.startswith('expr('):
964
+ # PY-941: use MERGE only if we need to identify input or there are many databases,
965
+ # otherwise use PASSTHROUGH, it seems to work faster in some cases
966
+ src.sink(otq.Merge(identify_input_ts=identify_input_ts))
967
+ else:
968
+ if identify_input_ts:
969
+ params["fields"] = "SYMBOL_NAME,TICK_TYPE"
970
+ params["drop_fields"] = True
971
+
972
+ src = self._create_source(otq.Passthrough(**params),
973
+ back_to_first_tick=back_to_first_tick,
974
+ keep_first_tick_timestamp=keep_first_tick_timestamp)
975
+ src.tick_type(str_db)
976
+
977
+ src = self._table_schema(src)
978
+ return src
979
+
980
+ def _cross_symbol_convert(self, symbol, symbol_date=None):
981
+ tmp_otq = TmpOtq()
982
+
983
+ if isinstance(symbol, _QueryEvalWrapper):
984
+ symbol = symbol.to_eval_string(tmp_otq=tmp_otq, symbol_date=symbol_date)
985
+ elif isinstance(symbol, query):
986
+ if symbol_date is not None:
987
+ raise ValueError("Parameter 'symbol_date' is not supported if symbols are set with otp.query object")
988
+ symbol = symbol.to_eval_string()
989
+ elif isinstance(symbol, (Source, otq.GraphQuery)):
990
+ symbol = Source._convert_symbol_to_string(symbol, tmp_otq, symbol_date=symbol_date)
991
+
992
+ return symbol, tmp_otq
993
+
994
+ def _base_ep_for_cross_symbol(
995
+ self, db, symbol, tick_type, identify_input_ts,
996
+ back_to_first_tick=0, keep_first_tick_timestamp=None,
997
+ presort=utils.adaptive, batch_size=None, concurrency=utils.default,
998
+ max_back_ticks_to_prepend=1,
999
+ where_clause_for_back_ticks=None,
1000
+ symbol_date=None,
1001
+ ):
1002
+ symbol, tmp_otq = self._cross_symbol_convert(symbol, symbol_date)
1003
+
1004
+ self.logger.info(f'symbol={symbol}')
1005
+
1006
+ tick_type = convert_tick_type_to_str(tick_type, db)
1007
+
1008
+ kwargs = dict(
1009
+ go_back_to_first_tick=back_to_first_tick,
1010
+ max_back_ticks_to_prepend=max_back_ticks_to_prepend,
1011
+ )
1012
+
1013
+ if where_clause_for_back_ticks is not None:
1014
+ kwargs['where_clause_for_back_ticks'] = where_clause_for_back_ticks
1015
+
1016
+ src = self._create_source(otq.Passthrough(**kwargs),
1017
+ back_to_first_tick=back_to_first_tick,
1018
+ keep_first_tick_timestamp=keep_first_tick_timestamp)
1019
+ if presort is utils.adaptive:
1020
+ presort = True
1021
+ if presort:
1022
+ if batch_size is None:
1023
+ batch_size = otp.config.default_batch_size
1024
+ if concurrency is utils.default:
1025
+ concurrency = otp.configuration.default_presort_concurrency()
1026
+ if concurrency is None:
1027
+ # None means inherit concurrency from the query where this EP is used
1028
+ # otq.Presort does not support None
1029
+ concurrency = ''
1030
+ src.sink(
1031
+ otq.Presort(batch_size=batch_size, max_concurrency=concurrency).symbols(symbol).tick_type(tick_type)
1032
+ )
1033
+ src.sink(otq.Merge(identify_input_ts=identify_input_ts))
1034
+ else:
1035
+ src.sink(
1036
+ otq.Merge(identify_input_ts=identify_input_ts).symbols(symbol).tick_type(tick_type)
1037
+ )
1038
+
1039
+ src._tmp_otq.merge(tmp_otq)
1040
+
1041
+ src = self._table_schema(src)
1042
+ return src
1043
+
1044
+
1045
+ Custom = DataSource # for backward compatiblity, previously we had only Custom