onetick-py 1.162.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +266 -0
  4. locator_parser/common.py +365 -0
  5. locator_parser/io.py +41 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +280 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +138 -0
  14. onetick/py/__init__.py +290 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +645 -0
  19. onetick/py/aggregations/_docs.py +912 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +427 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +373 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +275 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +752 -0
  33. onetick/py/configuration.py +736 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2307 -0
  45. onetick/py/core/_internal/_state_vars.py +87 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +810 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +270 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1001 -0
  58. onetick/py/core/_source/source_methods/joins.py +1393 -0
  59. onetick/py/core/_source/source_methods/merges.py +566 -0
  60. onetick/py/core/_source/source_methods/misc.py +1325 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +702 -0
  68. onetick/py/core/_source/symbol.py +202 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +215 -0
  75. onetick/py/core/column_operations/_methods/methods.py +294 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +150 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +30 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +92 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +464 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +160 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1374 -0
  83. onetick/py/core/column_operations/base.py +1061 -0
  84. onetick/py/core/cut_builder.py +149 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +244 -0
  87. onetick/py/core/lambda_object.py +442 -0
  88. onetick/py/core/multi_output_source.py +193 -0
  89. onetick/py/core/per_tick_script.py +2253 -0
  90. onetick/py/core/query_inspector.py +465 -0
  91. onetick/py/core/source.py +1663 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1042 -0
  94. onetick/py/db/db.py +1423 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2354 -0
  100. onetick/py/license.py +188 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +947 -0
  103. onetick/py/misc.py +437 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +211 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +841 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1342 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +126 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1049 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +238 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +357 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +256 -0
  132. onetick/py/types.py +2056 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +499 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1139 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +118 -0
  146. onetick/py/utils/tz.py +82 -0
  147. onetick_py-1.162.2.dist-info/METADATA +148 -0
  148. onetick_py-1.162.2.dist-info/RECORD +152 -0
  149. onetick_py-1.162.2.dist-info/WHEEL +5 -0
  150. onetick_py-1.162.2.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.162.2.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.162.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1393 @@
1
+ import inspect
2
+ import warnings
3
+ from datetime import datetime
4
+ from typing import TYPE_CHECKING, List, Optional, Union
5
+ from onetick.py.backports import Literal
6
+
7
+ from onetick import py as otp
8
+ from onetick.py import types as ott
9
+ from onetick.py.core._internal._state_objects import _TickSequence
10
+ from onetick.py.core._source._symbol_param import _SymbolParamSource
11
+ from onetick.py.core.column_operations._methods.op_types import are_strings
12
+ from onetick.py.core.column_operations.base import _Operation
13
+ from onetick.py.core.eval_query import prepare_params
14
+ from onetick.py.otq import otq
15
+ from onetick.py.compatibility import (
16
+ is_supported_point_in_time,
17
+ is_join_with_snapshot_snapshot_fields_parameter_supported,
18
+ )
19
+
20
+ if TYPE_CHECKING:
21
+ from onetick.py.core.source import Source
22
+
23
+
24
+ def _process_keep_time_param(self: 'Source', keep_time, sub_source):
25
+ if keep_time == "TIMESTAMP":
26
+ raise ValueError("TIMESTAMP is reserved OneTick name, please, specify another one.")
27
+ if keep_time in self.columns():
28
+ raise ValueError(f"{keep_time} column is already presented.")
29
+ sub_source = sub_source.copy()
30
+ if keep_time:
31
+ sub_source[keep_time] = sub_source["Time"]
32
+ return sub_source
33
+
34
+
35
+ def _process_start_or_end_of_jwq(join_params, time, param_name):
36
+ if time is not None:
37
+ if isinstance(time, (datetime, otp.dt)):
38
+ join_params[f"{param_name}"] = ott.datetime2expr(time)
39
+ elif isinstance(time, _Operation):
40
+ join_params[f"{param_name}"] = str(time)
41
+ else:
42
+ raise ValueError(f"{param_name} should be datetime.datetime instance or OneTick expression")
43
+
44
+
45
+ def _columns_to_params_for_joins(columns, query_params=False):
46
+ """
47
+ Converts a dictionary of columns into a parameters string.
48
+ This is mainly used for join_with_query and join_with_collection.
49
+
50
+ query_params control whether resulting string should be considered query params or symbol params
51
+ (as it impacts some of the conversion rules)
52
+ """
53
+ params_list = []
54
+
55
+ def get_msecs_expression(value):
56
+ return f"tostring(GET_MSECS({str(value)}))"
57
+
58
+ for key, value in columns.items():
59
+ dtype = ott.get_object_type(value)
60
+ convert_rule = "'" + key + "=' + "
61
+
62
+ if key == '_PARAM_SYMBOL_TIME' and not query_params:
63
+ # this symbol parameter has to be formatted differently because Onetick treats this parameter
64
+ # in a special way
65
+ if dtype is otp.nsectime:
66
+ convert_rule += f'NSECTIME_FORMAT("%Y%m%d%H%M%S.%J",{ott.value2str(value)},_TIMEZONE)'
67
+ elif dtype is str:
68
+ if are_strings(getattr(value, "dtype", None)):
69
+ convert_rule += str(value)
70
+ else:
71
+ convert_rule += '"' + value + '"'
72
+ else:
73
+ raise ValueError('Parameter symbol_time has to be a datetime value!')
74
+
75
+ elif dtype is str:
76
+ if are_strings(getattr(value, "dtype", None)):
77
+ convert_rule += str(value)
78
+ else:
79
+ convert_rule += '"' + value + '"'
80
+ elif dtype is otp.msectime:
81
+ convert_rule += get_msecs_expression(value)
82
+ elif dtype is otp.nsectime:
83
+ if key == '_SYMBOL_TIME' and query_params:
84
+ # hack to support passing _SYMBOL_TIME to called query as a parameter
85
+ warnings.warn(
86
+ 'Query parameter _SYMBOL_TIME passed to join_with_query! '
87
+ 'This is deprecated. Please use a dedicated `symbol_time` parameter of the '
88
+ 'join_with_query function',
89
+ FutureWarning,
90
+ stacklevel=3,
91
+ )
92
+ convert_rule += get_msecs_expression(value)
93
+ elif query_params:
94
+ # this can be used for query params but cannot be used for symbol params
95
+ # overall it's better
96
+ convert_rule += "'NSECTIME('+tostring(NSECTIME_TO_LONG(" + str(value) + "))+')'"
97
+ else:
98
+ # this matches the common way onetick converts nanoseconds to symbol parameters
99
+ convert_rule += (
100
+ get_msecs_expression(value) + "+'.'+SUBSTR(NSECTIME_FORMAT('%J'," + str(value) + ",_TIMEZONE),3,6)"
101
+ )
102
+ else:
103
+ convert_rule += "tostring(" + str(value) + ")"
104
+ params_list.append(convert_rule)
105
+ return "+','+".join(params_list)
106
+
107
+
108
+ def _check_and_convert_symbol(symbol):
109
+ """
110
+ Convert the value of 'symbol' function parameter to symbol name
111
+ OneTick string representation and dictionary of symbol parameters.
112
+ """
113
+ # "symbol" parameter can contain a symbol name (string, field, operation etc),
114
+ # a symbol parameter list (dict, Source, _SymbolParamSource),
115
+ # or both together as a tuple
116
+
117
+ symbol_name = None
118
+ symbol_param = {}
119
+
120
+ # if "symbol" is tuple, we unpack it
121
+ if isinstance(symbol, tuple) and len(symbol) == 2:
122
+ symbol, symbol_param = symbol
123
+
124
+ if isinstance(symbol, _Operation): # TODO: PY-35
125
+ symbol_name = f"tostring({str(symbol)})"
126
+ elif isinstance(symbol, str):
127
+ symbol_name = f"'{symbol}'"
128
+ elif type(symbol) in {int, float}: # constant
129
+ symbol_name = f"tostring({symbol})"
130
+ elif symbol is None:
131
+ # this is necessary to distinguish None (which is valid value for symbol) from invalid values
132
+ symbol_name = None
133
+ else:
134
+ if not symbol_param:
135
+ symbol_param = symbol
136
+
137
+ return symbol_name, symbol_param
138
+
139
+
140
+ def _convert_symbol_param_and_columns(symbol_param):
141
+ """
142
+ We need to create two objects from a symbol param (a dict, a Source or a _SymbolParamSource):
143
+
144
+ 1. Dictionary of columns to generate list of symbol parameters for the JOIN_WITH_QUERY EP
145
+ 2. _SymbolParamSource object to pass to the source function if necessary
146
+ """
147
+
148
+ if isinstance(symbol_param, dict):
149
+ converted_symbol_param_columns = symbol_param
150
+ converted_symbol_param = _SymbolParamSource(
151
+ **{key: ott.get_object_type(column) for key, column in symbol_param.items()}
152
+ )
153
+ elif isinstance(symbol_param, otp.Source):
154
+ converted_symbol_param_columns = {
155
+ field_name: symbol_param[field_name] for field_name in symbol_param.columns(skip_meta_fields=True).keys()
156
+ }
157
+ converted_symbol_param = symbol_param.to_symbol_param()
158
+ elif isinstance(symbol_param, _SymbolParamSource):
159
+ converted_symbol_param_columns = {
160
+ field_name: symbol_param[field_name] for field_name in symbol_param.schema.keys()
161
+ }
162
+ converted_symbol_param = symbol_param
163
+ else:
164
+ return None, None
165
+
166
+ # we want to pass all the fields to the joined query as symbol parameters,
167
+ # except for some special fields that would override explicitly set parameters
168
+ ignore_symbol_fields = [
169
+ '_PARAM_START_TIME_NANOS',
170
+ '_PARAM_END_TIME_NANOS',
171
+ ]
172
+ filtered_converted_symbol_param_columns = {}
173
+ for field_name, field_value in converted_symbol_param_columns.items():
174
+ if field_name in ignore_symbol_fields:
175
+ warnings.warn(
176
+ f'Special symbol parameter "{field_name}" was passed to the joined query! '
177
+ 'This parameter would be ignored. Please, use parameters of the `join_with_query` '
178
+ 'function itself to set it.',
179
+ FutureWarning,
180
+ stacklevel=2,
181
+ )
182
+ else:
183
+ filtered_converted_symbol_param_columns[field_name] = field_value
184
+ filtered_converted_symbol_param = _SymbolParamSource(
185
+ **{
186
+ field_name: field_value
187
+ for field_name, field_value in converted_symbol_param.schema.items()
188
+ if field_name not in ignore_symbol_fields
189
+ }
190
+ )
191
+ return filtered_converted_symbol_param_columns, filtered_converted_symbol_param
192
+
193
+
194
+ def _fill_time_param_for_jwq(join_params, start_time, end_time, timezone):
195
+ _process_start_or_end_of_jwq(join_params, start_time, "start_timestamp")
196
+ _process_start_or_end_of_jwq(join_params, end_time, "end_timestamp")
197
+ if timezone:
198
+ join_params["timezone"] = f"'{timezone}'"
199
+ else:
200
+ join_params["timezone"] = "_TIMEZONE" # this may break something, need to test
201
+
202
+
203
+ def _fill_aux_params_for_joins(
204
+ join_params, caching, end_time, prefix, start_time, symbol_name, timezone, join_with_collection=False
205
+ ):
206
+ if symbol_name and not join_with_collection:
207
+ join_params["symbol_name"] = symbol_name
208
+ if prefix is not None:
209
+ join_params["prefix_for_output_ticks"] = str(prefix)
210
+ if caching:
211
+ if join_with_collection:
212
+ supported = ("per_symbol",)
213
+ else:
214
+ supported = ("cross_symbol", "per_symbol")
215
+ if caching in supported:
216
+ join_params["caching_scope"] = caching
217
+ else:
218
+ raise ValueError(f"Unknown value for caching param, please use None or any of {supported}.")
219
+ _fill_time_param_for_jwq(join_params, start_time, end_time, timezone)
220
+ if join_with_collection:
221
+ del join_params['timezone']
222
+
223
+
224
+ def _get_default_fields_for_outer_join_str(default_fields_for_outer_join, how, sub_source_schema):
225
+ """
226
+ Default fields for outer join definition.
227
+ Used by join_with_query() and join_with_collection()
228
+ """
229
+ default_fields_for_outer_join_str = ''
230
+ if default_fields_for_outer_join:
231
+ if how != 'outer':
232
+ raise ValueError('The `default_fields_for_outer_join` parameter can be used only for outer join')
233
+ for field, expr in default_fields_for_outer_join.items():
234
+ if field not in sub_source_schema.keys():
235
+ raise KeyError(
236
+ f'Field {field} is specified in `default_fields_for_outer_join` parameter, '
237
+ 'but is not present in the joined source schema!'
238
+ )
239
+ if default_fields_for_outer_join_str != '':
240
+ default_fields_for_outer_join_str += ','
241
+ default_fields_for_outer_join_str += f'{field}={ott.value2str(expr)}'
242
+ return default_fields_for_outer_join_str
243
+
244
+
245
+ def _get_columns_with_prefix(self: 'Source', sub_source, prefix) -> dict:
246
+ sub_source_columns = sub_source.schema
247
+ if prefix is None:
248
+ prefix = ""
249
+ if not isinstance(prefix, str):
250
+ raise ValueError("Only string constants are supported for now.")
251
+ new_columns = {prefix + name: dtype for name, dtype in sub_source_columns.items()}
252
+ same_names = set(new_columns) & set(self.schema)
253
+ if same_names:
254
+ raise ValueError(f"After applying prefix some columns aren't unique: {', '.join(same_names)}.")
255
+ return new_columns
256
+
257
+
258
+ def join_with_collection(
259
+ self: 'Source',
260
+ collection_name,
261
+ query_func=None,
262
+ how="outer",
263
+ params=None,
264
+ start=None,
265
+ end=None,
266
+ prefix=None,
267
+ caching=None,
268
+ keep_time=None,
269
+ default_fields_for_outer_join=None,
270
+ ) -> 'Source':
271
+ """
272
+ For each tick uses ``query_func`` to join ticks from ``collection_name`` tick collection
273
+ (tick set, unordered tick set, tick list, or tick deque).
274
+
275
+ Parameters
276
+ ----------
277
+ collection_name: str
278
+ Name of the collection state variable from which to join ticks. Collections are the following types:
279
+ :py:class:`TickSet <onetick.py.core._internal._state_objects.TickSet>`,
280
+ :py:class:`TickSetUnordered <onetick.py.core._internal._state_objects.TickSetUnordered>`,
281
+ :py:class:`TickList <onetick.py.core._internal._state_objects.TickList>` and
282
+ :py:class:`TickDeque <onetick.py.core._internal._state_objects.TickDeque>`.
283
+
284
+ query_func: callable
285
+ Callable ``query_func`` should return :class:`Source`. If passed, this query will be used on ticks
286
+ from collection before joining them.
287
+ In this case, ``query_func`` object will be evaluated by OneTick (not python)
288
+ for every input tick. Note that python code will be executed only once,
289
+ so all python's conditional expressions will be evaluated only once too.
290
+
291
+ Callable should have ``source`` parameter. When callable is called, this parameter
292
+ will have value of a :class:`Source` object representing ticks loaded directly from the collection.
293
+ Any operation applied to this source will be applied to ticks from the collection
294
+ before joining them.
295
+
296
+ Also, callable should have the parameters with names
297
+ from ``params`` if they are specified in this method.
298
+
299
+ If ``query_func`` is not passed, then all ticks from the collection will be joined.
300
+ how: 'inner', 'outer'
301
+ Type of join. If **inner**, then output tick is propagated
302
+ only if some ticks from the collection were joined to the input tick.
303
+ params: dict
304
+ Mapping of the parameters' names and their values for the ``query_func``.
305
+ :py:class:`Columns <onetick.py.Column>` can be used as a value.
306
+ start: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
307
+ Start time to select ticks from collection.
308
+ If specified, only ticks in collection that have higher or equal timestamp will be processed.
309
+ If not passed, then there will be no lower time bound for the collection ticks.
310
+ This means that even ticks with TIMESTAMP lower than _START_TIME of the main query will be joined.
311
+ end: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
312
+ End time to select ticks from collection.
313
+ If specified, only ticks in collection that have lower timestamp will be processed.
314
+ If not passed, then there will be no upper time bound for the collection ticks.
315
+ This means that even ticks with TIMESTAMP higher than _END_TIME of the main query will be joined.
316
+ prefix : str
317
+ Prefix for the names of joined tick fields.
318
+ caching : str
319
+ If `None` caching is disabled. You can specify caching by using values:
320
+
321
+ * 'per_symbol': cache is different for each symbol.
322
+ keep_time : str
323
+ Name for the joined timestamp column. `None` means no timestamp column will be joined.
324
+ default_fields_for_outer_join : dict
325
+ When you use outer join, all output ticks will have fields from the schema of the joined source.
326
+ If nothing was joined to a particular output tick, these fields will have default values for their type.
327
+ This parameter allows to override the values that would be added to ticks for which nothing was joined.
328
+ Dictionary keys should be field names, and dictionary values should be constants
329
+ or :class:`Operation` expressions
330
+
331
+ Returns
332
+ -------
333
+ :class:`Source`
334
+ Source with joined ticks from ``collection_name``
335
+
336
+ See also
337
+ --------
338
+ **JOIN_WITH_COLLECTION_SUMMARY** OneTick event processor
339
+
340
+ Examples
341
+ --------
342
+ >>> # OTdirective: snippet-name: Special functions.join with collection.without query;
343
+ >>> src = otp.Tick(A=1)
344
+ >>> src.state_vars['TICK_SET'] = otp.state.tick_set('LATEST_TICK', 'B', otp.eval(otp.Tick(B=1, C='STR')))
345
+ >>> src = src.join_with_collection('TICK_SET')
346
+ >>> otp.run(src)[["A", "B", "C"]]
347
+ A B C
348
+ 0 1 1 STR
349
+
350
+ >>> # OTdirective: snippet-name: Special functions.join with collection.with query and params;
351
+ >>> src = otp.Ticks(A=[1, 2, 3, 4, 5],
352
+ ... B=[2, 2, 3, 3, 3])
353
+ >>> src.state_vars['TICK_LIST'] = otp.state.tick_list()
354
+ >>> def fun(tick): tick.state_vars['TICK_LIST'].push_back(tick)
355
+ >>> src = src.script(fun)
356
+ >>>
357
+ >>> def join_fun(source, param_b):
358
+ ... source = source.agg(dict(VALUE=otp.agg.sum(source['A'])))
359
+ ... source['VALUE'] = source['VALUE'] + param_b
360
+ ... return source
361
+ >>>
362
+ >>> src = src.join_with_collection('TICK_LIST', join_fun, params=dict(param_b=src['B']))
363
+ >>> otp.run(src)[["A", "B", "VALUE"]]
364
+ A B VALUE
365
+ 0 1 2 3
366
+ 1 2 2 5
367
+ 2 3 3 9
368
+ 3 4 3 13
369
+ 4 5 3 18
370
+
371
+ Join last standing quote from each exchange to trades:
372
+
373
+ >>> # OTdirective: snippet-name: Special functions.join with collection.standing quotes per exchange;
374
+ >>> trd = otp.Ticks(offset=[1000, 2000, 3000, 4000, 5000],
375
+ ... PRICE=[10.1, 10.2, 10.15, 10.23, 10.4],
376
+ ... SIZE=[100, 50, 100, 60, 200])
377
+ >>>
378
+ >>> qte = otp.Ticks(offset=[500, 600, 1200, 2500, 3500, 3600, 4800],
379
+ ... EXCHANGE=['N', 'C', 'Q', 'Q', 'C', 'N', 'C'],
380
+ ... ASK_PRICE=[10.2, 10.18, 10.18, 10.15, 10.31, 10.32, 10.44],
381
+ ... BID_PRICE=[10.1, 10.17, 10.17, 10.1, 10.23, 10.31, 10.4])
382
+ >>>
383
+ >>> trd['TICK_TYPE'] = 'TRD'
384
+ >>> qte['TICK_TYPE'] = 'QTE'
385
+ >>>
386
+ >>> trd_qte = trd + qte
387
+ >>> trd_qte.state_vars['LAST_QUOTE_PER_EXCHANGE'] = otp.state.tick_set(
388
+ ... 'LATEST', 'EXCHANGE',
389
+ ... schema=['EXCHANGE', 'ASK_PRICE', 'BID_PRICE'])
390
+ >>>
391
+ >>> trd_qte = trd_qte.state_vars['LAST_QUOTE_PER_EXCHANGE'].update(where=trd_qte['TICK_TYPE'] == 'QTE',
392
+ ... value_fields=['ASK_PRICE', 'BID_PRICE'])
393
+ >>> trd, _ = trd_qte[trd_qte['TICK_TYPE'] == 'TRD']
394
+ >>> trd.drop(['ASK_PRICE', 'BID_PRICE', 'EXCHANGE'], inplace=True)
395
+ >>> trd = trd.join_with_collection('LAST_QUOTE_PER_EXCHANGE')
396
+ >>> otp.run(trd)[['PRICE', 'SIZE', 'EXCHANGE', 'ASK_PRICE', 'BID_PRICE']]
397
+ PRICE SIZE EXCHANGE ASK_PRICE BID_PRICE
398
+ 0 10.10 100 N 10.20 10.10
399
+ 1 10.10 100 C 10.18 10.17
400
+ 2 10.20 50 N 10.20 10.10
401
+ 3 10.20 50 C 10.18 10.17
402
+ 4 10.20 50 Q 10.18 10.17
403
+ 5 10.15 100 N 10.20 10.10
404
+ 6 10.15 100 C 10.18 10.17
405
+ 7 10.15 100 Q 10.15 10.10
406
+ 8 10.23 60 N 10.32 10.31
407
+ 9 10.23 60 C 10.31 10.23
408
+ 10 10.23 60 Q 10.15 10.10
409
+ 11 10.40 200 N 10.32 10.31
410
+ 12 10.40 200 C 10.44 10.40
411
+ 13 10.40 200 Q 10.15 10.10
412
+ """
413
+
414
+ # check that passed collection is good
415
+ if collection_name not in self.state_vars.names:
416
+ raise KeyError(f'Collection with name {collection_name} is not in the list of available state variables')
417
+
418
+ if not isinstance(self.state_vars[collection_name], _TickSequence):
419
+ raise ValueError(
420
+ f'State variable {collection_name} is not a tick collection! '
421
+ 'Only TickSet, TickSetUnordered, TickList and TickDeque objects are supported '
422
+ 'as data sources for join_with_collection'
423
+ )
424
+
425
+ if params is None:
426
+ params = {}
427
+
428
+ special_params = ('source', '__fixed_start_time', '__fixed_end_time')
429
+ for sp_param in special_params:
430
+ if sp_param in params.keys():
431
+ raise ValueError(
432
+ f'Parameter name "{sp_param}" is special and cannot be used for params '
433
+ 'of join_with_collection function. Please, select a different name.'
434
+ )
435
+
436
+ # JOIN_WITH_COLLECTION_SUMMARY has START_TIME and END_TIME parameters with the precision of millisecond.
437
+ # So, here we add a workaround on onetick.py side to support nsectime precision
438
+ # "start" and "end" parameters of the EP are kept as they may be necessary
439
+ # for performance reasons
440
+
441
+ if start is not None:
442
+ params['__fixed_start_time'] = start
443
+ start = start - otp.Milli(1)
444
+
445
+ if end is not None:
446
+ params['__fixed_end_time'] = end
447
+ end = end + otp.Milli(1)
448
+
449
+ # prepare temporary file
450
+ # ------------------------------------ #
451
+
452
+ # TODO: this should be a common code somewhere
453
+ collection_schema = {
454
+ key: value
455
+ for key, value in self.state_vars[collection_name].schema.items()
456
+ if not self._check_key_is_reserved(key)
457
+ }
458
+
459
+ join_source_root = otp.DataSource(
460
+ db=otp.config.default_db, tick_type="ANY", schema_policy="manual", schema=collection_schema,
461
+ )
462
+ if query_func is None:
463
+ query_func = lambda source: source # noqa
464
+
465
+ converted_params = prepare_params(**params)
466
+
467
+ fixed_start_time = None
468
+ fixed_end_time = None
469
+ if '__fixed_start_time' in converted_params.keys():
470
+ fixed_start_time = converted_params['__fixed_start_time']
471
+ del converted_params['__fixed_start_time']
472
+ if '__fixed_end_time' in converted_params.keys():
473
+ fixed_end_time = converted_params['__fixed_end_time']
474
+ del converted_params['__fixed_end_time']
475
+
476
+ sub_source = query_func(source=join_source_root, **converted_params)
477
+
478
+ if fixed_start_time is not None:
479
+ sub_source = sub_source[sub_source['TIMESTAMP'] >= fixed_start_time][0]
480
+ if fixed_end_time is not None:
481
+ sub_source = sub_source[sub_source['TIMESTAMP'] < fixed_end_time][0]
482
+
483
+ sub_source = self._process_keep_time_param(keep_time, sub_source)
484
+
485
+ params_str = _columns_to_params_for_joins(params, query_params=True)
486
+
487
+ sub_source_schema = sub_source.schema.copy()
488
+
489
+ columns = {}
490
+ columns.update(self._get_columns_with_prefix(sub_source, prefix))
491
+ columns.update(self.columns(skip_meta_fields=True))
492
+
493
+ res = self.copy(columns=columns)
494
+
495
+ res._merge_tmp_otq(sub_source)
496
+ query_name = sub_source._store_in_tmp_otq(
497
+ res._tmp_otq, symbols='_NON_EXISTING_SYMBOL_', operation_suffix="join_with_collection"
498
+ )
499
+ # ------------------------------------ #
500
+ default_fields_for_outer_join_str = _get_default_fields_for_outer_join_str(
501
+ default_fields_for_outer_join, how, sub_source_schema
502
+ )
503
+
504
+ join_params = dict(
505
+ collection_name=str(self.state_vars[collection_name]),
506
+ otq_query=f'"THIS::{query_name}"',
507
+ join_type=how.upper(),
508
+ otq_query_params=params_str,
509
+ default_fields_for_outer_join=default_fields_for_outer_join_str,
510
+ )
511
+
512
+ _fill_aux_params_for_joins(
513
+ join_params, caching, end, prefix, start, symbol_name=None, timezone=None, join_with_collection=True
514
+ )
515
+ res.sink(otq.JoinWithCollectionSummary(**join_params))
516
+ res._add_table()
517
+ res.sink(otq.Passthrough(fields="TIMESTAMP", drop_fields=True))
518
+
519
+ return res
520
+
521
+
522
+ def join_with_query(
523
+ self: 'Source',
524
+ query,
525
+ how="outer",
526
+ symbol=None,
527
+ params=None,
528
+ start=None,
529
+ end=None,
530
+ timezone=None,
531
+ prefix=None,
532
+ caching=None,
533
+ keep_time=None,
534
+ where=None,
535
+ default_fields_for_outer_join=None,
536
+ symbol_time=None,
537
+ concurrency=None,
538
+ process_query_async: bool = True,
539
+ **kwargs,
540
+ ) -> 'Source':
541
+ """
542
+ For each tick executes ``query``.
543
+
544
+ Parameters
545
+ ----------
546
+ query: callable, Source
547
+ Callable ``query`` should return :class:`Source`. This object will be evaluated by OneTick (not python)
548
+ for every tick. Note python code will be executed only once, so all python's conditional expressions
549
+ will be evaluated only once too.
550
+ Callable should have ``symbol`` parameter and the parameters with names
551
+ from ``params`` if they are specified in this method.
552
+
553
+ If ``query`` is a :class:`Source` object then it will be propagated as a query to OneTick.
554
+ how: 'inner', 'outer'
555
+ Type of join. If **inner**, then each tick is propagated
556
+ only if its ``query`` execution has a non-empty result.
557
+ params: dict
558
+ Mapping of the parameters' names and their values for the ``query``.
559
+ :py:class:`Columns <onetick.py.Column>` can be used as a value.
560
+ symbol: str, Operation, dict, Source, or Tuple[Union[str, Operation], Union[dict, Source]]
561
+ Symbol name to use in ``query``. In addition, symbol params can be passed along with symbol name.
562
+
563
+ Symbol name can be passed as a string or as an :class:`Operation`.
564
+
565
+ Symbol parameters can be passed as a dictionary. Also, the main :class:`Source` object,
566
+ or the object containing a symbol parameter list, can be used as a list of symbol parameter.
567
+ Special symbol parameters (`_PARAM_START_TIME_NANOS` and `_PARAM_END_TIME_NANOS`)
568
+ will be ignored and will not be propagated to ``query``.
569
+
570
+ ``symbol`` will be interpreted as a symbol name or as symbol parameters, depending on its type.
571
+ You can pass both as a tuple.
572
+
573
+ If symbol name is not passed, then symbol name from the main source is used.
574
+ start: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
575
+ Start time of ``query``.
576
+ By default, start time of the main source is used.
577
+ end: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
578
+ End time of ``query`` (note that it's non-inclusive).
579
+ By default, end time of the main source is used.
580
+ start_time:
581
+ .. deprecated:: 1.48.4
582
+ The same as ``start``.
583
+ end_time:
584
+ .. deprecated:: 1.48.4
585
+ The same as ``end``.
586
+ timezone : Optional, str
587
+ Timezone of ``query``.
588
+ By default, timezone of the main source is used.
589
+ prefix : str
590
+ Prefix for the names of joined tick fields.
591
+ caching : str
592
+ If `None` caching is disabled (default). You can specify caching by using values:
593
+
594
+ * 'cross_symbol': cache is the same for all symbols
595
+
596
+ * 'per_symbol': cache is different for each symbol.
597
+
598
+ .. note::
599
+ When parameter ``process_query_async`` is set to ``True`` (default), caching may work
600
+ unexpectedly, because ticks will be accumulated in batches and ``query`` will be processed
601
+ in different threads.
602
+ keep_time : str
603
+ Name for the joined timestamp column. `None` means no timestamp column will be joined.
604
+ where : Operation
605
+ Condition to filter ticks for which the result of the ``query`` will be joined.
606
+ default_fields_for_outer_join : dict
607
+ When you use outer join, all output ticks will have fields from the schema of the joined source.
608
+ If nothing was joined to a particular output tick, these fields will have default values for their type.
609
+ This parameter allows to override the values that would be added to ticks for which nothing was joined.
610
+ Dictionary keys should be field names, and dictionary values should be constants
611
+ or :class:`Operation` expressions
612
+ symbol_time : :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
613
+ Time that will be used by Onetick to map the symbol with which ``query`` is executed to the reference data.
614
+ This parameter is only necessary if the query is expected to perform symbology conversions.
615
+ concurrency : int
616
+ Specifies number of threads for asynchronous processing of ``query`` per unbound symbol list.
617
+ By default, the number of threads is 1.
618
+ process_query_async: bool
619
+ Switches between synchronous and asynchronous execution of queries.
620
+
621
+ While asynchronous execution is generally much more effective,
622
+ in certain cases synchronous execution may still be preferred
623
+ (e.g., when there are a few input ticks, each initiating a memory-consuming query).
624
+
625
+ In asynchronous mode typically while parallel thread is processing the query,
626
+ EP accumulates some input ticks.
627
+
628
+
629
+ Returns
630
+ -------
631
+ :class:`Source`
632
+ Source with joined ticks from ``query``
633
+
634
+ See also
635
+ --------
636
+ **JOIN_WITH_QUERY** OneTick event processor
637
+
638
+ Examples
639
+ --------
640
+ >>> # OTdirective: snippet-name: Special functions.join with query.with an otp data source;
641
+ >>> d = otp.Ticks(Y=[-1])
642
+ >>> d = d.update(dict(Y=1), where=(d.Symbol.name == "a"))
643
+ >>> data = otp.Ticks(X=[1, 2],
644
+ ... S=["a", "b"])
645
+ >>> res = data.join_with_query(d, how='inner', symbol=data['S'])
646
+ >>> otp.run(res)[["X", "Y", "S"]]
647
+ X Y S
648
+ 0 1 1 a
649
+ 1 2 -1 b
650
+
651
+ >>> d = otp.Ticks(ADDED=[-1])
652
+ >>> d = d.update(dict(ADDED=1), where=(d.Symbol.name == "3")) # symbol name is always string
653
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
654
+ >>> res = data.join_with_query(d, how='inner', symbol=(data['A'] + data['B'])) # OTdirective: skip-snippet:;
655
+ >>> df = otp.run(res)
656
+ >>> df[["A", "B", "ADDED"]]
657
+ A B ADDED
658
+ 0 1 2 1
659
+ 1 2 4 -1
660
+
661
+ Constants as symbols are also supported:
662
+
663
+ >>> d = otp.Ticks(ADDED=[d.Symbol.name])
664
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
665
+ >>> res = data.join_with_query(d, how='inner', symbol=1) # OTdirective: skip-snippet:;
666
+ >>> df = otp.run(res)
667
+ >>> df[["A", "B", "ADDED"]]
668
+ A B ADDED
669
+ 0 1 2 1
670
+ 1 2 4 1
671
+
672
+ Function object as query is also supported (Note it will be executed only once in python's code):
673
+
674
+ >>> def func(symbol):
675
+ ... d = otp.Ticks(TYPE=["six"])
676
+ ... d = d.update(dict(TYPE="three"), where=(symbol.name == "3")) # symbol is always converted to string
677
+ ... d["TYPE"] = symbol['PREF'] + d["TYPE"] + symbol['POST']
678
+ ... return d
679
+ >>> # OTdirective: snippet-name: Special functions.join with query.with a function
680
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
681
+ >>> res = data.join_with_query(func, how='inner', symbol=(data['A'] + data['B'], dict(PREF="_", POST="$")))
682
+ >>> df = otp.run(res)
683
+ >>> df[["A", "B", "TYPE"]]
684
+ A B TYPE
685
+ 0 1 2 _three$
686
+ 1 2 4 _six$
687
+
688
+ It's possible to pass the source itself as a list of symbol parameters, which will make all of its fields
689
+ accessible through the "symbol" object:
690
+
691
+ >>> def func(symbol):
692
+ ... d = otp.Ticks(TYPE=["six"])
693
+ ... d["TYPE"] = symbol['PREF'] + d["TYPE"] + symbol['POST']
694
+ ... return d
695
+ >>> # OTdirective: snippet-name: 'Source' operations.join with query.source as symbol;
696
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4], PREF=["_", "$"], POST=["$", "_"])
697
+ >>> res = data.join_with_query(func, how='inner', symbol=data)
698
+ >>> df = otp.run(res)
699
+ >>> df[["A", "B", "TYPE"]]
700
+ A B TYPE
701
+ 0 1 2 _six$
702
+ 1 2 4 $six_
703
+
704
+ The examples above can be rewritten by using onetick query parameters instead of symbol parameters.
705
+ OTQ parameters are global for query, while symbol parameters can be redefined by bound symbols:
706
+
707
+ >>> def func(symbol, pref, post):
708
+ ... d = otp.Ticks(TYPE=["six"])
709
+ ... d = d.update(dict(TYPE="three"), where=(symbol.name == "3")) # symbol is always converted to string
710
+ ... d["TYPE"] = pref + d["TYPE"] + post
711
+ ... return d
712
+ >>> # OTdirective: snippet-name: Special functions.join with query.with a function that takes params;
713
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
714
+ >>> res = data.join_with_query(func, how='inner', symbol=(data['A'] + data['B']),
715
+ ... params=dict(pref="_", post="$"))
716
+ >>> df = otp.run(res)
717
+ >>> df[["A", "B", "TYPE"]]
718
+ A B TYPE
719
+ 0 1 2 _three$
720
+ 1 2 4 _six$
721
+
722
+ Some or all onetick query parameters can be column or expression also:
723
+
724
+ >>> def func(symbol, pref, post):
725
+ ... d = otp.Ticks(TYPE=["six"])
726
+ ... d = d.update(dict(TYPE="three"), where=(symbol.name == "3")) # symbol is always converted to string
727
+ ... d["TYPE"] = pref + d["TYPE"] + post
728
+ ... return d
729
+ >>> # OTdirective: snippet-name: Special functions.join with query.with a function that takes params from fields; # noqa
730
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4], PREF=["^", "_"], POST=["!", "$"])
731
+ >>> res = data.join_with_query(func, how='inner', symbol=(data['A'] + data['B']),
732
+ ... params=dict(pref=data["PREF"] + ".", post=data["POST"]))
733
+ >>> df = otp.run(res)
734
+ >>> df[["A", "B", "TYPE"]]
735
+ A B TYPE
736
+ 0 1 2 ^.three!
737
+ 1 2 4 _.six$
738
+
739
+ You can specify ``start`` and ``end`` time of the query, otherwise time interval of the main query will be used:
740
+
741
+ >>> # OTdirective: snippet-name: Special functions.join with query.passing start/end times;
742
+ >>> d = otp.Ticks(Y=[1, 2])
743
+ >>> data = otp.Ticks(X=[1, 2])
744
+ >>> start = otp.datetime(2003, 12, 1, 0, 0, 0, 1000)
745
+ >>> end = otp.datetime(2003, 12, 1, 0, 0, 0, 3000)
746
+ >>> res = data.join_with_query(d, how='inner', start=start, end=end)
747
+ >>> otp.run(res)
748
+ Time Y X
749
+ 0 2003-12-01 00:00:00.000 1 1
750
+ 1 2003-12-01 00:00:00.000 2 1
751
+ 2 2003-12-01 00:00:00.001 1 2
752
+ 3 2003-12-01 00:00:00.001 2 2
753
+
754
+ By default joined query inherits start and end time from the main query:
755
+
756
+ >>> joined_query = otp.Tick(JOINED_START_TIME=otp.meta_fields.start_time,
757
+ ... JOINED_END_TIME=otp.meta_fields.end_time)
758
+ >>> main_query = otp.Tick(A=1)
759
+ >>> data = main_query.join_with_query(joined_query)
760
+ >>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
761
+ Time JOINED_START_TIME JOINED_END_TIME A
762
+ 0 2003-12-01 2003-12-01 2003-12-04 1
763
+
764
+ Parameters ``start`` and ``end`` can be used to change time interval for the joined query:
765
+
766
+ >>> data = main_query.join_with_query(joined_query, start=otp.dt(2024, 1, 1), end=otp.dt(2024, 1, 3))
767
+ >>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
768
+ Time JOINED_START_TIME JOINED_END_TIME A
769
+ 0 2003-12-01 2024-01-01 2024-01-03 1
770
+
771
+ Note that query ``start`` time is inclusive, but query ``end`` time is not,
772
+ meaning that ticks with timestamps equal to the query end time will not be included:
773
+
774
+ >>> main_query = otp.Tick(A=1)
775
+ >>> joined_query = otp.Tick(DAY=0, bucket_interval=24*60*60)
776
+ >>> joined_query['DAY'] = joined_query['TIMESTAMP'].dt.day_of_month()
777
+ >>> otp.run(joined_query, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 5))
778
+ Time DAY
779
+ 0 2003-12-01 1
780
+ 1 2003-12-02 2
781
+ 2 2003-12-03 3
782
+ 3 2003-12-04 4
783
+
784
+ >>> joined_query = joined_query.last()
785
+ >>> data = main_query.join_with_query(joined_query,
786
+ ... start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
787
+ >>> otp.run(data)
788
+ Time DAY A
789
+ 0 2003-12-01 3 1
790
+
791
+ If you want to include such ticks, you can add one nanosecond to the query end time:
792
+
793
+ >>> data = main_query.join_with_query(joined_query,
794
+ ... start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4) + otp.Nano(1))
795
+ >>> otp.run(data)
796
+ Time DAY A
797
+ 0 2003-12-01 4 1
798
+
799
+ Use ``keep_time`` parameter to keep or rename original timestamp column:
800
+
801
+ >>> # OTdirective: snippet-name: Special functions.join with query.keep the timestamps of the joined ticks;
802
+ >>> d = otp.Ticks(Y=[1, 2])
803
+ >>> data = otp.Ticks(X=[1, 2])
804
+ >>> res = data.join_with_query(d, how='inner', keep_time="ORIG_TIME")
805
+ >>> otp.run(res)
806
+ Time Y ORIG_TIME X
807
+ 0 2003-12-01 00:00:00.000 1 2003-12-01 00:00:00.000 1
808
+ 1 2003-12-01 00:00:00.000 2 2003-12-01 00:00:00.001 1
809
+ 2 2003-12-01 00:00:00.001 1 2003-12-01 00:00:00.000 2
810
+ 3 2003-12-01 00:00:00.001 2 2003-12-01 00:00:00.001 2
811
+ """
812
+
813
+ # TODO: check if join_with_query checks schema of joined source against primary source,
814
+ # by itself or with process_by_group
815
+
816
+ if params is None:
817
+ params = {}
818
+
819
+ converted_symbol_name, symbol_param = _check_and_convert_symbol(symbol)
820
+
821
+ # default symbol name should be this: _SYMBOL_NAME if it is not empty else _NON_EXISTING_SYMBOL_
822
+ # this way we will force JWQ to substitute symbol with any symbol parameters we may have passed
823
+ # otherwise (if an empty symbol name is passed to JWQ), it will not substitute either symbol name
824
+ # or symbol parameters, and so symbol parameters may get lost
825
+ # see BDS-263
826
+ if converted_symbol_name is None:
827
+ converted_symbol_name = "CASE(_SYMBOL_NAME,'','_NON_EXISTING_SYMBOL',_SYMBOL_NAME)"
828
+
829
+ converted_symbol_param_columns, converted_symbol_param = _convert_symbol_param_and_columns(symbol_param)
830
+ if converted_symbol_param is None:
831
+ # we couldn't interpret "symbols" as either symbol name or symbol parameters
832
+ raise ValueError(
833
+ '"symbol" parameter has a wrong format! It should be a symbol name, a symbol parameter '
834
+ 'object (dict or Source), or a tuple containing both'
835
+ )
836
+
837
+ # adding symbol time
838
+ if '_PARAM_SYMBOL_TIME' in converted_symbol_param_columns.keys():
839
+ warnings.warn(
840
+ '"_PARAM_SYMBOL_TIME" explicitly passed among join_with_query symbol parameters! '
841
+ 'This is deprecated - please use symbol_time parameter instead. '
842
+ 'If you specify symbol_time parameter, it will override the explicitly passed value',
843
+ FutureWarning,
844
+ stacklevel=2,
845
+ )
846
+ if symbol_time is not None:
847
+ if ott.get_object_type(symbol_time) is not otp.nsectime and ott.get_object_type(symbol_time) is not str:
848
+ raise ValueError(
849
+ f'Parameter of type {ott.get_object_type(symbol_time)} passed as symbol_time! '
850
+ 'This parameter only supports datetime values or strings'
851
+ )
852
+ converted_symbol_param_columns['_PARAM_SYMBOL_TIME'] = symbol_time
853
+
854
+ # prepare temporary file
855
+ # ------------------------------------ #
856
+ converted_params = prepare_params(**params)
857
+
858
+ if isinstance(query, otp.Source):
859
+ sub_source = query
860
+ else:
861
+ # inspect function
862
+ # -------
863
+ sig = inspect.signature(query)
864
+ if "symbol" in sig.parameters:
865
+ if "symbol" in converted_params.keys():
866
+ raise AttributeError(
867
+ '"params" contains key "symbol", which is reserved for symbol parameters. '
868
+ 'Please, rename this parameter to another name'
869
+ )
870
+ converted_params["symbol"] = converted_symbol_param # type: ignore
871
+ sub_source = query(**converted_params)
872
+
873
+ sub_source = self._process_keep_time_param(keep_time, sub_source)
874
+
875
+ if not sub_source._is_unbound_required():
876
+ sub_source += otp.Empty()
877
+
878
+ params_str = _columns_to_params_for_joins(params, query_params=True)
879
+ symbol_params_str = _columns_to_params_for_joins(converted_symbol_param_columns)
880
+
881
+ sub_source_schema = sub_source.schema.copy()
882
+
883
+ columns = {}
884
+ columns.update(self._get_columns_with_prefix(sub_source, prefix))
885
+ columns.update(self.columns(skip_meta_fields=True))
886
+
887
+ res = self.copy(columns=columns)
888
+
889
+ res._merge_tmp_otq(sub_source)
890
+ query_name = sub_source._store_in_tmp_otq(
891
+ res._tmp_otq, symbols='_NON_EXISTING_SYMBOL_', operation_suffix="join_with_query"
892
+ ) # TODO: combine with _convert_symbol_to_string
893
+ # ------------------------------------ #
894
+
895
+ if where is not None and how != 'outer':
896
+ raise ValueError('The `where` parameter can be used only for outer join')
897
+
898
+ default_fields_for_outer_join_str = _get_default_fields_for_outer_join_str(
899
+ default_fields_for_outer_join, how, sub_source_schema
900
+ )
901
+
902
+ join_params = dict(
903
+ otq_query=f'"THIS::{query_name}"',
904
+ join_type=how.upper(),
905
+ otq_query_params=params_str,
906
+ symbol_params=symbol_params_str,
907
+ where=str(where._make_python_way_bool_expression()) if where is not None else '',
908
+ default_fields_for_outer_join=default_fields_for_outer_join_str,
909
+ process_query_asynchronously=process_query_async,
910
+ )
911
+ if concurrency is not None:
912
+ if type(concurrency) is not int or concurrency <= 0:
913
+ raise ValueError('Wrong value of concurrency parameter passed! concurrency should be a positive integer')
914
+ join_params['shared_thread_count'] = concurrency
915
+
916
+ start_time = kwargs.get('start_time', start)
917
+ end_time = kwargs.get('end_time', end)
918
+ _fill_aux_params_for_joins(join_params, caching, end_time, prefix, start_time, converted_symbol_name, timezone)
919
+ res.sink(otq.JoinWithQuery(**join_params))
920
+ res._add_table()
921
+ res.sink(otq.Passthrough(fields="TIMESTAMP", drop_fields=True))
922
+
923
+ return res
924
+
925
+
926
+ def point_in_time(
927
+ self: 'Source',
928
+ source: Union['Source', str],
929
+ offsets: List[int],
930
+ offset_type: Literal['time_msec', 'num_ticks'] = 'time_msec',
931
+ input_ts_fields_to_propagate: Optional[List[str]] = None,
932
+ symbol_date=None,
933
+ ) -> 'Source':
934
+ """
935
+ This method joins ticks from current source with the ticks from another ``source``.
936
+
937
+ Joined ticks are those that are offset by
938
+ the specified number of milliseconds or by the specified number of ticks
939
+ relative to the current source's tick timestamp.
940
+
941
+ Output tick may be generated for each specified offset, so this method may output several ticks for each input tick.
942
+
943
+ If another ``source`` doesn't have a tick with specified offset, then output tick is not generated.
944
+
945
+ Fields **TICK_TIME** and **OFFSET** are also added to the output ticks,
946
+ specifying original timestamp of the joined tick and the offset that it was specified to join by.
947
+
948
+ Note
949
+ ----
950
+ In order for this method to have reasonable performance,
951
+ the set of input ticks' timestamps has to be relatively small.
952
+
953
+ In other words, the points in time, which the user is interested in,
954
+ have to be quite few in order usage of this method to be justified.
955
+
956
+ Parameters
957
+ ----------
958
+ source: :class:`Source` or str
959
+ The source from which the data will be joined or the string with the path to the .otq file
960
+ (note that in the latter case schema can not be updated automatically with the fields from the joined query).
961
+ offsets:
962
+ List of integers specifying offsets for each timestamp.
963
+ offset_type: 'time_msec' or 'num_ticks'
964
+ The type of offset: number of milliseconds or the number of ticks.
965
+ input_ts_fields_to_propagate:
966
+ The list of fields to propagate from the current source.
967
+ By default no fields (except **TIMESTAMP**) are propagated.
968
+ symbol_date: :py:class:`otp.datetime <onetick.py.datetime>`
969
+ Symbol date that will be set for the ``source`` inner query.
970
+
971
+ See also
972
+ --------
973
+ | **POINT_IN_TIME** OneTick event processor
974
+ | :func:`onetick.py.PointInTime`
975
+ | :func:`onetick.py.join_by_time`
976
+
977
+ Examples
978
+ --------
979
+
980
+ Quotes and trades for testing:
981
+
982
+ .. testcode::
983
+
984
+ qte = otp.Ticks(ASK_PRICE=[20, 21, 22, 23, 24, 25], BID_PRICE=[20, 21, 22, 23, 24, 25])
985
+ print(otp.run(qte))
986
+
987
+ .. testoutput::
988
+
989
+ Time ASK_PRICE BID_PRICE
990
+ 0 2003-12-01 00:00:00.000 20 20
991
+ 1 2003-12-01 00:00:00.001 21 21
992
+ 2 2003-12-01 00:00:00.002 22 22
993
+ 3 2003-12-01 00:00:00.003 23 23
994
+ 4 2003-12-01 00:00:00.004 24 24
995
+ 5 2003-12-01 00:00:00.005 25 25
996
+
997
+ .. testcode::
998
+
999
+ trd = otp.Ticks(PRICE=[1, 3, 5], SIZE=[100, 300, 500], offset=[1, 3, 5])
1000
+ print(otp.run(trd))
1001
+
1002
+ .. testoutput::
1003
+
1004
+ Time PRICE SIZE
1005
+ 0 2003-12-01 00:00:00.001 1 100
1006
+ 1 2003-12-01 00:00:00.003 3 300
1007
+ 2 2003-12-01 00:00:00.005 5 500
1008
+
1009
+ Joining each quote with first trade with equal or less timestamp:
1010
+
1011
+ .. testcode::
1012
+ :skipif: not is_supported_point_in_time()
1013
+
1014
+ data = qte.point_in_time(trd, offsets=[0])
1015
+ print(otp.run(data))
1016
+
1017
+ .. testoutput::
1018
+
1019
+ Time PRICE SIZE TICK_TIME OFFSET
1020
+ 0 2003-12-01 00:00:00.001 1 100 2003-12-01 00:00:00.001 0
1021
+ 1 2003-12-01 00:00:00.002 1 100 2003-12-01 00:00:00.001 0
1022
+ 2 2003-12-01 00:00:00.003 3 300 2003-12-01 00:00:00.003 0
1023
+ 3 2003-12-01 00:00:00.004 3 300 2003-12-01 00:00:00.003 0
1024
+ 4 2003-12-01 00:00:00.005 5 500 2003-12-01 00:00:00.005 0
1025
+
1026
+ By default fields from the current source are not propagated,
1027
+ use parameter ``input_ts_fields_to_propagate`` to add them to the output:
1028
+
1029
+ .. testcode::
1030
+ :skipif: not is_supported_point_in_time()
1031
+
1032
+ data = qte.point_in_time(trd, offsets=[0], input_ts_fields_to_propagate=['ASK_PRICE', 'BID_PRICE'])
1033
+ print(otp.run(data))
1034
+
1035
+ .. testoutput::
1036
+
1037
+ Time ASK_PRICE BID_PRICE PRICE SIZE TICK_TIME OFFSET
1038
+ 0 2003-12-01 00:00:00.001 21 21 1 100 2003-12-01 00:00:00.001 0
1039
+ 1 2003-12-01 00:00:00.002 22 22 1 100 2003-12-01 00:00:00.001 0
1040
+ 2 2003-12-01 00:00:00.003 23 23 3 300 2003-12-01 00:00:00.003 0
1041
+ 3 2003-12-01 00:00:00.004 24 24 3 300 2003-12-01 00:00:00.003 0
1042
+ 4 2003-12-01 00:00:00.005 25 25 5 500 2003-12-01 00:00:00.005 0
1043
+
1044
+ Note that first quote was not propagated, because it doesn't have corresponding trade.
1045
+
1046
+ Offset may be positive or negative.
1047
+ If several offsets are specified, several output ticks may be generated for a single input tick:
1048
+
1049
+ .. testcode::
1050
+ :skipif: not is_supported_point_in_time()
1051
+
1052
+ data = qte.point_in_time(trd, offsets=[0, 1], input_ts_fields_to_propagate=['ASK_PRICE', 'BID_PRICE'])
1053
+ print(otp.run(data))
1054
+
1055
+ .. testoutput::
1056
+
1057
+ Time ASK_PRICE BID_PRICE PRICE SIZE TICK_TIME OFFSET
1058
+ 0 2003-12-01 00:00:00.000 20 20 1 100 2003-12-01 00:00:00.001 1
1059
+ 1 2003-12-01 00:00:00.001 21 21 1 100 2003-12-01 00:00:00.001 0
1060
+ 2 2003-12-01 00:00:00.001 21 21 1 100 2003-12-01 00:00:00.001 1
1061
+ 3 2003-12-01 00:00:00.002 22 22 1 100 2003-12-01 00:00:00.001 0
1062
+ 4 2003-12-01 00:00:00.002 22 22 3 300 2003-12-01 00:00:00.003 1
1063
+ 5 2003-12-01 00:00:00.003 23 23 3 300 2003-12-01 00:00:00.003 0
1064
+ 6 2003-12-01 00:00:00.003 23 23 3 300 2003-12-01 00:00:00.003 1
1065
+ 7 2003-12-01 00:00:00.004 24 24 3 300 2003-12-01 00:00:00.003 0
1066
+ 8 2003-12-01 00:00:00.004 24 24 5 500 2003-12-01 00:00:00.005 1
1067
+ 9 2003-12-01 00:00:00.005 25 25 5 500 2003-12-01 00:00:00.005 0
1068
+ 10 2003-12-01 00:00:00.005 25 25 5 500 2003-12-01 00:00:00.005 1
1069
+
1070
+ By default the number of milliseconds is used as an offset.
1071
+ You can also specify the number of ticks as an offset:
1072
+
1073
+ .. testcode::
1074
+ :skipif: not is_supported_point_in_time()
1075
+
1076
+ data = qte.point_in_time(trd, offset_type='num_ticks', offsets=[-1, 1],
1077
+ input_ts_fields_to_propagate=['ASK_PRICE', 'BID_PRICE'])
1078
+ print(otp.run(data))
1079
+
1080
+ .. testoutput::
1081
+
1082
+ Time ASK_PRICE BID_PRICE PRICE SIZE TICK_TIME OFFSET
1083
+ 0 2003-12-01 00:00:00.000 20 20 1 100 2003-12-01 00:00:00.001 1
1084
+ 1 2003-12-01 00:00:00.001 21 21 3 300 2003-12-01 00:00:00.003 1
1085
+ 2 2003-12-01 00:00:00.002 22 22 3 300 2003-12-01 00:00:00.003 1
1086
+ 3 2003-12-01 00:00:00.003 23 23 1 100 2003-12-01 00:00:00.001 -1
1087
+ 4 2003-12-01 00:00:00.003 23 23 5 500 2003-12-01 00:00:00.005 1
1088
+ 5 2003-12-01 00:00:00.004 24 24 1 100 2003-12-01 00:00:00.001 -1
1089
+ 6 2003-12-01 00:00:00.004 24 24 5 500 2003-12-01 00:00:00.005 1
1090
+ 7 2003-12-01 00:00:00.005 25 25 3 300 2003-12-01 00:00:00.003 -1
1091
+ """
1092
+ if not is_supported_point_in_time():
1093
+ raise RuntimeError('PointInTime event processor is not supported on this OneTick version')
1094
+
1095
+ res = self.copy()
1096
+
1097
+ if offset_type not in ('time_msec', 'num_ticks'):
1098
+ raise ValueError(f"Wrong value for parameter 'offset_type': {offset_type}")
1099
+
1100
+ if isinstance(source, str):
1101
+ otq_query = source
1102
+ else:
1103
+ query_name = source._store_in_tmp_otq(
1104
+ res._tmp_otq,
1105
+ operation_suffix='point_in_time',
1106
+ # set default symbol, even if it's not set by user, symbol's value doesn't matter in this case
1107
+ symbols=otp.config.get('default_symbol', 'ANY'),
1108
+ symbol_date=symbol_date,
1109
+ )
1110
+ otq_query = f'THIS::{query_name}'
1111
+
1112
+ input_ts_fields_to_propagate = input_ts_fields_to_propagate or []
1113
+
1114
+ pit_params = dict(
1115
+ otq_query=otq_query,
1116
+ offset_type=offset_type.upper(),
1117
+ offsets=','.join(map(str, offsets)),
1118
+ input_ts_fields_to_propagate=','.join(map(str, input_ts_fields_to_propagate)),
1119
+ )
1120
+ res.sink(otq.PointInTime(**pit_params))
1121
+
1122
+ schema = {}
1123
+ if input_ts_fields_to_propagate:
1124
+ schema = {
1125
+ k: v for k, v in res.schema.items()
1126
+ if k in input_ts_fields_to_propagate
1127
+ }
1128
+ res.schema.set(**schema)
1129
+ if not isinstance(source, str):
1130
+ res.schema.update(**source.schema)
1131
+ res.schema.update(**{
1132
+ 'TICK_TIME': otp.nsectime,
1133
+ 'OFFSET': int,
1134
+ })
1135
+ return res
1136
+
1137
+
1138
+ def join_with_snapshot(
1139
+ self: 'Source',
1140
+ snapshot_name='VALUE',
1141
+ snapshot_storage='memory',
1142
+ allow_snapshot_absence=False,
1143
+ join_keys=None,
1144
+ symbol_name_in_snapshot=None,
1145
+ database='',
1146
+ default_fields_for_outer_join=None,
1147
+ prefix_for_output_ticks='',
1148
+ snapshot_fields=None,
1149
+ ):
1150
+ """
1151
+ Saves last (at most) `n` ticks of each group of ticks from the input time series in global storage or
1152
+ in a memory mapped file under a specified snapshot name.
1153
+ Tick descriptor should be the same for all ticks saved into the snapshot.
1154
+ These ticks can then be read via :py:class:`ReadSnapshot <onetick.py.ReadSnapshot>` by using the name
1155
+ of the snapshot and the same symbol name (``<db_name>::<symbol>``) that were used by this method.
1156
+
1157
+ .. warning::
1158
+ You should update schema manually, if you want to use fields from snapshot in `onetick-py` query description
1159
+ before its execution.
1160
+
1161
+ That's due to the fact, that `onetick-py` can't identify a schema of data in a snapshot before making a query.
1162
+
1163
+ If you set ``default_fields_for_outer_join`` parameter, schema will be guessed from default fields values.
1164
+
1165
+ Parameters
1166
+ ----------
1167
+ snapshot_name: str
1168
+ The name that was specified in :py:meth:`onetick.py.Source.save_snapshot` as a ``snapshot_name`` during saving.
1169
+
1170
+ Default: `VALUE`
1171
+ snapshot_storage: str
1172
+ This parameter specifies the place of storage of the snapshot. Possible options are:
1173
+
1174
+ * `memory` - the snapshot is stored in the dynamic (heap) memory of the process
1175
+ that ran (or is still running) the :py:meth:`onetick.py.Source.save_snapshot` for the snapshot.
1176
+ * `memory_mapped_file` - the snapshot is stored in a memory mapped file.
1177
+ For each symbol to get the location of the snapshot in the file system, ``join_with_snapshot`` looks at
1178
+ the **SAVE_SNAPSHOT_DIR** parameter value in the locator section for the database of the symbol.
1179
+ In a specified directory it creates a new directory with the name of the snapshot and keeps
1180
+ the memory mapped file and some other helper files there.
1181
+
1182
+ Default: `memory`
1183
+ allow_snapshot_absence: bool
1184
+ If specified, the EP does not display an error about missing snapshot
1185
+ if the snapshot has not been saved or is still being saved.
1186
+
1187
+ Default: `False`
1188
+ join_keys: list, optional
1189
+ A list of names of attributes. A non-empty list causes input ticks to be joined only if all of them
1190
+ have matching values for all specified attributes.
1191
+ Currently, these fields need to match with ``group_by`` fields of the corresponding snapshot.
1192
+ symbol_name_in_snapshot: str, :class:`~onetick.py.Column` or :class:`~onetick.py.Operation`, optional
1193
+ Expression that evaluates to a string containing symbol name.
1194
+ Specified expression is reevaluated upon the arrival of each tick.
1195
+ If this parameter is empty, the input symbol name is used.
1196
+ database: str, optional
1197
+ The database to read the snapshot. If not specified database from the symbol is used.
1198
+ default_fields_for_outer_join: dict, optional
1199
+ A `dict` with field name as key and value, :class:`~onetick.py.Column` or :class:`~onetick.py.Operation`,
1200
+ which specifies the names and the values of the fields (also, optionally, the field type),
1201
+ used to form ticks to be joined with unmatched input ticks.
1202
+
1203
+ If you want to specify field type, pass tuple of field dtype and expression or value as dict item value.
1204
+
1205
+ This parameter is reevaluated upon the arrival of each tick.
1206
+
1207
+ It's also used for auto detecting snapshot schema for using fields from snapshot
1208
+ while building query via ``ontick-py``.
1209
+ prefix_for_output_ticks: str
1210
+ The prefix for the names of joined tick fields.
1211
+
1212
+ Default: `empty string`
1213
+ snapshot_fields: List[str], None
1214
+ Specifies list of fields from the snapshot to join with input ticks. When empty, all fields are included.
1215
+
1216
+ See also
1217
+ --------
1218
+ | **JOIN_WITH_SNAPSHOT** OneTick event processor
1219
+ | :py:class:`onetick.py.ReadSnapshot`
1220
+ | :py:class:`onetick.py.ShowSnapshotList`
1221
+ | :py:class:`onetick.py.FindSnapshotSymbols`
1222
+ | :py:meth:`onetick.py.Source.save_snapshot`
1223
+
1224
+ Examples
1225
+ --------
1226
+ Simple ticks join with snapshot:
1227
+
1228
+ >>> src = otp.Ticks(A=[1, 2])
1229
+ >>> src = src.join_with_snapshot(snapshot_name='some_snapshot') # doctest: +SKIP
1230
+ >>> otp.run(src) # doctest: +SKIP
1231
+ Time A X Y TICK_TIME
1232
+ 0 2003-12-01 00:00:00.000 1 1 4 2003-12-01 00:00:00.000
1233
+ 1 2003-12-01 00:00:00.000 1 2 5 2003-12-01 00:00:00.001
1234
+ 2 2003-12-01 00:00:00.001 2 1 4 2003-12-01 00:00:00.000
1235
+ 3 2003-12-01 00:00:00.001 2 2 5 2003-12-01 00:00:00.001
1236
+
1237
+ Add prefix ``T.`` for fields from snapshot:
1238
+
1239
+ >>> src = otp.Ticks(A=[1, 2])
1240
+ >>> src = src.join_with_snapshot(
1241
+ ... snapshot_name='some_snapshot', prefix_for_output_ticks='T.',
1242
+ ... ) # doctest: +SKIP
1243
+ >>> otp.run(src) # doctest: +SKIP
1244
+ Time A T.X T.Y T.TICK_TIME
1245
+ 0 2003-12-01 00:00:00.000 1 1 4 2003-12-01 00:00:00.000
1246
+ 1 2003-12-01 00:00:00.000 1 2 5 2003-12-01 00:00:00.001
1247
+ 2 2003-12-01 00:00:00.001 2 1 4 2003-12-01 00:00:00.000
1248
+ 3 2003-12-01 00:00:00.001 2 2 5 2003-12-01 00:00:00.001
1249
+
1250
+ To get only specific fields from snapshot use parameter ``snapshot_fields``:
1251
+
1252
+ >>> src = otp.Ticks(A=[1, 2])
1253
+ >>> src = src.join_with_snapshot(
1254
+ ... snapshot_name='some_snapshot', snapshot_fields=['Y'],
1255
+ ... ) # doctest: +SKIP
1256
+ >>> otp.run(src) # doctest: +SKIP
1257
+ Time A Y
1258
+ 0 2003-12-01 00:00:00.000 1 4
1259
+ 1 2003-12-01 00:00:00.000 1 5
1260
+ 2 2003-12-01 00:00:00.001 2 4
1261
+ 3 2003-12-01 00:00:00.001 2 5
1262
+
1263
+ Setting default values for snapshot fields for outer join via ``default_fields_for_outer_join_with_types``
1264
+ parameter with example of joining ticks with absent snapshot:
1265
+
1266
+ >>> src = otp.Ticks(A=[1, 2])
1267
+ >>> src = src.join_with_snapshot(
1268
+ ... snapshot_name='some_snapshot', allow_snapshot_absence=True,
1269
+ ... default_fields_for_outer_join={
1270
+ ... 'B': 'Some string',
1271
+ ... 'C': (float, src['A'] * 2),
1272
+ ... 'D': 50,
1273
+ ... },
1274
+ ... ) # doctest: +SKIP
1275
+ >>> otp.run(src) # doctest: +SKIP
1276
+ Time A B C D
1277
+ 0 2003-12-01 00:00:00.000 1 Some string 2.0 50.0
1278
+ 1 2003-12-01 00:00:00.001 2 Some string 2.0 50.0
1279
+
1280
+ In this case, schema for ``src`` object will be automatically detected from values for this parameter:
1281
+
1282
+ >>> src.schema # doctest: +SKIP
1283
+ {'A': <class 'int'>, 'B': <class 'str'>, 'C': <class 'float'>, 'D': <class 'int'>}
1284
+
1285
+
1286
+ You can join ticks from snapshot for each input tick for specified symbol name from string value or this tick
1287
+ via ``symbol_name_in_snapshot`` parameter.
1288
+
1289
+ Let's create snapshot with different symbol names inside:
1290
+
1291
+ >>> src = otp.Ticks(X=[1, 2, 3, 4], Y=['AAA', 'BBB', 'CCC', 'AAA'])
1292
+ >>> src = src.save_snapshot(
1293
+ ... snapshot_name='some_snapshot', num_ticks=5, keep_snapshot_after_query=True, symbol_name_field='Y',
1294
+ ... )
1295
+ >>> otp.run(src) # doctest: +SKIP
1296
+
1297
+ Now we can join input only with ticks from snapshot with specified symbol name:
1298
+
1299
+ >>> src = otp.Ticks(A=[1, 2])
1300
+ >>> src = src.join_with_snapshot(
1301
+ ... snapshot_name='some_snapshot', symbol_name_in_snapshot='AAA',
1302
+ ... ) # doctest: +SKIP
1303
+ >>> otp.run(src) # doctest: +SKIP
1304
+ Time A X TICK_TIME
1305
+ 0 2003-12-01 00:00:00.000 1 1 2003-12-01 00:00:00.000
1306
+ 1 2003-12-01 00:00:00.000 1 4 2003-12-01 00:00:00.003
1307
+ 2 2003-12-01 00:00:00.001 2 1 2003-12-01 00:00:00.000
1308
+ 3 2003-12-01 00:00:00.001 2 4 2003-12-01 00:00:00.003
1309
+
1310
+ Or we can join each tick with ticks from snapshot with symbol name from input ticks field:
1311
+
1312
+ >>> src = otp.Ticks(A=[1, 2], SYM=['AAA', 'CCC'])
1313
+ >>> src = src.join_with_snapshot(
1314
+ ... snapshot_name='some_snapshot', symbol_name_in_snapshot=src['SYM'],
1315
+ ... ) # doctest: +SKIP
1316
+ >>> otp.run(src) # doctest: +SKIP
1317
+ Time A SYM X TICK_TIME
1318
+ 0 2003-12-01 00:00:00.000 1 AAA 1 2003-12-01 00:00:00.000
1319
+ 1 2003-12-01 00:00:00.000 1 AAA 4 2003-12-01 00:00:00.003
1320
+ 2 2003-12-01 00:00:00.001 2 CCC 3 2003-12-01 00:00:00.002
1321
+ """
1322
+ kwargs = {}
1323
+
1324
+ if not hasattr(otq, "JoinWithSnapshot"):
1325
+ raise RuntimeError("Current version of OneTick doesn't support JOIN_WITH_SNAPSHOT EP")
1326
+
1327
+ if snapshot_storage not in ['memory', 'memory_mapped_file']:
1328
+ raise ValueError('`snapshot_storage` must be one of "memory", "memory_mapped_file"')
1329
+
1330
+ is_snapshot_fields_param_supported = is_join_with_snapshot_snapshot_fields_parameter_supported()
1331
+
1332
+ if snapshot_fields and not is_snapshot_fields_param_supported:
1333
+ raise RuntimeError(
1334
+ "Current version of OneTick doesn't support `snapshot_fields` parameter on JOIN_WITH_SNAPSHOT EP"
1335
+ )
1336
+
1337
+ snapshot_storage = snapshot_storage.upper()
1338
+
1339
+ if join_keys is None:
1340
+ join_keys_str = ''
1341
+ else:
1342
+ join_keys_str = ','.join(join_keys)
1343
+
1344
+ if symbol_name_in_snapshot is None:
1345
+ symbol_name_in_snapshot = ''
1346
+ elif isinstance(symbol_name_in_snapshot, _Operation):
1347
+ symbol_name_in_snapshot = str(symbol_name_in_snapshot)
1348
+
1349
+ if default_fields_for_outer_join is None:
1350
+ default_fields_for_outer_join = {}
1351
+
1352
+ default_fields_list = []
1353
+ snapshot_schema = {}
1354
+
1355
+ for field_name, field_value in default_fields_for_outer_join.items():
1356
+ if isinstance(field_value, tuple):
1357
+ field_type = field_value[0]
1358
+
1359
+ default_fields_list.append(
1360
+ f'{field_name} {ott.type2str(field_type)} = {ott.value2str(field_value[1])}',
1361
+ )
1362
+ else:
1363
+ if isinstance(field_value, _Operation):
1364
+ field_type = field_value.dtype
1365
+ else:
1366
+ field_type = type(field_value)
1367
+
1368
+ default_fields_list.append(f'{field_name} = {ott.value2str(field_value)}')
1369
+
1370
+ snapshot_schema[f'{prefix_for_output_ticks}{field_name}'] = field_type
1371
+
1372
+ default_fields_str = ','.join(default_fields_list)
1373
+
1374
+ if snapshot_fields is not None:
1375
+ kwargs['snapshot_fields'] = ','.join(snapshot_fields)
1376
+
1377
+ self.sink(
1378
+ otq.JoinWithSnapshot(
1379
+ snapshot_name=snapshot_name,
1380
+ snapshot_storage=snapshot_storage,
1381
+ allow_snapshot_absence=allow_snapshot_absence,
1382
+ join_keys=join_keys_str,
1383
+ symbol_name_in_snapshot=symbol_name_in_snapshot,
1384
+ database=database,
1385
+ default_fields_for_outer_join=default_fields_str,
1386
+ prefix_for_output_ticks=prefix_for_output_ticks,
1387
+ **kwargs,
1388
+ )
1389
+ )
1390
+
1391
+ self.schema.update(**snapshot_schema)
1392
+
1393
+ return self