onetick-py 1.177.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +262 -0
  4. locator_parser/common.py +368 -0
  5. locator_parser/io.py +43 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +279 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +141 -0
  14. onetick/py/__init__.py +293 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +648 -0
  19. onetick/py/aggregations/_docs.py +948 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +501 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +374 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +276 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +798 -0
  33. onetick/py/configuration.py +771 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2312 -0
  45. onetick/py/core/_internal/_state_vars.py +93 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +809 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +272 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1002 -0
  58. onetick/py/core/_source/source_methods/joins.py +1413 -0
  59. onetick/py/core/_source/source_methods/merges.py +605 -0
  60. onetick/py/core/_source/source_methods/misc.py +1455 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +986 -0
  68. onetick/py/core/_source/symbol.py +205 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +216 -0
  75. onetick/py/core/column_operations/_methods/methods.py +292 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +160 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +28 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
  83. onetick/py/core/column_operations/base.py +1121 -0
  84. onetick/py/core/cut_builder.py +150 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +245 -0
  87. onetick/py/core/lambda_object.py +441 -0
  88. onetick/py/core/multi_output_source.py +232 -0
  89. onetick/py/core/per_tick_script.py +2256 -0
  90. onetick/py/core/query_inspector.py +464 -0
  91. onetick/py/core/source.py +1744 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1128 -0
  94. onetick/py/db/db.py +1327 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2398 -0
  100. onetick/py/license.py +190 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +935 -0
  103. onetick/py/misc.py +470 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +216 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +916 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1347 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +128 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1045 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +271 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +374 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +251 -0
  132. onetick/py/types.py +2131 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +498 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1374 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +120 -0
  146. onetick/py/utils/tz.py +84 -0
  147. onetick_py-1.177.0.dist-info/METADATA +137 -0
  148. onetick_py-1.177.0.dist-info/RECORD +152 -0
  149. onetick_py-1.177.0.dist-info/WHEEL +5 -0
  150. onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.177.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1413 @@
1
+ import inspect
2
+ import warnings
3
+ from datetime import datetime
4
+ from typing import TYPE_CHECKING, List, Optional, Union
5
+ from onetick.py.backports import Literal
6
+
7
+ from onetick import py as otp
8
+ from onetick.py import types as ott
9
+ from onetick.py.core._internal._state_objects import _TickSequence
10
+ from onetick.py.core._source._symbol_param import _SymbolParamSource
11
+ from onetick.py.core.column_operations._methods.op_types import are_strings
12
+ from onetick.py.core.column_operations.base import _Operation
13
+ from onetick.py.core.eval_query import prepare_params
14
+ from onetick.py.otq import otq
15
+ from onetick.py.compatibility import (
16
+ is_supported_point_in_time,
17
+ is_join_with_query_symbol_time_otq_supported,
18
+ is_join_with_snapshot_snapshot_fields_parameter_supported,
19
+ )
20
+
21
+ if TYPE_CHECKING:
22
+ from onetick.py.core.source import Source
23
+
24
+
25
+ def _process_keep_time_param(self: 'Source', keep_time, sub_source):
26
+ if keep_time == "TIMESTAMP":
27
+ raise ValueError("TIMESTAMP is reserved OneTick name, please, specify another one.")
28
+ if keep_time in self.columns():
29
+ raise ValueError(f"{keep_time} column is already presented.")
30
+ sub_source = sub_source.copy()
31
+ if keep_time:
32
+ sub_source[keep_time] = sub_source["Time"]
33
+ return sub_source
34
+
35
+
36
+ def _process_start_or_end_of_jwq(join_params, time, param_name):
37
+ if time is not None:
38
+ if isinstance(time, (datetime, otp.dt)):
39
+ join_params[f"{param_name}"] = ott.datetime2expr(time)
40
+ elif isinstance(time, _Operation):
41
+ join_params[f"{param_name}"] = str(time)
42
+ else:
43
+ raise ValueError(f"{param_name} should be datetime.datetime instance or OneTick expression")
44
+
45
+
46
+ def _columns_to_params_for_joins(columns, query_params=False):
47
+ """
48
+ Converts a dictionary of columns into a parameters string.
49
+ This is mainly used for join_with_query and join_with_collection.
50
+
51
+ query_params control whether resulting string should be considered query params or symbol params
52
+ (as it impacts some of the conversion rules)
53
+ """
54
+ params_list = []
55
+
56
+ def get_msecs_expression(value):
57
+ return f"tostring(GET_MSECS({str(value)}))"
58
+
59
+ for key, value in columns.items():
60
+ dtype = ott.get_object_type(value)
61
+ convert_rule = "'" + key + "=' + "
62
+
63
+ if key == '_PARAM_SYMBOL_TIME' and not query_params:
64
+ # this symbol parameter has to be formatted differently because Onetick treats this parameter
65
+ # in a special way
66
+ if dtype is otp.nsectime:
67
+ convert_rule += f'NSECTIME_FORMAT("%Y%m%d%H%M%S.%J",{ott.value2str(value)},_TIMEZONE)'
68
+ elif dtype is str:
69
+ if are_strings(getattr(value, "dtype", None)):
70
+ convert_rule += str(value)
71
+ else:
72
+ convert_rule += '"' + value + '"'
73
+ else:
74
+ raise ValueError('Parameter symbol_time has to be a datetime value!')
75
+
76
+ elif key == '_SYMBOL_TIME' and query_params:
77
+ # hack to support passing _SYMBOL_TIME to called query as a parameter
78
+ if dtype is otp.nsectime:
79
+ convert_rule += get_msecs_expression(ott.value2str(value))
80
+ elif dtype is str:
81
+ ns = f'PARSE_NSECTIME("%Y%m%d%H%M%S", {ott.value2str(value)}, _TIMEZONE)'
82
+ convert_rule += get_msecs_expression(ns)
83
+ elif dtype is otp.msectime:
84
+ # for backward compatibility
85
+ convert_rule += get_msecs_expression(value)
86
+ else:
87
+ raise ValueError('Parameter symbol_time has to be a datetime value!')
88
+
89
+ elif dtype is str:
90
+ if are_strings(getattr(value, "dtype", None)):
91
+ convert_rule += str(value)
92
+ else:
93
+ convert_rule += '"' + value + '"'
94
+ elif dtype is otp.msectime:
95
+ convert_rule += get_msecs_expression(value)
96
+ elif dtype is otp.nsectime:
97
+ if query_params:
98
+ # this can be used for query params but cannot be used for symbol params
99
+ # overall it's better
100
+ convert_rule += "'NSECTIME('+tostring(NSECTIME_TO_LONG(" + str(value) + "))+')'"
101
+ else:
102
+ # this matches the common way onetick converts nanoseconds to symbol parameters
103
+ convert_rule += (
104
+ get_msecs_expression(value) + "+'.'+SUBSTR(NSECTIME_FORMAT('%J'," + str(value) + ",_TIMEZONE),3,6)"
105
+ )
106
+ else:
107
+ if issubclass(dtype, float) or dtype is otp.decimal:
108
+ warnings.warn(f"Parameter '{key}' is of {dtype} type.\n"
109
+ "Parameters passed to query will have to be converted to string"
110
+ " so the precision may be lost (default precision of 8 will be used).\n"
111
+ "Use other types like integers or strings to pass parameters with higher precision.")
112
+ convert_rule += "tostring(" + ott.value2str(value) + ")"
113
+ params_list.append(convert_rule)
114
+ return "+','+".join(params_list)
115
+
116
+
117
+ def _check_and_convert_symbol(symbol):
118
+ """
119
+ Convert the value of 'symbol' function parameter to symbol name
120
+ OneTick string representation and dictionary of symbol parameters.
121
+ """
122
+ # "symbol" parameter can contain a symbol name (string, field, operation etc),
123
+ # a symbol parameter list (dict, Source, _SymbolParamSource),
124
+ # or both together as a tuple
125
+
126
+ symbol_name = None
127
+ symbol_param = {}
128
+
129
+ # if "symbol" is tuple, we unpack it
130
+ if isinstance(symbol, tuple) and len(symbol) == 2:
131
+ symbol, symbol_param = symbol
132
+
133
+ if isinstance(symbol, _Operation): # TODO: PY-35
134
+ symbol_name = f"tostring({str(symbol)})"
135
+ elif isinstance(symbol, str):
136
+ symbol_name = f"'{symbol}'"
137
+ elif type(symbol) in {int, float}: # constant
138
+ symbol_name = f"tostring({symbol})"
139
+ elif symbol is None:
140
+ # this is necessary to distinguish None (which is valid value for symbol) from invalid values
141
+ symbol_name = None
142
+ else:
143
+ if not symbol_param:
144
+ symbol_param = symbol
145
+
146
+ return symbol_name, symbol_param
147
+
148
+
149
+ def _convert_symbol_param_and_columns(symbol_param):
150
+ """
151
+ We need to create two objects from a symbol param (a dict, a Source or a _SymbolParamSource):
152
+
153
+ 1. Dictionary of columns to generate list of symbol parameters for the JOIN_WITH_QUERY EP
154
+ 2. _SymbolParamSource object to pass to the source function if necessary
155
+ """
156
+
157
+ if isinstance(symbol_param, dict):
158
+ converted_symbol_param_columns = symbol_param
159
+ converted_symbol_param = _SymbolParamSource(
160
+ **{key: ott.get_object_type(column) for key, column in symbol_param.items()}
161
+ )
162
+ elif isinstance(symbol_param, otp.Source):
163
+ converted_symbol_param_columns = {
164
+ field_name: symbol_param[field_name] for field_name in symbol_param.columns(skip_meta_fields=True).keys()
165
+ }
166
+ converted_symbol_param = symbol_param.to_symbol_param()
167
+ elif isinstance(symbol_param, _SymbolParamSource):
168
+ converted_symbol_param_columns = {
169
+ field_name: symbol_param[field_name] for field_name in symbol_param.schema.keys()
170
+ }
171
+ converted_symbol_param = symbol_param
172
+ else:
173
+ return None, None
174
+
175
+ # we want to pass all the fields to the joined query as symbol parameters,
176
+ # except for some special fields that would override explicitly set parameters
177
+ ignore_symbol_fields = [
178
+ '_PARAM_START_TIME_NANOS',
179
+ '_PARAM_END_TIME_NANOS',
180
+ ]
181
+ filtered_converted_symbol_param_columns = {}
182
+ for field_name, field_value in converted_symbol_param_columns.items():
183
+ if field_name in ignore_symbol_fields:
184
+ warnings.warn(
185
+ f'Special symbol parameter "{field_name}" was passed to the joined query! '
186
+ 'This parameter would be ignored. Please, use parameters of the `join_with_query` '
187
+ 'function itself to set it.',
188
+ FutureWarning,
189
+ stacklevel=2,
190
+ )
191
+ else:
192
+ filtered_converted_symbol_param_columns[field_name] = field_value
193
+ filtered_converted_symbol_param = _SymbolParamSource(
194
+ **{
195
+ field_name: field_value
196
+ for field_name, field_value in converted_symbol_param.schema.items()
197
+ if field_name not in ignore_symbol_fields
198
+ }
199
+ )
200
+ return filtered_converted_symbol_param_columns, filtered_converted_symbol_param
201
+
202
+
203
+ def _fill_time_param_for_jwq(join_params, start_time, end_time, timezone):
204
+ _process_start_or_end_of_jwq(join_params, start_time, "start_timestamp")
205
+ _process_start_or_end_of_jwq(join_params, end_time, "end_timestamp")
206
+ if timezone:
207
+ join_params["timezone"] = f"'{timezone}'"
208
+ else:
209
+ join_params["timezone"] = "_TIMEZONE" # this may break something, need to test
210
+
211
+
212
+ def _fill_aux_params_for_joins(
213
+ join_params, caching, end_time, prefix, start_time, symbol_name, timezone, for_join_with_collection=False
214
+ ):
215
+ if symbol_name and not for_join_with_collection:
216
+ join_params["symbol_name"] = symbol_name
217
+ if prefix is not None:
218
+ join_params["prefix_for_output_ticks"] = str(prefix)
219
+ if caching:
220
+ if for_join_with_collection:
221
+ supported = ("per_symbol",)
222
+ else:
223
+ supported = ("cross_symbol", "per_symbol")
224
+ if caching in supported:
225
+ join_params["caching_scope"] = caching
226
+ else:
227
+ raise ValueError(f"Unknown value for caching param, please use None or any of {supported}.")
228
+ _fill_time_param_for_jwq(join_params, start_time, end_time, timezone)
229
+ if for_join_with_collection:
230
+ del join_params['timezone']
231
+
232
+
233
+ def _get_default_fields_for_outer_join_str(default_fields_for_outer_join, how, sub_source_schema):
234
+ """
235
+ Default fields for outer join definition.
236
+ Used by join_with_query() and join_with_collection()
237
+ """
238
+ default_fields_for_outer_join_str = ''
239
+ if default_fields_for_outer_join:
240
+ if how != 'outer':
241
+ raise ValueError('The `default_fields_for_outer_join` parameter can be used only for outer join')
242
+ for field, expr in default_fields_for_outer_join.items():
243
+ if field not in sub_source_schema.keys():
244
+ raise KeyError(
245
+ f'Field {field} is specified in `default_fields_for_outer_join` parameter, '
246
+ 'but is not present in the joined source schema!'
247
+ )
248
+ if default_fields_for_outer_join_str != '':
249
+ default_fields_for_outer_join_str += ','
250
+ default_fields_for_outer_join_str += f'{field}={ott.value2str(expr)}'
251
+ return default_fields_for_outer_join_str
252
+
253
+
254
+ def _get_columns_with_prefix(self: 'Source', sub_source, prefix) -> dict:
255
+ sub_source_columns = sub_source.schema
256
+ if prefix is None:
257
+ prefix = ""
258
+ if not isinstance(prefix, str):
259
+ raise ValueError("Only string constants are supported for now.")
260
+ new_columns = {prefix + name: dtype for name, dtype in sub_source_columns.items()}
261
+ same_names = set(new_columns) & set(self.schema)
262
+ if same_names:
263
+ raise ValueError(f"After applying prefix some columns aren't unique: {', '.join(same_names)}.")
264
+ return new_columns
265
+
266
+
267
+ def join_with_collection(
268
+ self: 'Source',
269
+ collection_name,
270
+ query_func=None,
271
+ how="outer",
272
+ params=None,
273
+ start=None,
274
+ end=None,
275
+ prefix=None,
276
+ caching=None,
277
+ keep_time=None,
278
+ default_fields_for_outer_join=None,
279
+ ) -> 'Source':
280
+ """
281
+ For each tick uses ``query_func`` to join ticks from ``collection_name`` tick collection
282
+ (tick set, unordered tick set, tick list, or tick deque).
283
+
284
+ Parameters
285
+ ----------
286
+ collection_name: str
287
+ Name of the collection state variable from which to join ticks. Collections are the following types:
288
+ :py:class:`TickSet <onetick.py.core._internal._state_objects.TickSet>`,
289
+ :py:class:`TickSetUnordered <onetick.py.core._internal._state_objects.TickSetUnordered>`,
290
+ :py:class:`TickList <onetick.py.core._internal._state_objects.TickList>` and
291
+ :py:class:`TickDeque <onetick.py.core._internal._state_objects.TickDeque>`.
292
+
293
+ query_func: callable
294
+ Callable ``query_func`` should return :class:`Source`. If passed, this query will be used on ticks
295
+ from collection before joining them.
296
+ In this case, ``query_func`` object will be evaluated by OneTick (not python)
297
+ for every input tick. Note that python code will be executed only once,
298
+ so all python's conditional expressions will be evaluated only once too.
299
+
300
+ Callable should have ``source`` parameter. When callable is called, this parameter
301
+ will have value of a :class:`Source` object representing ticks loaded directly from the collection.
302
+ Any operation applied to this source will be applied to ticks from the collection
303
+ before joining them.
304
+
305
+ Also, callable should have the parameters with names
306
+ from ``params`` if they are specified in this method.
307
+
308
+ If ``query_func`` is not passed, then all ticks from the collection will be joined.
309
+ how: 'inner', 'outer'
310
+ Type of join. If **inner**, then output tick is propagated
311
+ only if some ticks from the collection were joined to the input tick.
312
+ params: dict
313
+ Mapping of the parameters' names and their values for the ``query_func``.
314
+ :py:class:`Columns <onetick.py.Column>` can be used as a value.
315
+ start: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
316
+ Start time to select ticks from collection.
317
+ If specified, only ticks in collection that have higher or equal timestamp will be processed.
318
+ If not passed, then there will be no lower time bound for the collection ticks.
319
+ This means that even ticks with TIMESTAMP lower than _START_TIME of the main query will be joined.
320
+ end: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
321
+ End time to select ticks from collection.
322
+ If specified, only ticks in collection that have lower timestamp will be processed.
323
+ If not passed, then there will be no upper time bound for the collection ticks.
324
+ This means that even ticks with TIMESTAMP higher than _END_TIME of the main query will be joined.
325
+ prefix : str
326
+ Prefix for the names of joined tick fields.
327
+ caching : str
328
+ If `None` caching is disabled. You can specify caching by using values:
329
+
330
+ * 'per_symbol': cache is different for each symbol.
331
+ keep_time : str
332
+ Name for the joined timestamp column. `None` means no timestamp column will be joined.
333
+ default_fields_for_outer_join : dict
334
+ When you use outer join, all output ticks will have fields from the schema of the joined source.
335
+ If nothing was joined to a particular output tick, these fields will have default values for their type.
336
+ This parameter allows to override the values that would be added to ticks for which nothing was joined.
337
+ Dictionary keys should be field names, and dictionary values should be constants
338
+ or :class:`Operation` expressions
339
+
340
+ Returns
341
+ -------
342
+ :class:`Source`
343
+ Source with joined ticks from ``collection_name``
344
+
345
+ See also
346
+ --------
347
+ **JOIN_WITH_COLLECTION_SUMMARY** OneTick event processor
348
+
349
+ Examples
350
+ --------
351
+ >>> # OTdirective: snippet-name: Special functions.join with collection.without query;
352
+ >>> src = otp.Tick(A=1)
353
+ >>> src.state_vars['TICK_SET'] = otp.state.tick_set('LATEST_TICK', 'B', otp.eval(otp.Tick(B=1, C='STR')))
354
+ >>> src = src.join_with_collection('TICK_SET')
355
+ >>> otp.run(src)[["A", "B", "C"]]
356
+ A B C
357
+ 0 1 1 STR
358
+
359
+ >>> # OTdirective: snippet-name: Special functions.join with collection.with query and params;
360
+ >>> src = otp.Ticks(A=[1, 2, 3, 4, 5],
361
+ ... B=[2, 2, 3, 3, 3])
362
+ >>> src.state_vars['TICK_LIST'] = otp.state.tick_list()
363
+ >>> def fun(tick): tick.state_vars['TICK_LIST'].push_back(tick)
364
+ >>> src = src.script(fun)
365
+ >>>
366
+ >>> def join_fun(source, param_b):
367
+ ... source = source.agg(dict(VALUE=otp.agg.sum(source['A'])))
368
+ ... source['VALUE'] = source['VALUE'] + param_b
369
+ ... return source
370
+ >>>
371
+ >>> src = src.join_with_collection('TICK_LIST', join_fun, params=dict(param_b=src['B']))
372
+ >>> otp.run(src)[["A", "B", "VALUE"]]
373
+ A B VALUE
374
+ 0 1 2 3
375
+ 1 2 2 5
376
+ 2 3 3 9
377
+ 3 4 3 13
378
+ 4 5 3 18
379
+
380
+ Join last standing quote from each exchange to trades:
381
+
382
+ >>> # OTdirective: snippet-name: Special functions.join with collection.standing quotes per exchange;
383
+ >>> trd = otp.Ticks(offset=[1000, 2000, 3000, 4000, 5000],
384
+ ... PRICE=[10.1, 10.2, 10.15, 10.23, 10.4],
385
+ ... SIZE=[100, 50, 100, 60, 200])
386
+ >>>
387
+ >>> qte = otp.Ticks(offset=[500, 600, 1200, 2500, 3500, 3600, 4800],
388
+ ... EXCHANGE=['N', 'C', 'Q', 'Q', 'C', 'N', 'C'],
389
+ ... ASK_PRICE=[10.2, 10.18, 10.18, 10.15, 10.31, 10.32, 10.44],
390
+ ... BID_PRICE=[10.1, 10.17, 10.17, 10.1, 10.23, 10.31, 10.4])
391
+ >>>
392
+ >>> trd['TICK_TYPE'] = 'TRD'
393
+ >>> qte['TICK_TYPE'] = 'QTE'
394
+ >>>
395
+ >>> trd_qte = trd + qte
396
+ >>> trd_qte.state_vars['LAST_QUOTE_PER_EXCHANGE'] = otp.state.tick_set(
397
+ ... 'LATEST', 'EXCHANGE',
398
+ ... schema=['EXCHANGE', 'ASK_PRICE', 'BID_PRICE'])
399
+ >>>
400
+ >>> trd_qte = trd_qte.state_vars['LAST_QUOTE_PER_EXCHANGE'].update(where=trd_qte['TICK_TYPE'] == 'QTE',
401
+ ... value_fields=['ASK_PRICE', 'BID_PRICE'])
402
+ >>> trd = trd_qte.where(trd_qte['TICK_TYPE'] == 'TRD')
403
+ >>> trd.drop(['ASK_PRICE', 'BID_PRICE', 'EXCHANGE'], inplace=True)
404
+ >>> trd = trd.join_with_collection('LAST_QUOTE_PER_EXCHANGE')
405
+ >>> otp.run(trd)[['PRICE', 'SIZE', 'EXCHANGE', 'ASK_PRICE', 'BID_PRICE']]
406
+ PRICE SIZE EXCHANGE ASK_PRICE BID_PRICE
407
+ 0 10.10 100 N 10.20 10.10
408
+ 1 10.10 100 C 10.18 10.17
409
+ 2 10.20 50 N 10.20 10.10
410
+ 3 10.20 50 C 10.18 10.17
411
+ 4 10.20 50 Q 10.18 10.17
412
+ 5 10.15 100 N 10.20 10.10
413
+ 6 10.15 100 C 10.18 10.17
414
+ 7 10.15 100 Q 10.15 10.10
415
+ 8 10.23 60 N 10.32 10.31
416
+ 9 10.23 60 C 10.31 10.23
417
+ 10 10.23 60 Q 10.15 10.10
418
+ 11 10.40 200 N 10.32 10.31
419
+ 12 10.40 200 C 10.44 10.40
420
+ 13 10.40 200 Q 10.15 10.10
421
+ """
422
+
423
+ # check that passed collection is good
424
+ if collection_name not in self.state_vars.names:
425
+ raise KeyError(f'Collection with name {collection_name} is not in the list of available state variables')
426
+
427
+ if not isinstance(self.state_vars[collection_name], _TickSequence):
428
+ raise ValueError(
429
+ f'State variable {collection_name} is not a tick collection! '
430
+ 'Only TickSet, TickSetUnordered, TickList and TickDeque objects are supported '
431
+ 'as data sources for join_with_collection'
432
+ )
433
+
434
+ if params is None:
435
+ params = {}
436
+
437
+ special_params = ('source', '__fixed_start_time', '__fixed_end_time')
438
+ for sp_param in special_params:
439
+ if sp_param in params.keys():
440
+ raise ValueError(
441
+ f'Parameter name "{sp_param}" is special and cannot be used for params '
442
+ 'of join_with_collection function. Please, select a different name.'
443
+ )
444
+
445
+ # JOIN_WITH_COLLECTION_SUMMARY has START_TIME and END_TIME parameters with the precision of millisecond.
446
+ # So, here we add a workaround on onetick.py side to support nsectime precision
447
+ # "start" and "end" parameters of the EP are kept as they may be necessary
448
+ # for performance reasons
449
+
450
+ if start is not None:
451
+ params['__fixed_start_time'] = start
452
+ start = start - otp.Milli(1)
453
+
454
+ if end is not None:
455
+ params['__fixed_end_time'] = end
456
+ end = end + otp.Milli(1)
457
+
458
+ # prepare temporary file
459
+ # ------------------------------------ #
460
+
461
+ # TODO: this should be a common code somewhere
462
+ collection_schema = {
463
+ key: value
464
+ for key, value in self.state_vars[collection_name].schema.items()
465
+ if not self._check_key_is_reserved(key)
466
+ }
467
+
468
+ join_source_root = otp.DataSource(
469
+ db=otp.config.default_db, tick_type="ANY", schema_policy="manual", schema=collection_schema,
470
+ )
471
+ if query_func is None:
472
+ query_func = lambda source: source # noqa
473
+
474
+ converted_params = prepare_params(**params)
475
+
476
+ fixed_start_time = None
477
+ fixed_end_time = None
478
+ if '__fixed_start_time' in converted_params.keys():
479
+ fixed_start_time = converted_params['__fixed_start_time']
480
+ del converted_params['__fixed_start_time']
481
+ if '__fixed_end_time' in converted_params.keys():
482
+ fixed_end_time = converted_params['__fixed_end_time']
483
+ del converted_params['__fixed_end_time']
484
+
485
+ sub_source = query_func(source=join_source_root, **converted_params)
486
+
487
+ if fixed_start_time is not None:
488
+ sub_source = sub_source[sub_source['TIMESTAMP'] >= fixed_start_time][0]
489
+ if fixed_end_time is not None:
490
+ sub_source = sub_source[sub_source['TIMESTAMP'] < fixed_end_time][0]
491
+
492
+ sub_source = self._process_keep_time_param(keep_time, sub_source)
493
+
494
+ params_str = _columns_to_params_for_joins(params, query_params=True)
495
+
496
+ sub_source_schema = sub_source.schema.copy()
497
+
498
+ columns = {}
499
+ columns.update(self._get_columns_with_prefix(sub_source, prefix))
500
+ columns.update(self.columns(skip_meta_fields=True))
501
+
502
+ res = self.copy(columns=columns)
503
+
504
+ res._merge_tmp_otq(sub_source)
505
+ query_name = sub_source._store_in_tmp_otq(
506
+ res._tmp_otq, symbols='_NON_EXISTING_SYMBOL_', operation_suffix="join_with_collection"
507
+ )
508
+ # ------------------------------------ #
509
+ default_fields_for_outer_join_str = _get_default_fields_for_outer_join_str(
510
+ default_fields_for_outer_join, how, sub_source_schema
511
+ )
512
+
513
+ join_params = dict(
514
+ collection_name=str(self.state_vars[collection_name]),
515
+ otq_query=f'"THIS::{query_name}"',
516
+ join_type=how.upper(),
517
+ otq_query_params=params_str,
518
+ default_fields_for_outer_join=default_fields_for_outer_join_str,
519
+ )
520
+
521
+ _fill_aux_params_for_joins(
522
+ join_params, caching, end, prefix, start, symbol_name=None, timezone=None, for_join_with_collection=True
523
+ )
524
+ res.sink(otq.JoinWithCollectionSummary(**join_params))
525
+ res._add_table()
526
+ res.sink(otq.Passthrough(fields="TIMESTAMP", drop_fields=True))
527
+
528
+ return res
529
+
530
+
531
+ def join_with_query(
532
+ self: 'Source',
533
+ query,
534
+ how="outer",
535
+ symbol=None,
536
+ params=None,
537
+ start=None,
538
+ end=None,
539
+ timezone=None,
540
+ prefix=None,
541
+ caching=None,
542
+ keep_time=None,
543
+ where=None,
544
+ default_fields_for_outer_join=None,
545
+ symbol_time=None,
546
+ concurrency=None,
547
+ process_query_async: bool = True,
548
+ **kwargs,
549
+ ) -> 'Source':
550
+ """
551
+ For each tick executes ``query``.
552
+
553
+ Parameters
554
+ ----------
555
+ query: callable, Source
556
+ Callable ``query`` should return :class:`Source`. This object will be evaluated by OneTick (not python)
557
+ for every tick. Note python code will be executed only once, so all python's conditional expressions
558
+ will be evaluated only once too.
559
+ Callable should have ``symbol`` parameter and the parameters with names
560
+ from ``params`` if they are specified in this method.
561
+
562
+ If ``query`` is a :class:`Source` object then it will be propagated as a query to OneTick.
563
+ how: 'inner', 'outer'
564
+ Type of join. If **inner**, then each tick is propagated
565
+ only if its ``query`` execution has a non-empty result.
566
+ params: dict
567
+ Mapping of the parameters' names and their values for the ``query``.
568
+ :py:class:`Columns <onetick.py.Column>` can be used as a value.
569
+ symbol: str, Operation, dict, Source, or Tuple[Union[str, Operation], Union[dict, Source]]
570
+ Symbol name to use in ``query``. In addition, symbol params can be passed along with symbol name.
571
+
572
+ Symbol name can be passed as a string or as an :class:`Operation`.
573
+
574
+ Symbol parameters can be passed as a dictionary. Also, the main :class:`Source` object,
575
+ or the object containing a symbol parameter list, can be used as a list of symbol parameter.
576
+ Special symbol parameters (`_PARAM_START_TIME_NANOS` and `_PARAM_END_TIME_NANOS`)
577
+ will be ignored and will not be propagated to ``query``.
578
+
579
+ ``symbol`` will be interpreted as a symbol name or as symbol parameters, depending on its type.
580
+ You can pass both as a tuple.
581
+
582
+ If symbol name is not passed, then symbol name from the main source is used.
583
+ start: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
584
+ Start time of ``query``.
585
+ By default, start time of the main source is used.
586
+ end: :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
587
+ End time of ``query`` (note that it's non-inclusive).
588
+ By default, end time of the main source is used.
589
+ start_time:
590
+ .. deprecated:: 1.48.4
591
+ The same as ``start``.
592
+ end_time:
593
+ .. deprecated:: 1.48.4
594
+ The same as ``end``.
595
+ timezone : Optional, str
596
+ Timezone of ``query``.
597
+ By default, timezone of the main source is used.
598
+ prefix : str
599
+ Prefix for the names of joined tick fields.
600
+ caching : str
601
+ If `None` caching is disabled (default). You can specify caching by using values:
602
+
603
+ * 'cross_symbol': cache is the same for all symbols
604
+
605
+ * 'per_symbol': cache is different for each symbol.
606
+
607
+ .. note::
608
+ When parameter ``process_query_async`` is set to ``True`` (default), caching may work
609
+ unexpectedly, because ticks will be accumulated in batches and ``query`` will be processed
610
+ in different threads.
611
+ keep_time : str
612
+ Name for the joined timestamp column. `None` means no timestamp column will be joined.
613
+ where : Operation
614
+ Condition to filter ticks for which the result of the ``query`` will be joined.
615
+ default_fields_for_outer_join : dict
616
+ When you use outer join, all output ticks will have fields from the schema of the joined source.
617
+ If nothing was joined to a particular output tick, these fields will have default values for their type.
618
+ This parameter allows to override the values that would be added to ticks for which nothing was joined.
619
+ Dictionary keys should be field names, and dictionary values should be constants
620
+ or :class:`Operation` expressions
621
+ symbol_time : :py:class:`otp.datetime <onetick.py.datetime>`, :py:class:`otp.Operation <onetick.py.Operation>`
622
+ Time that will be used by Onetick to map the symbol with which ``query`` is executed to the reference data.
623
+ This parameter is only necessary if the query is expected to perform symbology conversions.
624
+ concurrency : int
625
+ Specifies number of threads for asynchronous processing of ``query`` per unbound symbol list.
626
+ By default, the number of threads is 1.
627
+ process_query_async: bool
628
+ Switches between synchronous and asynchronous execution of queries.
629
+
630
+ While asynchronous execution is generally much more effective,
631
+ in certain cases synchronous execution may still be preferred
632
+ (e.g., when there are a few input ticks, each initiating a memory-consuming query).
633
+
634
+ In asynchronous mode typically while parallel thread is processing the query,
635
+ EP accumulates some input ticks.
636
+
637
+
638
+ Returns
639
+ -------
640
+ :class:`Source`
641
+ Source with joined ticks from ``query``
642
+
643
+ See also
644
+ --------
645
+ **JOIN_WITH_QUERY** OneTick event processor
646
+
647
+ Examples
648
+ --------
649
+ >>> # OTdirective: snippet-name: Special functions.join with query.with an otp data source;
650
+ >>> d = otp.Ticks(Y=[-1])
651
+ >>> d = d.update(dict(Y=1), where=(d.Symbol.name == "a"))
652
+ >>> data = otp.Ticks(X=[1, 2],
653
+ ... S=["a", "b"])
654
+ >>> res = data.join_with_query(d, how='inner', symbol=data['S'])
655
+ >>> otp.run(res)[["X", "Y", "S"]]
656
+ X Y S
657
+ 0 1 1 a
658
+ 1 2 -1 b
659
+
660
+ >>> d = otp.Ticks(ADDED=[-1])
661
+ >>> d = d.update(dict(ADDED=1), where=(d.Symbol.name == "3")) # symbol name is always string
662
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
663
+ >>> res = data.join_with_query(d, how='inner', symbol=(data['A'] + data['B'])) # OTdirective: skip-snippet:;
664
+ >>> df = otp.run(res)
665
+ >>> df[["A", "B", "ADDED"]]
666
+ A B ADDED
667
+ 0 1 2 1
668
+ 1 2 4 -1
669
+
670
+ Constants as symbols are also supported:
671
+
672
+ >>> d = otp.Ticks(ADDED=[d.Symbol.name])
673
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
674
+ >>> res = data.join_with_query(d, how='inner', symbol=1) # OTdirective: skip-snippet:;
675
+ >>> df = otp.run(res)
676
+ >>> df[["A", "B", "ADDED"]]
677
+ A B ADDED
678
+ 0 1 2 1
679
+ 1 2 4 1
680
+
681
+ Function object as query is also supported (Note it will be executed only once in python's code):
682
+
683
+ >>> def func(symbol):
684
+ ... d = otp.Ticks(TYPE=["six"])
685
+ ... d = d.update(dict(TYPE="three"), where=(symbol.name == "3")) # symbol is always converted to string
686
+ ... d["TYPE"] = symbol['PREF'] + d["TYPE"] + symbol['POST']
687
+ ... return d
688
+ >>> # OTdirective: snippet-name: Special functions.join with query.with a function
689
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
690
+ >>> res = data.join_with_query(func, how='inner', symbol=(data['A'] + data['B'], dict(PREF="_", POST="$")))
691
+ >>> df = otp.run(res)
692
+ >>> df[["A", "B", "TYPE"]]
693
+ A B TYPE
694
+ 0 1 2 _three$
695
+ 1 2 4 _six$
696
+
697
+ It's possible to pass the source itself as a list of symbol parameters, which will make all of its fields
698
+ accessible through the "symbol" object:
699
+
700
+ >>> def func(symbol):
701
+ ... d = otp.Ticks(TYPE=["six"])
702
+ ... d["TYPE"] = symbol['PREF'] + d["TYPE"] + symbol['POST']
703
+ ... return d
704
+ >>> # OTdirective: snippet-name: 'Source' operations.join with query.source as symbol;
705
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4], PREF=["_", "$"], POST=["$", "_"])
706
+ >>> res = data.join_with_query(func, how='inner', symbol=data)
707
+ >>> df = otp.run(res)
708
+ >>> df[["A", "B", "TYPE"]]
709
+ A B TYPE
710
+ 0 1 2 _six$
711
+ 1 2 4 $six_
712
+
713
+ The examples above can be rewritten by using onetick query parameters instead of symbol parameters.
714
+ OTQ parameters are global for query, while symbol parameters can be redefined by bound symbols:
715
+
716
+ >>> def func(symbol, pref, post):
717
+ ... d = otp.Ticks(TYPE=["six"])
718
+ ... d = d.update(dict(TYPE="three"), where=(symbol.name == "3")) # symbol is always converted to string
719
+ ... d["TYPE"] = pref + d["TYPE"] + post
720
+ ... return d
721
+ >>> # OTdirective: snippet-name: Special functions.join with query.with a function that takes params;
722
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4])
723
+ >>> res = data.join_with_query(func, how='inner', symbol=(data['A'] + data['B']),
724
+ ... params=dict(pref="_", post="$"))
725
+ >>> df = otp.run(res)
726
+ >>> df[["A", "B", "TYPE"]]
727
+ A B TYPE
728
+ 0 1 2 _three$
729
+ 1 2 4 _six$
730
+
731
+ Some or all onetick query parameters can be column or expression also:
732
+
733
+ >>> def func(symbol, pref, post):
734
+ ... d = otp.Ticks(TYPE=["six"])
735
+ ... d = d.update(dict(TYPE="three"), where=(symbol.name == "3")) # symbol is always converted to string
736
+ ... d["TYPE"] = pref + d["TYPE"] + post
737
+ ... return d
738
+ >>> # OTdirective: snippet-name: Special functions.join with query.with a function that takes params from fields; # noqa
739
+ >>> data = otp.Ticks(A=[1, 2], B=[2, 4], PREF=["^", "_"], POST=["!", "$"])
740
+ >>> res = data.join_with_query(func, how='inner', symbol=(data['A'] + data['B']),
741
+ ... params=dict(pref=data["PREF"] + ".", post=data["POST"]))
742
+ >>> df = otp.run(res)
743
+ >>> df[["A", "B", "TYPE"]]
744
+ A B TYPE
745
+ 0 1 2 ^.three!
746
+ 1 2 4 _.six$
747
+
748
+ You can specify ``start`` and ``end`` time of the query, otherwise time interval of the main query will be used:
749
+
750
+ >>> # OTdirective: snippet-name: Special functions.join with query.passing start/end times;
751
+ >>> d = otp.Ticks(Y=[1, 2])
752
+ >>> data = otp.Ticks(X=[1, 2])
753
+ >>> start = otp.datetime(2003, 12, 1, 0, 0, 0, 1000)
754
+ >>> end = otp.datetime(2003, 12, 1, 0, 0, 0, 3000)
755
+ >>> res = data.join_with_query(d, how='inner', start=start, end=end)
756
+ >>> otp.run(res)
757
+ Time Y X
758
+ 0 2003-12-01 00:00:00.000 1 1
759
+ 1 2003-12-01 00:00:00.000 2 1
760
+ 2 2003-12-01 00:00:00.001 1 2
761
+ 3 2003-12-01 00:00:00.001 2 2
762
+
763
+ By default joined query inherits start and end time from the main query:
764
+
765
+ >>> joined_query = otp.Tick(JOINED_START_TIME=otp.meta_fields.start_time,
766
+ ... JOINED_END_TIME=otp.meta_fields.end_time)
767
+ >>> main_query = otp.Tick(A=1)
768
+ >>> data = main_query.join_with_query(joined_query)
769
+ >>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
770
+ Time JOINED_START_TIME JOINED_END_TIME A
771
+ 0 2003-12-01 2003-12-01 2003-12-04 1
772
+
773
+ Parameters ``start`` and ``end`` can be used to change time interval for the joined query:
774
+
775
+ >>> data = main_query.join_with_query(joined_query, start=otp.dt(2024, 1, 1), end=otp.dt(2024, 1, 3))
776
+ >>> otp.run(data, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
777
+ Time JOINED_START_TIME JOINED_END_TIME A
778
+ 0 2003-12-01 2024-01-01 2024-01-03 1
779
+
780
+ Note that query ``start`` time is inclusive, but query ``end`` time is not,
781
+ meaning that ticks with timestamps equal to the query end time will not be included:
782
+
783
+ >>> main_query = otp.Tick(A=1)
784
+ >>> joined_query = otp.Tick(DAY=0, bucket_interval=24*60*60)
785
+ >>> joined_query['DAY'] = joined_query['TIMESTAMP'].dt.day_of_month()
786
+ >>> otp.run(joined_query, start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 5))
787
+ Time DAY
788
+ 0 2003-12-01 1
789
+ 1 2003-12-02 2
790
+ 2 2003-12-03 3
791
+ 3 2003-12-04 4
792
+
793
+ >>> joined_query = joined_query.last()
794
+ >>> data = main_query.join_with_query(joined_query,
795
+ ... start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4))
796
+ >>> otp.run(data)
797
+ Time DAY A
798
+ 0 2003-12-01 3 1
799
+
800
+ If you want to include such ticks, you can add one nanosecond to the query end time:
801
+
802
+ >>> data = main_query.join_with_query(joined_query,
803
+ ... start=otp.dt(2003, 12, 1), end=otp.dt(2003, 12, 4) + otp.Nano(1))
804
+ >>> otp.run(data)
805
+ Time DAY A
806
+ 0 2003-12-01 4 1
807
+
808
+ Use ``keep_time`` parameter to keep or rename original timestamp column:
809
+
810
+ >>> # OTdirective: snippet-name: Special functions.join with query.keep the timestamps of the joined ticks;
811
+ >>> d = otp.Ticks(Y=[1, 2])
812
+ >>> data = otp.Ticks(X=[1, 2])
813
+ >>> res = data.join_with_query(d, how='inner', keep_time="ORIG_TIME")
814
+ >>> otp.run(res)
815
+ Time Y ORIG_TIME X
816
+ 0 2003-12-01 00:00:00.000 1 2003-12-01 00:00:00.000 1
817
+ 1 2003-12-01 00:00:00.000 2 2003-12-01 00:00:00.001 1
818
+ 2 2003-12-01 00:00:00.001 1 2003-12-01 00:00:00.000 2
819
+ 3 2003-12-01 00:00:00.001 2 2003-12-01 00:00:00.001 2
820
+ """
821
+
822
+ # TODO: check if join_with_query checks schema of joined source against primary source,
823
+ # by itself or with process_by_group
824
+
825
+ if params is None:
826
+ params = {}
827
+
828
+ converted_symbol_name, symbol_param = _check_and_convert_symbol(symbol)
829
+
830
+ # default symbol name should be this: _SYMBOL_NAME if it is not empty else _NON_EXISTING_SYMBOL_
831
+ # this way we will force JWQ to substitute symbol with any symbol parameters we may have passed
832
+ # otherwise (if an empty symbol name is passed to JWQ), it will not substitute either symbol name
833
+ # or symbol parameters, and so symbol parameters may get lost
834
+ # see BDS-263
835
+ if converted_symbol_name is None:
836
+ converted_symbol_name = "CASE(_SYMBOL_NAME,'','_NON_EXISTING_SYMBOL',_SYMBOL_NAME)"
837
+
838
+ converted_symbol_param_columns, converted_symbol_param = _convert_symbol_param_and_columns(symbol_param)
839
+ if converted_symbol_param is None:
840
+ # we couldn't interpret "symbols" as either symbol name or symbol parameters
841
+ raise ValueError(
842
+ '"symbol" parameter has a wrong format! It should be a symbol name, a symbol parameter '
843
+ 'object (dict or Source), or a tuple containing both'
844
+ )
845
+
846
+ if '_PARAM_SYMBOL_TIME' in converted_symbol_param_columns.keys():
847
+ warnings.warn(
848
+ '"_PARAM_SYMBOL_TIME" explicitly passed among join_with_query symbol parameters! '
849
+ 'This is deprecated - please use symbol_time parameter instead.',
850
+ FutureWarning,
851
+ stacklevel=2,
852
+ )
853
+ if '_SYMBOL_TIME' in params.keys():
854
+ warnings.warn(
855
+ 'Query parameter "_SYMBOL_TIME" passed to join_with_query! '
856
+ 'This is deprecated. Please use a dedicated `symbol_time` parameter.',
857
+ FutureWarning,
858
+ stacklevel=2,
859
+ )
860
+
861
+ # prepare temporary file
862
+ # ------------------------------------ #
863
+ converted_params = prepare_params(**params)
864
+
865
+ if isinstance(query, otp.Source):
866
+ sub_source = query
867
+ else:
868
+ # inspect function
869
+ # -------
870
+ sig = inspect.signature(query)
871
+ if "symbol" in sig.parameters:
872
+ if "symbol" in converted_params.keys():
873
+ raise AttributeError(
874
+ '"params" contains key "symbol", which is reserved for symbol parameters. '
875
+ 'Please, rename this parameter to another name'
876
+ )
877
+ converted_params["symbol"] = converted_symbol_param # type: ignore
878
+ sub_source = query(**converted_params)
879
+
880
+ sub_source = self._process_keep_time_param(keep_time, sub_source)
881
+
882
+ if not sub_source._is_unbound_required():
883
+ sub_source += otp.Empty()
884
+
885
+ # adding symbol time
886
+ if symbol_time is not None:
887
+ if ott.get_object_type(symbol_time) is not otp.nsectime and ott.get_object_type(symbol_time) is not str:
888
+ raise ValueError(
889
+ f'Parameter of type {ott.get_object_type(symbol_time)} passed as symbol_time! '
890
+ 'This parameter only supports datetime values or strings'
891
+ )
892
+ if is_join_with_query_symbol_time_otq_supported():
893
+ params = params.copy()
894
+ params['_SYMBOL_TIME'] = symbol_time
895
+ else:
896
+ converted_symbol_param_columns['_PARAM_SYMBOL_TIME'] = symbol_time
897
+
898
+ params_str = _columns_to_params_for_joins(params, query_params=True)
899
+ symbol_params_str = _columns_to_params_for_joins(converted_symbol_param_columns)
900
+
901
+ sub_source_schema = sub_source.schema.copy()
902
+
903
+ columns = {}
904
+ columns.update(self._get_columns_with_prefix(sub_source, prefix))
905
+ columns.update(self.columns(skip_meta_fields=True))
906
+
907
+ res = self.copy(columns=columns)
908
+
909
+ res._merge_tmp_otq(sub_source)
910
+ query_name = sub_source._store_in_tmp_otq(
911
+ res._tmp_otq, symbols='_NON_EXISTING_SYMBOL_', operation_suffix="join_with_query"
912
+ ) # TODO: combine with _convert_symbol_to_string
913
+ # ------------------------------------ #
914
+
915
+ if where is not None and how != 'outer':
916
+ raise ValueError('The `where` parameter can be used only for outer join')
917
+
918
+ default_fields_for_outer_join_str = _get_default_fields_for_outer_join_str(
919
+ default_fields_for_outer_join, how, sub_source_schema
920
+ )
921
+
922
+ join_params = dict(
923
+ otq_query=f'"THIS::{query_name}"',
924
+ join_type=how.upper(),
925
+ otq_query_params=params_str,
926
+ symbol_params=symbol_params_str,
927
+ where=str(where._make_python_way_bool_expression()) if where is not None else '',
928
+ default_fields_for_outer_join=default_fields_for_outer_join_str,
929
+ process_query_asynchronously=process_query_async,
930
+ )
931
+ if concurrency is not None:
932
+ if not isinstance(concurrency, int) or concurrency <= 0:
933
+ raise ValueError('Wrong value of concurrency parameter passed! concurrency should be a positive integer')
934
+ join_params['shared_thread_count'] = concurrency
935
+
936
+ start_time = kwargs.get('start_time', start)
937
+ end_time = kwargs.get('end_time', end)
938
+ _fill_aux_params_for_joins(join_params, caching, end_time, prefix, start_time, converted_symbol_name, timezone)
939
+ res.sink(otq.JoinWithQuery(**join_params))
940
+ res._add_table()
941
+ res.sink(otq.Passthrough(fields="TIMESTAMP", drop_fields=True))
942
+
943
+ return res
944
+
945
+
946
+ def point_in_time(
947
+ self: 'Source',
948
+ source: Union['Source', str],
949
+ offsets: List[int],
950
+ offset_type: Literal['time_msec', 'num_ticks'] = 'time_msec',
951
+ input_ts_fields_to_propagate: Optional[List[str]] = None,
952
+ symbol_date=None,
953
+ ) -> 'Source':
954
+ """
955
+ This method joins ticks from current source with the ticks from another ``source``.
956
+
957
+ Joined ticks are those that are offset by
958
+ the specified number of milliseconds or by the specified number of ticks
959
+ relative to the current source's tick timestamp.
960
+
961
+ Output tick may be generated for each specified offset, so this method may output several ticks for each input tick.
962
+
963
+ If another ``source`` doesn't have a tick with specified offset, then output tick is not generated.
964
+
965
+ Fields **TICK_TIME** and **OFFSET** are also added to the output ticks,
966
+ specifying original timestamp of the joined tick and the offset that it was specified to join by.
967
+
968
+ Note
969
+ ----
970
+ In order for this method to have reasonable performance,
971
+ the set of input ticks' timestamps has to be relatively small.
972
+
973
+ In other words, the points in time, which the user is interested in,
974
+ have to be quite few in order usage of this method to be justified.
975
+
976
+ Parameters
977
+ ----------
978
+ source: :class:`Source` or str
979
+ The source from which the data will be joined or the string with the path to the .otq file
980
+ (note that in the latter case schema can not be updated automatically with the fields from the joined query).
981
+ offsets:
982
+ List of integers specifying offsets for each timestamp.
983
+ offset_type: 'time_msec' or 'num_ticks'
984
+ The type of offset: number of milliseconds or the number of ticks.
985
+ input_ts_fields_to_propagate:
986
+ The list of fields to propagate from the current source.
987
+ By default no fields (except **TIMESTAMP**) are propagated.
988
+ symbol_date: :py:class:`otp.datetime <onetick.py.datetime>`
989
+ Symbol date that will be set for the ``source`` inner query.
990
+
991
+ See also
992
+ --------
993
+ | **POINT_IN_TIME** OneTick event processor
994
+ | :func:`onetick.py.PointInTime`
995
+ | :func:`onetick.py.join_by_time`
996
+
997
+ Examples
998
+ --------
999
+
1000
+ Quotes and trades for testing:
1001
+
1002
+ .. testcode::
1003
+
1004
+ qte = otp.Ticks(ASK_PRICE=[20, 21, 22, 23, 24, 25], BID_PRICE=[20, 21, 22, 23, 24, 25])
1005
+ print(otp.run(qte))
1006
+
1007
+ .. testoutput::
1008
+
1009
+ Time ASK_PRICE BID_PRICE
1010
+ 0 2003-12-01 00:00:00.000 20 20
1011
+ 1 2003-12-01 00:00:00.001 21 21
1012
+ 2 2003-12-01 00:00:00.002 22 22
1013
+ 3 2003-12-01 00:00:00.003 23 23
1014
+ 4 2003-12-01 00:00:00.004 24 24
1015
+ 5 2003-12-01 00:00:00.005 25 25
1016
+
1017
+ .. testcode::
1018
+
1019
+ trd = otp.Ticks(PRICE=[1, 3, 5], SIZE=[100, 300, 500], offset=[1, 3, 5])
1020
+ print(otp.run(trd))
1021
+
1022
+ .. testoutput::
1023
+
1024
+ Time PRICE SIZE
1025
+ 0 2003-12-01 00:00:00.001 1 100
1026
+ 1 2003-12-01 00:00:00.003 3 300
1027
+ 2 2003-12-01 00:00:00.005 5 500
1028
+
1029
+ Joining each quote with first trade with equal or less timestamp:
1030
+
1031
+ .. testcode::
1032
+ :skipif: not is_supported_point_in_time()
1033
+
1034
+ data = qte.point_in_time(trd, offsets=[0])
1035
+ print(otp.run(data))
1036
+
1037
+ .. testoutput::
1038
+
1039
+ Time PRICE SIZE TICK_TIME OFFSET
1040
+ 0 2003-12-01 00:00:00.001 1 100 2003-12-01 00:00:00.001 0
1041
+ 1 2003-12-01 00:00:00.002 1 100 2003-12-01 00:00:00.001 0
1042
+ 2 2003-12-01 00:00:00.003 3 300 2003-12-01 00:00:00.003 0
1043
+ 3 2003-12-01 00:00:00.004 3 300 2003-12-01 00:00:00.003 0
1044
+ 4 2003-12-01 00:00:00.005 5 500 2003-12-01 00:00:00.005 0
1045
+
1046
+ By default fields from the current source are not propagated,
1047
+ use parameter ``input_ts_fields_to_propagate`` to add them to the output:
1048
+
1049
+ .. testcode::
1050
+ :skipif: not is_supported_point_in_time()
1051
+
1052
+ data = qte.point_in_time(trd, offsets=[0], input_ts_fields_to_propagate=['ASK_PRICE', 'BID_PRICE'])
1053
+ print(otp.run(data))
1054
+
1055
+ .. testoutput::
1056
+
1057
+ Time ASK_PRICE BID_PRICE PRICE SIZE TICK_TIME OFFSET
1058
+ 0 2003-12-01 00:00:00.001 21 21 1 100 2003-12-01 00:00:00.001 0
1059
+ 1 2003-12-01 00:00:00.002 22 22 1 100 2003-12-01 00:00:00.001 0
1060
+ 2 2003-12-01 00:00:00.003 23 23 3 300 2003-12-01 00:00:00.003 0
1061
+ 3 2003-12-01 00:00:00.004 24 24 3 300 2003-12-01 00:00:00.003 0
1062
+ 4 2003-12-01 00:00:00.005 25 25 5 500 2003-12-01 00:00:00.005 0
1063
+
1064
+ Note that first quote was not propagated, because it doesn't have corresponding trade.
1065
+
1066
+ Offset may be positive or negative.
1067
+ If several offsets are specified, several output ticks may be generated for a single input tick:
1068
+
1069
+ .. testcode::
1070
+ :skipif: not is_supported_point_in_time()
1071
+
1072
+ data = qte.point_in_time(trd, offsets=[0, 1], input_ts_fields_to_propagate=['ASK_PRICE', 'BID_PRICE'])
1073
+ print(otp.run(data))
1074
+
1075
+ .. testoutput::
1076
+
1077
+ Time ASK_PRICE BID_PRICE PRICE SIZE TICK_TIME OFFSET
1078
+ 0 2003-12-01 00:00:00.000 20 20 1 100 2003-12-01 00:00:00.001 1
1079
+ 1 2003-12-01 00:00:00.001 21 21 1 100 2003-12-01 00:00:00.001 0
1080
+ 2 2003-12-01 00:00:00.001 21 21 1 100 2003-12-01 00:00:00.001 1
1081
+ 3 2003-12-01 00:00:00.002 22 22 1 100 2003-12-01 00:00:00.001 0
1082
+ 4 2003-12-01 00:00:00.002 22 22 3 300 2003-12-01 00:00:00.003 1
1083
+ 5 2003-12-01 00:00:00.003 23 23 3 300 2003-12-01 00:00:00.003 0
1084
+ 6 2003-12-01 00:00:00.003 23 23 3 300 2003-12-01 00:00:00.003 1
1085
+ 7 2003-12-01 00:00:00.004 24 24 3 300 2003-12-01 00:00:00.003 0
1086
+ 8 2003-12-01 00:00:00.004 24 24 5 500 2003-12-01 00:00:00.005 1
1087
+ 9 2003-12-01 00:00:00.005 25 25 5 500 2003-12-01 00:00:00.005 0
1088
+ 10 2003-12-01 00:00:00.005 25 25 5 500 2003-12-01 00:00:00.005 1
1089
+
1090
+ By default the number of milliseconds is used as an offset.
1091
+ You can also specify the number of ticks as an offset:
1092
+
1093
+ .. testcode::
1094
+ :skipif: not is_supported_point_in_time()
1095
+
1096
+ data = qte.point_in_time(trd, offset_type='num_ticks', offsets=[-1, 1],
1097
+ input_ts_fields_to_propagate=['ASK_PRICE', 'BID_PRICE'])
1098
+ print(otp.run(data))
1099
+
1100
+ .. testoutput::
1101
+
1102
+ Time ASK_PRICE BID_PRICE PRICE SIZE TICK_TIME OFFSET
1103
+ 0 2003-12-01 00:00:00.000 20 20 1 100 2003-12-01 00:00:00.001 1
1104
+ 1 2003-12-01 00:00:00.001 21 21 3 300 2003-12-01 00:00:00.003 1
1105
+ 2 2003-12-01 00:00:00.002 22 22 3 300 2003-12-01 00:00:00.003 1
1106
+ 3 2003-12-01 00:00:00.003 23 23 1 100 2003-12-01 00:00:00.001 -1
1107
+ 4 2003-12-01 00:00:00.003 23 23 5 500 2003-12-01 00:00:00.005 1
1108
+ 5 2003-12-01 00:00:00.004 24 24 1 100 2003-12-01 00:00:00.001 -1
1109
+ 6 2003-12-01 00:00:00.004 24 24 5 500 2003-12-01 00:00:00.005 1
1110
+ 7 2003-12-01 00:00:00.005 25 25 3 300 2003-12-01 00:00:00.003 -1
1111
+ """
1112
+ if not is_supported_point_in_time():
1113
+ raise RuntimeError('PointInTime event processor is not supported on this OneTick version')
1114
+
1115
+ res = self.copy()
1116
+
1117
+ if offset_type not in ('time_msec', 'num_ticks'):
1118
+ raise ValueError(f"Wrong value for parameter 'offset_type': {offset_type}")
1119
+
1120
+ if isinstance(source, str):
1121
+ otq_query = source
1122
+ else:
1123
+ query_name = source._store_in_tmp_otq(
1124
+ res._tmp_otq,
1125
+ operation_suffix='point_in_time',
1126
+ # set default symbol, even if it's not set by user, symbol's value doesn't matter in this case
1127
+ symbols=otp.config.get('default_symbol', 'ANY'),
1128
+ symbol_date=symbol_date,
1129
+ )
1130
+ otq_query = f'THIS::{query_name}'
1131
+
1132
+ input_ts_fields_to_propagate = input_ts_fields_to_propagate or []
1133
+
1134
+ pit_params = dict(
1135
+ otq_query=otq_query,
1136
+ offset_type=offset_type.upper(),
1137
+ offsets=','.join(map(str, offsets)),
1138
+ input_ts_fields_to_propagate=','.join(map(str, input_ts_fields_to_propagate)),
1139
+ )
1140
+ res.sink(otq.PointInTime(**pit_params))
1141
+
1142
+ schema = {}
1143
+ if input_ts_fields_to_propagate:
1144
+ schema = {
1145
+ k: v for k, v in res.schema.items()
1146
+ if k in input_ts_fields_to_propagate
1147
+ }
1148
+ res.schema.set(**schema)
1149
+ if not isinstance(source, str):
1150
+ res.schema.update(**source.schema)
1151
+ res.schema.update(**{
1152
+ 'TICK_TIME': otp.nsectime,
1153
+ 'OFFSET': int,
1154
+ })
1155
+ return res
1156
+
1157
+
1158
+ def join_with_snapshot(
1159
+ self: 'Source',
1160
+ snapshot_name='VALUE',
1161
+ snapshot_storage='memory',
1162
+ allow_snapshot_absence=False,
1163
+ join_keys=None,
1164
+ symbol_name_in_snapshot=None,
1165
+ database='',
1166
+ default_fields_for_outer_join=None,
1167
+ prefix_for_output_ticks='',
1168
+ snapshot_fields=None,
1169
+ ):
1170
+ """
1171
+ Saves last (at most) `n` ticks of each group of ticks from the input time series in global storage or
1172
+ in a memory mapped file under a specified snapshot name.
1173
+ Tick descriptor should be the same for all ticks saved into the snapshot.
1174
+ These ticks can then be read via :py:class:`ReadSnapshot <onetick.py.ReadSnapshot>` by using the name
1175
+ of the snapshot and the same symbol name (``<db_name>::<symbol>``) that were used by this method.
1176
+
1177
+ .. warning::
1178
+ You should update schema manually, if you want to use fields from snapshot in `onetick-py` query description
1179
+ before its execution.
1180
+
1181
+ That's due to the fact, that `onetick-py` can't identify a schema of data in a snapshot before making a query.
1182
+
1183
+ If you set ``default_fields_for_outer_join`` parameter, schema will be guessed from default fields values.
1184
+
1185
+ Parameters
1186
+ ----------
1187
+ snapshot_name: str
1188
+ The name that was specified in :py:meth:`onetick.py.Source.save_snapshot` as a ``snapshot_name`` during saving.
1189
+
1190
+ Default: `VALUE`
1191
+ snapshot_storage: str
1192
+ This parameter specifies the place of storage of the snapshot. Possible options are:
1193
+
1194
+ * `memory` - the snapshot is stored in the dynamic (heap) memory of the process
1195
+ that ran (or is still running) the :py:meth:`onetick.py.Source.save_snapshot` for the snapshot.
1196
+ * `memory_mapped_file` - the snapshot is stored in a memory mapped file.
1197
+ For each symbol to get the location of the snapshot in the file system, ``join_with_snapshot`` looks at
1198
+ the **SAVE_SNAPSHOT_DIR** parameter value in the locator section for the database of the symbol.
1199
+ In a specified directory it creates a new directory with the name of the snapshot and keeps
1200
+ the memory mapped file and some other helper files there.
1201
+
1202
+ Default: `memory`
1203
+ allow_snapshot_absence: bool
1204
+ If specified, the EP does not display an error about missing snapshot
1205
+ if the snapshot has not been saved or is still being saved.
1206
+
1207
+ Default: `False`
1208
+ join_keys: list, optional
1209
+ A list of names of attributes. A non-empty list causes input ticks to be joined only if all of them
1210
+ have matching values for all specified attributes.
1211
+ Currently, these fields need to match with ``group_by`` fields of the corresponding snapshot.
1212
+ symbol_name_in_snapshot: str, :class:`~onetick.py.Column` or :class:`~onetick.py.Operation`, optional
1213
+ Expression that evaluates to a string containing symbol name.
1214
+ Specified expression is reevaluated upon the arrival of each tick.
1215
+ If this parameter is empty, the input symbol name is used.
1216
+ database: str, optional
1217
+ The database to read the snapshot. If not specified database from the symbol is used.
1218
+ default_fields_for_outer_join: dict, optional
1219
+ A `dict` with field name as key and value, :class:`~onetick.py.Column` or :class:`~onetick.py.Operation`,
1220
+ which specifies the names and the values of the fields (also, optionally, the field type),
1221
+ used to form ticks to be joined with unmatched input ticks.
1222
+
1223
+ If you want to specify field type, pass tuple of field dtype and expression or value as dict item value.
1224
+
1225
+ This parameter is reevaluated upon the arrival of each tick.
1226
+
1227
+ It's also used for auto detecting snapshot schema for using fields from snapshot
1228
+ while building query via ``ontick-py``.
1229
+ prefix_for_output_ticks: str
1230
+ The prefix for the names of joined tick fields.
1231
+
1232
+ Default: `empty string`
1233
+ snapshot_fields: List[str], None
1234
+ Specifies list of fields from the snapshot to join with input ticks. When empty, all fields are included.
1235
+
1236
+ See also
1237
+ --------
1238
+ | **JOIN_WITH_SNAPSHOT** OneTick event processor
1239
+ | :py:class:`onetick.py.ReadSnapshot`
1240
+ | :py:class:`onetick.py.ShowSnapshotList`
1241
+ | :py:class:`onetick.py.FindSnapshotSymbols`
1242
+ | :py:meth:`onetick.py.Source.save_snapshot`
1243
+
1244
+ Examples
1245
+ --------
1246
+ Simple ticks join with snapshot:
1247
+
1248
+ >>> src = otp.Ticks(A=[1, 2])
1249
+ >>> src = src.join_with_snapshot(snapshot_name='some_snapshot') # doctest: +SKIP
1250
+ >>> otp.run(src) # doctest: +SKIP
1251
+ Time A X Y TICK_TIME
1252
+ 0 2003-12-01 00:00:00.000 1 1 4 2003-12-01 00:00:00.000
1253
+ 1 2003-12-01 00:00:00.000 1 2 5 2003-12-01 00:00:00.001
1254
+ 2 2003-12-01 00:00:00.001 2 1 4 2003-12-01 00:00:00.000
1255
+ 3 2003-12-01 00:00:00.001 2 2 5 2003-12-01 00:00:00.001
1256
+
1257
+ Add prefix ``T.`` for fields from snapshot:
1258
+
1259
+ >>> src = otp.Ticks(A=[1, 2])
1260
+ >>> src = src.join_with_snapshot(
1261
+ ... snapshot_name='some_snapshot', prefix_for_output_ticks='T.',
1262
+ ... ) # doctest: +SKIP
1263
+ >>> otp.run(src) # doctest: +SKIP
1264
+ Time A T.X T.Y T.TICK_TIME
1265
+ 0 2003-12-01 00:00:00.000 1 1 4 2003-12-01 00:00:00.000
1266
+ 1 2003-12-01 00:00:00.000 1 2 5 2003-12-01 00:00:00.001
1267
+ 2 2003-12-01 00:00:00.001 2 1 4 2003-12-01 00:00:00.000
1268
+ 3 2003-12-01 00:00:00.001 2 2 5 2003-12-01 00:00:00.001
1269
+
1270
+ To get only specific fields from snapshot use parameter ``snapshot_fields``:
1271
+
1272
+ >>> src = otp.Ticks(A=[1, 2])
1273
+ >>> src = src.join_with_snapshot(
1274
+ ... snapshot_name='some_snapshot', snapshot_fields=['Y'],
1275
+ ... ) # doctest: +SKIP
1276
+ >>> otp.run(src) # doctest: +SKIP
1277
+ Time A Y
1278
+ 0 2003-12-01 00:00:00.000 1 4
1279
+ 1 2003-12-01 00:00:00.000 1 5
1280
+ 2 2003-12-01 00:00:00.001 2 4
1281
+ 3 2003-12-01 00:00:00.001 2 5
1282
+
1283
+ Setting default values for snapshot fields for outer join via ``default_fields_for_outer_join_with_types``
1284
+ parameter with example of joining ticks with absent snapshot:
1285
+
1286
+ >>> src = otp.Ticks(A=[1, 2])
1287
+ >>> src = src.join_with_snapshot(
1288
+ ... snapshot_name='some_snapshot', allow_snapshot_absence=True,
1289
+ ... default_fields_for_outer_join={
1290
+ ... 'B': 'Some string',
1291
+ ... 'C': (float, src['A'] * 2),
1292
+ ... 'D': 50,
1293
+ ... },
1294
+ ... ) # doctest: +SKIP
1295
+ >>> otp.run(src) # doctest: +SKIP
1296
+ Time A B C D
1297
+ 0 2003-12-01 00:00:00.000 1 Some string 2.0 50.0
1298
+ 1 2003-12-01 00:00:00.001 2 Some string 2.0 50.0
1299
+
1300
+ In this case, schema for ``src`` object will be automatically detected from values for this parameter:
1301
+
1302
+ >>> src.schema # doctest: +SKIP
1303
+ {'A': <class 'int'>, 'B': <class 'str'>, 'C': <class 'float'>, 'D': <class 'int'>}
1304
+
1305
+
1306
+ You can join ticks from snapshot for each input tick for specified symbol name from string value or this tick
1307
+ via ``symbol_name_in_snapshot`` parameter.
1308
+
1309
+ Let's create snapshot with different symbol names inside:
1310
+
1311
+ >>> src = otp.Ticks(X=[1, 2, 3, 4], Y=['AAA', 'BBB', 'CCC', 'AAA'])
1312
+ >>> src = src.save_snapshot(
1313
+ ... snapshot_name='some_snapshot', num_ticks=5, keep_snapshot_after_query=True, symbol_name_field='Y',
1314
+ ... )
1315
+ >>> otp.run(src) # doctest: +SKIP
1316
+
1317
+ Now we can join input only with ticks from snapshot with specified symbol name:
1318
+
1319
+ >>> src = otp.Ticks(A=[1, 2])
1320
+ >>> src = src.join_with_snapshot(
1321
+ ... snapshot_name='some_snapshot', symbol_name_in_snapshot='AAA',
1322
+ ... ) # doctest: +SKIP
1323
+ >>> otp.run(src) # doctest: +SKIP
1324
+ Time A X TICK_TIME
1325
+ 0 2003-12-01 00:00:00.000 1 1 2003-12-01 00:00:00.000
1326
+ 1 2003-12-01 00:00:00.000 1 4 2003-12-01 00:00:00.003
1327
+ 2 2003-12-01 00:00:00.001 2 1 2003-12-01 00:00:00.000
1328
+ 3 2003-12-01 00:00:00.001 2 4 2003-12-01 00:00:00.003
1329
+
1330
+ Or we can join each tick with ticks from snapshot with symbol name from input ticks field:
1331
+
1332
+ >>> src = otp.Ticks(A=[1, 2], SYM=['AAA', 'CCC'])
1333
+ >>> src = src.join_with_snapshot(
1334
+ ... snapshot_name='some_snapshot', symbol_name_in_snapshot=src['SYM'],
1335
+ ... ) # doctest: +SKIP
1336
+ >>> otp.run(src) # doctest: +SKIP
1337
+ Time A SYM X TICK_TIME
1338
+ 0 2003-12-01 00:00:00.000 1 AAA 1 2003-12-01 00:00:00.000
1339
+ 1 2003-12-01 00:00:00.000 1 AAA 4 2003-12-01 00:00:00.003
1340
+ 2 2003-12-01 00:00:00.001 2 CCC 3 2003-12-01 00:00:00.002
1341
+ """
1342
+ kwargs = {}
1343
+
1344
+ if not hasattr(otq, "JoinWithSnapshot"):
1345
+ raise RuntimeError("Current version of OneTick doesn't support JOIN_WITH_SNAPSHOT EP")
1346
+
1347
+ if snapshot_storage not in ['memory', 'memory_mapped_file']:
1348
+ raise ValueError('`snapshot_storage` must be one of "memory", "memory_mapped_file"')
1349
+
1350
+ is_snapshot_fields_param_supported = is_join_with_snapshot_snapshot_fields_parameter_supported()
1351
+
1352
+ if snapshot_fields and not is_snapshot_fields_param_supported:
1353
+ raise RuntimeError(
1354
+ "Current version of OneTick doesn't support `snapshot_fields` parameter on JOIN_WITH_SNAPSHOT EP"
1355
+ )
1356
+
1357
+ snapshot_storage = snapshot_storage.upper()
1358
+
1359
+ if join_keys is None:
1360
+ join_keys_str = ''
1361
+ else:
1362
+ join_keys_str = ','.join(join_keys)
1363
+
1364
+ if symbol_name_in_snapshot is None:
1365
+ symbol_name_in_snapshot = ''
1366
+ elif isinstance(symbol_name_in_snapshot, _Operation):
1367
+ symbol_name_in_snapshot = str(symbol_name_in_snapshot)
1368
+
1369
+ if default_fields_for_outer_join is None:
1370
+ default_fields_for_outer_join = {}
1371
+
1372
+ default_fields_list = []
1373
+ snapshot_schema = {}
1374
+
1375
+ for field_name, field_value in default_fields_for_outer_join.items():
1376
+ if isinstance(field_value, tuple):
1377
+ field_type = field_value[0]
1378
+
1379
+ default_fields_list.append(
1380
+ f'{field_name} {ott.type2str(field_type)} = {ott.value2str(field_value[1])}',
1381
+ )
1382
+ else:
1383
+ if isinstance(field_value, _Operation):
1384
+ field_type = field_value.dtype
1385
+ else:
1386
+ field_type = type(field_value)
1387
+
1388
+ default_fields_list.append(f'{field_name} = {ott.value2str(field_value)}')
1389
+
1390
+ snapshot_schema[f'{prefix_for_output_ticks}{field_name}'] = field_type
1391
+
1392
+ default_fields_str = ','.join(default_fields_list)
1393
+
1394
+ if snapshot_fields is not None:
1395
+ kwargs['snapshot_fields'] = ','.join(snapshot_fields)
1396
+
1397
+ self.sink(
1398
+ otq.JoinWithSnapshot(
1399
+ snapshot_name=snapshot_name,
1400
+ snapshot_storage=snapshot_storage,
1401
+ allow_snapshot_absence=allow_snapshot_absence,
1402
+ join_keys=join_keys_str,
1403
+ symbol_name_in_snapshot=symbol_name_in_snapshot,
1404
+ database=database,
1405
+ default_fields_for_outer_join=default_fields_str,
1406
+ prefix_for_output_ticks=prefix_for_output_ticks,
1407
+ **kwargs,
1408
+ )
1409
+ )
1410
+
1411
+ self.schema.update(**snapshot_schema)
1412
+
1413
+ return self