onetick-py 1.162.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +266 -0
  4. locator_parser/common.py +365 -0
  5. locator_parser/io.py +41 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +280 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +138 -0
  14. onetick/py/__init__.py +290 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +645 -0
  19. onetick/py/aggregations/_docs.py +912 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +427 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +373 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +275 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +752 -0
  33. onetick/py/configuration.py +736 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2307 -0
  45. onetick/py/core/_internal/_state_vars.py +87 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +810 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +270 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1001 -0
  58. onetick/py/core/_source/source_methods/joins.py +1393 -0
  59. onetick/py/core/_source/source_methods/merges.py +566 -0
  60. onetick/py/core/_source/source_methods/misc.py +1325 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +702 -0
  68. onetick/py/core/_source/symbol.py +202 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +215 -0
  75. onetick/py/core/column_operations/_methods/methods.py +294 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +150 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +30 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +92 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +464 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +160 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1374 -0
  83. onetick/py/core/column_operations/base.py +1061 -0
  84. onetick/py/core/cut_builder.py +149 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +244 -0
  87. onetick/py/core/lambda_object.py +442 -0
  88. onetick/py/core/multi_output_source.py +193 -0
  89. onetick/py/core/per_tick_script.py +2253 -0
  90. onetick/py/core/query_inspector.py +465 -0
  91. onetick/py/core/source.py +1663 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1042 -0
  94. onetick/py/db/db.py +1423 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2354 -0
  100. onetick/py/license.py +188 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +947 -0
  103. onetick/py/misc.py +437 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +211 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +841 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1342 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +126 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1049 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +238 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +357 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +256 -0
  132. onetick/py/types.py +2056 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +499 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1139 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +118 -0
  146. onetick/py/utils/tz.py +82 -0
  147. onetick_py-1.162.2.dist-info/METADATA +148 -0
  148. onetick_py-1.162.2.dist-info/RECORD +152 -0
  149. onetick_py-1.162.2.dist-info/WHEEL +5 -0
  150. onetick_py-1.162.2.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.162.2.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.162.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,645 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Union, TYPE_CHECKING, Tuple, Optional, Any
3
+ from copy import deepcopy
4
+ from functools import wraps
5
+ from collections import namedtuple
6
+ import pandas as pd
7
+
8
+ if TYPE_CHECKING:
9
+ from onetick.py.core.source import Source # hack for annotations
10
+
11
+ from onetick.py.core.column import _Column
12
+ from onetick.py.core.column_operations.base import _Operation, OnetickParameter
13
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
14
+ from onetick.py import types as ott
15
+ from onetick.py import utils
16
+ from onetick.py.otq import otq
17
+
18
+
19
+ def validate(method):
20
+ """wraps schema getter with validations of input columns + src and resulting schema + output column"""
21
+
22
+ @wraps(method)
23
+ def inner(obj: '_Aggregation', src: 'Source', name):
24
+ obj.validate_input_columns(src)
25
+ for column in obj.group_by:
26
+ if str(column) not in src.schema or not isinstance(src[str(column)], _Column):
27
+ raise KeyError(f"There is no '{column}' column to group by")
28
+ schema: Dict = method(obj, src=src, name=name)
29
+ if not obj.overwrite_output_field:
30
+ obj.validate_output_name(schema, name)
31
+ return schema
32
+
33
+ return inner
34
+
35
+
36
+ def operation_gb(method):
37
+ """wraps aggregation to apply _Operation and remove it after aggregation"""
38
+
39
+ @wraps(method)
40
+ def inner(obj: '_Aggregation', src: 'Source', *args, **kwargs) -> 'Source':
41
+ inplace = kwargs.get('inplace')
42
+ res = src if inplace else src.copy()
43
+ src_schema = src.schema
44
+
45
+ gb_copy = obj.group_by.copy()
46
+ obj.group_by = []
47
+ for i, gb in enumerate(gb_copy):
48
+ if isinstance(gb, _Operation) and not isinstance(gb, _Column):
49
+ name = f'GROUP_{i}'
50
+ if name in src_schema:
51
+ raise AttributeError(f"'{name}' column name is reserved for group by Operation "
52
+ f"but it exists in current schema")
53
+ res[name] = gb
54
+ obj.group_by.append(res[name])
55
+ else:
56
+ obj.group_by.append(gb)
57
+ res = method(obj, res, *args, **kwargs)
58
+
59
+ obj.group_by = gb_copy
60
+ return res
61
+ return inner
62
+
63
+
64
+ def operation_replacer(method):
65
+ """
66
+ PY-378
67
+ Decorator allows working with aggregation's columns specified as operations.
68
+ """
69
+
70
+ @wraps(method)
71
+ def inner(obj: '_Aggregation', src: 'Source', *args, **kwargs) -> 'Source':
72
+ inplace = kwargs.get('inplace')
73
+ res = src if inplace else src.copy()
74
+ tmp_columns = {}
75
+
76
+ aggrs = getattr(obj, 'aggrs', None)
77
+ if aggrs:
78
+ aggs = aggrs.values()
79
+ else:
80
+ name = args[0] if args else kwargs.get('name')
81
+ if type(obj.column_name) is _Operation and name is None:
82
+ raise ValueError('Output field name must be specified when aggregating operation')
83
+ aggs = [obj]
84
+
85
+ # Add operation from each aggregation object to source `res` as column
86
+ # and replace *column_name* property in each aggregation object with this column's name.
87
+ for i, agg in enumerate(aggs):
88
+ if type(agg.column_name) is _Operation:
89
+ tmp_name = f'__TMP_AGG_COLUMN_{i}__'
90
+ res[tmp_name] = agg.column_name
91
+ tmp_columns[tmp_name] = (agg, agg.column_name)
92
+ agg.column_name = tmp_name
93
+
94
+ res = method(obj, res, *args, **kwargs)
95
+
96
+ if tmp_columns:
97
+ # Rollback all aggregation objects and source `res`.
98
+ # Delete all temporary columns and change property *column_name* back in aggregations.
99
+ to_drop = list(set(tmp_columns).intersection(res.schema))
100
+ if to_drop:
101
+ res.drop(to_drop, inplace=True)
102
+ for agg, column_name in tmp_columns.values():
103
+ agg.column_name = column_name
104
+
105
+ return res
106
+ return inner
107
+
108
+
109
+ def output_column_overwriter(method):
110
+ """
111
+ Allows outputting aggregation to existing field.
112
+ In this case temporary renaming existing field.
113
+ """
114
+ @wraps(method)
115
+ def inner(obj: '_Aggregation', src: 'Source', *args, **kwargs) -> 'Source':
116
+ column_name = obj.column_name
117
+ name = args[0] if args else kwargs.get('name')
118
+ name = name or column_name
119
+
120
+ if not obj.overwrite_output_field or not name or name not in src.schema:
121
+ return method(obj, src, *args, **kwargs)
122
+
123
+ inplace = kwargs.get('inplace')
124
+ res = src if inplace else src.copy()
125
+
126
+ # rename existing field to the temporary name
127
+ tmp_name = f'__TMP_AGG_COLUMN_{name}__'
128
+ res[tmp_name] = res[name]
129
+ res.drop(name, inplace=True)
130
+ # aggregating renamed field
131
+ kwargs['name'] = name
132
+ obj.column_name = tmp_name
133
+
134
+ res = method(obj, res, *args, **kwargs)
135
+
136
+ # removing temporary field
137
+ if tmp_name in res.schema:
138
+ res.drop(tmp_name, inplace=True)
139
+ obj.column_name = column_name
140
+
141
+ return res
142
+ return inner
143
+
144
+
145
+ def get_seconds_from_time_offset(time_offset):
146
+ if not isinstance(time_offset, ott.OTPBaseTimeOffset):
147
+ raise ValueError('Only DatePart objects can be passed in this function')
148
+
149
+ return int(pd.Timedelta(time_offset).total_seconds())
150
+
151
+
152
+ def get_bucket_interval_from_datepart(bucket_interval):
153
+ if not isinstance(bucket_interval, ott.OTPBaseTimeOffset):
154
+ raise ValueError('Only DatePart objects can be passed in this function')
155
+
156
+ if isinstance(bucket_interval, ott.ExpressionDefinedTimeOffset):
157
+ raise ValueError(f"Operation as DatePart isn't allowed: {str(bucket_interval.n)}")
158
+
159
+ # bucket_interval also could be one of these:
160
+ # otp.Milli, otp.Second, otp.Minute, otp.Hour, otp.Day, otp.Month
161
+ # bucket_interval will be converted and corresponding bucket_units value will be set
162
+
163
+ offset, datepart = bucket_interval.get_offset()
164
+ if datepart not in {'millisecond', 'second', 'minute', 'hour', 'day', 'month'}:
165
+ raise ValueError(f"Unsupported DatePart passed to bucket_interval: {datepart}")
166
+
167
+ if offset < 0:
168
+ raise ValueError(
169
+ f"Negative DateParts aren't allowed for bucket_interval: {offset} ({datepart})"
170
+ )
171
+
172
+ if datepart in {'millisecond', 'minute', 'hour'}:
173
+ # bucket_units could be only seconds, days, months or ticks
174
+ # so other DateParts are converted to seconds
175
+ if datepart == 'millisecond':
176
+ offset, datepart = offset / 1000, 'second'
177
+ else:
178
+ offset, datepart = ott.Second(get_seconds_from_time_offset(bucket_interval)).get_offset()
179
+
180
+ return offset, f"{datepart}s" # type: ignore[union-attr]
181
+
182
+
183
+ class _Aggregation(ABC):
184
+
185
+ @property
186
+ @abstractmethod
187
+ def NAME(self) -> str:
188
+ pass
189
+
190
+ @property
191
+ @abstractmethod
192
+ def EP(self) -> otq.EpBase:
193
+ pass
194
+
195
+ DEFAULT_OUTPUT_NAME = 'VALUE'
196
+
197
+ FIELDS_MAPPING = {
198
+ "column_name": "INPUT_FIELD_NAME",
199
+ "running": "IS_RUNNING_AGGR",
200
+ "all_fields": "ALL_FIELDS_FOR_SLIDING",
201
+ "bucket_interval": "BUCKET_INTERVAL",
202
+ "bucket_time": "BUCKET_TIME",
203
+ "bucket_units": "BUCKET_INTERVAL_UNITS",
204
+ "bucket_end_condition": "BUCKET_END_CRITERIA",
205
+ "end_condition_per_group": "BUCKET_END_PER_GROUP",
206
+ "boundary_tick_bucket": "BOUNDARY_TICK_BUCKET",
207
+ "group_by": "GROUP_BY",
208
+ "groups_to_display": "GROUPS_TO_DISPLAY",
209
+ }
210
+ FIELDS_DEFAULT = {
211
+ "running": False,
212
+ "all_fields": False,
213
+ "bucket_interval": 0,
214
+ "bucket_time": "BUCKET_END",
215
+ "bucket_units": "seconds",
216
+ "bucket_end_condition": None,
217
+ "end_condition_per_group": False,
218
+ "boundary_tick_bucket": "new",
219
+ "group_by": [],
220
+ "groups_to_display": "all",
221
+ }
222
+
223
+ FIELDS_TO_SKIP: List = [] # attr listed here won't be used in self.__str__
224
+
225
+ output_field_type: Optional[type] = None # None will force to use type of input column
226
+ require_type: Optional[Tuple[type, ...]] = None
227
+ _validations_to_skip: List = []
228
+
229
+ def __init__(self,
230
+ column: Union[str, _Column, _Operation],
231
+ running: bool = False,
232
+ all_fields: Union[bool, str] = False,
233
+ bucket_interval: Union[int, ott.OTPBaseTimeOffset] = 0,
234
+ bucket_time: str = "end",
235
+ bucket_units: Union[str, None] = None,
236
+ bucket_end_condition: Optional[_Operation] = None,
237
+ end_condition_per_group: bool = False,
238
+ boundary_tick_bucket: str = "new",
239
+ group_by: Optional[Union[List, str, _Operation]] = None,
240
+ groups_to_display: str = "all",
241
+ overwrite_output_field: bool = False):
242
+ """
243
+ Abstract method that implements common logic for aggregations
244
+ """
245
+ if type(column) is list:
246
+ column = ','.join(map(str, column))
247
+
248
+ column_name: Union[str, _Operation] = str(column)
249
+
250
+ if column_name == "Time":
251
+ # TODO: need to understand how to better work with alias
252
+ column_name = "TIMESTAMP"
253
+
254
+ if type(column) is _Operation:
255
+ column_name = column
256
+
257
+ if isinstance(bucket_interval, float):
258
+ if bucket_units is not None and bucket_units != 'seconds':
259
+ raise ValueError('Float values for bucket_interval are only supported for seconds.')
260
+ if bucket_interval < 0.001:
261
+ raise ValueError('Float values for bucket_interval less than 0.001 are not supported.')
262
+
263
+ if isinstance(bucket_interval, ott.OTPBaseTimeOffset):
264
+ bucket_interval, bucket_units = get_bucket_interval_from_datepart(bucket_interval)
265
+
266
+ if isinstance(all_fields, str) and all_fields == "when_ticks_exit_window":
267
+ if not running:
268
+ raise ValueError("`all_fields` can't be set to 'when_ticks_exit_window' when `running=False`")
269
+
270
+ if not bucket_interval:
271
+ raise ValueError(
272
+ "`all_fields` can't be set to 'when_ticks_exit_window' when `bucket_interval` is zero`"
273
+ )
274
+
275
+ all_fields = all_fields.upper()
276
+
277
+ self.column_name = column_name
278
+ self.running = running
279
+ self.all_fields = all_fields
280
+ self.bucket_time = bucket_time
281
+
282
+ if isinstance(bucket_interval, _Operation):
283
+ if bucket_interval.dtype is bool:
284
+ if bucket_end_condition is not None:
285
+ raise ValueError(
286
+ "Bucket end condition passed on both `bucket_interval` and `bucket_end_condition` parameters"
287
+ )
288
+
289
+ bucket_end_condition = bucket_interval
290
+ bucket_interval = 0
291
+ elif isinstance(bucket_interval, OnetickParameter) and bucket_interval.dtype is int:
292
+ bucket_interval = str(bucket_interval)
293
+ elif isinstance(bucket_interval, _SymbolParamColumn) and bucket_interval.dtype is int:
294
+ bucket_interval = str(bucket_interval.expr)
295
+ else:
296
+ raise ValueError("Bucket interval can only be boolean otp.Operation or integer otp.param")
297
+
298
+ self.bucket_interval = bucket_interval
299
+
300
+ if bucket_end_condition is None:
301
+ self.bucket_end_condition = None # type: ignore
302
+ else:
303
+ self.bucket_end_condition = str(bucket_end_condition)
304
+
305
+ self.bucket_units = bucket_units
306
+ if self.bucket_units is None:
307
+ if self.bucket_end_condition:
308
+ # allow omitting bucket_units if bucket_end_condition is set
309
+ self.bucket_units = 'flexible'
310
+ else:
311
+ # default value
312
+ self.bucket_units = 'seconds'
313
+
314
+ self.end_condition_per_group = end_condition_per_group
315
+ self.boundary_tick_bucket = boundary_tick_bucket
316
+ self.large_ints = False
317
+ if isinstance(group_by, _Operation) or isinstance(group_by, str):
318
+ group_by = [group_by]
319
+ self.group_by = group_by or []
320
+ self.groups_to_display = groups_to_display
321
+ self.overwrite_output_field = overwrite_output_field
322
+
323
+ self._param_validation()
324
+ self.bucket_time = f'BUCKET_{self.bucket_time.upper()}'
325
+
326
+ @staticmethod
327
+ def _attr2str(value) -> str:
328
+ if isinstance(value, bool):
329
+ return 'true' if value else 'false'
330
+ if type(value) is list:
331
+ return ','.join(value)
332
+ return str(value)
333
+
334
+ @property
335
+ def ep_params(self) -> Dict:
336
+ """prepare params for self.__str__ and otq.EpBase"""
337
+ params = {}
338
+
339
+ for field, ep_param in self.FIELDS_MAPPING.items():
340
+ if field in self.FIELDS_TO_SKIP:
341
+ continue
342
+
343
+ default_value = self.FIELDS_DEFAULT.get(field)
344
+ if getattr(self, field) != default_value:
345
+ if field == 'group_by':
346
+ params[ep_param] = ",".join(list(map(str, self.group_by)))
347
+ else:
348
+ params[ep_param] = getattr(self, field)
349
+ return params
350
+
351
+ def __str__(self):
352
+ params = [f'{k}={self._attr2str(v)}' for k, v in self.ep_params.items()]
353
+ return self.NAME + "(" + ",".join(params) + ")"
354
+
355
+ def to_ep(self, name: Optional[str]) -> otq.EpBase:
356
+ params = dict((k.lower(), v) for k, v in self.ep_params.items())
357
+ if 'output_field_name' not in self.FIELDS_TO_SKIP:
358
+ params['output_field_name'] = name
359
+ return self.EP(**params)
360
+
361
+ @validate
362
+ def _get_common_schema(self, src: 'Source', name: str) -> Dict:
363
+ """return data schema without output fields (this fields should be added further)"""
364
+ schema = {}
365
+ for column in self.group_by:
366
+ schema[str(column)] = src.schema[str(column)]
367
+ if self.all_fields:
368
+ schema.update(src.schema)
369
+ return schema
370
+
371
+ def _modify_source(self, res: 'Source', **kwargs):
372
+ """
373
+ Modify resulting source inplace before sinking to aggregation.
374
+ Can be overriden if needed.
375
+ """
376
+ pass
377
+
378
+ def _get_output_schema(self, src: 'Source', name: Optional[str] = None) -> Dict:
379
+ if not name or name in src.__class__.meta_fields:
380
+ return {}
381
+ return {
382
+ name: self.output_field_type or src.schema[self.column_name]
383
+ }
384
+
385
+ @operation_gb
386
+ @operation_replacer
387
+ @output_column_overwriter
388
+ def apply(self, src: 'Source', name: Optional[str] = None, inplace: bool = False) -> 'Source':
389
+ """
390
+ Applies aggregation to Source and sets proper schema
391
+
392
+ Parameters
393
+ ----------
394
+ src: Source
395
+ Source to apply aggregation
396
+ name: str, optional
397
+ Name of output column. If not specified, will be used self.column_name
398
+ inplace: bool
399
+ Modify passed ``src`` object or return modified copy.
400
+ """
401
+ if inplace:
402
+ res = src
403
+ src = src.copy()
404
+ else:
405
+ res = src.copy()
406
+ out_name = name or self.column_name
407
+ schema = self._get_common_schema(src, out_name)
408
+ # it's important to validate input schema before sinking
409
+ self._modify_source(res)
410
+ res.sink(self.to_ep(name=str(out_name)))
411
+ schema.update(self._get_output_schema(src, str(out_name)))
412
+ res.schema.set(**schema)
413
+
414
+ if not self.all_fields:
415
+ # in this case we propagate only resulting fields, that stored in res.schema (flexible schema case)
416
+ res._add_table(strict=True)
417
+ else:
418
+ # adding table to convert types in schema, e.g. float to int
419
+ res._add_table(strict=False)
420
+ return res
421
+
422
+ def validate_input_columns(self, src: 'Source'):
423
+ """checks that columns used in aggregation presented in Source"""
424
+ if self.column_name not in src.schema:
425
+ raise TypeError(f"Aggregation `{self.NAME}` uses column `{self.column_name}` as input, which doesn't exist")
426
+ if not self.require_type:
427
+ return
428
+ dtype = src.schema[self.column_name]
429
+ base_dtype = ott.get_base_type(dtype)
430
+ for t in self.require_type:
431
+ # more generic types can be specified in self.require_type too
432
+ if dtype is t or base_dtype is t:
433
+ return
434
+ raise TypeError(f"Aggregation `{self.NAME}` require {self.require_type} types, got {dtype}")
435
+
436
+ @staticmethod
437
+ def validate_output_name(schema: Dict, name: Union[List, str]):
438
+ """checks that aggregation won't output columns with same names"""
439
+ if type(name) is not list:
440
+ name = [name]
441
+
442
+ same_fields = []
443
+ for n in name:
444
+ if n in schema:
445
+ if '__long_nsec_' in n:
446
+ same_fields.append(n.replace('__long_nsec_', '')) # hack for large ints
447
+ else:
448
+ same_fields.append(n)
449
+ if same_fields:
450
+ raise ValueError("You try to propagate all fields and put result into already existing fields: "
451
+ f"'{', '.join(same_fields)}' ")
452
+
453
+ def _param_validation(self):
454
+ """validate __init__ parameters"""
455
+ if self.running and self.bucket_time == "start":
456
+ raise ValueError("It is not allowed to set up running=True and bucket_time='start'")
457
+ if self.bucket_units == "flexible" and self.bucket_end_condition is None:
458
+ raise ValueError("bucket_units is set to 'flexible' but bucket_end_condition is not specified. "
459
+ "Please specify bucket_end_condition.")
460
+ if self.bucket_units != "flexible" and self.bucket_end_condition is not None:
461
+ raise ValueError("bucket_end_condition can be used only with 'flexible' bucket_units. "
462
+ "Please set bucket_units to 'flexible'.")
463
+
464
+ if self.bucket_time not in ['start', 'end']:
465
+ raise ValueError(f"'bucket_time' might be either 'start' or 'end', but passed '{self.bucket_time}'")
466
+
467
+ valid_units = ("seconds", "ticks", "days", "months", "flexible")
468
+ if self.bucket_units not in valid_units:
469
+ raise ValueError("'bucket_units' can be one of the following: "
470
+ f"'{', '.join(valid_units)}'; however, '{self.bucket_units}' was passed")
471
+
472
+ valid_boundary = {"new", "previous"}
473
+ if self.boundary_tick_bucket not in valid_boundary:
474
+ message = "'boundary_tick_bucket' can be one of the following: {}; however, {} was passed"
475
+ raise ValueError(message.format(', '.join(list(valid_boundary)), self.boundary_tick_bucket))
476
+
477
+ for column in self.group_by:
478
+ if not isinstance(column, _Operation) and not isinstance(column, str):
479
+ raise TypeError(f"Unsupported type '{column}' of a column to group by")
480
+
481
+ if self.groups_to_display not in ('all', 'event_in_last_bucket'):
482
+ raise ValueError("Parameter 'groups_to_display' can only be set to 'all' or 'event_in_last_bucket':"
483
+ f" got '{self.groups_to_display}'")
484
+
485
+ if self.all_fields and not self.running and 'running_all_fields' not in self._validations_to_skip:
486
+ raise ValueError("It is not allowed set all_fields to True for not running aggregation")
487
+
488
+ if not self.running and self.overwrite_output_field:
489
+ raise ValueError("Parameter 'overwrite_output_field' can only be used with running aggregations")
490
+
491
+ @property
492
+ def is_multi_column_aggregation(self):
493
+ return isinstance(self, _MultiColumnAggregation)
494
+
495
+ @property
496
+ def is_all_columns_aggregation(self):
497
+ return isinstance(self, _AllColumnsAggregation)
498
+
499
+
500
+ class _AggregationTSType(_Aggregation):
501
+
502
+ FIELDS_MAPPING = deepcopy(_Aggregation.FIELDS_MAPPING)
503
+ FIELDS_MAPPING['time_series_type'] = 'TIME_SERIES_TYPE'
504
+ FIELDS_DEFAULT = deepcopy(_Aggregation.FIELDS_DEFAULT)
505
+ FIELDS_DEFAULT['time_series_type'] = 'event_ts'
506
+
507
+ def __init__(self, column, time_series_type: str = "event_ts", *args, **kwargs):
508
+ """
509
+ Abstract class that implements common logic for aggregations with ability to select time series type
510
+ inherited from _Aggregation
511
+
512
+ Parameters
513
+ ----------
514
+ column: see _Aggregation
515
+ time_series_type: "event_ts" or "state_ts", default="event_ts"
516
+ "state_ts":
517
+ if there is a tick in bucket with timestamp = bucket start:
518
+ only ticks in bucket used for calculation max value
519
+ else:
520
+ latest tick from previous bucket included in current bucket
521
+ "event_ts": only ticks from current bucket used for calculations
522
+ args: see _Aggregation
523
+ kwargs: see _Aggregation
524
+ """
525
+ if time_series_type not in ["event_ts", "state_ts"]:
526
+ raise ValueError('time_series_type argument must be "event_ts" or "state_ts"')
527
+ self.time_series_type = time_series_type
528
+ super().__init__(column, *args, **kwargs)
529
+
530
+
531
+ class _AggregationTSSelection(_Aggregation):
532
+
533
+ FIELDS_MAPPING = deepcopy(_Aggregation.FIELDS_MAPPING)
534
+ FIELDS_MAPPING['selection'] = 'SELECTION'
535
+ FIELDS_DEFAULT = deepcopy(_Aggregation.FIELDS_DEFAULT)
536
+ FIELDS_DEFAULT['selection'] = 'first'
537
+
538
+ def __init__(self, column, selection: str = "first", *args, **kwargs):
539
+ if selection not in ["first", "last"]:
540
+ raise ValueError(f'{self.__class__.__name__} selection argument must be "first" or "last"')
541
+ self.selection = selection
542
+ super().__init__(column, *args, **kwargs)
543
+
544
+
545
+ class _FloatAggregation(_Aggregation):
546
+
547
+ require_type = (int, float, ott._inf)
548
+
549
+ """
550
+ Aggregation that expect int or float as input
551
+ """
552
+
553
+
554
+ class _KeepTs(_Aggregation):
555
+
556
+ def __init__(self, *args, keep_timestamp=True, **kwargs):
557
+ super().__init__(*args, **kwargs)
558
+ self.keep_timestamp = keep_timestamp
559
+
560
+ @validate # type: ignore
561
+ def _get_common_schema(self, src: 'Source', *args, **kwargs) -> Dict:
562
+ schema = src.schema.copy()
563
+ schema['TICK_TIME'] = ott.nsectime
564
+ return schema
565
+
566
+ def apply(self, src: 'Source', *args, **kwargs) -> 'Source':
567
+ res = super().apply(src=src, *args, **kwargs)
568
+ if self.keep_timestamp:
569
+ # TICK_TIME can be empty if it's a tick from default_tick aggregation parameter
570
+ res['TICK_TIME'] = res.if_else(res['TICK_TIME'], res['TICK_TIME'], res['TIMESTAMP'])
571
+ res['TIMESTAMP'] = res['TICK_TIME']
572
+ res.drop('TICK_TIME', inplace=True)
573
+ return res
574
+
575
+
576
+ class _ExpectLargeInts(_Aggregation):
577
+ FIELDS_MAPPING = deepcopy(_Aggregation.FIELDS_MAPPING)
578
+ FIELDS_MAPPING['large_ints'] = 'EXPECT_LARGE_INTS'
579
+ FIELDS_MAPPING['null_int_val'] = 'NULL_INT_VAL'
580
+ FIELDS_DEFAULT = deepcopy(_Aggregation.FIELDS_DEFAULT)
581
+ FIELDS_DEFAULT['large_ints'] = False
582
+ FIELDS_DEFAULT['null_int_val'] = 0
583
+
584
+ def __init__(self, *args, large_ints=False, null_int_val=0, **kwargs):
585
+ super().__init__(*args, **kwargs)
586
+ if large_ints not in {True, False, utils.adaptive}:
587
+ raise ValueError(f"Wrong value for {self.__class__.__name__} aggregation"
588
+ f" 'large_ints' parameter: {large_ints}")
589
+ if large_ints is utils.adaptive:
590
+ large_ints = 'IF_INPUT_VAL_IS_LONG_INTEGER'
591
+
592
+ if null_int_val and not large_ints:
593
+ raise ValueError(
594
+ f"Wrong value for {self.__class__.__name__} aggregation:"
595
+ f" 'null_int_val' parameter is set, however 'large_ints' is `False`"
596
+ )
597
+
598
+ self.large_ints = large_ints
599
+ self.null_int_val = null_int_val
600
+
601
+ def apply(self, src: 'Source', name: Optional[str] = None) -> 'Source':
602
+ out_name = name or self.column_name
603
+ res, col, convert_back = self._ts_to_long(src, str(out_name))
604
+ res = super().apply(res, col.tmp_out_column)
605
+ if not convert_back:
606
+ return res
607
+ return self._long_to_ts(res, col)
608
+
609
+ def _ts_to_long(self, src: 'Source', name: str) -> Tuple['Source', Any, bool]:
610
+ agg_columns = namedtuple('agg_columns', ('in_column', 'tmp_in_column', 'tmp_out_column', 'out_column'))
611
+ if src.schema[self.column_name] != ott.nsectime:
612
+ return src, agg_columns(self.column_name, self.column_name, name, name), False
613
+ self.large_ints = True
614
+ res = src.copy()
615
+ col = agg_columns(self.column_name, f'__long_nsec_{self.column_name}',
616
+ f'__long_nsec_{name}', name)
617
+ res[col.tmp_in_column] = res[col.in_column].apply(int)
618
+ self.column_name = col.tmp_in_column
619
+ return res, col, True
620
+
621
+ def _long_to_ts(self, src: 'Source', col) -> 'Source':
622
+ res = src.copy()
623
+ res[col.out_column] = res[col.tmp_out_column].astype(ott.nsectime)
624
+ to_drop = []
625
+ for c in [col.tmp_out_column, col.tmp_in_column]:
626
+ if c in res.schema:
627
+ to_drop.append(c)
628
+ if to_drop:
629
+ res.drop(to_drop, inplace=True)
630
+ self.column_name = col.in_column
631
+ return res
632
+
633
+
634
+ class _MultiColumnAggregation:
635
+ """
636
+ Helper class for identifying multi-column aggregations.
637
+ """
638
+ pass
639
+
640
+
641
+ class _AllColumnsAggregation(_MultiColumnAggregation):
642
+ """
643
+ Helper class for identifying aggregations, which returns all fields from original ticks.
644
+ """
645
+ pass