onetick-py 1.177.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +262 -0
  4. locator_parser/common.py +368 -0
  5. locator_parser/io.py +43 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +279 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +141 -0
  14. onetick/py/__init__.py +293 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +648 -0
  19. onetick/py/aggregations/_docs.py +948 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +501 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +374 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +276 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +798 -0
  33. onetick/py/configuration.py +771 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2312 -0
  45. onetick/py/core/_internal/_state_vars.py +93 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +809 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +272 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1002 -0
  58. onetick/py/core/_source/source_methods/joins.py +1413 -0
  59. onetick/py/core/_source/source_methods/merges.py +605 -0
  60. onetick/py/core/_source/source_methods/misc.py +1455 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +986 -0
  68. onetick/py/core/_source/symbol.py +205 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +216 -0
  75. onetick/py/core/column_operations/_methods/methods.py +292 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +160 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +28 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
  83. onetick/py/core/column_operations/base.py +1121 -0
  84. onetick/py/core/cut_builder.py +150 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +245 -0
  87. onetick/py/core/lambda_object.py +441 -0
  88. onetick/py/core/multi_output_source.py +232 -0
  89. onetick/py/core/per_tick_script.py +2256 -0
  90. onetick/py/core/query_inspector.py +464 -0
  91. onetick/py/core/source.py +1744 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1128 -0
  94. onetick/py/db/db.py +1327 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2398 -0
  100. onetick/py/license.py +190 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +935 -0
  103. onetick/py/misc.py +470 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +216 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +916 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1347 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +128 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1045 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +271 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +374 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +251 -0
  132. onetick/py/types.py +2131 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +498 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1374 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +120 -0
  146. onetick/py/utils/tz.py +84 -0
  147. onetick_py-1.177.0.dist-info/METADATA +137 -0
  148. onetick_py-1.177.0.dist-info/RECORD +152 -0
  149. onetick_py-1.177.0.dist-info/WHEEL +5 -0
  150. onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.177.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,648 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Union, TYPE_CHECKING, Tuple, Optional, Any
3
+ from copy import deepcopy
4
+ from functools import wraps
5
+ from collections import namedtuple
6
+ import pandas as pd
7
+
8
+ if TYPE_CHECKING:
9
+ from onetick.py.core.source import Source # hack for annotations
10
+
11
+ from onetick.py.core.column import _Column
12
+ from onetick.py.core.column_operations.base import _Operation, OnetickParameter
13
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
14
+ from onetick.py import types as ott
15
+ from onetick.py import utils
16
+ from onetick.py.otq import otq
17
+
18
+
19
+ def validate(method):
20
+ """wraps schema getter with validations of input columns + src and resulting schema + output column"""
21
+
22
+ @wraps(method)
23
+ def inner(obj: '_Aggregation', src: 'Source', name):
24
+ obj.validate_input_columns(src)
25
+ for column in obj.group_by:
26
+ if str(column) not in src.schema or not isinstance(src[str(column)], _Column):
27
+ raise KeyError(f"There is no '{column}' column to group by")
28
+ schema: Dict = method(obj, src=src, name=name)
29
+ if not obj.overwrite_output_field:
30
+ obj.validate_output_name(schema, name)
31
+ return schema
32
+
33
+ return inner
34
+
35
+
36
+ def operation_gb(method):
37
+ """wraps aggregation to apply _Operation and remove it after aggregation"""
38
+
39
+ @wraps(method)
40
+ def inner(obj: '_Aggregation', src: 'Source', *args, **kwargs) -> 'Source':
41
+ inplace = kwargs.get('inplace')
42
+ res = src if inplace else src.copy()
43
+ src_schema = src.schema
44
+
45
+ gb_copy = obj.group_by.copy()
46
+ obj.group_by = []
47
+ for i, gb in enumerate(gb_copy):
48
+ if isinstance(gb, _Operation) and not isinstance(gb, _Column):
49
+ name = f'GROUP_{i}'
50
+ if name in src_schema:
51
+ raise AttributeError(f"'{name}' column name is reserved for group by Operation "
52
+ f"but it exists in current schema")
53
+ res[name] = gb
54
+ obj.group_by.append(res[name])
55
+ else:
56
+ obj.group_by.append(gb)
57
+ res = method(obj, res, *args, **kwargs)
58
+
59
+ obj.group_by = gb_copy
60
+ return res
61
+ return inner
62
+
63
+
64
+ def operation_replacer(method):
65
+ """
66
+ PY-378
67
+ Decorator allows working with aggregation's columns specified as operations.
68
+ """
69
+
70
+ @wraps(method)
71
+ def inner(obj: '_Aggregation', src: 'Source', *args, **kwargs) -> 'Source':
72
+ inplace = kwargs.get('inplace')
73
+ res = src if inplace else src.copy()
74
+ tmp_columns = {}
75
+
76
+ aggrs = getattr(obj, 'aggrs', None)
77
+ if aggrs:
78
+ aggs = aggrs.values()
79
+ else:
80
+ name = args[0] if args else kwargs.get('name')
81
+ # pylint: disable-next=unidiomatic-typecheck
82
+ if type(obj.column_name) is _Operation and name is None:
83
+ raise ValueError('Output field name must be specified when aggregating operation')
84
+ aggs = [obj]
85
+
86
+ # Add operation from each aggregation object to source `res` as column
87
+ # and replace *column_name* property in each aggregation object with this column's name.
88
+ for i, agg in enumerate(aggs):
89
+ # pylint: disable-next=unidiomatic-typecheck
90
+ if type(agg.column_name) is _Operation:
91
+ tmp_name = f'__TMP_AGG_COLUMN_{i}__'
92
+ res[tmp_name] = agg.column_name
93
+ tmp_columns[tmp_name] = (agg, agg.column_name)
94
+ agg.column_name = tmp_name
95
+
96
+ res = method(obj, res, *args, **kwargs)
97
+
98
+ if tmp_columns:
99
+ # Rollback all aggregation objects and source `res`.
100
+ # Delete all temporary columns and change property *column_name* back in aggregations.
101
+ to_drop = list(set(tmp_columns).intersection(res.schema))
102
+ if to_drop:
103
+ res.drop(to_drop, inplace=True)
104
+ for agg, column_name in tmp_columns.values():
105
+ agg.column_name = column_name
106
+
107
+ return res
108
+ return inner
109
+
110
+
111
+ def output_column_overwriter(method):
112
+ """
113
+ Allows outputting aggregation to existing field.
114
+ In this case temporary renaming existing field.
115
+ """
116
+ @wraps(method)
117
+ def inner(obj: '_Aggregation', src: 'Source', *args, **kwargs) -> 'Source':
118
+ column_name = obj.column_name
119
+ name = args[0] if args else kwargs.get('name')
120
+ name = name or column_name
121
+
122
+ if not obj.overwrite_output_field or not name or name not in src.schema:
123
+ return method(obj, src, *args, **kwargs)
124
+
125
+ inplace = kwargs.get('inplace')
126
+ res = src if inplace else src.copy()
127
+
128
+ # rename existing field to the temporary name
129
+ tmp_name = f'__TMP_AGG_COLUMN_{name}__'
130
+ res[tmp_name] = res[name]
131
+ res.drop(name, inplace=True)
132
+ # aggregating renamed field
133
+ kwargs['name'] = name
134
+ obj.column_name = tmp_name
135
+
136
+ res = method(obj, res, *args, **kwargs)
137
+
138
+ # removing temporary field
139
+ if tmp_name in res.schema:
140
+ res.drop(tmp_name, inplace=True)
141
+ obj.column_name = column_name
142
+
143
+ return res
144
+ return inner
145
+
146
+
147
+ def get_seconds_from_time_offset(time_offset):
148
+ if not isinstance(time_offset, ott.OTPBaseTimeOffset):
149
+ raise ValueError('Only DatePart objects can be passed in this function')
150
+
151
+ return int(pd.Timedelta(time_offset).total_seconds())
152
+
153
+
154
+ def get_bucket_interval_from_datepart(bucket_interval):
155
+ if not isinstance(bucket_interval, ott.OTPBaseTimeOffset):
156
+ raise ValueError('Only DatePart objects can be passed in this function')
157
+
158
+ if isinstance(bucket_interval, ott.ExpressionDefinedTimeOffset):
159
+ raise ValueError(f"Operation as DatePart isn't allowed: {str(bucket_interval.n)}")
160
+
161
+ # bucket_interval also could be one of these:
162
+ # otp.Milli, otp.Second, otp.Minute, otp.Hour, otp.Day, otp.Month
163
+ # bucket_interval will be converted and corresponding bucket_units value will be set
164
+
165
+ offset, datepart = bucket_interval.get_offset()
166
+ if datepart not in {'millisecond', 'second', 'minute', 'hour', 'day', 'month'}:
167
+ raise ValueError(f"Unsupported DatePart passed to bucket_interval: {datepart}")
168
+
169
+ if offset < 0:
170
+ raise ValueError(
171
+ f"Negative DateParts aren't allowed for bucket_interval: {offset} ({datepart})"
172
+ )
173
+
174
+ if datepart in {'millisecond', 'minute', 'hour'}:
175
+ # bucket_units could be only seconds, days, months or ticks
176
+ # so other DateParts are converted to seconds
177
+ if datepart == 'millisecond':
178
+ offset, datepart = offset / 1000, 'second'
179
+ else:
180
+ offset, datepart = ott.Second(get_seconds_from_time_offset(bucket_interval)).get_offset()
181
+
182
+ return offset, f"{datepart}s" # type: ignore[union-attr]
183
+
184
+
185
+ class _Aggregation(ABC):
186
+
187
+ @property
188
+ @abstractmethod
189
+ def NAME(self) -> str:
190
+ pass
191
+
192
+ @property
193
+ @abstractmethod
194
+ def EP(self) -> otq.EpBase:
195
+ pass
196
+
197
+ DEFAULT_OUTPUT_NAME = 'VALUE'
198
+
199
+ FIELDS_MAPPING = {
200
+ "column_name": "INPUT_FIELD_NAME",
201
+ "running": "IS_RUNNING_AGGR",
202
+ "all_fields": "ALL_FIELDS_FOR_SLIDING",
203
+ "bucket_interval": "BUCKET_INTERVAL",
204
+ "bucket_time": "BUCKET_TIME",
205
+ "bucket_units": "BUCKET_INTERVAL_UNITS",
206
+ "bucket_end_condition": "BUCKET_END_CRITERIA",
207
+ "end_condition_per_group": "BUCKET_END_PER_GROUP",
208
+ "boundary_tick_bucket": "BOUNDARY_TICK_BUCKET",
209
+ "group_by": "GROUP_BY",
210
+ "groups_to_display": "GROUPS_TO_DISPLAY",
211
+ }
212
+ FIELDS_DEFAULT = {
213
+ "running": False,
214
+ "all_fields": False,
215
+ "bucket_interval": 0,
216
+ "bucket_time": "BUCKET_END",
217
+ "bucket_units": "seconds",
218
+ "bucket_end_condition": None,
219
+ "end_condition_per_group": False,
220
+ "boundary_tick_bucket": "new",
221
+ "group_by": [],
222
+ "groups_to_display": "all",
223
+ }
224
+
225
+ FIELDS_TO_SKIP: List = [] # attr listed here won't be used in self.__str__
226
+
227
+ output_field_type: Optional[type] = None # None will force to use type of input column
228
+ require_type: Optional[Tuple[type, ...]] = None
229
+ _validations_to_skip: List = []
230
+
231
+ def __init__(self,
232
+ column: Union[str, _Column, _Operation],
233
+ running: bool = False,
234
+ all_fields: Union[bool, str] = False,
235
+ bucket_interval: Union[int, ott.OTPBaseTimeOffset] = 0,
236
+ bucket_time: str = "end",
237
+ bucket_units: Union[str, None] = None,
238
+ bucket_end_condition: Optional[_Operation] = None,
239
+ end_condition_per_group: bool = False,
240
+ boundary_tick_bucket: str = "new",
241
+ group_by: Optional[Union[List, str, _Operation]] = None,
242
+ groups_to_display: str = "all",
243
+ overwrite_output_field: bool = False):
244
+ """
245
+ Abstract method that implements common logic for aggregations
246
+ """
247
+ if isinstance(column, list):
248
+ column = ','.join(map(str, column))
249
+
250
+ column_name: Union[str, _Operation] = str(column)
251
+
252
+ if column_name == "Time":
253
+ # TODO: need to understand how to better work with alias
254
+ column_name = "TIMESTAMP"
255
+
256
+ # pylint: disable-next=unidiomatic-typecheck
257
+ if type(column) is _Operation:
258
+ column_name = column
259
+
260
+ if isinstance(bucket_interval, float):
261
+ if bucket_units is not None and bucket_units != 'seconds':
262
+ raise ValueError('Float values for bucket_interval are only supported for seconds.')
263
+ if bucket_interval < 0.001:
264
+ raise ValueError('Float values for bucket_interval less than 0.001 are not supported.')
265
+
266
+ if isinstance(bucket_interval, ott.OTPBaseTimeOffset):
267
+ bucket_interval, bucket_units = get_bucket_interval_from_datepart(bucket_interval)
268
+
269
+ if isinstance(all_fields, str) and all_fields == "when_ticks_exit_window":
270
+ if not running:
271
+ raise ValueError("`all_fields` can't be set to 'when_ticks_exit_window' when `running=False`")
272
+
273
+ if not bucket_interval:
274
+ raise ValueError(
275
+ "`all_fields` can't be set to 'when_ticks_exit_window' when `bucket_interval` is zero`"
276
+ )
277
+
278
+ all_fields = all_fields.upper()
279
+
280
+ self.column_name = column_name
281
+ self.running = running
282
+ self.all_fields = all_fields
283
+ self.bucket_time = bucket_time
284
+
285
+ if isinstance(bucket_interval, _Operation):
286
+ if bucket_interval.dtype is bool:
287
+ if bucket_end_condition is not None:
288
+ raise ValueError(
289
+ "Bucket end condition passed on both `bucket_interval` and `bucket_end_condition` parameters"
290
+ )
291
+
292
+ bucket_end_condition = bucket_interval
293
+ bucket_interval = 0
294
+ elif isinstance(bucket_interval, OnetickParameter) and bucket_interval.dtype is int:
295
+ bucket_interval = str(bucket_interval)
296
+ elif isinstance(bucket_interval, _SymbolParamColumn) and bucket_interval.dtype is int:
297
+ bucket_interval = str(bucket_interval.expr)
298
+ else:
299
+ raise ValueError("Bucket interval can only be boolean otp.Operation or integer otp.param")
300
+
301
+ self.bucket_interval = bucket_interval
302
+
303
+ if bucket_end_condition is None:
304
+ self.bucket_end_condition = None # type: ignore
305
+ else:
306
+ self.bucket_end_condition = str(bucket_end_condition)
307
+
308
+ self.bucket_units = bucket_units
309
+ if self.bucket_units is None:
310
+ if self.bucket_end_condition:
311
+ # allow omitting bucket_units if bucket_end_condition is set
312
+ self.bucket_units = 'flexible'
313
+ else:
314
+ # default value
315
+ self.bucket_units = 'seconds'
316
+
317
+ self.end_condition_per_group = end_condition_per_group
318
+ self.boundary_tick_bucket = boundary_tick_bucket
319
+ self.large_ints = False
320
+ if isinstance(group_by, (_Operation, str)):
321
+ group_by = [group_by]
322
+ self.group_by = group_by or []
323
+ self.groups_to_display = groups_to_display
324
+ self.overwrite_output_field = overwrite_output_field
325
+
326
+ self._param_validation()
327
+ self.bucket_time = f'BUCKET_{self.bucket_time.upper()}'
328
+
329
+ @staticmethod
330
+ def _attr2str(value) -> str:
331
+ if isinstance(value, bool):
332
+ return 'true' if value else 'false'
333
+ if isinstance(value, list):
334
+ return ','.join(value)
335
+ return str(value)
336
+
337
+ @property
338
+ def ep_params(self) -> Dict:
339
+ """prepare params for self.__str__ and otq.EpBase"""
340
+ params = {}
341
+
342
+ for field, ep_param in self.FIELDS_MAPPING.items():
343
+ if field in self.FIELDS_TO_SKIP:
344
+ continue
345
+
346
+ default_value = self.FIELDS_DEFAULT.get(field)
347
+ if getattr(self, field) != default_value:
348
+ if field == 'group_by':
349
+ params[ep_param] = ",".join(list(map(str, self.group_by)))
350
+ else:
351
+ params[ep_param] = getattr(self, field)
352
+ return params
353
+
354
+ def __str__(self):
355
+ params = [f'{k}={self._attr2str(v)}' for k, v in self.ep_params.items()]
356
+ return self.NAME + "(" + ",".join(params) + ")"
357
+
358
+ def to_ep(self, name: Optional[str]) -> otq.EpBase:
359
+ params = dict((k.lower(), v) for k, v in self.ep_params.items())
360
+ if 'output_field_name' not in self.FIELDS_TO_SKIP:
361
+ params['output_field_name'] = name
362
+ return self.EP(**params)
363
+
364
+ @validate
365
+ def _get_common_schema(self, src: 'Source', name: str) -> Dict:
366
+ """return data schema without output fields (this fields should be added further)"""
367
+ schema = {}
368
+ for column in self.group_by:
369
+ schema[str(column)] = src.schema[str(column)]
370
+ if self.all_fields:
371
+ schema.update(src.schema)
372
+ return schema
373
+
374
+ def _modify_source(self, res: 'Source', **kwargs):
375
+ """
376
+ Modify resulting source inplace before sinking to aggregation.
377
+ Can be overriden if needed.
378
+ """
379
+ pass
380
+
381
+ def _get_output_schema(self, src: 'Source', name: Optional[str] = None) -> Dict:
382
+ if not name or name in src.__class__.meta_fields:
383
+ return {}
384
+ return {
385
+ name: self.output_field_type or src.schema[self.column_name]
386
+ }
387
+
388
+ @operation_gb
389
+ @operation_replacer
390
+ @output_column_overwriter
391
+ def apply(self, src: 'Source', name: Optional[str] = None, inplace: bool = False) -> 'Source':
392
+ """
393
+ Applies aggregation to Source and sets proper schema
394
+
395
+ Parameters
396
+ ----------
397
+ src: Source
398
+ Source to apply aggregation
399
+ name: str, optional
400
+ Name of output column. If not specified, will be used self.column_name
401
+ inplace: bool
402
+ Modify passed ``src`` object or return modified copy.
403
+ """
404
+ if inplace:
405
+ res = src
406
+ src = src.copy()
407
+ else:
408
+ res = src.copy()
409
+ out_name = name or self.column_name
410
+ schema = self._get_common_schema(src, out_name)
411
+ # it's important to validate input schema before sinking
412
+ self._modify_source(res)
413
+ res.sink(self.to_ep(name=str(out_name)))
414
+ schema.update(self._get_output_schema(src, str(out_name)))
415
+ res.schema.set(**schema)
416
+
417
+ if not self.all_fields:
418
+ # in this case we propagate only resulting fields, that stored in res.schema (flexible schema case)
419
+ res._add_table(strict=True)
420
+ else:
421
+ # adding table to convert types in schema, e.g. float to int
422
+ res._add_table(strict=False)
423
+ return res
424
+
425
+ def validate_input_columns(self, src: 'Source'):
426
+ """checks that columns used in aggregation presented in Source"""
427
+ if self.column_name not in src.schema:
428
+ raise TypeError(f"Aggregation `{self.NAME}` uses column `{self.column_name}` as input, which doesn't exist")
429
+ if not self.require_type:
430
+ return
431
+ dtype = src.schema[self.column_name]
432
+ base_dtype = ott.get_base_type(dtype)
433
+ for t in self.require_type:
434
+ # more generic types can be specified in self.require_type too
435
+ if dtype is t or base_dtype is t:
436
+ return
437
+ raise TypeError(f"Aggregation `{self.NAME}` require {self.require_type} types, got {dtype}")
438
+
439
+ @staticmethod
440
+ def validate_output_name(schema: Dict, name: Union[List, str]):
441
+ """checks that aggregation won't output columns with same names"""
442
+ if not isinstance(name, list):
443
+ name = [name]
444
+
445
+ same_fields = []
446
+ for n in name:
447
+ if n in schema:
448
+ if '__long_nsec_' in n:
449
+ same_fields.append(n.replace('__long_nsec_', '')) # hack for large ints
450
+ else:
451
+ same_fields.append(n)
452
+ if same_fields:
453
+ raise ValueError("You try to propagate all fields and put result into already existing fields: "
454
+ f"'{', '.join(same_fields)}' ")
455
+
456
+ def _param_validation(self):
457
+ """validate __init__ parameters"""
458
+ if self.running and self.bucket_time == "start":
459
+ raise ValueError("It is not allowed to set up running=True and bucket_time='start'")
460
+ if self.bucket_units == "flexible" and self.bucket_end_condition is None:
461
+ raise ValueError("bucket_units is set to 'flexible' but bucket_end_condition is not specified. "
462
+ "Please specify bucket_end_condition.")
463
+ if self.bucket_units != "flexible" and self.bucket_end_condition is not None:
464
+ raise ValueError("bucket_end_condition can be used only with 'flexible' bucket_units. "
465
+ "Please set bucket_units to 'flexible'.")
466
+
467
+ if self.bucket_time not in ['start', 'end']:
468
+ raise ValueError(f"'bucket_time' might be either 'start' or 'end', but passed '{self.bucket_time}'")
469
+
470
+ valid_units = ("seconds", "ticks", "days", "months", "flexible")
471
+ if self.bucket_units not in valid_units:
472
+ raise ValueError("'bucket_units' can be one of the following: "
473
+ f"'{', '.join(valid_units)}'; however, '{self.bucket_units}' was passed")
474
+
475
+ valid_boundary = {"new", "previous"}
476
+ if self.boundary_tick_bucket not in valid_boundary:
477
+ message = "'boundary_tick_bucket' can be one of the following: {}; however, {} was passed"
478
+ raise ValueError(message.format(', '.join(list(valid_boundary)), self.boundary_tick_bucket))
479
+
480
+ for column in self.group_by:
481
+ if not isinstance(column, _Operation) and not isinstance(column, str):
482
+ raise TypeError(f"Unsupported type '{column}' of a column to group by")
483
+
484
+ if self.groups_to_display not in ('all', 'event_in_last_bucket'):
485
+ raise ValueError("Parameter 'groups_to_display' can only be set to 'all' or 'event_in_last_bucket':"
486
+ f" got '{self.groups_to_display}'")
487
+
488
+ if self.all_fields and not self.running and 'running_all_fields' not in self._validations_to_skip:
489
+ raise ValueError("It is not allowed set all_fields to True for not running aggregation")
490
+
491
+ if not self.running and self.overwrite_output_field:
492
+ raise ValueError("Parameter 'overwrite_output_field' can only be used with running aggregations")
493
+
494
+ @property
495
+ def is_multi_column_aggregation(self):
496
+ return isinstance(self, _MultiColumnAggregation)
497
+
498
+ @property
499
+ def is_all_columns_aggregation(self):
500
+ return isinstance(self, _AllColumnsAggregation)
501
+
502
+
503
+ class _AggregationTSType(_Aggregation):
504
+
505
+ FIELDS_MAPPING = deepcopy(_Aggregation.FIELDS_MAPPING)
506
+ FIELDS_MAPPING['time_series_type'] = 'TIME_SERIES_TYPE'
507
+ FIELDS_DEFAULT = deepcopy(_Aggregation.FIELDS_DEFAULT)
508
+ FIELDS_DEFAULT['time_series_type'] = 'event_ts'
509
+
510
+ def __init__(self, column, time_series_type: str = "event_ts", *args, **kwargs):
511
+ """
512
+ Abstract class that implements common logic for aggregations with ability to select time series type
513
+ inherited from _Aggregation
514
+
515
+ Parameters
516
+ ----------
517
+ column: see _Aggregation
518
+ time_series_type: "event_ts" or "state_ts", default="event_ts"
519
+ "state_ts":
520
+ if there is a tick in bucket with timestamp = bucket start:
521
+ only ticks in bucket used for calculation max value
522
+ else:
523
+ latest tick from previous bucket included in current bucket
524
+ "event_ts": only ticks from current bucket used for calculations
525
+ args: see _Aggregation
526
+ kwargs: see _Aggregation
527
+ """
528
+ if time_series_type not in ["event_ts", "state_ts"]:
529
+ raise ValueError('time_series_type argument must be "event_ts" or "state_ts"')
530
+ self.time_series_type = time_series_type
531
+ super().__init__(column, *args, **kwargs)
532
+
533
+
534
+ class _AggregationTSSelection(_Aggregation):
535
+
536
+ FIELDS_MAPPING = deepcopy(_Aggregation.FIELDS_MAPPING)
537
+ FIELDS_MAPPING['selection'] = 'SELECTION'
538
+ FIELDS_DEFAULT = deepcopy(_Aggregation.FIELDS_DEFAULT)
539
+ FIELDS_DEFAULT['selection'] = 'first'
540
+
541
+ def __init__(self, column, selection: str = "first", *args, **kwargs):
542
+ if selection not in ["first", "last"]:
543
+ raise ValueError(f'{self.__class__.__name__} selection argument must be "first" or "last"')
544
+ self.selection = selection
545
+ super().__init__(column, *args, **kwargs)
546
+
547
+
548
+ class _FloatAggregation(_Aggregation):
549
+
550
+ require_type = (int, float, ott._inf, ott.decimal)
551
+
552
+ """
553
+ Aggregation that expect int or float as input
554
+ """
555
+
556
+
557
+ class _KeepTs(_Aggregation):
558
+
559
+ def __init__(self, *args, keep_timestamp=True, **kwargs):
560
+ super().__init__(*args, **kwargs)
561
+ self.keep_timestamp = keep_timestamp
562
+
563
+ @validate # type: ignore
564
+ def _get_common_schema(self, src: 'Source', *args, **kwargs) -> Dict:
565
+ schema = src.schema.copy()
566
+ schema['TICK_TIME'] = ott.nsectime
567
+ return schema
568
+
569
+ def apply(self, src: 'Source', *args, **kwargs) -> 'Source':
570
+ res = super().apply(src=src, *args, **kwargs)
571
+ if self.keep_timestamp:
572
+ # TICK_TIME can be empty if it's a tick from default_tick aggregation parameter
573
+ res['TICK_TIME'] = res.if_else(res['TICK_TIME'], res['TICK_TIME'], res['TIMESTAMP'])
574
+ res['TIMESTAMP'] = res['TICK_TIME']
575
+ res.drop('TICK_TIME', inplace=True)
576
+ return res
577
+
578
+
579
+ class _ExpectLargeInts(_Aggregation):
580
+ FIELDS_MAPPING = deepcopy(_Aggregation.FIELDS_MAPPING)
581
+ FIELDS_MAPPING['large_ints'] = 'EXPECT_LARGE_INTS'
582
+ FIELDS_MAPPING['null_int_val'] = 'NULL_INT_VAL'
583
+ FIELDS_DEFAULT = deepcopy(_Aggregation.FIELDS_DEFAULT)
584
+ FIELDS_DEFAULT['large_ints'] = False
585
+ FIELDS_DEFAULT['null_int_val'] = 0
586
+
587
+ def __init__(self, *args, large_ints=False, null_int_val=0, **kwargs):
588
+ super().__init__(*args, **kwargs)
589
+ if large_ints not in {True, False, utils.adaptive}:
590
+ raise ValueError(f"Wrong value for {self.__class__.__name__} aggregation"
591
+ f" 'large_ints' parameter: {large_ints}")
592
+ if large_ints is utils.adaptive:
593
+ large_ints = 'IF_INPUT_VAL_IS_LONG_INTEGER'
594
+
595
+ if null_int_val and not large_ints:
596
+ raise ValueError(
597
+ f"Wrong value for {self.__class__.__name__} aggregation:"
598
+ f" 'null_int_val' parameter is set, however 'large_ints' is `False`"
599
+ )
600
+
601
+ self.large_ints = large_ints
602
+ self.null_int_val = null_int_val
603
+
604
+ def apply(self, src: 'Source', name: Optional[str] = None) -> 'Source':
605
+ out_name = name or self.column_name
606
+ res, col, convert_back = self._ts_to_long(src, str(out_name))
607
+ res = super().apply(res, col.tmp_out_column)
608
+ if not convert_back:
609
+ return res
610
+ return self._long_to_ts(res, col)
611
+
612
+ def _ts_to_long(self, src: 'Source', name: str) -> Tuple['Source', Any, bool]:
613
+ agg_columns = namedtuple('agg_columns', ('in_column', 'tmp_in_column', 'tmp_out_column', 'out_column'))
614
+ if src.schema[self.column_name] != ott.nsectime:
615
+ return src, agg_columns(self.column_name, self.column_name, name, name), False
616
+ self.large_ints = True
617
+ res = src.copy()
618
+ col = agg_columns(self.column_name, f'__long_nsec_{self.column_name}',
619
+ f'__long_nsec_{name}', name)
620
+ res[col.tmp_in_column] = res[col.in_column].apply(int)
621
+ self.column_name = col.tmp_in_column
622
+ return res, col, True
623
+
624
+ def _long_to_ts(self, src: 'Source', col) -> 'Source':
625
+ res = src.copy()
626
+ res[col.out_column] = res[col.tmp_out_column].astype(ott.nsectime)
627
+ to_drop = []
628
+ for c in [col.tmp_out_column, col.tmp_in_column]:
629
+ if c in res.schema:
630
+ to_drop.append(c)
631
+ if to_drop:
632
+ res.drop(to_drop, inplace=True)
633
+ self.column_name = col.in_column
634
+ return res
635
+
636
+
637
+ class _MultiColumnAggregation:
638
+ """
639
+ Helper class for identifying multi-column aggregations.
640
+ """
641
+ pass
642
+
643
+
644
+ class _AllColumnsAggregation(_MultiColumnAggregation):
645
+ """
646
+ Helper class for identifying aggregations, which returns all fields from original ticks.
647
+ """
648
+ pass