onetick-py 1.162.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +266 -0
  4. locator_parser/common.py +365 -0
  5. locator_parser/io.py +41 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +280 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +138 -0
  14. onetick/py/__init__.py +290 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +645 -0
  19. onetick/py/aggregations/_docs.py +912 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +427 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +373 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +275 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +752 -0
  33. onetick/py/configuration.py +736 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2307 -0
  45. onetick/py/core/_internal/_state_vars.py +87 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +810 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +270 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1001 -0
  58. onetick/py/core/_source/source_methods/joins.py +1393 -0
  59. onetick/py/core/_source/source_methods/merges.py +566 -0
  60. onetick/py/core/_source/source_methods/misc.py +1325 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +702 -0
  68. onetick/py/core/_source/symbol.py +202 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +215 -0
  75. onetick/py/core/column_operations/_methods/methods.py +294 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +150 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +30 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +92 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +464 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +160 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1374 -0
  83. onetick/py/core/column_operations/base.py +1061 -0
  84. onetick/py/core/cut_builder.py +149 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +244 -0
  87. onetick/py/core/lambda_object.py +442 -0
  88. onetick/py/core/multi_output_source.py +193 -0
  89. onetick/py/core/per_tick_script.py +2253 -0
  90. onetick/py/core/query_inspector.py +465 -0
  91. onetick/py/core/source.py +1663 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1042 -0
  94. onetick/py/db/db.py +1423 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2354 -0
  100. onetick/py/license.py +188 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +947 -0
  103. onetick/py/misc.py +437 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +211 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +841 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1342 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +126 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1049 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +238 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +357 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +256 -0
  132. onetick/py/types.py +2056 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +499 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1139 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +118 -0
  146. onetick/py/utils/tz.py +82 -0
  147. onetick_py-1.162.2.dist-info/METADATA +148 -0
  148. onetick_py-1.162.2.dist-info/RECORD +152 -0
  149. onetick_py-1.162.2.dist-info/WHEEL +5 -0
  150. onetick_py-1.162.2.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.162.2.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.162.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,499 @@
1
+ import io
2
+ import os
3
+ import subprocess
4
+ import warnings
5
+ import dataclasses
6
+ from pathlib import Path
7
+ from dataclasses import dataclass
8
+ from typing import Optional, Union, Type, Tuple
9
+
10
+ import pandas as pd
11
+ import onetick.py as otp
12
+ from onetick.py.otq import otq
13
+ from onetick.py.backports import cache
14
+ from onetick.py.core import query_inspector
15
+ from . import adaptive
16
+
17
+
18
+ if otp.__webapi__ or os.getenv("OTP_SKIP_OTQ_VALIDATION", False):
19
+ MEASURE_PERF = None
20
+ else:
21
+ # see http://solutions.pages.soltest.onetick.com/iac/onetick-server/MeasurePerf.html
22
+ MEASURE_PERF = Path(otp.__one_tick_bin_dir__) / 'measure_perf.exe'
23
+
24
+
25
+ @cache
26
+ def _get_allocation_lib() -> Optional[str]:
27
+ suffix = '.dll' if os.name == 'nt' else '.so'
28
+ allocation_lib = Path(otp.__one_tick_bin_dir__) / ('liballocation_interceptors' + suffix)
29
+ if not allocation_lib.exists():
30
+ warnings.warn(f"Can't find file {allocation_lib}, memory statistics will not be calculated")
31
+ return None
32
+ if os.name == 'nt':
33
+ withdll_exe = Path(otp.__one_tick_bin_dir__) / 'withdll.exe'
34
+ if not withdll_exe.exists():
35
+ warnings.warn(f"Can't find file {withdll_exe}, memory statistics will not be calculated")
36
+ return None
37
+ return f'{withdll_exe} -d:{allocation_lib}'
38
+ else:
39
+ return f'LD_PRELOAD={allocation_lib}'
40
+
41
+
42
+ def _run_measure_perf(otq_file: str, summary_file: str, context: Optional[str] = None):
43
+ if otp.__webapi__:
44
+ raise RuntimeError("Can't use measure_perf.exe in WebAPI mode.")
45
+ if MEASURE_PERF is not None and not MEASURE_PERF.exists(): # noqa
46
+ raise RuntimeError(f"File {MEASURE_PERF} doesn't exist, can't execute it.")
47
+ allocation_lib = _get_allocation_lib()
48
+ cmd = allocation_lib or ''
49
+ if cmd:
50
+ cmd += ' '
51
+ cmd += f'{MEASURE_PERF} -otq_file {otq_file} -summary_file {summary_file}'
52
+ if context:
53
+ cmd += f' -context {context}'
54
+ try:
55
+ subprocess.run(cmd, shell=True, check=True)
56
+ except subprocess.CalledProcessError:
57
+ if os.name == 'nt' and allocation_lib and Path(str(summary_file)).exists():
58
+ # withdll.exe on Windows returns strange exit codes, but runs successfully anyway
59
+ return
60
+ raise
61
+
62
+
63
+ def measure_perf(src_or_otq: Union['otp.Source', str],
64
+ summary_file: Union[str, 'otp.utils.TmpFile', None] = None,
65
+ context: Union[str, Type[adaptive], None] = adaptive) -> Tuple[str, Union[str, 'otp.utils.TmpFile']]:
66
+ """
67
+ Run **measure_perf.exe** tool on some .otq file or :py:class:`onetick.py.Source`.
68
+ Result is saved in file ``summary_file``.
69
+ If it is not set, then temporary :py:class:`onetick.py.utils.temp.TmpFile` is generated and returned.
70
+
71
+ Parameters
72
+ ----------
73
+ src_or_otq: :py:class:`~onetick.py.Source` or str
74
+ :py:class:`~onetick.py.Source` object or path to already existing .otq file.
75
+ summary_file: str
76
+ path to the resulting summary file.
77
+ By default some temporary file name will be used.
78
+ context: str
79
+ context that will be used to run the query.
80
+
81
+ Returns
82
+ -------
83
+ Returns tuple with the path to the generated query and path to the summary file.
84
+
85
+ Examples
86
+ --------
87
+ >>> t = otp.Tick(A=1)
88
+ >>> otq_file, summary_file = otp.perf.measure_perf(t)
89
+ >>> with open(summary_file) as f: # doctest: +ELLIPSIS
90
+ ... print(f.read())
91
+ Running result of ...
92
+ ...
93
+ index,EP_name,tag,...
94
+ ...
95
+ """
96
+ if isinstance(src_or_otq, otp.Source):
97
+ otq_file = src_or_otq.to_otq()
98
+ else:
99
+ otq_file = src_or_otq
100
+ if not summary_file:
101
+ summary_file = otp.utils.TmpFile()
102
+ context_str = otp.config.context if context is adaptive else context
103
+ _run_measure_perf(otq_file, str(summary_file), context_str) # type: ignore
104
+ return otq_file, summary_file
105
+
106
+
107
+ @dataclass
108
+ class SummaryEntry:
109
+ def __getitem__(self, item):
110
+ """
111
+ Get value of the entry field by name.
112
+ """
113
+ return getattr(self, item)
114
+
115
+ def __setitem__(self, item, value):
116
+ """
117
+ Set value of the entry field by name.
118
+ """
119
+ return setattr(self, item, value)
120
+
121
+ def asdict(self) -> dict:
122
+ """
123
+ Return entry as a dictionary of field names and their values.
124
+ """
125
+ return dataclasses.asdict(self)
126
+
127
+ def __iter__(self):
128
+ """
129
+ Iterator that returns tuples with name and value of each field.
130
+ """
131
+ for k, v in self.asdict().items():
132
+ yield k, v
133
+
134
+ @classmethod
135
+ @cache
136
+ def fields(cls):
137
+ """
138
+ Get list of entries field objects.
139
+ """
140
+ return dataclasses.fields(cls)
141
+
142
+ @classmethod
143
+ @cache
144
+ def field_names(cls):
145
+ """
146
+ Get list of entries field names.
147
+ """
148
+ return [field.name for field in cls.fields()]
149
+
150
+
151
+ @dataclass
152
+ class DebugSummaryEntry:
153
+ #: internal stack info number to identify debug information
154
+ stack_info: Optional[str] = None
155
+ #: python traceback string to identify location of the python code that created OneTick's EP
156
+ traceback: Optional[str] = None
157
+
158
+
159
+ @dataclass
160
+ class _OrdinarySummaryEntry(SummaryEntry):
161
+ #: Sequential number of the EP in the report
162
+ index: int
163
+ #: Name of the EP
164
+ EP_name: str
165
+ #: EP full tag (scope will be added to the tag if there is any)
166
+ tag: int
167
+ #: Time elapsed for EP execution with its child nodes in microseconds
168
+ running_time_with_children: int
169
+ #: Individual time elapsed for EP execution in microseconds
170
+ running_time: int
171
+ #: Number of ticks processed by the EP
172
+ processed_tick_events: int
173
+ #: Number of tick descriptors processed by the EP
174
+ processed_schema_events: int
175
+ processed_timer_events: int
176
+ #: Maximal number of ticks accumulated by the EP during query execution
177
+ #: This field is calculated only for aggregations (for example, EPs with a sliding window or GROUP_BY).
178
+ #: For all other EPs, it has the value of 0.
179
+ max_accumulated_ticks_count: int
180
+ #: For continuous queries, each EP measures the latency in microseconds for all the ticks it has propagated.
181
+ #: The latency of a tick is considered to be the difference between
182
+ #: tick propagation host time and the timestamp of the tick.
183
+ #: The maximum value of this latency (calculated by the EP)
184
+ #: is reported by measure_perf.exe in the summary of that EP.
185
+ #:
186
+ #: The latency is calculated neither for aggregations with BUCKET_TIME=BUCKET_START
187
+ #: (as ticks are propagated by overwritten timestamps that are equal to the bucket start) nor for their child EPs.
188
+ #: For such cases, the following max_introduced_latency
189
+ #: special values indicate the reason why the maximum introduced latency was not calculated:
190
+ #:
191
+ #: * -3 indicates that the EP is the culprit for latency calculation interruption
192
+ #: * -2 indicates that the latency calculation for the EP is turned off because
193
+ #: its source EP's max_introduced_latency is -3
194
+ #: * -1 indicates that the query is non-continuous
195
+ max_introduced_latency: int
196
+ #: There are EPs (like PRESORT, Aggregations, and others)
197
+ #: that are allowed to propagate received ticks with some delay.
198
+ #: This flag indicates if the EP introduces delay.
199
+ ep_introduces_delay_flag: int
200
+ #: The amount of memory allocated by EP and its child nodes.
201
+ allocated_memory_with_children: int
202
+ #: The amount of memory allocated by EP.
203
+ allocated_memory: int
204
+ #: The amount of memory unreleased by EP. The usual cause of non-zero unreleased memory is EP's cached data.
205
+ unreleased_memory_with_children: int
206
+ #: The amount of memory unreleased by EP and its child nodes.
207
+ #: The usual cause of non-zero unreleased memory is EP's and its child nodes' cached data.
208
+ unreleased_memory: int
209
+ #: Peak memory utilization introduced by EP and its child nodes.
210
+ peak_allocated_memory: int
211
+
212
+
213
+ @dataclass
214
+ class OrdinarySummaryEntry(DebugSummaryEntry, _OrdinarySummaryEntry):
215
+ """
216
+ Data class for each line of ordinary performance summary.
217
+ """
218
+ pass
219
+
220
+
221
+ @dataclass
222
+ class _PresortSummaryEntry(SummaryEntry):
223
+ #: Sequential number of the branch in PRESORT EPs summary section
224
+ index: int
225
+ #: Source EP name of combined PRESORT EP source branch for which the summary was reported
226
+ presort_source_ep_name: str
227
+ #: Combined PRESORT EP name
228
+ presort_sink_ep_name: str
229
+ #: Source EP tag of combined PRESORT EP source branch for which the summary was reported
230
+ presort_source_ep_tag: int
231
+ #: Combined PRESORT EP tag
232
+ presort_sink_ep_tag: int
233
+ #: Maximum accumulated ticks count by PRESORT for the located branch.
234
+ max_accumulated_ticks_count: int
235
+
236
+
237
+ @dataclass
238
+ class PresortSummaryEntry(DebugSummaryEntry, _PresortSummaryEntry):
239
+ """
240
+ Data class for each line of PRESORT performance summary.
241
+ """
242
+ pass
243
+
244
+
245
+ @dataclass
246
+ class _CEPSummaryEntry(SummaryEntry):
247
+ #: Sequential number of the root EP in the root EPs summary section
248
+ index: int
249
+ #: Root EP name for which summary is provided
250
+ sink_ep_name: str
251
+ #: Root EP tag for which summary is provided
252
+ sink_ep_tag: int
253
+ #: Mean of the latencies of all ticks passed through the node
254
+ latencies_mean: float
255
+ #: Standard deviation of the latencies of all ticks passed through the node
256
+ latencies_standard_deviation: float
257
+ #: Average slope of the linear regression function found by least squares method calculated for all latencies
258
+ #: of all ticks passed through the root node.
259
+ #: As mentioned earlier, the regression function can be considered as a function describing some relationship
260
+ #: between two variables: tick latency and tick arrival timestamp.
261
+ latencies_average_slope: float
262
+ #: This is the average variance of ticks latencies from the computed linear regression function.
263
+ latencies_variance_from_regression_line: float
264
+
265
+
266
+ @dataclass
267
+ class CEPSummaryEntry(DebugSummaryEntry, _CEPSummaryEntry):
268
+ """
269
+ Data class for each line of CEP performance summary.
270
+ """
271
+ pass
272
+
273
+
274
+ class PerformanceSummary:
275
+ _entry_cls: Optional[Type[SummaryEntry]] = None
276
+ _entry_key: Optional[str] = None
277
+ _ep_name_field: Optional[str] = None
278
+
279
+ def __init__(self, text: Optional[str]):
280
+ #: text of the summary (csv format)
281
+ self.text = text
282
+ #: pandas.DataFrame from the data of the summary
283
+ self.dataframe = pd.read_csv(io.StringIO(self.text)) if self.text else pd.DataFrame()
284
+ #: list of corresponding entries objects
285
+ self.entries = self.dataframe.to_dict('records')
286
+ #: mapping of EP tags to corresponding entry objects
287
+ self.entries_dict = {}
288
+ if self._entry_cls is not None:
289
+ self.entries = [self._entry_cls(**e) for e in self.entries]
290
+ if self._entry_key is not None:
291
+ self.entries_dict = {e[self._entry_key]: e for e in self.entries}
292
+
293
+ def __iter__(self):
294
+ """
295
+ Iterate over list of summary :attr:`entries`.
296
+ """
297
+ for entry in self.entries:
298
+ yield entry
299
+
300
+
301
+ class OrdinarySummary(PerformanceSummary):
302
+ """
303
+ This is the first section in the summary file containing the largest portion of the summary for graph nodes.
304
+ """
305
+
306
+ _entry_cls = OrdinarySummaryEntry
307
+ _entry_key = 'tag'
308
+ _ep_name_field = 'EP_name'
309
+
310
+
311
+ class PresortSummary(PerformanceSummary):
312
+ """
313
+ In PRESORT EPs summary section **measure_perf.exe** provides per PRESORT source branch report
314
+ containing max accumulated ticks count by PRESORT for each of these branches.
315
+ Namely, it shows how many ticks were accumulated by PRESORT for each of these source branches.
316
+
317
+ Please note that there are some PRESORT EP types, like SYNCHRONIZE_TIME EP,
318
+ that do not support performance measurement, yet.
319
+
320
+ Each line of this section contains six fields
321
+ representing the location of the branch for which the report is printed
322
+ and a field that contains the maximum number of ticks accumulated by PRESORT for this branch.
323
+
324
+ The location of a branch is determined by the source and sink EP names and tags.
325
+ """
326
+
327
+ _entry_cls = PresortSummaryEntry
328
+ _entry_key = 'presort_sink_ep_tag'
329
+ _ep_name_field = 'presort_sink_ep_name'
330
+
331
+
332
+ class CEPSummary(PerformanceSummary):
333
+ """
334
+ The last summary type produced by **measure_perf.exe**
335
+ is the latency summary for root EPs of the executed top-level query in CEP mode.
336
+
337
+ Each root EP in CEP mode measures tick arrival latency before processing and propagating it to the sinks,
338
+ down by the graph.
339
+
340
+ Note that for non-CEP mode this summary is not printed at all.
341
+
342
+ The summary provided in this section tries to shed some light
343
+ and estimate the relationship between the following two variables:
344
+
345
+ * dependent variable - tick latency
346
+ * independent variable - tick arrival time into the root node.
347
+
348
+ The summary printed in this section tries to describe this relationship using some statistical analysis metrics.
349
+
350
+ Please note that these values are calculated across all ticks in all symbols processed by the query.
351
+
352
+ Calculated stats for ROOT EPs are printed once the query is finished and there are no more ticks left to arrive.
353
+
354
+ This summary contains the mean of latencies, standard deviation,
355
+ average slope of linear regression function (calculated by the least squares method),
356
+ and average variance from the regression function computed based on latency numbers of ticks
357
+ that are passed through each root EP of a top-level query.
358
+
359
+ For each root node, one line is printed with the fields containing values for each of the above-mentioned metrics.
360
+ This summary should be enough to determine slow consumer queries and try to debug and optimize those.
361
+ """
362
+
363
+ _entry_cls = CEPSummaryEntry
364
+ _entry_key = 'sink_ep_tag'
365
+ _ep_name_field = 'sink_ep_name'
366
+
367
+
368
+ class PerformanceSummaryFile:
369
+ def __init__(self, summary_file: Union[str, os.PathLike]):
370
+ """
371
+ Class to read and parse ``summary_file`` that was generated by OneTick's measure_perf.exe
372
+
373
+ Parsed result is accessible via public properties of the class.
374
+
375
+ Parameters
376
+ ----------
377
+ summary_file:
378
+ path to the summary file.
379
+
380
+ Examples
381
+ --------
382
+ >>> t = otp.Tick(A=1)
383
+ >>> otq_file, summary_file = otp.perf.measure_perf(t)
384
+ >>> result = otp.perf.PerformanceSummaryFile(summary_file)
385
+ >>> print(result.ordinary_summary.dataframe) # doctest: +ELLIPSIS
386
+ index EP_name tag ...
387
+ 0 PASSTHROUGH 0 ...
388
+ ...
389
+ """
390
+ #: path to the summary file
391
+ self.summary_file = Path(summary_file)
392
+ #: the text of the summary file
393
+ self.summary_text = self.summary_file.read_text()
394
+ ordinary_summary, presort_summary, cep_summary = self._parse()
395
+ #: :class:`Ordinary summary <onetick.py.perf.OrdinarySummary>`
396
+ self.ordinary_summary = ordinary_summary
397
+ #: :class:`Presort summary <onetick.py.perf.PresortSummary>`
398
+ self.presort_summary = presort_summary
399
+ #: :class:`CEP summary <onetick.py.perf.CEPSummary>`
400
+ self.cep_summary = cep_summary
401
+
402
+ def _parse(self):
403
+ summary_text_lines = self.summary_text.splitlines(keepends=True)
404
+
405
+ summaries = {}
406
+ for i, line in enumerate(summary_text_lines):
407
+ if not line.startswith('index,'):
408
+ continue
409
+ header = set(line.strip().split(','))
410
+ for summary_cls in (OrdinarySummary, PresortSummary, CEPSummary):
411
+ if header.issubset(summary_cls._entry_cls.field_names()):
412
+ break
413
+ else:
414
+ raise ValueError("Can't parse performance summary")
415
+ summaries[summary_cls] = i
416
+
417
+ tables_header_line_indexes = list(summaries.values())
418
+ for i, (summary_cls, start) in enumerate(summaries.items()):
419
+ if i + 1 < len(tables_header_line_indexes):
420
+ end = tables_header_line_indexes[i + 1]
421
+ else:
422
+ end = None
423
+ summary_table_text = ''.join(summary_text_lines[start:end])
424
+ summaries[summary_cls] = summary_cls(summary_table_text)
425
+
426
+ return tuple(
427
+ summaries.get(summary_cls) or summary_cls(None)
428
+ for summary_cls in (OrdinarySummary, PresortSummary, CEPSummary)
429
+ )
430
+
431
+
432
+ def _get_query_nodes(otq_file: str):
433
+ """
434
+ Get query nodes ids and stack info.
435
+ """
436
+ otq_file, _, query_name = otq_file.partition('::')
437
+ info = query_inspector.get_query_info(otq_file, query_name)
438
+ result = {}
439
+ for node_tag, node in info.nodes.items():
440
+ ep_name, stack_info = node._get_ep_name_and_stack_info()
441
+ result[node_tag] = {'node_tag': node_tag, 'ep_name': ep_name, 'stack_info': stack_info}
442
+ return result
443
+
444
+
445
+ class MeasurePerformance(PerformanceSummaryFile):
446
+ def __init__(self, src_or_otq, summary_file=None, context=adaptive):
447
+ """
448
+ Class to run OneTick's measure_perf.exe on the specified query and parse the result.
449
+
450
+ Additionally some debug information about the python location of event processor objects
451
+ may be added to the result if
452
+ :py:attr:`stack_info<onetick.py.configuration.Config.stack_info>`
453
+ configuration parameter is set.
454
+
455
+ Parsed result is accessible via public properties of the class.
456
+
457
+ Parameters
458
+ ----------
459
+ src_or_otq: :py:class:`~onetick.py.Source` or str
460
+ :py:class:`~onetick.py.Source` object or path to already existing .otq file.
461
+ summary_file: str
462
+ path to the resulting summary file.
463
+ By default some temporary file name will be used.
464
+ context: str
465
+ context that will be used to run the query.
466
+
467
+ Examples
468
+ --------
469
+ >>> t = otp.Tick(A=1)
470
+ >>> result = otp.perf.MeasurePerformance(t)
471
+ >>> print(result.ordinary_summary.dataframe) # doctest: +ELLIPSIS
472
+ index EP_name tag ...
473
+ 0 PASSTHROUGH 0 ...
474
+ ...
475
+ """
476
+
477
+ self.otq_file, summary_file = measure_perf(src_or_otq, summary_file, context)
478
+ super().__init__(summary_file)
479
+ self._query_nodes = _get_query_nodes(self.otq_file)
480
+ for summary in (self.ordinary_summary, self.presort_summary, self.cep_summary):
481
+ self._add_debug_info_to_summary(summary)
482
+
483
+ def _add_debug_info_to_summary(self, summary):
484
+ """
485
+ Modify entries in ``summary`` to include **stack_info** and **traceback** parameters.
486
+ """
487
+ from onetick.py._stack_info import _get_traceback_with_id
488
+ for tag, entry in summary.entries_dict.items():
489
+ if tag not in self._query_nodes:
490
+ warnings.warn(f"Can't find node with tag {tag} in file {self.otq_file}")
491
+ continue
492
+ debug_info = self._query_nodes[tag]
493
+ ep_name = entry[summary._ep_name_field]
494
+ assert ep_name.split('/')[0] == debug_info['ep_name']
495
+ stack_info_uuid = debug_info['stack_info']
496
+ if stack_info_uuid:
497
+ traceback = _get_traceback_with_id(stack_info_uuid)
498
+ entry['stack_info'] = stack_info_uuid
499
+ entry['traceback'] = traceback
@@ -0,0 +1,49 @@
1
+ import os
2
+
3
+ from .config import get_config_param
4
+
5
+
6
+ def abspath_to_query_by_otq_path(otq_path, query_path):
7
+ """
8
+ Function searches absolute path to a query based on the otq_path and
9
+ short query path
10
+ """
11
+ query_path_parts = query_path.split("/")
12
+
13
+ for base_path in otq_path.split(","):
14
+ base_path = os.path.abspath(base_path)
15
+ path = os.path.join(base_path, *query_path_parts)
16
+ if os.path.exists(path):
17
+ return path
18
+
19
+ raise FileNotFoundError(f'Query "{query_path}" is not found')
20
+
21
+
22
+ def abspath_to_query_by_name(query_path):
23
+ if os.path.isabs(query_path) and os.path.exists(query_path):
24
+ return query_path
25
+
26
+ if "ONE_TICK_CONFIG" not in os.environ:
27
+ raise ValueError("ONE_TICK_CONFIG is not set!")
28
+
29
+ return abspath_to_query_by_otq_path(
30
+ get_config_param(os.environ["ONE_TICK_CONFIG"], "OTQ_FILE_PATH"), query_path
31
+ )
32
+
33
+
34
+ def query_to_path_and_name(path):
35
+ """
36
+ Split passed OneTick like 'path' to a query ot the
37
+ path and query name
38
+ """
39
+ query_path = None
40
+ query_name = None
41
+
42
+ pos1, pos2 = path.rfind(":"), path.find(":")
43
+ if pos1 != pos2 and pos1 > 0 and pos2 > 0:
44
+ _ = path.split(":")
45
+ query_path, query_name = ":".join(_[:-2]), _[-1]
46
+ else:
47
+ query_path = path
48
+
49
+ return query_path, query_name