onetick-py 1.162.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +266 -0
  4. locator_parser/common.py +365 -0
  5. locator_parser/io.py +41 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +280 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +138 -0
  14. onetick/py/__init__.py +290 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +645 -0
  19. onetick/py/aggregations/_docs.py +912 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +427 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +373 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +275 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +752 -0
  33. onetick/py/configuration.py +736 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2307 -0
  45. onetick/py/core/_internal/_state_vars.py +87 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +810 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +270 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1001 -0
  58. onetick/py/core/_source/source_methods/joins.py +1393 -0
  59. onetick/py/core/_source/source_methods/merges.py +566 -0
  60. onetick/py/core/_source/source_methods/misc.py +1325 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +702 -0
  68. onetick/py/core/_source/symbol.py +202 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +215 -0
  75. onetick/py/core/column_operations/_methods/methods.py +294 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +150 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +30 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +92 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +464 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +160 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1374 -0
  83. onetick/py/core/column_operations/base.py +1061 -0
  84. onetick/py/core/cut_builder.py +149 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +244 -0
  87. onetick/py/core/lambda_object.py +442 -0
  88. onetick/py/core/multi_output_source.py +193 -0
  89. onetick/py/core/per_tick_script.py +2253 -0
  90. onetick/py/core/query_inspector.py +465 -0
  91. onetick/py/core/source.py +1663 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1042 -0
  94. onetick/py/db/db.py +1423 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2354 -0
  100. onetick/py/license.py +188 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +947 -0
  103. onetick/py/misc.py +437 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +211 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +841 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1342 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +126 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1049 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +238 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +357 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +256 -0
  132. onetick/py/types.py +2056 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +499 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1139 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +118 -0
  146. onetick/py/utils/tz.py +82 -0
  147. onetick_py-1.162.2.dist-info/METADATA +148 -0
  148. onetick_py-1.162.2.dist-info/RECORD +152 -0
  149. onetick_py-1.162.2.dist-info/WHEEL +5 -0
  150. onetick_py-1.162.2.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.162.2.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.162.2.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1374 @@
1
+ from typing import Optional
2
+
3
+ from onetick.py import types as ott
4
+ from onetick.py import configuration, utils
5
+ from onetick.py.core.column_operations.accessors._accessor import _Accessor
6
+ from onetick.py.core.column_operations.base import _Operation
7
+ from onetick.py.backports import Literal
8
+ from onetick.py.docs.utils import alias
9
+ from onetick.py.compatibility import is_ilike_supported
10
+
11
+
12
+ def _get_onetick_bool_string(value: bool) -> str:
13
+ if value:
14
+ return '"true"'
15
+ return '"false"'
16
+
17
+
18
+ class _StrAccessor(_Accessor):
19
+ """ Accessor for string functions
20
+
21
+ >>> data = otp.Ticks(X=['some string'])
22
+ >>> data["Y"] = data["X"].str.<function_name>() # doctest: +SKIP
23
+ """
24
+ class Formatter(_Operation):
25
+ def __init__(self, dtype, formatter, op_params):
26
+
27
+ def op_func(*args, **kwargs):
28
+ return formatter(*args, **kwargs), dtype
29
+
30
+ super().__init__(op_func=op_func, op_params=op_params, dtype=dtype)
31
+
32
+ def to_datetime(self,
33
+ format='%Y/%m/%d %H:%M:%S.%J',
34
+ timezone=None,
35
+ unit: Optional[Literal['ms', 'ns']] = None):
36
+ """
37
+ Converts the formatted time to the number of nanoseconds (datetime) since 1970/01/01 GMT.
38
+
39
+ Parameters
40
+ ----------
41
+ format: str, Operation, Column
42
+ The format might contain any characters, but the following combinations of
43
+ characters have special meanings
44
+
45
+ %Y - Year (4 digits)
46
+
47
+ %y - Year (2 digits)
48
+
49
+ %m - Month (2 digits)
50
+
51
+ %d - Day of month (2 digits)
52
+
53
+ %H - Hours (2 digits, 24-hour format)
54
+
55
+ %I - Hours (2 digits, 12-hour format)
56
+
57
+ %M - Minutes (2 digits)
58
+
59
+ %S - Seconds (2 digits)
60
+
61
+ %J - Nanoseconds (9 digits)
62
+
63
+ %p - AM/PM (2 characters)
64
+
65
+ timezone: str | Operation | Column
66
+ Timezone. The timezone of the query will be used if no ``timezone`` was passed.
67
+
68
+ unit: str, optional
69
+ If set, `format` and `timezone` are ignored.
70
+ If equals to `ns`, constructs a nanosecond-granularity timestamp from a millisecond-granularity
71
+ string. It has the following format: < milliseconds since 1970/01/01 GMT >.< fraction of a millisecond >.
72
+ The fraction might have at most six digits. If the fraction is equal to zero,
73
+ .< fraction of a millisecond > is optional.
74
+ If equals to `ms`, constructs a millisecond-granularity timestamp from a millisecond-granularity
75
+ string. It has the following format: < milliseconds since 1970/01/01 GMT >.
76
+
77
+ Returns
78
+ -------
79
+ Operation
80
+ :py:class:`nsectime <onetick.py.types.nsectime>` Operation obtained from the string
81
+
82
+ Examples
83
+ --------
84
+ >>> # OTdirective: snippet-name: string.to timestamp;
85
+ >>> data = otp.Tick(X='5/17/22-11:10:56.123456789')
86
+ >>> data['Y'] = data['X'].str.to_datetime('%m/%d/%y-%H:%M:%S.%J', 'Europe/London')
87
+ >>> otp.run(data)
88
+ Time X Y
89
+ 0 2003-12-01 5/17/22-11:10:56.123456789 2022-05-17 06:10:56.123456789
90
+
91
+ >>> data = otp.Ticks(A=['1693825877111.002001', '1693825877112'])
92
+ >>> data['NSECTIME_A'] = data['A'].str.to_datetime(unit='ns')
93
+ >>> otp.run(data)
94
+ Time A NSECTIME_A
95
+ 0 2003-12-01 00:00:00.000 1693825877111.002001 2023-09-04 07:11:17.111002001
96
+ 1 2003-12-01 00:00:00.001 1693825877112 2023-09-04 07:11:17.112000000
97
+
98
+ >>> data = otp.Tick(A='1693825877111')
99
+ >>> data['MSECTIME_A'] = data['A'].str.to_datetime(unit='ms')
100
+ >>> otp.run(data)
101
+ Time A MSECTIME_A
102
+ 0 2003-12-01 1693825877111 2023-09-04 07:11:17.111
103
+ """
104
+ if unit is None:
105
+ if timezone is utils.default:
106
+ timezone = configuration.config.tz
107
+
108
+ def formatter(column, fmt, tz):
109
+ column_str = ott.value2str(column)
110
+ tz_str, format_str = self._preprocess_tz_and_format(tz, fmt)
111
+ format_str = format_str.replace('%f', '%J')
112
+ return f'parse_nsectime({format_str},{column_str},{tz_str})'
113
+
114
+ return _StrAccessor.Formatter(
115
+ op_params=[self._base_column, format, timezone],
116
+ dtype=ott.nsectime,
117
+ formatter=formatter,
118
+ )
119
+ else:
120
+ if unit == 'ns':
121
+ return _StrAccessor.Formatter(
122
+ op_params=[self._base_column],
123
+ dtype=ott.nsectime,
124
+ formatter=lambda column: f'MSEC_STR_TO_NSECTIME({ott.value2str(column)})',
125
+ )
126
+ if unit == 'ms':
127
+ return _StrAccessor.Formatter(
128
+ op_params=[self._base_column],
129
+ dtype=ott.msectime,
130
+ formatter=lambda column: f'GET_MSECS(MSEC_STR_TO_NSECTIME({ott.value2str(column)}))',
131
+ )
132
+ raise ValueError(f'`{unit}` is unsupported value for `unit` parameter')
133
+
134
+ strptime = alias(to_datetime,
135
+ doc_replacer=lambda doc: doc.replace('to_datetime', 'strptime'))
136
+
137
+ def token(self, sep=" ", n=0):
138
+ """
139
+ Breaks the value into tokens based on the delimiter ``sep``
140
+ and returns token at position ``n`` (zero-based).
141
+
142
+ If there are not enough tokens to get the one at position ``n``, then empty string is returned.
143
+
144
+ Parameters
145
+ ----------
146
+ sep: str or Column or Operation
147
+ The delimiter, which must be a single character used to split the string into tokens.
148
+ n: int, Operation
149
+ Token index to return. For a negative ``n``, count from the end instead of the beginning.
150
+ If index is out of range, then empty string is returned.
151
+
152
+ Returns
153
+ -------
154
+ Operation
155
+ token at position ``n`` or empty string.
156
+
157
+ Examples
158
+ -------
159
+ >>> # OTdirective: snippet-name: string.token;
160
+ >>> data = otp.Tick(X='US_COMP::TRD')
161
+ >>> data['Y'] = data['X'].str.token(':', -1)
162
+ >>> otp.run(data)
163
+ Time X Y
164
+ 0 2003-12-01 US_COMP::TRD TRD
165
+
166
+ Other columns can be used as parameters too:
167
+
168
+ >>> data = otp.Tick(X='US_COMP::TRD', SEP=':', N=-1)
169
+ >>> data['Y'] = data['X'].str.token(data['SEP'], data['N'])
170
+ >>> otp.run(data)
171
+ Time X SEP N Y
172
+ 0 2003-12-01 US_COMP::TRD : -1 TRD
173
+
174
+ If index is out of range, then empty string is returned:
175
+
176
+ >>> data = otp.Tick(X='US_COMP::TRD')
177
+ >>> data['Y'] = data['X'].str.token(':', 999)
178
+ >>> otp.run(data)
179
+ Time X Y
180
+ 0 2003-12-01 US_COMP::TRD
181
+ """
182
+ if isinstance(sep, str) and len(sep) != 1:
183
+ raise ValueError("Function '.str.token()' expects parameter 'sep' to be a single character")
184
+ return _StrAccessor.Formatter(
185
+ op_params=[self._base_column, sep, n],
186
+ dtype=self._base_column.dtype,
187
+ formatter=lambda column, sep, n: f'token({ott.value2str(column)},{ott.value2str(n)},{ott.value2str(sep)})'
188
+ )
189
+
190
+ def match(self, pat, case=True):
191
+ r"""
192
+ Match the text against a regular expression specified in the ``pat`` parameter.
193
+
194
+ Parameters
195
+ ----------
196
+ pat: str or Column or Operation
197
+ A pattern specified via the POSIX extended regular expression syntax.
198
+ case: bool
199
+ If ``True``, then regular expression is case-sensitive.
200
+
201
+ Returns
202
+ -------
203
+ Operation
204
+ ``True`` if the match was successful, ``False`` otherwise.
205
+ Note that boolean Operation is converted to float if added as a column.
206
+
207
+ Examples
208
+ --------
209
+ >>> # OTdirective: snippet-name: string.match;
210
+ >>> data = otp.Ticks(X=['hello', 'there were 77 ticks'])
211
+ >>> data['Y'] = data['X'].str.match(r'\d\d')
212
+ >>> otp.run(data)
213
+ Time X Y
214
+ 0 2003-12-01 00:00:00.000 hello 0.0
215
+ 1 2003-12-01 00:00:00.001 there were 77 ticks 1.0
216
+
217
+ Other columns can be used as parameter ``pat`` too:
218
+
219
+ >>> data = otp.Tick(X='OneTick', PAT='onetick')
220
+ >>> data['Y'] = data['X'].str.match(data['PAT'], case=False)
221
+ >>> otp.run(data)
222
+ Time X PAT Y
223
+ 0 2003-12-01 OneTick onetick 1.0
224
+
225
+ ``match`` function can also be used as a filter.
226
+ For example, to filter on-exchange continuous trading trades:
227
+
228
+ >>> q = otp.DataSource('US_COMP', tick_type='TRD', symbols=['SPY']) # doctest: +SKIP
229
+ >>> q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']] # doctest: +SKIP
230
+ >>> q, _ = q[q['COND'].str.match('^[^O6TUHILNRWZ47QMBCGPV]*$')] # doctest: +SKIP
231
+ >>> otp.run(q, start=otp.dt(2023, 5, 15, 9, 30), end=otp.dt(2023, 5, 15, 9, 30, 1)) # doctest: +SKIP
232
+ Time PRICE SIZE COND EXCHANGE
233
+ 0 2023-05-15 09:30:00.000776704 412.220 247 Z
234
+ 1 2023-05-15 09:30:00.019069440 412.230 100 F K
235
+ .. ... ... ... ... ...
236
+ """
237
+ caseless = _get_onetick_bool_string(not case)
238
+ return _StrAccessor.Formatter(
239
+ op_params=[self._base_column, pat],
240
+ dtype=bool,
241
+ formatter=lambda column, pat: f'regex_match({ott.value2str(column)},{ott.value2str(pat)},{caseless})',
242
+ )
243
+
244
+ def len(self):
245
+ """
246
+ Get the length of a string.
247
+
248
+ Returns
249
+ -------
250
+ Operation
251
+ The length of the string.
252
+ If a null-character (byte with value ``0``) is present in the string,
253
+ its position (0-based) is returned.
254
+
255
+ Examples
256
+ --------
257
+ >>> # OTdirective: snippet-name: string.len;
258
+ >>> data = otp.Ticks(X=['hello', 'world!'])
259
+ >>> data['LEN'] = data['X'].str.len()
260
+ >>> otp.run(data)
261
+ Time X LEN
262
+ 0 2003-12-01 00:00:00.000 hello 5
263
+ 1 2003-12-01 00:00:00.001 world! 6
264
+ """
265
+ return _StrAccessor.Formatter(op_params=[self._base_column],
266
+ dtype=int,
267
+ formatter=lambda column: f'strlen({ott.value2str(column)})')
268
+
269
+ def contains(self, substr):
270
+ """
271
+ Check if the string contains ``substr``.
272
+
273
+ Note
274
+ ----
275
+ This function does not support regular expressions.
276
+ Use :func:`match` for this purpose.
277
+
278
+ Parameters
279
+ ----------
280
+ substr: str or Column or Operation
281
+ A substring to search for within the string.
282
+
283
+ Returns
284
+ -------
285
+ Operation
286
+ ``True`` if the string contains the substring, ``False`` otherwise.
287
+ Note that boolean Operation is converted to float if added as a column.
288
+
289
+ Examples
290
+ --------
291
+ >>> # OTdirective: snippet-name: string.contains;
292
+ >>> data = otp.Ticks(X=['hello', 'world!'])
293
+ >>> data['CONTAINS'] = data['X'].str.contains('hel')
294
+ >>> otp.run(data)
295
+ Time X CONTAINS
296
+ 0 2003-12-01 00:00:00.000 hello 1.0
297
+ 1 2003-12-01 00:00:00.001 world! 0.0
298
+
299
+ Other columns can be used as parameter ``substr`` too:
300
+
301
+ >>> # OTdirective: snippet-name: string.contains another field;
302
+ >>> data = otp.Ticks(X=['hello', 'big', 'world!'],
303
+ ... Y=['hel', 'wor', 'wor'])
304
+ >>> data['CONTAINS'] = data['X'].str.contains(data['Y'])
305
+ >>> otp.run(data)
306
+ Time X Y CONTAINS
307
+ 0 2003-12-01 00:00:00.000 hello hel 1.0
308
+ 1 2003-12-01 00:00:00.001 big wor 0.0
309
+ 2 2003-12-01 00:00:00.002 world! wor 1.0
310
+
311
+ This method can also be used for filtering:
312
+
313
+ >>> # OTdirective: snippet-name: string.contains as a filter;
314
+ >>> data = otp.Ticks(X=['Hello', 'World'])
315
+ >>> with_substr, wo_substr = data[data['X'].str.contains('Hel')]
316
+ >>> otp.run(with_substr)
317
+ Time X
318
+ 0 2003-12-01 Hello
319
+ """
320
+ return _StrAccessor.Formatter(
321
+ op_params=[self._base_column, substr],
322
+ dtype=bool,
323
+ formatter=lambda column, substr: f'instr({ott.value2str(column)}, {ott.value2str(substr)}) > -1',
324
+ )
325
+
326
+ def trim(self):
327
+ """
328
+ Removes white spaces from both sides of the string.
329
+
330
+ Returns
331
+ -------
332
+ Operation
333
+ Trimmed string
334
+
335
+ See Also
336
+ --------
337
+ :meth:`ltrim`, :meth:`rtrim`
338
+
339
+ Examples
340
+ --------
341
+ >>> # OTdirective: snippet-name: string.trim;
342
+ >>> data = otp.Ticks(X=[' Hello', 'World '])
343
+ >>> data['X'] = data['X'].str.trim()
344
+ >>> otp.run(data)
345
+ Time X
346
+ 0 2003-12-01 00:00:00.000 Hello
347
+ 1 2003-12-01 00:00:00.001 World
348
+ """
349
+ return _StrAccessor.Formatter(op_params=[self._base_column],
350
+ dtype=self._base_column.dtype,
351
+ formatter=lambda column: f'trim({ott.value2str(column)})')
352
+
353
+ def ltrim(self):
354
+ """
355
+ Removes the leading white spaces from a string.
356
+
357
+ Returns
358
+ -------
359
+ Operation
360
+ Trimmed string
361
+
362
+ See Also
363
+ --------
364
+ :meth:`trim`, :meth:`rtrim`
365
+ """
366
+ return _StrAccessor.Formatter(op_params=[self._base_column],
367
+ dtype=self._base_column.dtype,
368
+ formatter=lambda column: f'ltrim({ott.value2str(column)})')
369
+
370
+ def rtrim(self):
371
+ """
372
+ Removes the trailing white spaces from a string.
373
+
374
+ Returns
375
+ -------
376
+ Operation
377
+ Trimmed string
378
+
379
+ See Also
380
+ --------
381
+ :meth:`ltrim`, :meth:`trim`
382
+ """
383
+ return _StrAccessor.Formatter(op_params=[self._base_column],
384
+ dtype=self._base_column.dtype,
385
+ formatter=lambda column: f'rtrim({ott.value2str(column)})')
386
+
387
+ def lower(self):
388
+ """
389
+ Convert a string to lower case.
390
+
391
+ Returns
392
+ -------
393
+ Operation
394
+ Lowercased string
395
+
396
+ Examples
397
+ --------
398
+ >>> # OTdirective: snippet-name: string.lower;
399
+ >>> data = otp.Ticks(X=['HeLlO', 'wOrLd!'])
400
+ >>> data['LOW'] = data['X'].str.lower()
401
+ >>> otp.run(data)
402
+ Time X LOW
403
+ 0 2003-12-01 00:00:00.000 HeLlO hello
404
+ 1 2003-12-01 00:00:00.001 wOrLd! world!
405
+ """
406
+ return _StrAccessor.Formatter(op_params=[self._base_column],
407
+ dtype=self._base_column.dtype,
408
+ formatter=lambda column: f'lower({ott.value2str(column)})')
409
+
410
+ def upper(self):
411
+ """
412
+ Converts a string to upper case.
413
+
414
+ Returns
415
+ -------
416
+ Operation
417
+ Uppercased string
418
+
419
+ Examples
420
+ --------
421
+ >>> # OTdirective: snippet-name: string.upper;
422
+ >>> data = otp.Ticks(X=['HeLlO', 'wOrLd!'])
423
+ >>> data['UP'] = data['X'].str.upper()
424
+ >>> otp.run(data)
425
+ Time X UP
426
+ 0 2003-12-01 00:00:00.000 HeLlO HELLO
427
+ 1 2003-12-01 00:00:00.001 wOrLd! WORLD!
428
+ """
429
+ return _StrAccessor.Formatter(op_params=[self._base_column],
430
+ dtype=self._base_column.dtype,
431
+ formatter=lambda column: f'upper({ott.value2str(column)})')
432
+
433
+ def replace(self, pat, repl):
434
+ """
435
+ Search for occurrences (case dependent) of ``pat`` and replace with ``repl``.
436
+
437
+ Parameters
438
+ ----------
439
+ pat: str or Column or Operation
440
+ Pattern to replace.
441
+ repl: str or Column or Operation
442
+ Replacement string.
443
+
444
+ Returns
445
+ -------
446
+ Operation
447
+ String with ``pat`` replaced by ``repl``.
448
+
449
+ Examples
450
+ --------
451
+ >>> # OTdirective: snippet-name: string.replace;
452
+ >>> data = otp.Ticks(X=['A Table', 'A Chair', 'An Apple'])
453
+ >>> data['Y'] = data['X'].str.replace('A', 'The')
454
+ >>> otp.run(data)
455
+ Time X Y
456
+ 0 2003-12-01 00:00:00.000 A Table The Table
457
+ 1 2003-12-01 00:00:00.001 A Chair The Chair
458
+ 2 2003-12-01 00:00:00.002 An Apple Then Thepple
459
+
460
+ Other columns can be used as parameters too:
461
+
462
+ >>> # OTdirective: snippet-name: string.replace from field;
463
+ >>> data = otp.Ticks(X=['A Table', 'A Chair', 'An Apple'],
464
+ ... PAT=['A', 'A', 'An'],
465
+ ... REPL=['The', 'Their', 'My'])
466
+ >>> data['Y'] = data['X'].str.replace(data['PAT'], data['REPL'])
467
+ >>> otp.run(data)
468
+ Time X PAT REPL Y
469
+ 0 2003-12-01 00:00:00.000 A Table A The The Table
470
+ 1 2003-12-01 00:00:00.001 A Chair A Their Their Chair
471
+ 2 2003-12-01 00:00:00.002 An Apple An My My Apple
472
+ """
473
+ # see, BDS-112
474
+ if not isinstance(pat, str):
475
+ pat = pat.str.rtrim()
476
+ if not isinstance(repl, str):
477
+ repl = repl.str.rtrim()
478
+ return _StrAccessor.Formatter(
479
+ op_params=[self._base_column, pat, repl],
480
+ dtype=self._base_column.dtype,
481
+ formatter=(
482
+ lambda column, pat, repl:
483
+ f'replace({ott.value2str(column)}, {ott.value2str(pat)}, {ott.value2str(repl)})'
484
+ ),
485
+ )
486
+
487
+ def regex_replace(self, pat, repl, *, replace_every=False, caseless=False):
488
+ r"""
489
+ Search for occurrences (case dependent) of ``pat`` and replace with ``repl``.
490
+
491
+ Parameters
492
+ ----------
493
+ pat: str or Column or Operation
494
+ Pattern to replace specified via the POSIX extended regular expression syntax.
495
+ repl: str or Column or Operation
496
+ Replacement string. ``\0`` refers to the entire matched text. ``\1`` to ``\9`` refer
497
+ to the text matched by the corresponding parenthesized group in ``pat``.
498
+ replace_every: bool
499
+ If ``replace_every`` flag is set to ``True``, all matches will be replaced, if ``False`` only the first one.
500
+ caseless: bool
501
+ If the ``caseless`` flag is set to ``True``, matching is case-insensitive.
502
+
503
+ Returns
504
+ -------
505
+ Operation
506
+ String with pattern ``pat`` replaced by ``repl``.
507
+
508
+ See Also
509
+ --------
510
+ :meth:`extract`
511
+
512
+ Examples
513
+ --------
514
+ >>> # OTdirective: snippet-name: string.regex replace;
515
+ >>> data = otp.Ticks(X=['A Table', 'A Chair', 'An Apple'])
516
+ >>> data['Y'] = data['X'].str.regex_replace('An? ', 'The ')
517
+ >>> otp.run(data)
518
+ Time X Y
519
+ 0 2003-12-01 00:00:00.000 A Table The Table
520
+ 1 2003-12-01 00:00:00.001 A Chair The Chair
521
+ 2 2003-12-01 00:00:00.002 An Apple The Apple
522
+
523
+ Parameter ``replace_every`` will replace all occurrences of ``pat`` in the string:
524
+
525
+ >>> # OTdirective: snippet-name: string.regex replace all;
526
+ >>> data = otp.Ticks(X=['A Table, A Chair, An Apple'])
527
+ >>> data['Y'] = data['X'].str.regex_replace('An? ', 'The ', replace_every=True)
528
+ >>> otp.run(data)
529
+ Time X Y
530
+ 0 2003-12-01 A Table, A Chair, An Apple The Table, The Chair, The Apple
531
+
532
+ Capturing groups in regular expressions is supported:
533
+
534
+ >>> # OTdirective: snippet-name: string.regex groups;
535
+ >>> data = otp.Ticks(X=['11/12/1992', '9/22/1993', '3/30/1991'])
536
+ >>> data['Y'] = data['X'].str.regex_replace(r'(\d{1,2})/(\d{1,2})/', r'\2.\1.')
537
+ >>> otp.run(data)
538
+ Time X Y
539
+ 0 2003-12-01 00:00:00.000 11/12/1992 12.11.1992
540
+ 1 2003-12-01 00:00:00.001 9/22/1993 22.9.1993
541
+ 2 2003-12-01 00:00:00.002 3/30/1991 30.3.1991
542
+ """
543
+ replace_every = _get_onetick_bool_string(replace_every)
544
+ caseless = _get_onetick_bool_string(caseless)
545
+ return _StrAccessor.Formatter(
546
+ op_params=[self._base_column, pat, repl],
547
+ dtype=self._base_column.dtype,
548
+ formatter=lambda column, pat, repl: f'regex_replace({ott.value2str(column)}, {ott.value2str(pat)},'
549
+ f' {ott.value2str(repl)}, {replace_every}, {caseless})',
550
+ )
551
+
552
+ def find(self, sub, start=0):
553
+ """
554
+ Find the index of ``sub`` in the string. If not found, returns ``-1``.
555
+
556
+ Parameters
557
+ ----------
558
+ sub: str or Column or Operation
559
+ Substring to find.
560
+ start: int or Column or Operation
561
+ Starting position to find.
562
+
563
+ Returns
564
+ -------
565
+ Operation
566
+ The starting position of the substring or ``-1`` if it is not found.
567
+
568
+ Examples
569
+ --------
570
+ >>> data = otp.Ticks(X=['ananas', 'banana', 'potato'])
571
+ >>> data['Y'] = data['X'].str.find('ana') # OTdirective: snippet-name: string.find;
572
+ >>> otp.run(data)
573
+ Time X Y
574
+ 0 2003-12-01 00:00:00.000 ananas 0
575
+ 1 2003-12-01 00:00:00.001 banana 1
576
+ 2 2003-12-01 00:00:00.002 potato -1
577
+
578
+ Other columns can be used as parameter ``sub`` too:
579
+
580
+ >>> # OTdirective: snippet-name: string.find field value;
581
+ >>> data = otp.Ticks(X=['Ananas', 'Banana', 'Potato'], sub=['Ana', 'anan', 'ato'])
582
+ >>> data['Y'] = data['X'].str.find(data['sub'])
583
+ >>> otp.run(data)
584
+ Time X sub Y
585
+ 0 2003-12-01 00:00:00.000 Ananas Ana 0
586
+ 1 2003-12-01 00:00:00.001 Banana anan 1
587
+ 2 2003-12-01 00:00:00.002 Potato ato 3
588
+
589
+ Note that empty string will be found at the start of any string:
590
+
591
+ >>> data = otp.Ticks(X=['string', ''])
592
+ >>> data['Y'] = data['X'].str.find('')
593
+ >>> otp.run(data)
594
+ Time X Y
595
+ 0 2003-12-01 00:00:00.000 string 0
596
+ 1 2003-12-01 00:00:00.001 0
597
+
598
+ ``start`` parameter is used to find ``sub`` starting from selected position:
599
+
600
+ >>> data = otp.Ticks(X=['ababab', 'abbbbb'])
601
+ >>> data['Y'] = data['X'].str.find('ab', 1)
602
+ >>> otp.run(data)
603
+ Time X Y
604
+ 0 2003-12-01 00:00:00.000 ababab 2
605
+ 1 2003-12-01 00:00:00.001 abbbbb -1
606
+ """
607
+ return _StrAccessor.Formatter(
608
+ op_params=[self._base_column, sub, start],
609
+ dtype=int,
610
+ formatter=(
611
+ lambda column, sub, start:
612
+ f'LOCATE({ott.value2str(sub)}, {ott.value2str(column)}, {ott.value2str(start + 1)})-1'
613
+ ),
614
+ )
615
+
616
+ def repeat(self, repeats):
617
+ """
618
+ Duplicate a string ``repeats`` times.
619
+
620
+ Note
621
+ ----
622
+ * Alternative for the ``repeat`` function is multiplication.
623
+ * The returned string has the same type and maximum length as the original field.
624
+
625
+ Parameters
626
+ ----------
627
+ repeats: int or Column or Operation
628
+ Non-negative number of copies of the string.
629
+ Repeating zero times results in empty string.
630
+ Repeating negative number of times results in exception.
631
+
632
+ Returns
633
+ -------
634
+ Operation
635
+ String repeated ``repeats`` times.
636
+
637
+ Examples
638
+ --------
639
+ >>> # OTdirective: snippet-name: string.repeat;
640
+ >>> data = otp.Ticks(X=['Banana', 'Ananas', 'Apple'])
641
+ >>> data['X'] = data['X'].str.repeat(3)
642
+ >>> otp.run(data)
643
+ Time X
644
+ 0 2003-12-01 00:00:00.000 BananaBananaBanana
645
+ 1 2003-12-01 00:00:00.001 AnanasAnanasAnanas
646
+ 2 2003-12-01 00:00:00.002 AppleAppleApple
647
+
648
+ Other columns can be used as parameter ``repeats`` too:
649
+
650
+ # OTdirective: snippet-name: string.repeat from a field;
651
+ >>> data = otp.Ticks(X=['Banana', 'Ananas', 'Apple'], TIMES=[1, 3, 2])
652
+ >>> data['Y'] = data['X'].str.repeat(data['TIMES'])
653
+ >>> otp.run(data)
654
+ Time X TIMES Y
655
+ 0 2003-12-01 00:00:00.000 Banana 1 Banana
656
+ 1 2003-12-01 00:00:00.001 Ananas 3 AnanasAnanasAnanas
657
+ 2 2003-12-01 00:00:00.002 Apple 2 AppleApple
658
+
659
+ The returned string has the same type and therefore the same maximum length as the original field:
660
+
661
+ >>> data = otp.Ticks(X=[otp.string[9]('Banana')])
662
+ >>> data['Y'] = data['X'].str.repeat(3)
663
+ >>> data.schema
664
+ {'X': string[9], 'Y': string[9]}
665
+ >>> otp.run(data)
666
+ Time X Y
667
+ 0 2003-12-01 Banana BananaBan
668
+
669
+ ``repeat`` does the same thing as multiplication by a non-negative int:
670
+
671
+ >>> # OTdirective: snippet-name: string.repeat by multiplication;
672
+ >>> data = otp.Ticks(X=['Banana'], N=[2])
673
+ >>> data['X2'] = data['X'] * data['N']
674
+ >>> data['X3'] = data['X'] * 3
675
+ >>> otp.run(data)
676
+ Time X N X2 X3
677
+ 0 2003-12-01 Banana 2 BananaBanana BananaBananaBanana
678
+
679
+ Multiplying by 0 results in empty string:
680
+
681
+ >>> data = otp.Ticks(X=['Banana', 'Apple'])
682
+ >>> data['Y'] = data['X'].str.repeat(0)
683
+ >>> otp.run(data)
684
+ Time X Y
685
+ 0 2003-12-01 00:00:00.000 Banana
686
+ 1 2003-12-01 00:00:00.001 Apple
687
+ """
688
+ return _StrAccessor.Formatter(
689
+ op_params=[self._base_column, repeats],
690
+ dtype=self._base_column.dtype,
691
+ formatter=lambda column, repeats: f'repeat({ott.value2str(column)}, {ott.value2str(repeats)})',
692
+ )
693
+
694
+ def extract(self, pat, rewrite=r"\0", caseless=False):
695
+ r"""
696
+ Match the string against a regular expression specified by ``pat`` and return the first match.
697
+ The ``rewrite`` parameter can optionally be used to arrange the matched substrings and embed them within the
698
+ string specified in ``rewrite``.
699
+
700
+ Parameters
701
+ ----------
702
+ pat: str or Column or Operation
703
+ Pattern to search for specified via the POSIX extended regular expression syntax.
704
+ rewrite: str or Column or Operation
705
+ A string that specifies how to arrange the matched text. ``\0`` refers to the entire matched text.
706
+ ``\1`` to ``\9`` refer to the text matched by the corresponding parenthesized group in ``pat``.
707
+ ``\u`` and ``\l`` modifiers within the ``rewrite`` string convert the case of the text that
708
+ matches the corresponding parenthesized group (e.g., ``\u1`` converts ``\1`` to uppercase).
709
+ caseless: bool
710
+ If the ``caseless`` flag is set to ``True``, matching is case-insensitive.
711
+
712
+ Returns
713
+ -------
714
+ Operation
715
+ String matched by ``pat`` with format specified in ``rewrite``.
716
+
717
+ See Also
718
+ --------
719
+ regex_replace
720
+
721
+ Examples
722
+ --------
723
+ >>> # OTdirective: snippet-name: string.regex extract;
724
+ >>> data = otp.Ticks(X=['Mr. Smith: +1348 +4781', 'Ms. Smith: +8971'])
725
+ >>> data['TEL'] = data['X'].str.extract(r'\+\d{4}')
726
+ >>> otp.run(data)
727
+ Time X TEL
728
+ 0 2003-12-01 00:00:00.000 Mr. Smith: +1348 +4781 +1348
729
+ 1 2003-12-01 00:00:00.001 Ms. Smith: +8971 +8971
730
+
731
+ You can specify the group to extract in the ``rewrite`` parameter:
732
+
733
+ >>> # OTdirective: snippet-name: string.regex extract group;
734
+ >>> data = otp.Ticks(X=['Mr. Smith: 1992/12/22', 'Ms. Smith: 1989/10/15'])
735
+ >>> data['BIRTH_YEAR'] = data['X'].str.extract(r'(\d{4})/(\d{2})/(\d{2})', rewrite=r'birth year: \1')
736
+ >>> otp.run(data)
737
+ Time X BIRTH_YEAR
738
+ 0 2003-12-01 00:00:00.000 Mr. Smith: 1992/12/22 birth year: 1992
739
+ 1 2003-12-01 00:00:00.001 Ms. Smith: 1989/10/15 birth year: 1989
740
+
741
+ You can use a column as a ``rewrite`` or ``pat`` parameter:
742
+
743
+ >>> # OTdirective: snippet-name: string.regex extract from field;
744
+ >>> data = otp.Ticks(X=['Kelly, Mr. James', 'Wilkes, Mrs. James', 'Connolly, Miss. Kate'],
745
+ ... PAT=['(Mrs?)\\.', '(Mrs?)\\.', '(Miss)\\.'],
746
+ ... REWRITE=['Title 1: \\1', 'Title 2: \\1', 'Title 3: \\1'])
747
+ >>> data['TITLE'] = data['X'].str.extract(data['PAT'], rewrite=data['REWRITE'])
748
+ >>> otp.run(data)
749
+ Time X PAT REWRITE TITLE
750
+ 0 2003-12-01 00:00:00.000 Kelly, Mr. James (Mrs?)\. Title 1: \1 Title 1: Mr
751
+ 1 2003-12-01 00:00:00.001 Wilkes, Mrs. James (Mrs?)\. Title 2: \1 Title 2: Mrs
752
+ 2 2003-12-01 00:00:00.002 Connolly, Miss. Kate (Miss)\. Title 3: \1 Title 3: Miss
753
+
754
+ Case of the extracted string can be changed by adding ``l`` and ``u`` to extract group:
755
+
756
+ >>> # OTdirective: snippet-name: string.regex extract caseless;
757
+ >>> data = otp.Ticks(NAME=['mr. BroWn', 'Ms. smITh'])
758
+ >>> data['RESULT'] = data['NAME'].str.extract(r'(m)([rs]\. )([a-z])([a-z]*)', r'\u1\l2\u3\l4', caseless=True)
759
+ >>> otp.run(data)
760
+ Time NAME RESULT
761
+ 0 2003-12-01 00:00:00.000 mr. BroWn Mr. Brown
762
+ 1 2003-12-01 00:00:00.001 Ms. smITh Ms. Smith
763
+ """
764
+ caseless = _get_onetick_bool_string(caseless)
765
+ return _StrAccessor.Formatter(
766
+ op_params=[self._base_column, pat, rewrite],
767
+ dtype=self._base_column.dtype,
768
+ formatter=(
769
+ lambda column, pat, rewrite:
770
+ f'regex_extract({ott.value2str(column)}, {ott.value2str(pat)}, {ott.value2str(rewrite)}, {caseless})'
771
+ ),
772
+ )
773
+
774
+ def substr(self, start, n_bytes=None, rtrim=False):
775
+ """
776
+ Return ``n_bytes`` characters starting from ``start``.
777
+
778
+ For a positive ``start`` return ``num_bytes`` of the string, starting from the position specified by
779
+ ``start`` (0-based).
780
+ For a negative ``start``, the position is counted from the end of the string.
781
+ If the ``n_bytes`` parameter is omitted, returns the part of the input string
782
+ starting at ``start`` till the end of the string.
783
+
784
+ Parameters
785
+ ----------
786
+ start: int or Column or Operation
787
+ Index of first symbol in substring
788
+ n_bytes: int or Column or Operation
789
+ Number of bytes in substring
790
+ rtrim: bool
791
+ If set to ``True``, original string will be trimmed from the right side
792
+ before getting the substring, this can be useful with negative ``start`` index.
793
+
794
+ Returns
795
+ -------
796
+ Operation
797
+ Substring of string (``n_bytes`` length starting with ``start``).
798
+
799
+ Examples
800
+ --------
801
+ >>> # OTdirective: snippet-name: string.substring;
802
+ >>> data = otp.Ticks(X=['abcdef', '12345 '], START_INDEX=[2, 1], N=[2, 3])
803
+ >>> data['FIRST_3'] = data['X'].str.substr(0, 3)
804
+ >>> data['LAST_3'] = data['X'].str.substr(-3, rtrim=True)
805
+ >>> data['CENTER'] = data['X'].str.substr(data['START_INDEX'], data['N'])
806
+ >>> otp.run(data)
807
+ Time X START_INDEX N FIRST_3 LAST_3 CENTER
808
+ 0 2003-12-01 00:00:00.000 abcdef 2 2 abc def cd
809
+ 1 2003-12-01 00:00:00.001 12345 1 3 123 345 234
810
+ """
811
+ column = self._base_column
812
+ if rtrim:
813
+ column = column.str.rtrim()
814
+
815
+ if n_bytes is None:
816
+ return _StrAccessor.Formatter(
817
+ op_params=[column, start],
818
+ dtype=self._base_column.dtype,
819
+ formatter=(
820
+ lambda column, start:
821
+ f'substr({ott.value2str(column)}, {ott.value2str(start)})'
822
+ ),
823
+ )
824
+ else:
825
+ return _StrAccessor.Formatter(
826
+ op_params=[column, start, n_bytes],
827
+ dtype=self._base_column.dtype,
828
+ formatter=(
829
+ lambda column, start, n_bytes:
830
+ f'substr({ott.value2str(column)}, {ott.value2str(start)}, {ott.value2str(n_bytes)})'
831
+ ),
832
+ )
833
+
834
+ def get(self, i):
835
+ """
836
+ Returns the character at the position indicated by the 0-based index; and empty string,
837
+ if position is greater or equal to the length.
838
+
839
+ Parameters
840
+ ----------
841
+ i: int or Column or Operation
842
+ Index of the character to find.
843
+
844
+ Examples
845
+ --------
846
+ >>> data = otp.Ticks(X=['abcdef', '12345 ', 'qw'], GET_INDEX=[2, 1, 0])
847
+ >>> data['THIRD'] = data['X'].str.get(2)
848
+ >>> data['FROM_INDEX'] = data['X'].str.get(data['GET_INDEX'])
849
+ >>> otp.run(data)
850
+ Time X GET_INDEX THIRD FROM_INDEX
851
+ 0 2003-12-01 00:00:00.000 abcdef 2 c c
852
+ 1 2003-12-01 00:00:00.001 12345 1 3 2
853
+ 2 2003-12-01 00:00:00.002 qw 0 q
854
+
855
+ It is possible to use syntax with indexer to call this method:
856
+
857
+ >>> data = otp.Ticks(X=['abcdef', '12345 ', 'qw'])
858
+ >>> data['THIRD'] = data['X'].str[1]
859
+ >>> otp.run(data)
860
+ Time X THIRD
861
+ 0 2003-12-01 00:00:00.000 abcdef b
862
+ 1 2003-12-01 00:00:00.001 12345 2
863
+ 2 2003-12-01 00:00:00.002 qw w
864
+ """
865
+ return _StrAccessor.Formatter(
866
+ op_params=[self._base_column, i],
867
+ dtype=str,
868
+ formatter=(
869
+ lambda column, i:
870
+ 'CASE(BYTE_AT({0}, {1}),-1,"",CHAR(BYTE_AT({0}, {1})))'.format(ott.value2str(column), ott.value2str(i))
871
+ ),
872
+ )
873
+
874
+ def concat(self, other):
875
+ """
876
+ Returns a string that is the result of concatenating to ``others``.
877
+
878
+ Parameters
879
+ ----------
880
+ other: str or Column or Operation
881
+ String to concatenate with.
882
+
883
+ Examples
884
+ --------
885
+ >>> data = otp.Ticks(X=['X1', 'X2', 'X3'], Y=['Y1', 'Y2', 'Y3'])
886
+ >>> data['X_WITH_CONST_SUFFIX'] = data['X'].str.concat('_suffix')
887
+ >>> data['X_WTH_Y'] = data['X'].str.concat(data['Y'])
888
+ >>> otp.run(data)
889
+ Time X Y X_WITH_CONST_SUFFIX X_WTH_Y
890
+ 0 2003-12-01 00:00:00.000 X1 Y1 X1_suffix X1Y1
891
+ 1 2003-12-01 00:00:00.001 X2 Y2 X2_suffix X2Y2
892
+ 2 2003-12-01 00:00:00.002 X3 Y3 X3_suffix X3Y3
893
+ """
894
+ return _StrAccessor.Formatter(
895
+ op_params=[self._base_column, other],
896
+ dtype=self._base_column.dtype,
897
+ formatter=lambda column, other: f'CONCAT({ott.value2str(column)}, {ott.value2str(other)})',
898
+ )
899
+
900
+ def insert(self, start, length, value):
901
+ """
902
+ Returns a string where ``length`` characters have been deleted from string,
903
+ beginning at ``start``, and where ``value`` has been inserted into string, beginning at ``start``.
904
+
905
+ Parameters
906
+ ----------
907
+ start: int or Column or Operation
908
+ Position to remove from and to insert into.
909
+ length: int or Column or Operation
910
+ Number if characters to remove.
911
+ value: str or Column or Operation
912
+ String to insert.
913
+
914
+ Examples
915
+ --------
916
+ >>> data = otp.Ticks(X=['aaaaaaa', 'bbbbb', 'cccc'], Y=['ddd', 'ee', 'f'])
917
+ >>> data['INSERTED_1'] = data['X'].str.insert(3, 1, 'X')
918
+ >>> data['INSERTED_2'] = data['X'].str.insert(3, 2, 'X')
919
+ >>> data['INSERTED_Y'] = data['X'].str.insert(3, 2, data['Y'])
920
+ >>> otp.run(data)
921
+ Time X Y INSERTED_1 INSERTED_2 INSERTED_Y
922
+ 0 2003-12-01 00:00:00.000 aaaaaaa ddd aaXaaaa aaXaaa aadddaaa
923
+ 1 2003-12-01 00:00:00.001 bbbbb ee bbXbb bbXb bbeeb
924
+ 2 2003-12-01 00:00:00.002 cccc f ccXc ccX ccf
925
+
926
+ It is possible to insert without removal:
927
+
928
+ >>> data = otp.Ticks(X=['aaaaaaa', 'bbbbb', 'cccc'])
929
+ >>> data['INSERTED'] = data['X'].str.insert(3, 0, 'X')
930
+ >>> otp.run(data)
931
+ Time X INSERTED
932
+ 0 2003-12-01 00:00:00.000 aaaaaaa aaXaaaaa
933
+ 1 2003-12-01 00:00:00.001 bbbbb bbXbbb
934
+ 2 2003-12-01 00:00:00.002 cccc ccXcc
935
+
936
+ It is possible to remove without insertion:
937
+
938
+ >>> data = otp.Ticks(X=['aaaaaaa', 'bbbbb', 'cccc'])
939
+ >>> data['REMOVED'] = data['X'].str.insert(3, 2, '')
940
+ >>> otp.run(data)
941
+ Time X REMOVED
942
+ 0 2003-12-01 00:00:00.000 aaaaaaa aaaaa
943
+ 1 2003-12-01 00:00:00.001 bbbbb bbb
944
+ 2 2003-12-01 00:00:00.002 cccc cc
945
+ """
946
+ return _StrAccessor.Formatter(
947
+ op_params=[self._base_column, start, length, value],
948
+ dtype=self._base_column.dtype,
949
+ formatter=(
950
+ lambda column, start, length, value:
951
+ f'INSERT({ott.value2str(column)}, {ott.value2str(start)},'
952
+ f' {ott.value2str(length)}, {ott.value2str(value)})'
953
+ ),
954
+ )
955
+
956
+ def first(self, count=1):
957
+ """
958
+ Returns first ``count`` symbols.
959
+
960
+ Parameters
961
+ ----------
962
+ count: int or Column or Operation
963
+ Number of first symbols to return. Default: 1
964
+
965
+ Examples
966
+ --------
967
+ >>> data = otp.Ticks(X=['abc', 'bac', 'cba'], Y=[3, 1, 10])
968
+ >>> data['FIRST'] = data['X'].str.first()
969
+ >>> data['FIRST_Y'] = data['X'].str.first(data['Y'])
970
+ >>> otp.run(data)
971
+ Time X Y FIRST FIRST_Y
972
+ 0 2003-12-01 00:00:00.000 abc 3 a abc
973
+ 1 2003-12-01 00:00:00.001 bac 1 b b
974
+ 2 2003-12-01 00:00:00.002 cba 10 c cba
975
+ """
976
+ return _StrAccessor.Formatter(
977
+ op_params=[self._base_column, count],
978
+ dtype=str,
979
+ formatter=lambda column, count: f'LEFT({ott.value2str(column)}, {ott.value2str(count)})',
980
+ )
981
+
982
+ def last(self, count=1):
983
+ """
984
+ Returns last ``count`` symbols.
985
+
986
+ Parameters
987
+ ----------
988
+ count: int or Column or Operation
989
+ Number of last symbols to return. Default: 1
990
+
991
+ Examples
992
+ --------
993
+ >>> data = otp.Ticks(X=['abc', 'bac', 'cba'], Y=[3, 1, 9])
994
+ >>> data['LAST'] = data['X'].str.last()
995
+ >>> data['LAST_Y'] = data['X'].str.last(data['Y'])
996
+ >>> otp.run(data)
997
+ Time X Y LAST LAST_Y
998
+ 0 2003-12-01 00:00:00.000 abc 3 c abc
999
+ 1 2003-12-01 00:00:00.001 bac 1 c c
1000
+ 2 2003-12-01 00:00:00.002 cba 9 a cba
1001
+ """
1002
+ # RIGHT function works strange with negative index
1003
+ # RIGHT_UTF8 works fine but it is not supported by old builds
1004
+ return _StrAccessor.Formatter(
1005
+ op_params=[self._base_column, count],
1006
+ dtype=self._base_column.dtype,
1007
+ formatter=(
1008
+ lambda column, count:
1009
+ 'SUBSTR({0}, MAX(STRLEN({0})-{1}, 0))'.format(ott.value2str(column), ott.value2str(count))
1010
+ ),
1011
+ )
1012
+
1013
+ def startswith(self, value):
1014
+ """
1015
+ Checks if the Operation starts with a string.
1016
+
1017
+ Parameters
1018
+ ----------
1019
+ value: str or Column or Operation
1020
+ String to check if starts with it.
1021
+
1022
+ Examples
1023
+ --------
1024
+ >>> data = otp.Ticks(X=['baaaa', 'bbbbb', 'cbbc'], Y=['ba', 'abb', 'c'])
1025
+ >>> data['STARTSWITH_CONST'] = data['X'].str.startswith('bb')
1026
+ >>> data['STARTSWITH_Y'] = data['X'].str.startswith(data['Y'])
1027
+ >>> otp.run(data)
1028
+ Time X Y STARTSWITH_CONST STARTSWITH_Y
1029
+ 0 2003-12-01 00:00:00.000 baaaa ba 0.0 1.0
1030
+ 1 2003-12-01 00:00:00.001 bbbbb abb 1.0 0.0
1031
+ 2 2003-12-01 00:00:00.002 cbbc c 0.0 1.0
1032
+ """
1033
+ return _StrAccessor.Formatter(
1034
+ op_params=[self._base_column, value],
1035
+ dtype=bool,
1036
+ formatter=(
1037
+ lambda column, value:
1038
+ 'LEFT({0}, STRLEN({1}))={1}'.format(ott.value2str(column), ott.value2str(value))
1039
+ ),
1040
+ )
1041
+
1042
+ def endswith(self, value):
1043
+ """
1044
+ Checks if the Operation ends with a string.
1045
+
1046
+ Parameters
1047
+ ----------
1048
+ value: str or Column or Operation
1049
+ String to check if starts with it.
1050
+
1051
+ Examples
1052
+ --------
1053
+ >>> data = otp.Ticks(X=['baaaa', 'bbbbb', 'cbbc', 'c'], Y=['ba', 'bbb', 'c', 'cc'])
1054
+ >>> data['ENDSWITH_CONST'] = data['X'].str.endswith('bb')
1055
+ >>> data['ENDSWITH_Y'] = data['X'].str.endswith(data['Y'])
1056
+ >>> otp.run(data)
1057
+ Time X Y ENDSWITH_CONST ENDSWITH_Y
1058
+ 0 2003-12-01 00:00:00.000 baaaa ba 0.0 0.0
1059
+ 1 2003-12-01 00:00:00.001 bbbbb bbb 1.0 1.0
1060
+ 2 2003-12-01 00:00:00.002 cbbc c 0.0 1.0
1061
+ 3 2003-12-01 00:00:00.003 c cc 0.0 0.0
1062
+ """
1063
+ # RIGHT function works strange with negative index
1064
+ # RIGHT_UTF8 works fine but it is not supported by old builds
1065
+ return _StrAccessor.Formatter(
1066
+ op_params=[self._base_column, value],
1067
+ dtype=bool,
1068
+ formatter=(
1069
+ lambda column, value:
1070
+ 'SUBSTR({0}, MAX(STRLEN({0})-STRLEN({1}), 0))={1}'.format(ott.value2str(column), ott.value2str(value))
1071
+ ),
1072
+ )
1073
+
1074
+ def slice(self, start=None, stop=None):
1075
+ """
1076
+ Returns slice.
1077
+
1078
+ Parameters
1079
+ ----------
1080
+ start: int or Column or Operation, optional
1081
+ Start position for slice operation.
1082
+ stop: int or Column or Operation, optional
1083
+ Stop position for slice operation.
1084
+
1085
+ Examples
1086
+ --------
1087
+ >>> data = otp.Ticks(X=['12345', 'abcde', 'qwerty'], START=[3, 0, 1], STOP=[4, 3, 3])
1088
+ >>> data['START_1_SLICE'] = data['X'].str.slice(start=1)
1089
+ >>> data['STOP_2_SLICE'] = data['X'].str.slice(stop=2)
1090
+ >>> data['SLICE_FROM_COLUMNS'] = data['X'].str.slice(start=data['START'], stop=data['STOP'])
1091
+ >>> otp.run(data)
1092
+ Time X START STOP START_1_SLICE STOP_2_SLICE SLICE_FROM_COLUMNS
1093
+ 0 2003-12-01 00:00:00.000 12345 3 4 2345 12 4
1094
+ 1 2003-12-01 00:00:00.001 abcde 0 3 bcde ab abc
1095
+ 2 2003-12-01 00:00:00.002 qwerty 1 3 werty qw we
1096
+
1097
+ Parameters can be negative:
1098
+
1099
+ >>> data = otp.Ticks(X=['12345', 'abcde', 'qwerty'])
1100
+ >>> data['START_SLICE'] = data['X'].str.slice(start=-3)
1101
+ >>> data['STOP_SLICE'] = data['X'].str.slice(stop=-1)
1102
+ >>> data['START_STOP_SLICE'] = data['X'].str.slice(start=-3, stop=-1)
1103
+ >>> otp.run(data)
1104
+ Time X START_SLICE STOP_SLICE START_STOP_SLICE
1105
+ 0 2003-12-01 00:00:00.000 12345 345 1234 34
1106
+ 1 2003-12-01 00:00:00.001 abcde cde abcd cd
1107
+ 2 2003-12-01 00:00:00.002 qwerty rty qwert rt
1108
+
1109
+ It is possible to use syntax with indexer to call this method:
1110
+
1111
+ >>> data = otp.Ticks(X=['12345', 'abcde', 'qwerty'])
1112
+ >>> data['START_SLICE'] = data['X'].str[1:]
1113
+ >>> data['STOP_SLICE'] = data['X'].str[:3]
1114
+ >>> data['START_STOP_SLICE'] = data['X'].str[1:3]
1115
+ >>> otp.run(data)
1116
+ Time X START_SLICE STOP_SLICE START_STOP_SLICE
1117
+ 0 2003-12-01 00:00:00.000 12345 2345 123 23
1118
+ 1 2003-12-01 00:00:00.001 abcde bcde abc bc
1119
+ 2 2003-12-01 00:00:00.002 qwerty werty qwe we
1120
+ """
1121
+ if start is None and stop is None:
1122
+ raise ValueError("At least one of the `start` or `stop` parameters should be set.")
1123
+ if start is None:
1124
+ def formatter(x, start, stop):
1125
+ x = ott.value2str(x)
1126
+ stop_str = ott.value2str(stop)
1127
+ len_x = f'STRLEN({x})'
1128
+ return (f'CASE({stop_str}>=0,1,'
1129
+ f'SUBSTR({x},0,{stop_str}),'
1130
+ f'SUBSTR({x},0,MAX(0,{len_x}+{stop_str})))')
1131
+ elif stop is None:
1132
+ def formatter(x, start, stop):
1133
+ x = ott.value2str(x)
1134
+ len_x = f'STRLEN({x})'
1135
+ # we need this workaround because simple RIGHT and SUBSTR with negative start parameter work strange
1136
+ # SUBSTR_UTF8 works fine but it is not supported by old builds
1137
+ x_corrected = f'LEFT({x},{len_x})'
1138
+ # SUBSTR returns '' when ABC(second parameter) >= STRLEN
1139
+ return f'SUBSTR({x_corrected},MAX({ott.value2str(start)},-{len_x}))'
1140
+ else:
1141
+ def formatter(x, start, stop):
1142
+ x = ott.value2str(x)
1143
+ stop_str = ott.value2str(stop)
1144
+ len_x = f'STRLEN({x})'
1145
+ # we need this workaround because simple RIGHT and SUBSTR with negative start parameter work strange
1146
+ # SUBSTR_UTF8 works fine but it is not supported by old builds
1147
+ x_corrected = f'LEFT({x},{len_x})'
1148
+ # y is x after cutting the left part (we need to cut the right part of it)
1149
+ # SUBSTR returns '' when ABC(second parameter) >= STRLEN
1150
+ y = f'SUBSTR({x_corrected},MAX({ott.value2str(start)},-{len_x}))'
1151
+ len_y = f'STRLEN({y})'
1152
+ len_cut = f'({len_x}-{len_y})' # length of already cut part (the left one)
1153
+ stop_for_y = f'CASE({stop_str}>=0,1,{stop_str}-{len_cut},{stop_str})'
1154
+ return (f'CASE({stop_for_y}>=0,1,'
1155
+ f'SUBSTR({y},0,{stop_for_y}),'
1156
+ f'SUBSTR({y},0,MAX(0,{len_y}+{stop_for_y})))')
1157
+ return _StrAccessor.Formatter(op_params=[self._base_column, start, stop],
1158
+ dtype=self._base_column.dtype,
1159
+ formatter=formatter)
1160
+
1161
+ def __getitem__(self, item):
1162
+ if isinstance(item, slice):
1163
+ if item.step is not None:
1164
+ raise ValueError("`step` parameter is not supported.")
1165
+ return self.slice(start=item.start, stop=item.stop)
1166
+ return self.get(item)
1167
+
1168
+ def like(self, pattern):
1169
+ r"""
1170
+ Check if the value is matched with SQL-like ``pattern``.
1171
+
1172
+ Parameters
1173
+ ----------
1174
+ pattern: str or symbol parameter (:py:class:`~onetick.py.core._source._symbol_param._SymbolParamColumn`)
1175
+ Pattern to match the value with.
1176
+ The pattern can contain usual text characters and two special ones:
1177
+
1178
+ * ``%`` represents zero or more characters
1179
+ * ``_`` represents a single character
1180
+
1181
+ Use backslash ``\`` character to escape these special characters.
1182
+
1183
+ Returns
1184
+ -------
1185
+ Operation
1186
+ ``True`` if the match was successful, ``False`` otherwise.
1187
+ Note that boolean Operation is converted to float if added as a column.
1188
+
1189
+ Examples
1190
+ --------
1191
+
1192
+ Use ``%`` character to specify any number of characters:
1193
+
1194
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1195
+ >>> data['LIKE'] = data['X'].str.like('a%')
1196
+ >>> otp.run(data)
1197
+ Time X LIKE
1198
+ 0 2003-12-01 00:00:00.000 a 1.0
1199
+ 1 2003-12-01 00:00:00.001 ab 1.0
1200
+ 2 2003-12-01 00:00:00.002 b_ 0.0
1201
+ 3 2003-12-01 00:00:00.003 b% 0.0
1202
+
1203
+ Use ``_`` special character to specify a single character:
1204
+
1205
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1206
+ >>> data['LIKE'] = data['X'].str.like('a_')
1207
+ >>> otp.run(data)
1208
+ Time X LIKE
1209
+ 0 2003-12-01 00:00:00.000 a 0.0
1210
+ 1 2003-12-01 00:00:00.001 ab 1.0
1211
+ 2 2003-12-01 00:00:00.002 b_ 0.0
1212
+ 3 2003-12-01 00:00:00.003 b% 0.0
1213
+
1214
+ Use backslash ``\`` character to escape special characters:
1215
+
1216
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1217
+ >>> data['LIKE'] = data['X'].str.like(r'b\_')
1218
+ >>> otp.run(data)
1219
+ Time X LIKE
1220
+ 0 2003-12-01 00:00:00.000 a 0.0
1221
+ 1 2003-12-01 00:00:00.001 ab 0.0
1222
+ 2 2003-12-01 00:00:00.002 b_ 1.0
1223
+ 3 2003-12-01 00:00:00.003 b% 0.0
1224
+
1225
+ This function can be used to filter out ticks:
1226
+
1227
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1228
+ >>> data, _ = data[data['X'].str.like('a%')]
1229
+ >>> otp.run(data)
1230
+ Time X
1231
+ 0 2003-12-01 00:00:00.000 a
1232
+ 1 2003-12-01 00:00:00.001 ab
1233
+
1234
+ ``pattern`` can only be a constant expression, like string or symbol parameter:
1235
+
1236
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1237
+ >>> data['LIKE'] = data['X'].str.like(data.Symbol['PATTERN', str])
1238
+ >>> otp.run(data, symbols=otp.Tick(SYMBOL_NAME='COMMON::AAPL', PATTERN='_'))['COMMON::AAPL']
1239
+ Time X LIKE
1240
+ 0 2003-12-01 00:00:00.000 a 1.0
1241
+ 1 2003-12-01 00:00:00.001 ab 0.0
1242
+ 2 2003-12-01 00:00:00.002 b_ 0.0
1243
+ 3 2003-12-01 00:00:00.003 b% 0.0
1244
+ """
1245
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
1246
+ if not isinstance(pattern, (str, _SymbolParamColumn)):
1247
+ raise ValueError('like() function expects parameter to be a constant expression')
1248
+ return _StrAccessor.Formatter(
1249
+ op_params=[self._base_column, pattern],
1250
+ dtype=bool,
1251
+ formatter=lambda column, pattern: f'{ott.value2str(column)} LIKE {ott.value2str(pattern)}'
1252
+ )
1253
+
1254
+ def ilike(self, pattern):
1255
+ r"""
1256
+ Check if the value is case insensitive matched with SQL-like ``pattern``.
1257
+
1258
+ Parameters
1259
+ ----------
1260
+ pattern: str or symbol parameter (:py:class:`~onetick.py.core._source._symbol_param._SymbolParamColumn`)
1261
+ Pattern to match the value with.
1262
+ The pattern can contain usual text characters and two special ones:
1263
+
1264
+ * ``%`` represents zero or more characters
1265
+ * ``_`` represents a single character
1266
+
1267
+ Use backslash ``\`` character to escape these special characters.
1268
+
1269
+ Returns
1270
+ -------
1271
+ Operation
1272
+ ``True`` if the match was successful, ``False`` otherwise.
1273
+ Note that boolean Operation is converted to float if added as a column.
1274
+
1275
+ Examples
1276
+ --------
1277
+
1278
+ Use ``%`` character to specify any number of characters:
1279
+
1280
+ .. testcode::
1281
+ :skipif: not is_ilike_supported()
1282
+
1283
+ data = otp.Ticks(X=['a', 'ab', 'Ab', 'b_'])
1284
+ data['LIKE'] = data['X'].str.ilike('a%')
1285
+ df = otp.run(data)
1286
+ print(df)
1287
+
1288
+ .. testoutput::
1289
+
1290
+ Time X LIKE
1291
+ 0 2003-12-01 00:00:00.000 a 1.0
1292
+ 1 2003-12-01 00:00:00.001 ab 1.0
1293
+ 2 2003-12-01 00:00:00.002 Ab 1.0
1294
+ 3 2003-12-01 00:00:00.003 b_ 0.0
1295
+
1296
+ Use ``_`` special character to specify a single character:
1297
+
1298
+ .. testcode::
1299
+ :skipif: not is_ilike_supported()
1300
+
1301
+ data = otp.Ticks(X=['a', 'ab', 'Ab', 'b_'])
1302
+ data['LIKE'] = data['X'].str.ilike('a_')
1303
+ df = otp.run(data)
1304
+ print(df)
1305
+
1306
+ .. testoutput::
1307
+
1308
+ Time X LIKE
1309
+ 0 2003-12-01 00:00:00.000 a 0.0
1310
+ 1 2003-12-01 00:00:00.001 ab 1.0
1311
+ 2 2003-12-01 00:00:00.002 Ab 1.0
1312
+ 3 2003-12-01 00:00:00.003 b_ 0.0
1313
+
1314
+ Use backslash ``\`` character to escape special characters:
1315
+
1316
+ .. testcode::
1317
+ :skipif: not is_ilike_supported()
1318
+
1319
+ data = otp.Ticks(X=['a', 'ab', 'bb', 'b_'])
1320
+ data['LIKE'] = data['X'].str.ilike(r'b\_')
1321
+ df = otp.run(data)
1322
+ print(df)
1323
+
1324
+ .. testoutput::
1325
+
1326
+ Time X LIKE
1327
+ 0 2003-12-01 00:00:00.000 a 0.0
1328
+ 1 2003-12-01 00:00:00.001 ab 0.0
1329
+ 2 2003-12-01 00:00:00.002 bb 0.0
1330
+ 3 2003-12-01 00:00:00.003 b_ 1.0
1331
+
1332
+ This function can be used to filter out ticks:
1333
+
1334
+ .. testcode::
1335
+ :skipif: not is_ilike_supported()
1336
+
1337
+ data = otp.Ticks(X=['a', 'ab', 'Ab', 'b_'])
1338
+ data, _ = data[data['X'].str.ilike('a%')]
1339
+ df = otp.run(data)
1340
+ print(df)
1341
+
1342
+ .. testoutput::
1343
+
1344
+ Time X
1345
+ 0 2003-12-01 00:00:00.000 a
1346
+ 1 2003-12-01 00:00:00.001 ab
1347
+ 2 2003-12-01 00:00:00.002 Ab
1348
+
1349
+ ``pattern`` can only be a constant expression, like string or symbol parameter:
1350
+
1351
+ .. testcode::
1352
+ :skipif: not is_ilike_supported()
1353
+
1354
+ data = otp.Ticks(X=['a', 'ab', 'A', 'b_'])
1355
+ data['LIKE'] = data['X'].str.ilike(data.Symbol['PATTERN', str])
1356
+ df = otp.run(data, symbols=otp.Tick(SYMBOL_NAME='COMMON::AAPL', PATTERN='_'))['COMMON::AAPL']
1357
+ print(df)
1358
+
1359
+ .. testoutput::
1360
+
1361
+ Time X LIKE
1362
+ 0 2003-12-01 00:00:00.000 a 1.0
1363
+ 1 2003-12-01 00:00:00.001 ab 0.0
1364
+ 2 2003-12-01 00:00:00.002 A 1.0
1365
+ 3 2003-12-01 00:00:00.003 b_ 0.0
1366
+ """
1367
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
1368
+ if not isinstance(pattern, (str, _SymbolParamColumn)):
1369
+ raise ValueError('ilike() function expects parameter to be a constant expression')
1370
+ return _StrAccessor.Formatter(
1371
+ op_params=[self._base_column, pattern],
1372
+ dtype=bool,
1373
+ formatter=lambda column, pattern: f'{ott.value2str(column)} ILIKE {ott.value2str(pattern)}'
1374
+ )