onetick-py 1.177.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. locator_parser/__init__.py +0 -0
  2. locator_parser/acl.py +73 -0
  3. locator_parser/actions.py +262 -0
  4. locator_parser/common.py +368 -0
  5. locator_parser/io.py +43 -0
  6. locator_parser/locator.py +150 -0
  7. onetick/__init__.py +101 -0
  8. onetick/doc_utilities/__init__.py +3 -0
  9. onetick/doc_utilities/napoleon.py +40 -0
  10. onetick/doc_utilities/ot_doctest.py +140 -0
  11. onetick/doc_utilities/snippets.py +279 -0
  12. onetick/lib/__init__.py +4 -0
  13. onetick/lib/instance.py +141 -0
  14. onetick/py/__init__.py +293 -0
  15. onetick/py/_stack_info.py +89 -0
  16. onetick/py/_version.py +2 -0
  17. onetick/py/aggregations/__init__.py +11 -0
  18. onetick/py/aggregations/_base.py +648 -0
  19. onetick/py/aggregations/_docs.py +948 -0
  20. onetick/py/aggregations/compute.py +286 -0
  21. onetick/py/aggregations/functions.py +2216 -0
  22. onetick/py/aggregations/generic.py +104 -0
  23. onetick/py/aggregations/high_low.py +80 -0
  24. onetick/py/aggregations/num_distinct.py +83 -0
  25. onetick/py/aggregations/order_book.py +501 -0
  26. onetick/py/aggregations/other.py +1014 -0
  27. onetick/py/backports.py +26 -0
  28. onetick/py/cache.py +374 -0
  29. onetick/py/callback/__init__.py +5 -0
  30. onetick/py/callback/callback.py +276 -0
  31. onetick/py/callback/callbacks.py +131 -0
  32. onetick/py/compatibility.py +798 -0
  33. onetick/py/configuration.py +771 -0
  34. onetick/py/core/__init__.py +0 -0
  35. onetick/py/core/_csv_inspector.py +93 -0
  36. onetick/py/core/_internal/__init__.py +0 -0
  37. onetick/py/core/_internal/_manually_bound_value.py +6 -0
  38. onetick/py/core/_internal/_nodes_history.py +250 -0
  39. onetick/py/core/_internal/_op_utils/__init__.py +0 -0
  40. onetick/py/core/_internal/_op_utils/every_operand.py +9 -0
  41. onetick/py/core/_internal/_op_utils/is_const.py +10 -0
  42. onetick/py/core/_internal/_per_tick_scripts/tick_list_sort_template.script +121 -0
  43. onetick/py/core/_internal/_proxy_node.py +140 -0
  44. onetick/py/core/_internal/_state_objects.py +2312 -0
  45. onetick/py/core/_internal/_state_vars.py +93 -0
  46. onetick/py/core/_source/__init__.py +0 -0
  47. onetick/py/core/_source/_symbol_param.py +95 -0
  48. onetick/py/core/_source/schema.py +97 -0
  49. onetick/py/core/_source/source_methods/__init__.py +0 -0
  50. onetick/py/core/_source/source_methods/aggregations.py +809 -0
  51. onetick/py/core/_source/source_methods/applyers.py +296 -0
  52. onetick/py/core/_source/source_methods/columns.py +141 -0
  53. onetick/py/core/_source/source_methods/data_quality.py +301 -0
  54. onetick/py/core/_source/source_methods/debugs.py +272 -0
  55. onetick/py/core/_source/source_methods/drops.py +120 -0
  56. onetick/py/core/_source/source_methods/fields.py +619 -0
  57. onetick/py/core/_source/source_methods/filters.py +1002 -0
  58. onetick/py/core/_source/source_methods/joins.py +1413 -0
  59. onetick/py/core/_source/source_methods/merges.py +605 -0
  60. onetick/py/core/_source/source_methods/misc.py +1455 -0
  61. onetick/py/core/_source/source_methods/pandases.py +155 -0
  62. onetick/py/core/_source/source_methods/renames.py +356 -0
  63. onetick/py/core/_source/source_methods/sorts.py +183 -0
  64. onetick/py/core/_source/source_methods/switches.py +142 -0
  65. onetick/py/core/_source/source_methods/symbols.py +117 -0
  66. onetick/py/core/_source/source_methods/times.py +627 -0
  67. onetick/py/core/_source/source_methods/writes.py +986 -0
  68. onetick/py/core/_source/symbol.py +205 -0
  69. onetick/py/core/_source/tmp_otq.py +222 -0
  70. onetick/py/core/column.py +209 -0
  71. onetick/py/core/column_operations/__init__.py +0 -0
  72. onetick/py/core/column_operations/_methods/__init__.py +4 -0
  73. onetick/py/core/column_operations/_methods/_internal.py +28 -0
  74. onetick/py/core/column_operations/_methods/conversions.py +216 -0
  75. onetick/py/core/column_operations/_methods/methods.py +292 -0
  76. onetick/py/core/column_operations/_methods/op_types.py +160 -0
  77. onetick/py/core/column_operations/accessors/__init__.py +0 -0
  78. onetick/py/core/column_operations/accessors/_accessor.py +28 -0
  79. onetick/py/core/column_operations/accessors/decimal_accessor.py +104 -0
  80. onetick/py/core/column_operations/accessors/dt_accessor.py +537 -0
  81. onetick/py/core/column_operations/accessors/float_accessor.py +184 -0
  82. onetick/py/core/column_operations/accessors/str_accessor.py +1367 -0
  83. onetick/py/core/column_operations/base.py +1121 -0
  84. onetick/py/core/cut_builder.py +150 -0
  85. onetick/py/core/db_constants.py +20 -0
  86. onetick/py/core/eval_query.py +245 -0
  87. onetick/py/core/lambda_object.py +441 -0
  88. onetick/py/core/multi_output_source.py +232 -0
  89. onetick/py/core/per_tick_script.py +2256 -0
  90. onetick/py/core/query_inspector.py +464 -0
  91. onetick/py/core/source.py +1744 -0
  92. onetick/py/db/__init__.py +2 -0
  93. onetick/py/db/_inspection.py +1128 -0
  94. onetick/py/db/db.py +1327 -0
  95. onetick/py/db/utils.py +64 -0
  96. onetick/py/docs/__init__.py +0 -0
  97. onetick/py/docs/docstring_parser.py +112 -0
  98. onetick/py/docs/utils.py +81 -0
  99. onetick/py/functions.py +2398 -0
  100. onetick/py/license.py +190 -0
  101. onetick/py/log.py +88 -0
  102. onetick/py/math.py +935 -0
  103. onetick/py/misc.py +470 -0
  104. onetick/py/oqd/__init__.py +22 -0
  105. onetick/py/oqd/eps.py +1195 -0
  106. onetick/py/oqd/sources.py +325 -0
  107. onetick/py/otq.py +216 -0
  108. onetick/py/pyomd_mock.py +47 -0
  109. onetick/py/run.py +916 -0
  110. onetick/py/servers.py +173 -0
  111. onetick/py/session.py +1347 -0
  112. onetick/py/sources/__init__.py +19 -0
  113. onetick/py/sources/cache.py +167 -0
  114. onetick/py/sources/common.py +128 -0
  115. onetick/py/sources/csv.py +642 -0
  116. onetick/py/sources/custom.py +85 -0
  117. onetick/py/sources/data_file.py +305 -0
  118. onetick/py/sources/data_source.py +1045 -0
  119. onetick/py/sources/empty.py +94 -0
  120. onetick/py/sources/odbc.py +337 -0
  121. onetick/py/sources/order_book.py +271 -0
  122. onetick/py/sources/parquet.py +168 -0
  123. onetick/py/sources/pit.py +191 -0
  124. onetick/py/sources/query.py +495 -0
  125. onetick/py/sources/snapshots.py +419 -0
  126. onetick/py/sources/split_query_output_by_symbol.py +198 -0
  127. onetick/py/sources/symbology_mapping.py +123 -0
  128. onetick/py/sources/symbols.py +374 -0
  129. onetick/py/sources/ticks.py +825 -0
  130. onetick/py/sql.py +70 -0
  131. onetick/py/state.py +251 -0
  132. onetick/py/types.py +2131 -0
  133. onetick/py/utils/__init__.py +70 -0
  134. onetick/py/utils/acl.py +93 -0
  135. onetick/py/utils/config.py +186 -0
  136. onetick/py/utils/default.py +49 -0
  137. onetick/py/utils/file.py +38 -0
  138. onetick/py/utils/helpers.py +76 -0
  139. onetick/py/utils/locator.py +94 -0
  140. onetick/py/utils/perf.py +498 -0
  141. onetick/py/utils/query.py +49 -0
  142. onetick/py/utils/render.py +1374 -0
  143. onetick/py/utils/script.py +244 -0
  144. onetick/py/utils/temp.py +471 -0
  145. onetick/py/utils/types.py +120 -0
  146. onetick/py/utils/tz.py +84 -0
  147. onetick_py-1.177.0.dist-info/METADATA +137 -0
  148. onetick_py-1.177.0.dist-info/RECORD +152 -0
  149. onetick_py-1.177.0.dist-info/WHEEL +5 -0
  150. onetick_py-1.177.0.dist-info/entry_points.txt +2 -0
  151. onetick_py-1.177.0.dist-info/licenses/LICENSE +21 -0
  152. onetick_py-1.177.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1367 @@
1
+ from typing import Optional
2
+
3
+ from onetick.py import types as ott
4
+ from onetick.py import configuration, utils
5
+ from onetick.py.core.column_operations.accessors._accessor import _Accessor
6
+ from onetick.py.core.column_operations.base import _Operation
7
+ from onetick.py.backports import Literal
8
+ from onetick.py.docs.utils import alias
9
+ from onetick.py.compatibility import is_ilike_supported
10
+
11
+
12
+ def _get_onetick_bool_string(value: bool) -> str:
13
+ if value:
14
+ return '"true"'
15
+ return '"false"'
16
+
17
+
18
+ class _StrAccessor(_Accessor):
19
+ """ Accessor for string functions
20
+
21
+ >>> data = otp.Ticks(X=['some string'])
22
+ >>> data["Y"] = data["X"].str.<function_name>() # doctest: +SKIP
23
+ """
24
+
25
+ def to_datetime(self,
26
+ format='%Y/%m/%d %H:%M:%S.%J',
27
+ timezone=None,
28
+ unit: Optional[Literal['ms', 'ns']] = None):
29
+ """
30
+ Converts the formatted time to the number of nanoseconds (datetime) since 1970/01/01 GMT.
31
+
32
+ Parameters
33
+ ----------
34
+ format: str, Operation, Column
35
+ The format might contain any characters, but the following combinations of
36
+ characters have special meanings
37
+
38
+ %Y - Year (4 digits)
39
+
40
+ %y - Year (2 digits)
41
+
42
+ %m - Month (2 digits)
43
+
44
+ %d - Day of month (2 digits)
45
+
46
+ %H - Hours (2 digits, 24-hour format)
47
+
48
+ %I - Hours (2 digits, 12-hour format)
49
+
50
+ %M - Minutes (2 digits)
51
+
52
+ %S - Seconds (2 digits)
53
+
54
+ %J - Nanoseconds (9 digits)
55
+
56
+ %p - AM/PM (2 characters)
57
+
58
+ timezone: str | Operation | Column
59
+ Timezone. The timezone of the query will be used if no ``timezone`` was passed.
60
+
61
+ unit: str, optional
62
+ If set, `format` and `timezone` are ignored.
63
+ If equals to `ns`, constructs a nanosecond-granularity timestamp from a millisecond-granularity
64
+ string. It has the following format: < milliseconds since 1970/01/01 GMT >.< fraction of a millisecond >.
65
+ The fraction might have at most six digits. If the fraction is equal to zero,
66
+ .< fraction of a millisecond > is optional.
67
+ If equals to `ms`, constructs a millisecond-granularity timestamp from a millisecond-granularity
68
+ string. It has the following format: < milliseconds since 1970/01/01 GMT >.
69
+
70
+ Returns
71
+ -------
72
+ Operation
73
+ :py:class:`nsectime <onetick.py.types.nsectime>` Operation obtained from the string
74
+
75
+ Examples
76
+ --------
77
+ >>> # OTdirective: snippet-name: string.to timestamp;
78
+ >>> data = otp.Tick(X='5/17/22-11:10:56.123456789')
79
+ >>> data['Y'] = data['X'].str.to_datetime('%m/%d/%y-%H:%M:%S.%J', 'Europe/London')
80
+ >>> otp.run(data)
81
+ Time X Y
82
+ 0 2003-12-01 5/17/22-11:10:56.123456789 2022-05-17 06:10:56.123456789
83
+
84
+ >>> data = otp.Ticks(A=['1693825877111.002001', '1693825877112'])
85
+ >>> data['NSECTIME_A'] = data['A'].str.to_datetime(unit='ns')
86
+ >>> otp.run(data)
87
+ Time A NSECTIME_A
88
+ 0 2003-12-01 00:00:00.000 1693825877111.002001 2023-09-04 07:11:17.111002001
89
+ 1 2003-12-01 00:00:00.001 1693825877112 2023-09-04 07:11:17.112000000
90
+
91
+ >>> data = otp.Tick(A='1693825877111')
92
+ >>> data['MSECTIME_A'] = data['A'].str.to_datetime(unit='ms')
93
+ >>> otp.run(data)
94
+ Time A MSECTIME_A
95
+ 0 2003-12-01 1693825877111 2023-09-04 07:11:17.111
96
+ """
97
+ if unit is None:
98
+ if timezone is utils.default:
99
+ timezone = configuration.config.tz
100
+
101
+ def formatter(column, fmt, tz):
102
+ column_str = ott.value2str(column)
103
+ tz_str, format_str = self._preprocess_tz_and_format(tz, fmt)
104
+ format_str = format_str.replace('%f', '%J')
105
+ return f'parse_nsectime({format_str},{column_str},{tz_str})'
106
+
107
+ return _StrAccessor.Formatter(
108
+ op_params=[self._base_column, format, timezone],
109
+ dtype=ott.nsectime,
110
+ formatter=formatter,
111
+ )
112
+ else:
113
+ if unit == 'ns':
114
+ return _StrAccessor.Formatter(
115
+ op_params=[self._base_column],
116
+ dtype=ott.nsectime,
117
+ formatter=lambda column: f'MSEC_STR_TO_NSECTIME({ott.value2str(column)})',
118
+ )
119
+ if unit == 'ms':
120
+ return _StrAccessor.Formatter(
121
+ op_params=[self._base_column],
122
+ dtype=ott.msectime,
123
+ formatter=lambda column: f'GET_MSECS(MSEC_STR_TO_NSECTIME({ott.value2str(column)}))',
124
+ )
125
+ raise ValueError(f'`{unit}` is unsupported value for `unit` parameter')
126
+
127
+ strptime = alias(to_datetime,
128
+ doc_replacer=lambda doc: doc.replace('to_datetime', 'strptime'))
129
+
130
+ def token(self, sep=" ", n=0):
131
+ """
132
+ Breaks the value into tokens based on the delimiter ``sep``
133
+ and returns token at position ``n`` (zero-based).
134
+
135
+ If there are not enough tokens to get the one at position ``n``, then empty string is returned.
136
+
137
+ Parameters
138
+ ----------
139
+ sep: str or Column or Operation
140
+ The delimiter, which must be a single character used to split the string into tokens.
141
+ n: int, Operation
142
+ Token index to return. For a negative ``n``, count from the end instead of the beginning.
143
+ If index is out of range, then empty string is returned.
144
+
145
+ Returns
146
+ -------
147
+ Operation
148
+ token at position ``n`` or empty string.
149
+
150
+ Examples
151
+ -------
152
+ >>> # OTdirective: snippet-name: string.token;
153
+ >>> data = otp.Tick(X='US_COMP::TRD')
154
+ >>> data['Y'] = data['X'].str.token(':', -1)
155
+ >>> otp.run(data)
156
+ Time X Y
157
+ 0 2003-12-01 US_COMP::TRD TRD
158
+
159
+ Other columns can be used as parameters too:
160
+
161
+ >>> data = otp.Tick(X='US_COMP::TRD', SEP=':', N=-1)
162
+ >>> data['Y'] = data['X'].str.token(data['SEP'], data['N'])
163
+ >>> otp.run(data)
164
+ Time X SEP N Y
165
+ 0 2003-12-01 US_COMP::TRD : -1 TRD
166
+
167
+ If index is out of range, then empty string is returned:
168
+
169
+ >>> data = otp.Tick(X='US_COMP::TRD')
170
+ >>> data['Y'] = data['X'].str.token(':', 999)
171
+ >>> otp.run(data)
172
+ Time X Y
173
+ 0 2003-12-01 US_COMP::TRD
174
+ """
175
+ if isinstance(sep, str) and len(sep) != 1:
176
+ raise ValueError("Function '.str.token()' expects parameter 'sep' to be a single character")
177
+ return _StrAccessor.Formatter(
178
+ op_params=[self._base_column, sep, n],
179
+ dtype=self._base_column.dtype,
180
+ formatter=lambda column, sep, n: f'token({ott.value2str(column)},{ott.value2str(n)},{ott.value2str(sep)})'
181
+ )
182
+
183
+ def match(self, pat, case=True):
184
+ r"""
185
+ Match the text against a regular expression specified in the ``pat`` parameter.
186
+
187
+ Parameters
188
+ ----------
189
+ pat: str or Column or Operation
190
+ A pattern specified via the POSIX extended regular expression syntax.
191
+ case: bool
192
+ If ``True``, then regular expression is case-sensitive.
193
+
194
+ Returns
195
+ -------
196
+ Operation
197
+ ``True`` if the match was successful, ``False`` otherwise.
198
+ Note that boolean Operation is converted to float if added as a column.
199
+
200
+ Examples
201
+ --------
202
+ >>> # OTdirective: snippet-name: string.match;
203
+ >>> data = otp.Ticks(X=['hello', 'there were 77 ticks'])
204
+ >>> data['Y'] = data['X'].str.match(r'\d\d')
205
+ >>> otp.run(data)
206
+ Time X Y
207
+ 0 2003-12-01 00:00:00.000 hello 0.0
208
+ 1 2003-12-01 00:00:00.001 there were 77 ticks 1.0
209
+
210
+ Other columns can be used as parameter ``pat`` too:
211
+
212
+ >>> data = otp.Tick(X='OneTick', PAT='onetick')
213
+ >>> data['Y'] = data['X'].str.match(data['PAT'], case=False)
214
+ >>> otp.run(data)
215
+ Time X PAT Y
216
+ 0 2003-12-01 OneTick onetick 1.0
217
+
218
+ ``match`` function can also be used as a filter.
219
+ For example, to filter on-exchange continuous trading trades:
220
+
221
+ >>> q = otp.DataSource('US_COMP', tick_type='TRD', symbols=['SPY']) # doctest: +SKIP
222
+ >>> q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']] # doctest: +SKIP
223
+ >>> q = q.where(q['COND'].str.match('^[^O6TUHILNRWZ47QMBCGPV]*$')) # doctest: +SKIP
224
+ >>> otp.run(q, start=otp.dt(2023, 5, 15, 9, 30), end=otp.dt(2023, 5, 15, 9, 30, 1)) # doctest: +SKIP
225
+ Time PRICE SIZE COND EXCHANGE
226
+ 0 2023-05-15 09:30:00.000776704 412.220 247 Z
227
+ 1 2023-05-15 09:30:00.019069440 412.230 100 F K
228
+ .. ... ... ... ... ...
229
+ """
230
+ caseless = _get_onetick_bool_string(not case)
231
+ return _StrAccessor.Formatter(
232
+ op_params=[self._base_column, pat],
233
+ dtype=bool,
234
+ formatter=lambda column, pat: f'regex_match({ott.value2str(column)},{ott.value2str(pat)},{caseless})',
235
+ )
236
+
237
+ def len(self):
238
+ """
239
+ Get the length of a string.
240
+
241
+ Returns
242
+ -------
243
+ Operation
244
+ The length of the string.
245
+ If a null-character (byte with value ``0``) is present in the string,
246
+ its position (0-based) is returned.
247
+
248
+ Examples
249
+ --------
250
+ >>> # OTdirective: snippet-name: string.len;
251
+ >>> data = otp.Ticks(X=['hello', 'world!'])
252
+ >>> data['LEN'] = data['X'].str.len()
253
+ >>> otp.run(data)
254
+ Time X LEN
255
+ 0 2003-12-01 00:00:00.000 hello 5
256
+ 1 2003-12-01 00:00:00.001 world! 6
257
+ """
258
+ return _StrAccessor.Formatter(op_params=[self._base_column],
259
+ dtype=int,
260
+ formatter=lambda column: f'strlen({ott.value2str(column)})')
261
+
262
+ def contains(self, substr):
263
+ """
264
+ Check if the string contains ``substr``.
265
+
266
+ Note
267
+ ----
268
+ This function does not support regular expressions.
269
+ Use :func:`match` for this purpose.
270
+
271
+ Parameters
272
+ ----------
273
+ substr: str or Column or Operation
274
+ A substring to search for within the string.
275
+
276
+ Returns
277
+ -------
278
+ Operation
279
+ ``True`` if the string contains the substring, ``False`` otherwise.
280
+ Note that boolean Operation is converted to float if added as a column.
281
+
282
+ Examples
283
+ --------
284
+ >>> # OTdirective: snippet-name: string.contains;
285
+ >>> data = otp.Ticks(X=['hello', 'world!'])
286
+ >>> data['CONTAINS'] = data['X'].str.contains('hel')
287
+ >>> otp.run(data)
288
+ Time X CONTAINS
289
+ 0 2003-12-01 00:00:00.000 hello 1.0
290
+ 1 2003-12-01 00:00:00.001 world! 0.0
291
+
292
+ Other columns can be used as parameter ``substr`` too:
293
+
294
+ >>> # OTdirective: snippet-name: string.contains another field;
295
+ >>> data = otp.Ticks(X=['hello', 'big', 'world!'],
296
+ ... Y=['hel', 'wor', 'wor'])
297
+ >>> data['CONTAINS'] = data['X'].str.contains(data['Y'])
298
+ >>> otp.run(data)
299
+ Time X Y CONTAINS
300
+ 0 2003-12-01 00:00:00.000 hello hel 1.0
301
+ 1 2003-12-01 00:00:00.001 big wor 0.0
302
+ 2 2003-12-01 00:00:00.002 world! wor 1.0
303
+
304
+ This method can also be used for filtering:
305
+
306
+ >>> # OTdirective: snippet-name: string.contains as a filter;
307
+ >>> data = otp.Ticks(X=['Hello', 'World'])
308
+ >>> with_substr, wo_substr = data[data['X'].str.contains('Hel')]
309
+ >>> otp.run(with_substr)
310
+ Time X
311
+ 0 2003-12-01 Hello
312
+ """
313
+ return _StrAccessor.Formatter(
314
+ op_params=[self._base_column, substr],
315
+ dtype=bool,
316
+ formatter=lambda column, substr: f'instr({ott.value2str(column)}, {ott.value2str(substr)}) > -1',
317
+ )
318
+
319
+ def trim(self):
320
+ """
321
+ Removes white spaces from both sides of the string.
322
+
323
+ Returns
324
+ -------
325
+ Operation
326
+ Trimmed string
327
+
328
+ See Also
329
+ --------
330
+ :meth:`ltrim`, :meth:`rtrim`
331
+
332
+ Examples
333
+ --------
334
+ >>> # OTdirective: snippet-name: string.trim;
335
+ >>> data = otp.Ticks(X=[' Hello', 'World '])
336
+ >>> data['X'] = data['X'].str.trim()
337
+ >>> otp.run(data)
338
+ Time X
339
+ 0 2003-12-01 00:00:00.000 Hello
340
+ 1 2003-12-01 00:00:00.001 World
341
+ """
342
+ return _StrAccessor.Formatter(op_params=[self._base_column],
343
+ dtype=self._base_column.dtype,
344
+ formatter=lambda column: f'trim({ott.value2str(column)})')
345
+
346
+ def ltrim(self):
347
+ """
348
+ Removes the leading white spaces from a string.
349
+
350
+ Returns
351
+ -------
352
+ Operation
353
+ Trimmed string
354
+
355
+ See Also
356
+ --------
357
+ :meth:`trim`, :meth:`rtrim`
358
+ """
359
+ return _StrAccessor.Formatter(op_params=[self._base_column],
360
+ dtype=self._base_column.dtype,
361
+ formatter=lambda column: f'ltrim({ott.value2str(column)})')
362
+
363
+ def rtrim(self):
364
+ """
365
+ Removes the trailing white spaces from a string.
366
+
367
+ Returns
368
+ -------
369
+ Operation
370
+ Trimmed string
371
+
372
+ See Also
373
+ --------
374
+ :meth:`ltrim`, :meth:`trim`
375
+ """
376
+ return _StrAccessor.Formatter(op_params=[self._base_column],
377
+ dtype=self._base_column.dtype,
378
+ formatter=lambda column: f'rtrim({ott.value2str(column)})')
379
+
380
+ def lower(self):
381
+ """
382
+ Convert a string to lower case.
383
+
384
+ Returns
385
+ -------
386
+ Operation
387
+ Lowercased string
388
+
389
+ Examples
390
+ --------
391
+ >>> # OTdirective: snippet-name: string.lower;
392
+ >>> data = otp.Ticks(X=['HeLlO', 'wOrLd!'])
393
+ >>> data['LOW'] = data['X'].str.lower()
394
+ >>> otp.run(data)
395
+ Time X LOW
396
+ 0 2003-12-01 00:00:00.000 HeLlO hello
397
+ 1 2003-12-01 00:00:00.001 wOrLd! world!
398
+ """
399
+ return _StrAccessor.Formatter(op_params=[self._base_column],
400
+ dtype=self._base_column.dtype,
401
+ formatter=lambda column: f'lower({ott.value2str(column)})')
402
+
403
+ def upper(self):
404
+ """
405
+ Converts a string to upper case.
406
+
407
+ Returns
408
+ -------
409
+ Operation
410
+ Uppercased string
411
+
412
+ Examples
413
+ --------
414
+ >>> # OTdirective: snippet-name: string.upper;
415
+ >>> data = otp.Ticks(X=['HeLlO', 'wOrLd!'])
416
+ >>> data['UP'] = data['X'].str.upper()
417
+ >>> otp.run(data)
418
+ Time X UP
419
+ 0 2003-12-01 00:00:00.000 HeLlO HELLO
420
+ 1 2003-12-01 00:00:00.001 wOrLd! WORLD!
421
+ """
422
+ return _StrAccessor.Formatter(op_params=[self._base_column],
423
+ dtype=self._base_column.dtype,
424
+ formatter=lambda column: f'upper({ott.value2str(column)})')
425
+
426
+ def replace(self, pat, repl):
427
+ """
428
+ Search for occurrences (case dependent) of ``pat`` and replace with ``repl``.
429
+
430
+ Parameters
431
+ ----------
432
+ pat: str or Column or Operation
433
+ Pattern to replace.
434
+ repl: str or Column or Operation
435
+ Replacement string.
436
+
437
+ Returns
438
+ -------
439
+ Operation
440
+ String with ``pat`` replaced by ``repl``.
441
+
442
+ Examples
443
+ --------
444
+ >>> # OTdirective: snippet-name: string.replace;
445
+ >>> data = otp.Ticks(X=['A Table', 'A Chair', 'An Apple'])
446
+ >>> data['Y'] = data['X'].str.replace('A', 'The')
447
+ >>> otp.run(data)
448
+ Time X Y
449
+ 0 2003-12-01 00:00:00.000 A Table The Table
450
+ 1 2003-12-01 00:00:00.001 A Chair The Chair
451
+ 2 2003-12-01 00:00:00.002 An Apple Then Thepple
452
+
453
+ Other columns can be used as parameters too:
454
+
455
+ >>> # OTdirective: snippet-name: string.replace from field;
456
+ >>> data = otp.Ticks(X=['A Table', 'A Chair', 'An Apple'],
457
+ ... PAT=['A', 'A', 'An'],
458
+ ... REPL=['The', 'Their', 'My'])
459
+ >>> data['Y'] = data['X'].str.replace(data['PAT'], data['REPL'])
460
+ >>> otp.run(data)
461
+ Time X PAT REPL Y
462
+ 0 2003-12-01 00:00:00.000 A Table A The The Table
463
+ 1 2003-12-01 00:00:00.001 A Chair A Their Their Chair
464
+ 2 2003-12-01 00:00:00.002 An Apple An My My Apple
465
+ """
466
+ # see, BDS-112
467
+ if not isinstance(pat, str):
468
+ pat = pat.str.rtrim()
469
+ if not isinstance(repl, str):
470
+ repl = repl.str.rtrim()
471
+ return _StrAccessor.Formatter(
472
+ op_params=[self._base_column, pat, repl],
473
+ dtype=self._base_column.dtype,
474
+ formatter=(
475
+ lambda column, pat, repl:
476
+ f'replace({ott.value2str(column)}, {ott.value2str(pat)}, {ott.value2str(repl)})'
477
+ ),
478
+ )
479
+
480
+ def regex_replace(self, pat, repl, *, replace_every=False, caseless=False):
481
+ r"""
482
+ Search for occurrences (case dependent) of ``pat`` and replace with ``repl``.
483
+
484
+ Parameters
485
+ ----------
486
+ pat: str or Column or Operation
487
+ Pattern to replace specified via the POSIX extended regular expression syntax.
488
+ repl: str or Column or Operation
489
+ Replacement string. ``\0`` refers to the entire matched text. ``\1`` to ``\9`` refer
490
+ to the text matched by the corresponding parenthesized group in ``pat``.
491
+ replace_every: bool
492
+ If ``replace_every`` flag is set to ``True``, all matches will be replaced, if ``False`` only the first one.
493
+ caseless: bool
494
+ If the ``caseless`` flag is set to ``True``, matching is case-insensitive.
495
+
496
+ Returns
497
+ -------
498
+ Operation
499
+ String with pattern ``pat`` replaced by ``repl``.
500
+
501
+ See Also
502
+ --------
503
+ :meth:`extract`
504
+
505
+ Examples
506
+ --------
507
+ >>> # OTdirective: snippet-name: string.regex replace;
508
+ >>> data = otp.Ticks(X=['A Table', 'A Chair', 'An Apple'])
509
+ >>> data['Y'] = data['X'].str.regex_replace('An? ', 'The ')
510
+ >>> otp.run(data)
511
+ Time X Y
512
+ 0 2003-12-01 00:00:00.000 A Table The Table
513
+ 1 2003-12-01 00:00:00.001 A Chair The Chair
514
+ 2 2003-12-01 00:00:00.002 An Apple The Apple
515
+
516
+ Parameter ``replace_every`` will replace all occurrences of ``pat`` in the string:
517
+
518
+ >>> # OTdirective: snippet-name: string.regex replace all;
519
+ >>> data = otp.Ticks(X=['A Table, A Chair, An Apple'])
520
+ >>> data['Y'] = data['X'].str.regex_replace('An? ', 'The ', replace_every=True)
521
+ >>> otp.run(data)
522
+ Time X Y
523
+ 0 2003-12-01 A Table, A Chair, An Apple The Table, The Chair, The Apple
524
+
525
+ Capturing groups in regular expressions is supported:
526
+
527
+ >>> # OTdirective: snippet-name: string.regex groups;
528
+ >>> data = otp.Ticks(X=['11/12/1992', '9/22/1993', '3/30/1991'])
529
+ >>> data['Y'] = data['X'].str.regex_replace(r'(\d{1,2})/(\d{1,2})/', r'\2.\1.')
530
+ >>> otp.run(data)
531
+ Time X Y
532
+ 0 2003-12-01 00:00:00.000 11/12/1992 12.11.1992
533
+ 1 2003-12-01 00:00:00.001 9/22/1993 22.9.1993
534
+ 2 2003-12-01 00:00:00.002 3/30/1991 30.3.1991
535
+ """
536
+ replace_every = _get_onetick_bool_string(replace_every)
537
+ caseless = _get_onetick_bool_string(caseless)
538
+ return _StrAccessor.Formatter(
539
+ op_params=[self._base_column, pat, repl],
540
+ dtype=self._base_column.dtype,
541
+ formatter=lambda column, pat, repl: f'regex_replace({ott.value2str(column)}, {ott.value2str(pat)},'
542
+ f' {ott.value2str(repl)}, {replace_every}, {caseless})',
543
+ )
544
+
545
+ def find(self, sub, start=0):
546
+ """
547
+ Find the index of ``sub`` in the string. If not found, returns ``-1``.
548
+
549
+ Parameters
550
+ ----------
551
+ sub: str or Column or Operation
552
+ Substring to find.
553
+ start: int or Column or Operation
554
+ Starting position to find.
555
+
556
+ Returns
557
+ -------
558
+ Operation
559
+ The starting position of the substring or ``-1`` if it is not found.
560
+
561
+ Examples
562
+ --------
563
+ >>> data = otp.Ticks(X=['ananas', 'banana', 'potato'])
564
+ >>> data['Y'] = data['X'].str.find('ana') # OTdirective: snippet-name: string.find;
565
+ >>> otp.run(data)
566
+ Time X Y
567
+ 0 2003-12-01 00:00:00.000 ananas 0
568
+ 1 2003-12-01 00:00:00.001 banana 1
569
+ 2 2003-12-01 00:00:00.002 potato -1
570
+
571
+ Other columns can be used as parameter ``sub`` too:
572
+
573
+ >>> # OTdirective: snippet-name: string.find field value;
574
+ >>> data = otp.Ticks(X=['Ananas', 'Banana', 'Potato'], sub=['Ana', 'anan', 'ato'])
575
+ >>> data['Y'] = data['X'].str.find(data['sub'])
576
+ >>> otp.run(data)
577
+ Time X sub Y
578
+ 0 2003-12-01 00:00:00.000 Ananas Ana 0
579
+ 1 2003-12-01 00:00:00.001 Banana anan 1
580
+ 2 2003-12-01 00:00:00.002 Potato ato 3
581
+
582
+ Note that empty string will be found at the start of any string:
583
+
584
+ >>> data = otp.Ticks(X=['string', ''])
585
+ >>> data['Y'] = data['X'].str.find('')
586
+ >>> otp.run(data)
587
+ Time X Y
588
+ 0 2003-12-01 00:00:00.000 string 0
589
+ 1 2003-12-01 00:00:00.001 0
590
+
591
+ ``start`` parameter is used to find ``sub`` starting from selected position:
592
+
593
+ >>> data = otp.Ticks(X=['ababab', 'abbbbb'])
594
+ >>> data['Y'] = data['X'].str.find('ab', 1)
595
+ >>> otp.run(data)
596
+ Time X Y
597
+ 0 2003-12-01 00:00:00.000 ababab 2
598
+ 1 2003-12-01 00:00:00.001 abbbbb -1
599
+ """
600
+ return _StrAccessor.Formatter(
601
+ op_params=[self._base_column, sub, start],
602
+ dtype=int,
603
+ formatter=(
604
+ lambda column, sub, start:
605
+ f'LOCATE({ott.value2str(sub)}, {ott.value2str(column)}, {ott.value2str(start + 1)})-1'
606
+ ),
607
+ )
608
+
609
+ def repeat(self, repeats):
610
+ """
611
+ Duplicate a string ``repeats`` times.
612
+
613
+ Note
614
+ ----
615
+ * Alternative for the ``repeat`` function is multiplication.
616
+ * The returned string has the same type and maximum length as the original field.
617
+
618
+ Parameters
619
+ ----------
620
+ repeats: int or Column or Operation
621
+ Non-negative number of copies of the string.
622
+ Repeating zero times results in empty string.
623
+ Repeating negative number of times results in exception.
624
+
625
+ Returns
626
+ -------
627
+ Operation
628
+ String repeated ``repeats`` times.
629
+
630
+ Examples
631
+ --------
632
+ >>> # OTdirective: snippet-name: string.repeat;
633
+ >>> data = otp.Ticks(X=['Banana', 'Ananas', 'Apple'])
634
+ >>> data['X'] = data['X'].str.repeat(3)
635
+ >>> otp.run(data)
636
+ Time X
637
+ 0 2003-12-01 00:00:00.000 BananaBananaBanana
638
+ 1 2003-12-01 00:00:00.001 AnanasAnanasAnanas
639
+ 2 2003-12-01 00:00:00.002 AppleAppleApple
640
+
641
+ Other columns can be used as parameter ``repeats`` too:
642
+
643
+ # OTdirective: snippet-name: string.repeat from a field;
644
+ >>> data = otp.Ticks(X=['Banana', 'Ananas', 'Apple'], TIMES=[1, 3, 2])
645
+ >>> data['Y'] = data['X'].str.repeat(data['TIMES'])
646
+ >>> otp.run(data)
647
+ Time X TIMES Y
648
+ 0 2003-12-01 00:00:00.000 Banana 1 Banana
649
+ 1 2003-12-01 00:00:00.001 Ananas 3 AnanasAnanasAnanas
650
+ 2 2003-12-01 00:00:00.002 Apple 2 AppleApple
651
+
652
+ The returned string has the same type and therefore the same maximum length as the original field:
653
+
654
+ >>> data = otp.Ticks(X=[otp.string[9]('Banana')])
655
+ >>> data['Y'] = data['X'].str.repeat(3)
656
+ >>> data.schema
657
+ {'X': string[9], 'Y': string[9]}
658
+ >>> otp.run(data)
659
+ Time X Y
660
+ 0 2003-12-01 Banana BananaBan
661
+
662
+ ``repeat`` does the same thing as multiplication by a non-negative int:
663
+
664
+ >>> # OTdirective: snippet-name: string.repeat by multiplication;
665
+ >>> data = otp.Ticks(X=['Banana'], N=[2])
666
+ >>> data['X2'] = data['X'] * data['N']
667
+ >>> data['X3'] = data['X'] * 3
668
+ >>> otp.run(data)
669
+ Time X N X2 X3
670
+ 0 2003-12-01 Banana 2 BananaBanana BananaBananaBanana
671
+
672
+ Multiplying by 0 results in empty string:
673
+
674
+ >>> data = otp.Ticks(X=['Banana', 'Apple'])
675
+ >>> data['Y'] = data['X'].str.repeat(0)
676
+ >>> otp.run(data)
677
+ Time X Y
678
+ 0 2003-12-01 00:00:00.000 Banana
679
+ 1 2003-12-01 00:00:00.001 Apple
680
+ """
681
+ return _StrAccessor.Formatter(
682
+ op_params=[self._base_column, repeats],
683
+ dtype=self._base_column.dtype,
684
+ formatter=lambda column, repeats: f'repeat({ott.value2str(column)}, {ott.value2str(repeats)})',
685
+ )
686
+
687
+ def extract(self, pat, rewrite=r"\0", caseless=False):
688
+ r"""
689
+ Match the string against a regular expression specified by ``pat`` and return the first match.
690
+ The ``rewrite`` parameter can optionally be used to arrange the matched substrings and embed them within the
691
+ string specified in ``rewrite``.
692
+
693
+ Parameters
694
+ ----------
695
+ pat: str or Column or Operation
696
+ Pattern to search for specified via the POSIX extended regular expression syntax.
697
+ rewrite: str or Column or Operation
698
+ A string that specifies how to arrange the matched text. ``\0`` refers to the entire matched text.
699
+ ``\1`` to ``\9`` refer to the text matched by the corresponding parenthesized group in ``pat``.
700
+ ``\u`` and ``\l`` modifiers within the ``rewrite`` string convert the case of the text that
701
+ matches the corresponding parenthesized group (e.g., ``\u1`` converts ``\1`` to uppercase).
702
+ caseless: bool
703
+ If the ``caseless`` flag is set to ``True``, matching is case-insensitive.
704
+
705
+ Returns
706
+ -------
707
+ Operation
708
+ String matched by ``pat`` with format specified in ``rewrite``.
709
+
710
+ See Also
711
+ --------
712
+ regex_replace
713
+
714
+ Examples
715
+ --------
716
+ >>> # OTdirective: snippet-name: string.regex extract;
717
+ >>> data = otp.Ticks(X=['Mr. Smith: +1348 +4781', 'Ms. Smith: +8971'])
718
+ >>> data['TEL'] = data['X'].str.extract(r'\+\d{4}')
719
+ >>> otp.run(data)
720
+ Time X TEL
721
+ 0 2003-12-01 00:00:00.000 Mr. Smith: +1348 +4781 +1348
722
+ 1 2003-12-01 00:00:00.001 Ms. Smith: +8971 +8971
723
+
724
+ You can specify the group to extract in the ``rewrite`` parameter:
725
+
726
+ >>> # OTdirective: snippet-name: string.regex extract group;
727
+ >>> data = otp.Ticks(X=['Mr. Smith: 1992/12/22', 'Ms. Smith: 1989/10/15'])
728
+ >>> data['BIRTH_YEAR'] = data['X'].str.extract(r'(\d{4})/(\d{2})/(\d{2})', rewrite=r'birth year: \1')
729
+ >>> otp.run(data)
730
+ Time X BIRTH_YEAR
731
+ 0 2003-12-01 00:00:00.000 Mr. Smith: 1992/12/22 birth year: 1992
732
+ 1 2003-12-01 00:00:00.001 Ms. Smith: 1989/10/15 birth year: 1989
733
+
734
+ You can use a column as a ``rewrite`` or ``pat`` parameter:
735
+
736
+ >>> # OTdirective: snippet-name: string.regex extract from field;
737
+ >>> data = otp.Ticks(X=['Kelly, Mr. James', 'Wilkes, Mrs. James', 'Connolly, Miss. Kate'],
738
+ ... PAT=['(Mrs?)\\.', '(Mrs?)\\.', '(Miss)\\.'],
739
+ ... REWRITE=['Title 1: \\1', 'Title 2: \\1', 'Title 3: \\1'])
740
+ >>> data['TITLE'] = data['X'].str.extract(data['PAT'], rewrite=data['REWRITE'])
741
+ >>> otp.run(data)
742
+ Time X PAT REWRITE TITLE
743
+ 0 2003-12-01 00:00:00.000 Kelly, Mr. James (Mrs?)\. Title 1: \1 Title 1: Mr
744
+ 1 2003-12-01 00:00:00.001 Wilkes, Mrs. James (Mrs?)\. Title 2: \1 Title 2: Mrs
745
+ 2 2003-12-01 00:00:00.002 Connolly, Miss. Kate (Miss)\. Title 3: \1 Title 3: Miss
746
+
747
+ Case of the extracted string can be changed by adding ``l`` and ``u`` to extract group:
748
+
749
+ >>> # OTdirective: snippet-name: string.regex extract caseless;
750
+ >>> data = otp.Ticks(NAME=['mr. BroWn', 'Ms. smITh'])
751
+ >>> data['RESULT'] = data['NAME'].str.extract(r'(m)([rs]\. )([a-z])([a-z]*)', r'\u1\l2\u3\l4', caseless=True)
752
+ >>> otp.run(data)
753
+ Time NAME RESULT
754
+ 0 2003-12-01 00:00:00.000 mr. BroWn Mr. Brown
755
+ 1 2003-12-01 00:00:00.001 Ms. smITh Ms. Smith
756
+ """
757
+ caseless = _get_onetick_bool_string(caseless)
758
+ return _StrAccessor.Formatter(
759
+ op_params=[self._base_column, pat, rewrite],
760
+ dtype=self._base_column.dtype,
761
+ formatter=(
762
+ lambda column, pat, rewrite:
763
+ f'regex_extract({ott.value2str(column)}, {ott.value2str(pat)}, {ott.value2str(rewrite)}, {caseless})'
764
+ ),
765
+ )
766
+
767
+ def substr(self, start, n_bytes=None, rtrim=False):
768
+ """
769
+ Return ``n_bytes`` characters starting from ``start``.
770
+
771
+ For a positive ``start`` return ``num_bytes`` of the string, starting from the position specified by
772
+ ``start`` (0-based).
773
+ For a negative ``start``, the position is counted from the end of the string.
774
+ If the ``n_bytes`` parameter is omitted, returns the part of the input string
775
+ starting at ``start`` till the end of the string.
776
+
777
+ Parameters
778
+ ----------
779
+ start: int or Column or Operation
780
+ Index of first symbol in substring
781
+ n_bytes: int or Column or Operation
782
+ Number of bytes in substring
783
+ rtrim: bool
784
+ If set to ``True``, original string will be trimmed from the right side
785
+ before getting the substring, this can be useful with negative ``start`` index.
786
+
787
+ Returns
788
+ -------
789
+ Operation
790
+ Substring of string (``n_bytes`` length starting with ``start``).
791
+
792
+ Examples
793
+ --------
794
+ >>> # OTdirective: snippet-name: string.substring;
795
+ >>> data = otp.Ticks(X=['abcdef', '12345 '], START_INDEX=[2, 1], N=[2, 3])
796
+ >>> data['FIRST_3'] = data['X'].str.substr(0, 3)
797
+ >>> data['LAST_3'] = data['X'].str.substr(-3, rtrim=True)
798
+ >>> data['CENTER'] = data['X'].str.substr(data['START_INDEX'], data['N'])
799
+ >>> otp.run(data)
800
+ Time X START_INDEX N FIRST_3 LAST_3 CENTER
801
+ 0 2003-12-01 00:00:00.000 abcdef 2 2 abc def cd
802
+ 1 2003-12-01 00:00:00.001 12345 1 3 123 345 234
803
+ """
804
+ column = self._base_column
805
+ if rtrim:
806
+ column = column.str.rtrim()
807
+
808
+ if n_bytes is None:
809
+ return _StrAccessor.Formatter(
810
+ op_params=[column, start],
811
+ dtype=self._base_column.dtype,
812
+ formatter=(
813
+ lambda column, start:
814
+ f'substr({ott.value2str(column)}, {ott.value2str(start)})'
815
+ ),
816
+ )
817
+ else:
818
+ return _StrAccessor.Formatter(
819
+ op_params=[column, start, n_bytes],
820
+ dtype=self._base_column.dtype,
821
+ formatter=(
822
+ lambda column, start, n_bytes:
823
+ f'substr({ott.value2str(column)}, {ott.value2str(start)}, {ott.value2str(n_bytes)})'
824
+ ),
825
+ )
826
+
827
+ def get(self, i):
828
+ """
829
+ Returns the character at the position indicated by the 0-based index; and empty string,
830
+ if position is greater or equal to the length.
831
+
832
+ Parameters
833
+ ----------
834
+ i: int or Column or Operation
835
+ Index of the character to find.
836
+
837
+ Examples
838
+ --------
839
+ >>> data = otp.Ticks(X=['abcdef', '12345 ', 'qw'], GET_INDEX=[2, 1, 0])
840
+ >>> data['THIRD'] = data['X'].str.get(2)
841
+ >>> data['FROM_INDEX'] = data['X'].str.get(data['GET_INDEX'])
842
+ >>> otp.run(data)
843
+ Time X GET_INDEX THIRD FROM_INDEX
844
+ 0 2003-12-01 00:00:00.000 abcdef 2 c c
845
+ 1 2003-12-01 00:00:00.001 12345 1 3 2
846
+ 2 2003-12-01 00:00:00.002 qw 0 q
847
+
848
+ It is possible to use syntax with indexer to call this method:
849
+
850
+ >>> data = otp.Ticks(X=['abcdef', '12345 ', 'qw'])
851
+ >>> data['THIRD'] = data['X'].str[1]
852
+ >>> otp.run(data)
853
+ Time X THIRD
854
+ 0 2003-12-01 00:00:00.000 abcdef b
855
+ 1 2003-12-01 00:00:00.001 12345 2
856
+ 2 2003-12-01 00:00:00.002 qw w
857
+ """
858
+ return _StrAccessor.Formatter(
859
+ op_params=[self._base_column, i],
860
+ dtype=str,
861
+ formatter=(
862
+ lambda column, i:
863
+ 'CASE(BYTE_AT({0}, {1}),-1,"",CHAR(BYTE_AT({0}, {1})))'.format(ott.value2str(column), ott.value2str(i))
864
+ ),
865
+ )
866
+
867
+ def concat(self, other):
868
+ """
869
+ Returns a string that is the result of concatenating to ``others``.
870
+
871
+ Parameters
872
+ ----------
873
+ other: str or Column or Operation
874
+ String to concatenate with.
875
+
876
+ Examples
877
+ --------
878
+ >>> data = otp.Ticks(X=['X1', 'X2', 'X3'], Y=['Y1', 'Y2', 'Y3'])
879
+ >>> data['X_WITH_CONST_SUFFIX'] = data['X'].str.concat('_suffix')
880
+ >>> data['X_WTH_Y'] = data['X'].str.concat(data['Y'])
881
+ >>> otp.run(data)
882
+ Time X Y X_WITH_CONST_SUFFIX X_WTH_Y
883
+ 0 2003-12-01 00:00:00.000 X1 Y1 X1_suffix X1Y1
884
+ 1 2003-12-01 00:00:00.001 X2 Y2 X2_suffix X2Y2
885
+ 2 2003-12-01 00:00:00.002 X3 Y3 X3_suffix X3Y3
886
+ """
887
+ return _StrAccessor.Formatter(
888
+ op_params=[self._base_column, other],
889
+ dtype=self._base_column.dtype,
890
+ formatter=lambda column, other: f'CONCAT({ott.value2str(column)}, {ott.value2str(other)})',
891
+ )
892
+
893
+ def insert(self, start, length, value):
894
+ """
895
+ Returns a string where ``length`` characters have been deleted from string,
896
+ beginning at ``start``, and where ``value`` has been inserted into string, beginning at ``start``.
897
+
898
+ Parameters
899
+ ----------
900
+ start: int or Column or Operation
901
+ Position to remove from and to insert into.
902
+ length: int or Column or Operation
903
+ Number if characters to remove.
904
+ value: str or Column or Operation
905
+ String to insert.
906
+
907
+ Examples
908
+ --------
909
+ >>> data = otp.Ticks(X=['aaaaaaa', 'bbbbb', 'cccc'], Y=['ddd', 'ee', 'f'])
910
+ >>> data['INSERTED_1'] = data['X'].str.insert(3, 1, 'X')
911
+ >>> data['INSERTED_2'] = data['X'].str.insert(3, 2, 'X')
912
+ >>> data['INSERTED_Y'] = data['X'].str.insert(3, 2, data['Y'])
913
+ >>> otp.run(data)
914
+ Time X Y INSERTED_1 INSERTED_2 INSERTED_Y
915
+ 0 2003-12-01 00:00:00.000 aaaaaaa ddd aaXaaaa aaXaaa aadddaaa
916
+ 1 2003-12-01 00:00:00.001 bbbbb ee bbXbb bbXb bbeeb
917
+ 2 2003-12-01 00:00:00.002 cccc f ccXc ccX ccf
918
+
919
+ It is possible to insert without removal:
920
+
921
+ >>> data = otp.Ticks(X=['aaaaaaa', 'bbbbb', 'cccc'])
922
+ >>> data['INSERTED'] = data['X'].str.insert(3, 0, 'X')
923
+ >>> otp.run(data)
924
+ Time X INSERTED
925
+ 0 2003-12-01 00:00:00.000 aaaaaaa aaXaaaaa
926
+ 1 2003-12-01 00:00:00.001 bbbbb bbXbbb
927
+ 2 2003-12-01 00:00:00.002 cccc ccXcc
928
+
929
+ It is possible to remove without insertion:
930
+
931
+ >>> data = otp.Ticks(X=['aaaaaaa', 'bbbbb', 'cccc'])
932
+ >>> data['REMOVED'] = data['X'].str.insert(3, 2, '')
933
+ >>> otp.run(data)
934
+ Time X REMOVED
935
+ 0 2003-12-01 00:00:00.000 aaaaaaa aaaaa
936
+ 1 2003-12-01 00:00:00.001 bbbbb bbb
937
+ 2 2003-12-01 00:00:00.002 cccc cc
938
+ """
939
+ return _StrAccessor.Formatter(
940
+ op_params=[self._base_column, start, length, value],
941
+ dtype=self._base_column.dtype,
942
+ formatter=(
943
+ lambda column, start, length, value:
944
+ f'INSERT({ott.value2str(column)}, {ott.value2str(start)},'
945
+ f' {ott.value2str(length)}, {ott.value2str(value)})'
946
+ ),
947
+ )
948
+
949
+ def first(self, count=1):
950
+ """
951
+ Returns first ``count`` symbols.
952
+
953
+ Parameters
954
+ ----------
955
+ count: int or Column or Operation
956
+ Number of first symbols to return. Default: 1
957
+
958
+ Examples
959
+ --------
960
+ >>> data = otp.Ticks(X=['abc', 'bac', 'cba'], Y=[3, 1, 10])
961
+ >>> data['FIRST'] = data['X'].str.first()
962
+ >>> data['FIRST_Y'] = data['X'].str.first(data['Y'])
963
+ >>> otp.run(data)
964
+ Time X Y FIRST FIRST_Y
965
+ 0 2003-12-01 00:00:00.000 abc 3 a abc
966
+ 1 2003-12-01 00:00:00.001 bac 1 b b
967
+ 2 2003-12-01 00:00:00.002 cba 10 c cba
968
+ """
969
+ return _StrAccessor.Formatter(
970
+ op_params=[self._base_column, count],
971
+ dtype=str,
972
+ formatter=lambda column, count: f'LEFT({ott.value2str(column)}, {ott.value2str(count)})',
973
+ )
974
+
975
+ def last(self, count=1):
976
+ """
977
+ Returns last ``count`` symbols.
978
+
979
+ Parameters
980
+ ----------
981
+ count: int or Column or Operation
982
+ Number of last symbols to return. Default: 1
983
+
984
+ Examples
985
+ --------
986
+ >>> data = otp.Ticks(X=['abc', 'bac', 'cba'], Y=[3, 1, 9])
987
+ >>> data['LAST'] = data['X'].str.last()
988
+ >>> data['LAST_Y'] = data['X'].str.last(data['Y'])
989
+ >>> otp.run(data)
990
+ Time X Y LAST LAST_Y
991
+ 0 2003-12-01 00:00:00.000 abc 3 c abc
992
+ 1 2003-12-01 00:00:00.001 bac 1 c c
993
+ 2 2003-12-01 00:00:00.002 cba 9 a cba
994
+ """
995
+ # RIGHT function works strange with negative index
996
+ # RIGHT_UTF8 works fine but it is not supported by old builds
997
+ return _StrAccessor.Formatter(
998
+ op_params=[self._base_column, count],
999
+ dtype=self._base_column.dtype,
1000
+ formatter=(
1001
+ lambda column, count:
1002
+ 'SUBSTR({0}, MAX(STRLEN({0})-{1}, 0))'.format(ott.value2str(column), ott.value2str(count))
1003
+ ),
1004
+ )
1005
+
1006
+ def startswith(self, value):
1007
+ """
1008
+ Checks if the Operation starts with a string.
1009
+
1010
+ Parameters
1011
+ ----------
1012
+ value: str or Column or Operation
1013
+ String to check if starts with it.
1014
+
1015
+ Examples
1016
+ --------
1017
+ >>> data = otp.Ticks(X=['baaaa', 'bbbbb', 'cbbc'], Y=['ba', 'abb', 'c'])
1018
+ >>> data['STARTSWITH_CONST'] = data['X'].str.startswith('bb')
1019
+ >>> data['STARTSWITH_Y'] = data['X'].str.startswith(data['Y'])
1020
+ >>> otp.run(data)
1021
+ Time X Y STARTSWITH_CONST STARTSWITH_Y
1022
+ 0 2003-12-01 00:00:00.000 baaaa ba 0.0 1.0
1023
+ 1 2003-12-01 00:00:00.001 bbbbb abb 1.0 0.0
1024
+ 2 2003-12-01 00:00:00.002 cbbc c 0.0 1.0
1025
+ """
1026
+ return _StrAccessor.Formatter(
1027
+ op_params=[self._base_column, value],
1028
+ dtype=bool,
1029
+ formatter=(
1030
+ lambda column, value:
1031
+ 'LEFT({0}, STRLEN({1}))={1}'.format(ott.value2str(column), ott.value2str(value))
1032
+ ),
1033
+ )
1034
+
1035
+ def endswith(self, value):
1036
+ """
1037
+ Checks if the Operation ends with a string.
1038
+
1039
+ Parameters
1040
+ ----------
1041
+ value: str or Column or Operation
1042
+ String to check if starts with it.
1043
+
1044
+ Examples
1045
+ --------
1046
+ >>> data = otp.Ticks(X=['baaaa', 'bbbbb', 'cbbc', 'c'], Y=['ba', 'bbb', 'c', 'cc'])
1047
+ >>> data['ENDSWITH_CONST'] = data['X'].str.endswith('bb')
1048
+ >>> data['ENDSWITH_Y'] = data['X'].str.endswith(data['Y'])
1049
+ >>> otp.run(data)
1050
+ Time X Y ENDSWITH_CONST ENDSWITH_Y
1051
+ 0 2003-12-01 00:00:00.000 baaaa ba 0.0 0.0
1052
+ 1 2003-12-01 00:00:00.001 bbbbb bbb 1.0 1.0
1053
+ 2 2003-12-01 00:00:00.002 cbbc c 0.0 1.0
1054
+ 3 2003-12-01 00:00:00.003 c cc 0.0 0.0
1055
+ """
1056
+ # RIGHT function works strange with negative index
1057
+ # RIGHT_UTF8 works fine but it is not supported by old builds
1058
+ return _StrAccessor.Formatter(
1059
+ op_params=[self._base_column, value],
1060
+ dtype=bool,
1061
+ formatter=(
1062
+ lambda column, value:
1063
+ 'SUBSTR({0}, MAX(STRLEN({0})-STRLEN({1}), 0))={1}'.format(ott.value2str(column), ott.value2str(value))
1064
+ ),
1065
+ )
1066
+
1067
+ def slice(self, start=None, stop=None):
1068
+ """
1069
+ Returns slice.
1070
+
1071
+ Parameters
1072
+ ----------
1073
+ start: int or Column or Operation, optional
1074
+ Start position for slice operation.
1075
+ stop: int or Column or Operation, optional
1076
+ Stop position for slice operation.
1077
+
1078
+ Examples
1079
+ --------
1080
+ >>> data = otp.Ticks(X=['12345', 'abcde', 'qwerty'], START=[3, 0, 1], STOP=[4, 3, 3])
1081
+ >>> data['START_1_SLICE'] = data['X'].str.slice(start=1)
1082
+ >>> data['STOP_2_SLICE'] = data['X'].str.slice(stop=2)
1083
+ >>> data['SLICE_FROM_COLUMNS'] = data['X'].str.slice(start=data['START'], stop=data['STOP'])
1084
+ >>> otp.run(data)
1085
+ Time X START STOP START_1_SLICE STOP_2_SLICE SLICE_FROM_COLUMNS
1086
+ 0 2003-12-01 00:00:00.000 12345 3 4 2345 12 4
1087
+ 1 2003-12-01 00:00:00.001 abcde 0 3 bcde ab abc
1088
+ 2 2003-12-01 00:00:00.002 qwerty 1 3 werty qw we
1089
+
1090
+ Parameters can be negative:
1091
+
1092
+ >>> data = otp.Ticks(X=['12345', 'abcde', 'qwerty'])
1093
+ >>> data['START_SLICE'] = data['X'].str.slice(start=-3)
1094
+ >>> data['STOP_SLICE'] = data['X'].str.slice(stop=-1)
1095
+ >>> data['START_STOP_SLICE'] = data['X'].str.slice(start=-3, stop=-1)
1096
+ >>> otp.run(data)
1097
+ Time X START_SLICE STOP_SLICE START_STOP_SLICE
1098
+ 0 2003-12-01 00:00:00.000 12345 345 1234 34
1099
+ 1 2003-12-01 00:00:00.001 abcde cde abcd cd
1100
+ 2 2003-12-01 00:00:00.002 qwerty rty qwert rt
1101
+
1102
+ It is possible to use syntax with indexer to call this method:
1103
+
1104
+ >>> data = otp.Ticks(X=['12345', 'abcde', 'qwerty'])
1105
+ >>> data['START_SLICE'] = data['X'].str[1:]
1106
+ >>> data['STOP_SLICE'] = data['X'].str[:3]
1107
+ >>> data['START_STOP_SLICE'] = data['X'].str[1:3]
1108
+ >>> otp.run(data)
1109
+ Time X START_SLICE STOP_SLICE START_STOP_SLICE
1110
+ 0 2003-12-01 00:00:00.000 12345 2345 123 23
1111
+ 1 2003-12-01 00:00:00.001 abcde bcde abc bc
1112
+ 2 2003-12-01 00:00:00.002 qwerty werty qwe we
1113
+ """
1114
+ if start is None and stop is None:
1115
+ raise ValueError("At least one of the `start` or `stop` parameters should be set.")
1116
+ if start is None:
1117
+ def formatter(x, start, stop):
1118
+ x = ott.value2str(x)
1119
+ stop_str = ott.value2str(stop)
1120
+ len_x = f'STRLEN({x})'
1121
+ return (f'CASE({stop_str}>=0,1,'
1122
+ f'SUBSTR({x},0,{stop_str}),'
1123
+ f'SUBSTR({x},0,MAX(0,{len_x}+{stop_str})))')
1124
+ elif stop is None:
1125
+ def formatter(x, start, stop):
1126
+ x = ott.value2str(x)
1127
+ len_x = f'STRLEN({x})'
1128
+ # we need this workaround because simple RIGHT and SUBSTR with negative start parameter work strange
1129
+ # SUBSTR_UTF8 works fine but it is not supported by old builds
1130
+ x_corrected = f'LEFT({x},{len_x})'
1131
+ # SUBSTR returns '' when ABC(second parameter) >= STRLEN
1132
+ return f'SUBSTR({x_corrected},MAX({ott.value2str(start)},-{len_x}))'
1133
+ else:
1134
+ def formatter(x, start, stop):
1135
+ x = ott.value2str(x)
1136
+ stop_str = ott.value2str(stop)
1137
+ len_x = f'STRLEN({x})'
1138
+ # we need this workaround because simple RIGHT and SUBSTR with negative start parameter work strange
1139
+ # SUBSTR_UTF8 works fine but it is not supported by old builds
1140
+ x_corrected = f'LEFT({x},{len_x})'
1141
+ # y is x after cutting the left part (we need to cut the right part of it)
1142
+ # SUBSTR returns '' when ABC(second parameter) >= STRLEN
1143
+ y = f'SUBSTR({x_corrected},MAX({ott.value2str(start)},-{len_x}))'
1144
+ len_y = f'STRLEN({y})'
1145
+ len_cut = f'({len_x}-{len_y})' # length of already cut part (the left one)
1146
+ stop_for_y = f'CASE({stop_str}>=0,1,{stop_str}-{len_cut},{stop_str})'
1147
+ return (f'CASE({stop_for_y}>=0,1,'
1148
+ f'SUBSTR({y},0,{stop_for_y}),'
1149
+ f'SUBSTR({y},0,MAX(0,{len_y}+{stop_for_y})))')
1150
+ return _StrAccessor.Formatter(op_params=[self._base_column, start, stop],
1151
+ dtype=self._base_column.dtype,
1152
+ formatter=formatter)
1153
+
1154
+ def __getitem__(self, item):
1155
+ if isinstance(item, slice):
1156
+ if item.step is not None:
1157
+ raise ValueError("`step` parameter is not supported.")
1158
+ return self.slice(start=item.start, stop=item.stop)
1159
+ return self.get(item)
1160
+
1161
+ def like(self, pattern):
1162
+ r"""
1163
+ Check if the value is matched with SQL-like ``pattern``.
1164
+
1165
+ Parameters
1166
+ ----------
1167
+ pattern: str or symbol parameter (:py:class:`~onetick.py.core._source._symbol_param._SymbolParamColumn`)
1168
+ Pattern to match the value with.
1169
+ The pattern can contain usual text characters and two special ones:
1170
+
1171
+ * ``%`` represents zero or more characters
1172
+ * ``_`` represents a single character
1173
+
1174
+ Use backslash ``\`` character to escape these special characters.
1175
+
1176
+ Returns
1177
+ -------
1178
+ Operation
1179
+ ``True`` if the match was successful, ``False`` otherwise.
1180
+ Note that boolean Operation is converted to float if added as a column.
1181
+
1182
+ Examples
1183
+ --------
1184
+
1185
+ Use ``%`` character to specify any number of characters:
1186
+
1187
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1188
+ >>> data['LIKE'] = data['X'].str.like('a%')
1189
+ >>> otp.run(data)
1190
+ Time X LIKE
1191
+ 0 2003-12-01 00:00:00.000 a 1.0
1192
+ 1 2003-12-01 00:00:00.001 ab 1.0
1193
+ 2 2003-12-01 00:00:00.002 b_ 0.0
1194
+ 3 2003-12-01 00:00:00.003 b% 0.0
1195
+
1196
+ Use ``_`` special character to specify a single character:
1197
+
1198
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1199
+ >>> data['LIKE'] = data['X'].str.like('a_')
1200
+ >>> otp.run(data)
1201
+ Time X LIKE
1202
+ 0 2003-12-01 00:00:00.000 a 0.0
1203
+ 1 2003-12-01 00:00:00.001 ab 1.0
1204
+ 2 2003-12-01 00:00:00.002 b_ 0.0
1205
+ 3 2003-12-01 00:00:00.003 b% 0.0
1206
+
1207
+ Use backslash ``\`` character to escape special characters:
1208
+
1209
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1210
+ >>> data['LIKE'] = data['X'].str.like(r'b\_')
1211
+ >>> otp.run(data)
1212
+ Time X LIKE
1213
+ 0 2003-12-01 00:00:00.000 a 0.0
1214
+ 1 2003-12-01 00:00:00.001 ab 0.0
1215
+ 2 2003-12-01 00:00:00.002 b_ 1.0
1216
+ 3 2003-12-01 00:00:00.003 b% 0.0
1217
+
1218
+ This function can be used to filter out ticks:
1219
+
1220
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1221
+ >>> data = data.where(data['X'].str.like('a%'))
1222
+ >>> otp.run(data)
1223
+ Time X
1224
+ 0 2003-12-01 00:00:00.000 a
1225
+ 1 2003-12-01 00:00:00.001 ab
1226
+
1227
+ ``pattern`` can only be a constant expression, like string or symbol parameter:
1228
+
1229
+ >>> data = otp.Ticks(X=['a', 'ab', 'b_', 'b%'])
1230
+ >>> data['LIKE'] = data['X'].str.like(data.Symbol['PATTERN', str])
1231
+ >>> otp.run(data, symbols=otp.Tick(SYMBOL_NAME='COMMON::AAPL', PATTERN='_'))['COMMON::AAPL']
1232
+ Time X LIKE
1233
+ 0 2003-12-01 00:00:00.000 a 1.0
1234
+ 1 2003-12-01 00:00:00.001 ab 0.0
1235
+ 2 2003-12-01 00:00:00.002 b_ 0.0
1236
+ 3 2003-12-01 00:00:00.003 b% 0.0
1237
+ """
1238
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
1239
+ if not isinstance(pattern, (str, _SymbolParamColumn)):
1240
+ raise ValueError('like() function expects parameter to be a constant expression')
1241
+ return _StrAccessor.Formatter(
1242
+ op_params=[self._base_column, pattern],
1243
+ dtype=bool,
1244
+ formatter=lambda column, pattern: f'{ott.value2str(column)} LIKE {ott.value2str(pattern)}'
1245
+ )
1246
+
1247
+ def ilike(self, pattern):
1248
+ r"""
1249
+ Check if the value is case insensitive matched with SQL-like ``pattern``.
1250
+
1251
+ Parameters
1252
+ ----------
1253
+ pattern: str or symbol parameter (:py:class:`~onetick.py.core._source._symbol_param._SymbolParamColumn`)
1254
+ Pattern to match the value with.
1255
+ The pattern can contain usual text characters and two special ones:
1256
+
1257
+ * ``%`` represents zero or more characters
1258
+ * ``_`` represents a single character
1259
+
1260
+ Use backslash ``\`` character to escape these special characters.
1261
+
1262
+ Returns
1263
+ -------
1264
+ Operation
1265
+ ``True`` if the match was successful, ``False`` otherwise.
1266
+ Note that boolean Operation is converted to float if added as a column.
1267
+
1268
+ Examples
1269
+ --------
1270
+
1271
+ Use ``%`` character to specify any number of characters:
1272
+
1273
+ .. testcode::
1274
+ :skipif: not is_ilike_supported()
1275
+
1276
+ data = otp.Ticks(X=['a', 'ab', 'Ab', 'b_'])
1277
+ data['LIKE'] = data['X'].str.ilike('a%')
1278
+ df = otp.run(data)
1279
+ print(df)
1280
+
1281
+ .. testoutput::
1282
+
1283
+ Time X LIKE
1284
+ 0 2003-12-01 00:00:00.000 a 1.0
1285
+ 1 2003-12-01 00:00:00.001 ab 1.0
1286
+ 2 2003-12-01 00:00:00.002 Ab 1.0
1287
+ 3 2003-12-01 00:00:00.003 b_ 0.0
1288
+
1289
+ Use ``_`` special character to specify a single character:
1290
+
1291
+ .. testcode::
1292
+ :skipif: not is_ilike_supported()
1293
+
1294
+ data = otp.Ticks(X=['a', 'ab', 'Ab', 'b_'])
1295
+ data['LIKE'] = data['X'].str.ilike('a_')
1296
+ df = otp.run(data)
1297
+ print(df)
1298
+
1299
+ .. testoutput::
1300
+
1301
+ Time X LIKE
1302
+ 0 2003-12-01 00:00:00.000 a 0.0
1303
+ 1 2003-12-01 00:00:00.001 ab 1.0
1304
+ 2 2003-12-01 00:00:00.002 Ab 1.0
1305
+ 3 2003-12-01 00:00:00.003 b_ 0.0
1306
+
1307
+ Use backslash ``\`` character to escape special characters:
1308
+
1309
+ .. testcode::
1310
+ :skipif: not is_ilike_supported()
1311
+
1312
+ data = otp.Ticks(X=['a', 'ab', 'bb', 'b_'])
1313
+ data['LIKE'] = data['X'].str.ilike(r'b\_')
1314
+ df = otp.run(data)
1315
+ print(df)
1316
+
1317
+ .. testoutput::
1318
+
1319
+ Time X LIKE
1320
+ 0 2003-12-01 00:00:00.000 a 0.0
1321
+ 1 2003-12-01 00:00:00.001 ab 0.0
1322
+ 2 2003-12-01 00:00:00.002 bb 0.0
1323
+ 3 2003-12-01 00:00:00.003 b_ 1.0
1324
+
1325
+ This function can be used to filter out ticks:
1326
+
1327
+ .. testcode::
1328
+ :skipif: not is_ilike_supported()
1329
+
1330
+ data = otp.Ticks(X=['a', 'ab', 'Ab', 'b_'])
1331
+ data = data.where(data['X'].str.ilike('a%'))
1332
+ df = otp.run(data)
1333
+ print(df)
1334
+
1335
+ .. testoutput::
1336
+
1337
+ Time X
1338
+ 0 2003-12-01 00:00:00.000 a
1339
+ 1 2003-12-01 00:00:00.001 ab
1340
+ 2 2003-12-01 00:00:00.002 Ab
1341
+
1342
+ ``pattern`` can only be a constant expression, like string or symbol parameter:
1343
+
1344
+ .. testcode::
1345
+ :skipif: not is_ilike_supported()
1346
+
1347
+ data = otp.Ticks(X=['a', 'ab', 'A', 'b_'])
1348
+ data['LIKE'] = data['X'].str.ilike(data.Symbol['PATTERN', str])
1349
+ df = otp.run(data, symbols=otp.Tick(SYMBOL_NAME='COMMON::AAPL', PATTERN='_'))['COMMON::AAPL']
1350
+ print(df)
1351
+
1352
+ .. testoutput::
1353
+
1354
+ Time X LIKE
1355
+ 0 2003-12-01 00:00:00.000 a 1.0
1356
+ 1 2003-12-01 00:00:00.001 ab 0.0
1357
+ 2 2003-12-01 00:00:00.002 A 1.0
1358
+ 3 2003-12-01 00:00:00.003 b_ 0.0
1359
+ """
1360
+ from onetick.py.core._source._symbol_param import _SymbolParamColumn
1361
+ if not isinstance(pattern, (str, _SymbolParamColumn)):
1362
+ raise ValueError('ilike() function expects parameter to be a constant expression')
1363
+ return _StrAccessor.Formatter(
1364
+ op_params=[self._base_column, pattern],
1365
+ dtype=bool,
1366
+ formatter=lambda column, pattern: f'{ott.value2str(column)} ILIKE {ott.value2str(pattern)}'
1367
+ )