pixeltable 0.3.14__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +292 -105
- pixeltable/catalog/column.py +10 -8
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +25 -20
- pixeltable/catalog/schema_object.py +3 -6
- pixeltable/catalog/table.py +245 -189
- pixeltable/catalog/table_version.py +319 -201
- pixeltable/catalog/table_version_handle.py +15 -2
- pixeltable/catalog/table_version_path.py +60 -21
- pixeltable/catalog/view.py +14 -5
- pixeltable/dataframe.py +11 -9
- pixeltable/env.py +2 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +20 -11
- pixeltable/exprs/column_property_ref.py +15 -6
- pixeltable/exprs/column_ref.py +32 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/query_template_function.py +1 -1
- pixeltable/functions/gemini.py +166 -33
- pixeltable/functions/math.py +63 -0
- pixeltable/functions/string.py +212 -58
- pixeltable/globals.py +7 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +8 -29
- pixeltable/io/label_studio.py +1 -1
- pixeltable/io/parquet.py +4 -4
- pixeltable/io/table_data_conduit.py +0 -31
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +5 -1
- pixeltable/plan.py +4 -4
- pixeltable/share/packager.py +207 -15
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +31 -13
- pixeltable/utils/dbms.py +1 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/METADATA +1 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/RECORD +50 -49
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/entry_points.txt +0 -0
pixeltable/functions/string.py
CHANGED
|
@@ -12,8 +12,13 @@ t.select(t.str_col.capitalize()).collect()
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import builtins
|
|
15
|
+
import re
|
|
16
|
+
import textwrap
|
|
17
|
+
from string import whitespace
|
|
15
18
|
from typing import Any, Optional
|
|
16
19
|
|
|
20
|
+
import sqlalchemy as sql
|
|
21
|
+
|
|
17
22
|
import pixeltable as pxt
|
|
18
23
|
from pixeltable.utils.code import local_public_names
|
|
19
24
|
|
|
@@ -28,6 +33,11 @@ def capitalize(self: str) -> str:
|
|
|
28
33
|
return self.capitalize()
|
|
29
34
|
|
|
30
35
|
|
|
36
|
+
@capitalize.to_sql
|
|
37
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
38
|
+
return sql.func.concat(sql.func.upper(sql.func.left(self, 1)), sql.func.lower(sql.func.right(self, -1)))
|
|
39
|
+
|
|
40
|
+
|
|
31
41
|
@pxt.udf(is_method=True)
|
|
32
42
|
def casefold(self: str) -> str:
|
|
33
43
|
"""
|
|
@@ -53,26 +63,47 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
53
63
|
|
|
54
64
|
|
|
55
65
|
@pxt.udf(is_method=True)
|
|
56
|
-
def contains(self: str,
|
|
66
|
+
def contains(self: str, substr: str, case: bool = True) -> bool:
|
|
57
67
|
"""
|
|
58
|
-
Test if string contains
|
|
68
|
+
Test if string contains a substring.
|
|
59
69
|
|
|
60
70
|
Args:
|
|
61
|
-
|
|
71
|
+
substr: string literal or regular expression
|
|
62
72
|
case: if False, ignore case
|
|
63
|
-
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
64
|
-
regex: if True, treat pattern as a regular expression
|
|
65
73
|
"""
|
|
66
|
-
if
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
+
if case:
|
|
75
|
+
return substr in self
|
|
76
|
+
else:
|
|
77
|
+
return substr.lower() in self.lower()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@contains.to_sql
|
|
81
|
+
def _(
|
|
82
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, case: Optional[sql.ColumnElement] = None
|
|
83
|
+
) -> sql.ColumnElement:
|
|
84
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
85
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
86
|
+
if case is None:
|
|
87
|
+
# Default `case` is True, so we do a case-sensitive comparison
|
|
88
|
+
return self.like(sql.func.concat('%', escaped_substr, '%'))
|
|
74
89
|
else:
|
|
75
|
-
|
|
90
|
+
# Toggle case-sensitivity based on the value of `case`
|
|
91
|
+
return sql.case(
|
|
92
|
+
(case, self.like(sql.func.concat('%', escaped_substr, '%'))),
|
|
93
|
+
else_=sql.func.lower(self).like(sql.func.concat('%', sql.func.lower(escaped_substr), '%')),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@pxt.udf(is_method=True)
|
|
98
|
+
def contains_re(self: str, pattern: str, flags: int = 0) -> bool:
|
|
99
|
+
"""
|
|
100
|
+
Test if string contains a regular expression pattern.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
pattern: regular expression pattern
|
|
104
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
105
|
+
"""
|
|
106
|
+
return bool(re.search(pattern, self, flags))
|
|
76
107
|
|
|
77
108
|
|
|
78
109
|
@pxt.udf(is_method=True)
|
|
@@ -84,22 +115,27 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
|
|
|
84
115
|
pattern: string literal or regular expression
|
|
85
116
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
86
117
|
"""
|
|
87
|
-
import re
|
|
88
|
-
|
|
89
118
|
return builtins.len(re.findall(pattern, self, flags))
|
|
90
119
|
|
|
91
120
|
|
|
92
121
|
@pxt.udf(is_method=True)
|
|
93
|
-
def endswith(self: str,
|
|
122
|
+
def endswith(self: str, substr: str) -> bool:
|
|
94
123
|
"""
|
|
95
124
|
Return `True` if the string ends with the specified suffix, otherwise return `False`.
|
|
96
125
|
|
|
97
126
|
Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
|
|
98
127
|
|
|
99
128
|
Args:
|
|
100
|
-
|
|
129
|
+
substr: string literal
|
|
101
130
|
"""
|
|
102
|
-
return self.endswith(
|
|
131
|
+
return self.endswith(substr)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@endswith.to_sql
|
|
135
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
|
|
136
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
137
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
138
|
+
return self.like(sql.func.concat('%', escaped_substr))
|
|
103
139
|
|
|
104
140
|
|
|
105
141
|
@pxt.udf(is_method=True)
|
|
@@ -113,13 +149,11 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
|
|
|
113
149
|
width: Maximum line width.
|
|
114
150
|
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
115
151
|
"""
|
|
116
|
-
import textwrap
|
|
117
|
-
|
|
118
152
|
return textwrap.fill(self, width, **kwargs)
|
|
119
153
|
|
|
120
154
|
|
|
121
155
|
@pxt.udf(is_method=True)
|
|
122
|
-
def find(self: str, substr: str, start:
|
|
156
|
+
def find(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
123
157
|
"""
|
|
124
158
|
Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
|
|
125
159
|
|
|
@@ -133,6 +167,23 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
133
167
|
return self.find(substr, start, end)
|
|
134
168
|
|
|
135
169
|
|
|
170
|
+
@find.to_sql
|
|
171
|
+
def _(
|
|
172
|
+
self: sql.ColumnElement,
|
|
173
|
+
substr: sql.ColumnElement,
|
|
174
|
+
start: sql.ColumnElement,
|
|
175
|
+
end: Optional[sql.ColumnElement] = None,
|
|
176
|
+
) -> sql.ColumnElement:
|
|
177
|
+
sl = pxt.functions.string.slice._to_sql(self, start, end)
|
|
178
|
+
if sl is None:
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
strpos = sql.func.strpos(sl, substr)
|
|
182
|
+
return sql.case(
|
|
183
|
+
(strpos == 0, -1), (start >= 0, strpos + start - 1), else_=strpos + sql.func.char_length(self) + start - 1
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
136
187
|
@pxt.udf(is_method=True)
|
|
137
188
|
def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
138
189
|
"""
|
|
@@ -144,8 +195,6 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
|
144
195
|
pattern: regular expression pattern
|
|
145
196
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
146
197
|
"""
|
|
147
|
-
import re
|
|
148
|
-
|
|
149
198
|
return re.findall(pattern, self, flags)
|
|
150
199
|
|
|
151
200
|
|
|
@@ -171,8 +220,6 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
|
|
|
171
220
|
case: if False, ignore case
|
|
172
221
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
173
222
|
"""
|
|
174
|
-
import re
|
|
175
|
-
|
|
176
223
|
if not case:
|
|
177
224
|
flags |= re.IGNORECASE
|
|
178
225
|
_ = bool(re.fullmatch(pattern, self, flags))
|
|
@@ -180,7 +227,7 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
|
|
|
180
227
|
|
|
181
228
|
|
|
182
229
|
@pxt.udf(is_method=True)
|
|
183
|
-
def index(self: str, substr: str, start:
|
|
230
|
+
def index(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
184
231
|
"""
|
|
185
232
|
Return the lowest index in string where `substr` is found within the slice `[start:end]`.
|
|
186
233
|
Raises ValueError if `substr` is not found.
|
|
@@ -330,6 +377,11 @@ def len(self: str) -> int:
|
|
|
330
377
|
return builtins.len(self)
|
|
331
378
|
|
|
332
379
|
|
|
380
|
+
@len.to_sql
|
|
381
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
382
|
+
return sql.func.char_length(self)
|
|
383
|
+
|
|
384
|
+
|
|
333
385
|
@pxt.udf(is_method=True)
|
|
334
386
|
def ljust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
335
387
|
"""
|
|
@@ -355,6 +407,11 @@ def lower(self: str) -> str:
|
|
|
355
407
|
return self.lower()
|
|
356
408
|
|
|
357
409
|
|
|
410
|
+
@lower.to_sql
|
|
411
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
412
|
+
return sql.func.lower(self)
|
|
413
|
+
|
|
414
|
+
|
|
358
415
|
@pxt.udf(is_method=True)
|
|
359
416
|
def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
360
417
|
"""
|
|
@@ -369,6 +426,11 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
369
426
|
return self.lstrip(chars)
|
|
370
427
|
|
|
371
428
|
|
|
429
|
+
@lstrip.to_sql
|
|
430
|
+
def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
|
|
431
|
+
return sql.func.ltrim(self, chars if chars is not None else whitespace)
|
|
432
|
+
|
|
433
|
+
|
|
372
434
|
@pxt.udf(is_method=True)
|
|
373
435
|
def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
374
436
|
"""
|
|
@@ -379,8 +441,6 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
|
379
441
|
case: if False, ignore case
|
|
380
442
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
381
443
|
"""
|
|
382
|
-
import re
|
|
383
|
-
|
|
384
444
|
if not case:
|
|
385
445
|
flags |= re.IGNORECASE
|
|
386
446
|
return bool(re.match(pattern, self, flags))
|
|
@@ -440,9 +500,12 @@ def removeprefix(self: str, prefix: str) -> str:
|
|
|
440
500
|
"""
|
|
441
501
|
Remove prefix. If the prefix is not present, returns string.
|
|
442
502
|
"""
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
503
|
+
return self.removeprefix(prefix)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
@removeprefix.to_sql
|
|
507
|
+
def _(self: sql.ColumnElement, prefix: sql.ColumnElement) -> sql.ColumnElement:
|
|
508
|
+
return sql.case((startswith._to_sql(self, prefix), sql.func.right(self, -sql.func.char_length(prefix))), else_=self)
|
|
446
509
|
|
|
447
510
|
|
|
448
511
|
@pxt.udf(is_method=True)
|
|
@@ -450,9 +513,12 @@ def removesuffix(self: str, suffix: str) -> str:
|
|
|
450
513
|
"""
|
|
451
514
|
Remove suffix. If the suffix is not present, returns string.
|
|
452
515
|
"""
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
516
|
+
return self.removesuffix(suffix)
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
@removesuffix.to_sql
|
|
520
|
+
def _(self: sql.ColumnElement, suffix: sql.ColumnElement) -> sql.ColumnElement:
|
|
521
|
+
return sql.case((endswith._to_sql(self, suffix), sql.func.left(self, -sql.func.char_length(suffix))), else_=self)
|
|
456
522
|
|
|
457
523
|
|
|
458
524
|
@pxt.udf(is_method=True)
|
|
@@ -463,32 +529,65 @@ def repeat(self: str, n: int) -> str:
|
|
|
463
529
|
return self * n
|
|
464
530
|
|
|
465
531
|
|
|
532
|
+
@repeat.to_sql
|
|
533
|
+
def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
|
|
534
|
+
return sql.func.repeat(self, n.cast(sql.types.INT))
|
|
535
|
+
|
|
536
|
+
|
|
466
537
|
@pxt.udf(is_method=True)
|
|
467
|
-
def replace(
|
|
468
|
-
self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
|
|
469
|
-
) -> str:
|
|
538
|
+
def replace(self: str, substr: str, repl: str, n: Optional[int] = None) -> str:
|
|
470
539
|
"""
|
|
471
|
-
Replace occurrences of `
|
|
540
|
+
Replace occurrences of `substr` with `repl`.
|
|
472
541
|
|
|
473
|
-
Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace)
|
|
474
|
-
[`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
|
|
542
|
+
Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace).
|
|
475
543
|
|
|
476
544
|
Args:
|
|
477
|
-
|
|
545
|
+
substr: string literal
|
|
478
546
|
repl: replacement string
|
|
479
|
-
n: number of replacements to make (
|
|
480
|
-
|
|
547
|
+
n: number of replacements to make (if `None`, replace all occurrences)
|
|
548
|
+
"""
|
|
549
|
+
return self.replace(substr, repl, n or -1)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@replace.to_sql
|
|
553
|
+
def _(
|
|
554
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: Optional[sql.ColumnElement] = None
|
|
555
|
+
) -> sql.ColumnElement:
|
|
556
|
+
if n is not None:
|
|
557
|
+
return None # SQL does not support bounding the number of replacements
|
|
558
|
+
|
|
559
|
+
return sql.func.replace(self, substr, repl)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
@pxt.udf(is_method=True)
|
|
563
|
+
def replace_re(self: str, pattern: str, repl: str, n: Optional[int] = None, flags: int = 0) -> str:
|
|
564
|
+
"""
|
|
565
|
+
Replace occurrences of a regular expression pattern with `repl`.
|
|
566
|
+
|
|
567
|
+
Equivalent to [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub).
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
pattern: regular expression pattern
|
|
571
|
+
repl: replacement string
|
|
572
|
+
n: number of replacements to make (if `None`, replace all occurrences)
|
|
481
573
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
482
|
-
regex: if True, treat pattern as a regular expression
|
|
483
574
|
"""
|
|
484
|
-
|
|
485
|
-
import re
|
|
575
|
+
return re.sub(pattern, repl, self, count=(n or 0), flags=flags)
|
|
486
576
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
577
|
+
|
|
578
|
+
@pxt.udf(is_method=True)
|
|
579
|
+
def reverse(self: str) -> str:
|
|
580
|
+
"""
|
|
581
|
+
Return a reversed copy of the string.
|
|
582
|
+
|
|
583
|
+
Equivalent to `str[::-1]`.
|
|
584
|
+
"""
|
|
585
|
+
return self[::-1]
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
@reverse.to_sql
|
|
589
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
590
|
+
return sql.func.reverse(self)
|
|
492
591
|
|
|
493
592
|
|
|
494
593
|
@pxt.udf(is_method=True)
|
|
@@ -556,6 +655,11 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
556
655
|
return self.rstrip(chars)
|
|
557
656
|
|
|
558
657
|
|
|
658
|
+
@rstrip.to_sql
|
|
659
|
+
def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
|
|
660
|
+
return sql.func.rtrim(self, chars if chars is not None else whitespace)
|
|
661
|
+
|
|
662
|
+
|
|
559
663
|
@pxt.udf(is_method=True)
|
|
560
664
|
def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
|
|
561
665
|
"""
|
|
@@ -569,6 +673,41 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
|
|
|
569
673
|
return self[start:stop:step]
|
|
570
674
|
|
|
571
675
|
|
|
676
|
+
@slice.to_sql
|
|
677
|
+
def _(
|
|
678
|
+
self: sql.ColumnElement,
|
|
679
|
+
start: Optional[sql.ColumnElement] = None,
|
|
680
|
+
stop: Optional[sql.ColumnElement] = None,
|
|
681
|
+
step: Optional[sql.ColumnElement] = None,
|
|
682
|
+
) -> sql.ColumnElement:
|
|
683
|
+
if step is not None:
|
|
684
|
+
return None
|
|
685
|
+
|
|
686
|
+
if start is not None:
|
|
687
|
+
start = start.cast(sql.types.INT) # Postgres won't accept a BIGINT
|
|
688
|
+
start = sql.case(
|
|
689
|
+
(start >= 0, start + 1), # SQL is 1-based, Python is 0-based
|
|
690
|
+
else_=sql.func.char_length(self) + start + 1, # negative index
|
|
691
|
+
)
|
|
692
|
+
start = sql.func.greatest(start, 1)
|
|
693
|
+
|
|
694
|
+
if stop is not None:
|
|
695
|
+
stop = stop.cast(sql.types.INT) # Postgres won't accept a BIGINT
|
|
696
|
+
stop = sql.case(
|
|
697
|
+
(stop >= 0, stop + 1), # SQL is 1-based, Python is 0-based
|
|
698
|
+
else_=sql.func.char_length(self) + stop + 1, # negative index
|
|
699
|
+
)
|
|
700
|
+
stop = sql.func.greatest(stop, 0)
|
|
701
|
+
|
|
702
|
+
if start is None:
|
|
703
|
+
if stop is None:
|
|
704
|
+
return self
|
|
705
|
+
return sql.func.substr(self, 1, stop)
|
|
706
|
+
if stop is None:
|
|
707
|
+
return sql.func.substr(self, start)
|
|
708
|
+
return sql.func.substr(self, start, sql.func.greatest(stop - start, 0))
|
|
709
|
+
|
|
710
|
+
|
|
572
711
|
@pxt.udf(is_method=True)
|
|
573
712
|
def slice_replace(
|
|
574
713
|
self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
|
|
@@ -585,16 +724,23 @@ def slice_replace(
|
|
|
585
724
|
|
|
586
725
|
|
|
587
726
|
@pxt.udf(is_method=True)
|
|
588
|
-
def startswith(self: str,
|
|
727
|
+
def startswith(self: str, substr: str) -> int:
|
|
589
728
|
"""
|
|
590
|
-
Return `True` if string starts with `
|
|
729
|
+
Return `True` if string starts with `substr`, otherwise return `False`.
|
|
591
730
|
|
|
592
731
|
Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
|
|
593
732
|
|
|
594
733
|
Args:
|
|
595
|
-
|
|
734
|
+
substr: string literal
|
|
596
735
|
"""
|
|
597
|
-
return self.startswith(
|
|
736
|
+
return self.startswith(substr)
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
@startswith.to_sql
|
|
740
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
|
|
741
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
742
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
743
|
+
return self.like(sql.func.concat(escaped_substr, '%'))
|
|
598
744
|
|
|
599
745
|
|
|
600
746
|
@pxt.udf(is_method=True)
|
|
@@ -610,6 +756,11 @@ def strip(self: str, chars: Optional[str] = None) -> str:
|
|
|
610
756
|
return self.strip(chars)
|
|
611
757
|
|
|
612
758
|
|
|
759
|
+
@strip.to_sql
|
|
760
|
+
def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
|
|
761
|
+
return sql.func.trim(self, chars if chars is not None else whitespace)
|
|
762
|
+
|
|
763
|
+
|
|
613
764
|
@pxt.udf(is_method=True)
|
|
614
765
|
def swapcase(self: str) -> str:
|
|
615
766
|
"""
|
|
@@ -641,6 +792,11 @@ def upper(self: str) -> str:
|
|
|
641
792
|
return self.upper()
|
|
642
793
|
|
|
643
794
|
|
|
795
|
+
@upper.to_sql
|
|
796
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
797
|
+
return sql.func.upper(self)
|
|
798
|
+
|
|
799
|
+
|
|
644
800
|
@pxt.udf(is_method=True)
|
|
645
801
|
def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
646
802
|
"""
|
|
@@ -653,8 +809,6 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
|
653
809
|
width: Maximum line width.
|
|
654
810
|
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
655
811
|
"""
|
|
656
|
-
import textwrap
|
|
657
|
-
|
|
658
812
|
return textwrap.wrap(self, width, **kwargs)
|
|
659
813
|
|
|
660
814
|
|
pixeltable/globals.py
CHANGED
|
@@ -272,7 +272,7 @@ def create_view(
|
|
|
272
272
|
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
273
273
|
raise excs.Error(
|
|
274
274
|
f'Column {col_name!r} already exists in the base table '
|
|
275
|
-
f'{tbl_version_path.get_column(col_name).tbl.
|
|
275
|
+
f'{tbl_version_path.get_column(col_name).tbl.name}.'
|
|
276
276
|
)
|
|
277
277
|
|
|
278
278
|
return Catalog.get().create_view(
|
|
@@ -422,7 +422,10 @@ def get_table(path: str) -> catalog.Table:
|
|
|
422
422
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
423
423
|
"""
|
|
424
424
|
path_obj = catalog.Path(path)
|
|
425
|
-
|
|
425
|
+
tbl = Catalog.get().get_table(path_obj)
|
|
426
|
+
tv = tbl._tbl_version.get()
|
|
427
|
+
_logger.debug(f'get_table(): tbl={tv.id}:{tv.effective_version} sa_tbl={id(tv.store_tbl.sa_tbl):x} tv={id(tv):x}')
|
|
428
|
+
return tbl
|
|
426
429
|
|
|
427
430
|
|
|
428
431
|
def move(path: str, new_path: str) -> None:
|
|
@@ -493,8 +496,8 @@ def drop_table(
|
|
|
493
496
|
if isinstance(table, catalog.Table):
|
|
494
497
|
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
495
498
|
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
496
|
-
with
|
|
497
|
-
tbl_path = table._path
|
|
499
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
500
|
+
tbl_path = table._path()
|
|
498
501
|
else:
|
|
499
502
|
assert isinstance(table, str)
|
|
500
503
|
tbl_path = table
|
pixeltable/index/base.py
CHANGED
|
@@ -41,6 +41,11 @@ class IndexBase(abc.ABC):
|
|
|
41
41
|
"""Create the index on the index value column"""
|
|
42
42
|
pass
|
|
43
43
|
|
|
44
|
+
@abc.abstractmethod
|
|
45
|
+
def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
|
|
46
|
+
"""Drop the index on the index value column"""
|
|
47
|
+
pass
|
|
48
|
+
|
|
44
49
|
@classmethod
|
|
45
50
|
@abc.abstractmethod
|
|
46
51
|
def display_name(cls) -> str:
|
pixeltable/index/btree.py
CHANGED
|
@@ -59,6 +59,11 @@ class BtreeIndex(IndexBase):
|
|
|
59
59
|
conn = Env.get().conn
|
|
60
60
|
idx.create(bind=conn)
|
|
61
61
|
|
|
62
|
+
def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
|
|
63
|
+
"""Drop the index on the index value column"""
|
|
64
|
+
# TODO: implement
|
|
65
|
+
raise NotImplementedError()
|
|
66
|
+
|
|
62
67
|
@classmethod
|
|
63
68
|
def display_name(cls) -> str:
|
|
64
69
|
return 'btree'
|
|
@@ -148,6 +148,11 @@ class EmbeddingIndex(IndexBase):
|
|
|
148
148
|
conn = Env.get().conn
|
|
149
149
|
idx.create(bind=conn)
|
|
150
150
|
|
|
151
|
+
def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
|
|
152
|
+
"""Drop the index on the index value column"""
|
|
153
|
+
# TODO: implement
|
|
154
|
+
raise NotImplementedError()
|
|
155
|
+
|
|
151
156
|
def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
|
|
152
157
|
"""Create a ColumnElement that represents '<val_column> <op> <item>'"""
|
|
153
158
|
assert isinstance(item, (str, PIL.Image.Image))
|
pixeltable/io/external_store.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
|
-
import time
|
|
7
6
|
from dataclasses import dataclass
|
|
8
7
|
from typing import Any, Optional
|
|
9
8
|
from uuid import UUID
|
|
@@ -11,7 +10,7 @@ from uuid import UUID
|
|
|
11
10
|
import pixeltable.exceptions as excs
|
|
12
11
|
import pixeltable.type_system as ts
|
|
13
12
|
from pixeltable import Column, Table
|
|
14
|
-
from pixeltable.catalog import TableVersion
|
|
13
|
+
from pixeltable.catalog import TableVersion
|
|
15
14
|
|
|
16
15
|
_logger = logging.getLogger('pixeltable')
|
|
17
16
|
|
|
@@ -32,15 +31,11 @@ class ExternalStore(abc.ABC):
|
|
|
32
31
|
|
|
33
32
|
@abc.abstractmethod
|
|
34
33
|
def link(self, tbl_version: TableVersion) -> None:
|
|
35
|
-
"""
|
|
36
|
-
Called by `TableVersion.link()` to implement store-specific logic.
|
|
37
|
-
"""
|
|
34
|
+
"""Creates store-specific metadata needed to implement sync()."""
|
|
38
35
|
|
|
39
36
|
@abc.abstractmethod
|
|
40
37
|
def unlink(self, tbl_version: TableVersion) -> None:
|
|
41
|
-
"""
|
|
42
|
-
Called by `TableVersion.unlink()` to implement store-specific logic.
|
|
43
|
-
"""
|
|
38
|
+
"""Removes store-specific metadata created in link()."""
|
|
44
39
|
|
|
45
40
|
@abc.abstractmethod
|
|
46
41
|
def get_local_columns(self) -> list[Column]:
|
|
@@ -111,17 +106,10 @@ class Project(ExternalStore, abc.ABC):
|
|
|
111
106
|
|
|
112
107
|
if len(stored_proxies_needed) > 0:
|
|
113
108
|
_logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
|
|
114
|
-
# Create stored proxies for columns that need one
|
|
115
|
-
|
|
116
|
-
tbl_version.version += 1
|
|
117
|
-
preceding_schema_version = tbl_version.schema_version
|
|
118
|
-
tbl_version.schema_version = tbl_version.version
|
|
119
|
-
proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
|
|
109
|
+
# Create stored proxies for columns that need one
|
|
110
|
+
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
120
111
|
# Add the columns; this will also update table metadata.
|
|
121
|
-
tbl_version.
|
|
122
|
-
# We don't need to retain `UpdateStatus` since the stored proxies are intended to be
|
|
123
|
-
# invisible to the user.
|
|
124
|
-
tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
112
|
+
tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
|
|
125
113
|
|
|
126
114
|
def unlink(self, tbl_version: TableVersion) -> None:
|
|
127
115
|
# Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
|
|
@@ -132,15 +120,10 @@ class Project(ExternalStore, abc.ABC):
|
|
|
132
120
|
deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
|
|
133
121
|
if len(deletions_needed) > 0:
|
|
134
122
|
_logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
|
|
135
|
-
# Delete stored proxies that are no longer needed.
|
|
136
|
-
tbl_version.version += 1
|
|
137
|
-
preceding_schema_version = tbl_version.schema_version
|
|
138
|
-
tbl_version.schema_version = tbl_version.version
|
|
139
123
|
tbl_version._drop_columns(deletions_needed)
|
|
140
124
|
self.stored_proxies.clear()
|
|
141
|
-
tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
142
125
|
|
|
143
|
-
def create_stored_proxy(self,
|
|
126
|
+
def create_stored_proxy(self, col: Column) -> Column:
|
|
144
127
|
"""
|
|
145
128
|
Creates a proxy column for the specified column. The proxy column will be created in the specified
|
|
146
129
|
`TableVersion`.
|
|
@@ -158,12 +141,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
158
141
|
# Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
|
|
159
142
|
computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
|
|
160
143
|
stored=True,
|
|
161
|
-
col_id=tbl_version.next_col_id,
|
|
162
|
-
sa_col_type=col.col_type.to_sa_type(),
|
|
163
|
-
schema_version_add=tbl_version.schema_version,
|
|
164
144
|
)
|
|
165
|
-
proxy_col.tbl = TableVersionHandle(tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version)
|
|
166
|
-
tbl_version.next_col_id += 1
|
|
167
145
|
self.stored_proxies[col] = proxy_col
|
|
168
146
|
return proxy_col
|
|
169
147
|
|
|
@@ -213,6 +191,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
213
191
|
external (import or export) columns.
|
|
214
192
|
If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
|
|
215
193
|
in which the Pixeltable column names are resolved to the corresponding `Column` objects.
|
|
194
|
+
TODO: return columns as names or qualified ids
|
|
216
195
|
"""
|
|
217
196
|
from pixeltable import exprs
|
|
218
197
|
|
pixeltable/io/label_studio.py
CHANGED
|
@@ -577,7 +577,7 @@ class LabelStudioProject(Project):
|
|
|
577
577
|
else:
|
|
578
578
|
local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
579
579
|
if local_annotations_column not in t._schema:
|
|
580
|
-
t.add_columns({local_annotations_column: ts.
|
|
580
|
+
t.add_columns({local_annotations_column: ts.Json})
|
|
581
581
|
|
|
582
582
|
resolved_col_mapping = cls.validate_columns(
|
|
583
583
|
t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
|
pixeltable/io/parquet.py
CHANGED
|
@@ -14,7 +14,7 @@ import PIL.Image
|
|
|
14
14
|
|
|
15
15
|
import pixeltable as pxt
|
|
16
16
|
import pixeltable.exceptions as excs
|
|
17
|
-
from pixeltable.
|
|
17
|
+
from pixeltable.catalog import Catalog
|
|
18
18
|
from pixeltable.utils.transactional_directory import transactional_directory
|
|
19
19
|
|
|
20
20
|
if typing.TYPE_CHECKING:
|
|
@@ -87,7 +87,7 @@ def export_parquet(
|
|
|
87
87
|
current_value_batch: dict[str, deque] = {k: deque() for k in df.schema}
|
|
88
88
|
current_byte_estimate = 0
|
|
89
89
|
|
|
90
|
-
with
|
|
90
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
91
91
|
for data_row in df._exec():
|
|
92
92
|
for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
|
|
93
93
|
val = data_row[e.slot_idx]
|
|
@@ -112,11 +112,11 @@ def export_parquet(
|
|
|
112
112
|
length = len(val)
|
|
113
113
|
elif col_type.is_string_type():
|
|
114
114
|
length = len(val)
|
|
115
|
-
elif col_type.is_video_type():
|
|
115
|
+
elif col_type.is_video_type() or col_type.is_audio_type():
|
|
116
116
|
if data_row.file_paths is not None and data_row.file_paths[e.slot_idx] is not None:
|
|
117
117
|
val = data_row.file_paths[e.slot_idx]
|
|
118
118
|
else:
|
|
119
|
-
raise excs.Error(f'unknown video type {type(val)}')
|
|
119
|
+
raise excs.Error(f'unknown audio/video type {type(val)}')
|
|
120
120
|
length = len(val)
|
|
121
121
|
elif col_type.is_json_type():
|
|
122
122
|
val = json.dumps(val)
|