pixeltable 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +9 -1
- pixeltable/catalog/catalog.py +559 -134
- pixeltable/catalog/column.py +36 -32
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +12 -0
- pixeltable/catalog/insertable_table.py +30 -25
- pixeltable/catalog/schema_object.py +9 -6
- pixeltable/catalog/table.py +334 -267
- pixeltable/catalog/table_version.py +358 -241
- pixeltable/catalog/table_version_handle.py +18 -2
- pixeltable/catalog/table_version_path.py +86 -16
- pixeltable/catalog/view.py +47 -23
- pixeltable/dataframe.py +198 -19
- pixeltable/env.py +6 -4
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +188 -22
- pixeltable/exprs/column_property_ref.py +16 -6
- pixeltable/exprs/column_ref.py +33 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +11 -4
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +5 -3
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +19 -45
- pixeltable/functions/deepseek.py +19 -38
- pixeltable/functions/fireworks.py +9 -18
- pixeltable/functions/gemini.py +2 -3
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/llama_cpp.py +6 -6
- pixeltable/functions/mistralai.py +16 -53
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +82 -165
- pixeltable/functions/string.py +212 -58
- pixeltable/functions/together.py +22 -80
- pixeltable/globals.py +10 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +10 -31
- pixeltable/io/label_studio.py +5 -5
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +1 -32
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +13 -1
- pixeltable/plan.py +135 -12
- pixeltable/share/packager.py +138 -14
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +19 -13
- pixeltable/type_system.py +30 -0
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/RECORD +78 -73
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
pixeltable/functions/string.py
CHANGED
|
@@ -12,8 +12,13 @@ t.select(t.str_col.capitalize()).collect()
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import builtins
|
|
15
|
+
import re
|
|
16
|
+
import textwrap
|
|
17
|
+
from string import whitespace
|
|
15
18
|
from typing import Any, Optional
|
|
16
19
|
|
|
20
|
+
import sqlalchemy as sql
|
|
21
|
+
|
|
17
22
|
import pixeltable as pxt
|
|
18
23
|
from pixeltable.utils.code import local_public_names
|
|
19
24
|
|
|
@@ -28,6 +33,11 @@ def capitalize(self: str) -> str:
|
|
|
28
33
|
return self.capitalize()
|
|
29
34
|
|
|
30
35
|
|
|
36
|
+
@capitalize.to_sql
|
|
37
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
38
|
+
return sql.func.concat(sql.func.upper(sql.func.left(self, 1)), sql.func.lower(sql.func.right(self, -1)))
|
|
39
|
+
|
|
40
|
+
|
|
31
41
|
@pxt.udf(is_method=True)
|
|
32
42
|
def casefold(self: str) -> str:
|
|
33
43
|
"""
|
|
@@ -53,26 +63,47 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
53
63
|
|
|
54
64
|
|
|
55
65
|
@pxt.udf(is_method=True)
|
|
56
|
-
def contains(self: str,
|
|
66
|
+
def contains(self: str, substr: str, case: bool = True) -> bool:
|
|
57
67
|
"""
|
|
58
|
-
Test if string contains
|
|
68
|
+
Test if string contains a substring.
|
|
59
69
|
|
|
60
70
|
Args:
|
|
61
|
-
|
|
71
|
+
substr: string literal or regular expression
|
|
62
72
|
case: if False, ignore case
|
|
63
|
-
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
64
|
-
regex: if True, treat pattern as a regular expression
|
|
65
73
|
"""
|
|
66
|
-
if
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
+
if case:
|
|
75
|
+
return substr in self
|
|
76
|
+
else:
|
|
77
|
+
return substr.lower() in self.lower()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@contains.to_sql
|
|
81
|
+
def _(
|
|
82
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, case: Optional[sql.ColumnElement] = None
|
|
83
|
+
) -> sql.ColumnElement:
|
|
84
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
85
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
86
|
+
if case is None:
|
|
87
|
+
# Default `case` is True, so we do a case-sensitive comparison
|
|
88
|
+
return self.like(sql.func.concat('%', escaped_substr, '%'))
|
|
74
89
|
else:
|
|
75
|
-
|
|
90
|
+
# Toggle case-sensitivity based on the value of `case`
|
|
91
|
+
return sql.case(
|
|
92
|
+
(case, self.like(sql.func.concat('%', escaped_substr, '%'))),
|
|
93
|
+
else_=sql.func.lower(self).like(sql.func.concat('%', sql.func.lower(escaped_substr), '%')),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@pxt.udf(is_method=True)
|
|
98
|
+
def contains_re(self: str, pattern: str, flags: int = 0) -> bool:
|
|
99
|
+
"""
|
|
100
|
+
Test if string contains a regular expression pattern.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
pattern: regular expression pattern
|
|
104
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
105
|
+
"""
|
|
106
|
+
return bool(re.search(pattern, self, flags))
|
|
76
107
|
|
|
77
108
|
|
|
78
109
|
@pxt.udf(is_method=True)
|
|
@@ -84,22 +115,27 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
|
|
|
84
115
|
pattern: string literal or regular expression
|
|
85
116
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
86
117
|
"""
|
|
87
|
-
import re
|
|
88
|
-
|
|
89
118
|
return builtins.len(re.findall(pattern, self, flags))
|
|
90
119
|
|
|
91
120
|
|
|
92
121
|
@pxt.udf(is_method=True)
|
|
93
|
-
def endswith(self: str,
|
|
122
|
+
def endswith(self: str, substr: str) -> bool:
|
|
94
123
|
"""
|
|
95
124
|
Return `True` if the string ends with the specified suffix, otherwise return `False`.
|
|
96
125
|
|
|
97
126
|
Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
|
|
98
127
|
|
|
99
128
|
Args:
|
|
100
|
-
|
|
129
|
+
substr: string literal
|
|
101
130
|
"""
|
|
102
|
-
return self.endswith(
|
|
131
|
+
return self.endswith(substr)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@endswith.to_sql
|
|
135
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
|
|
136
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
137
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
138
|
+
return self.like(sql.func.concat('%', escaped_substr))
|
|
103
139
|
|
|
104
140
|
|
|
105
141
|
@pxt.udf(is_method=True)
|
|
@@ -113,13 +149,11 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
|
|
|
113
149
|
width: Maximum line width.
|
|
114
150
|
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
115
151
|
"""
|
|
116
|
-
import textwrap
|
|
117
|
-
|
|
118
152
|
return textwrap.fill(self, width, **kwargs)
|
|
119
153
|
|
|
120
154
|
|
|
121
155
|
@pxt.udf(is_method=True)
|
|
122
|
-
def find(self: str, substr: str, start:
|
|
156
|
+
def find(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
123
157
|
"""
|
|
124
158
|
Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
|
|
125
159
|
|
|
@@ -133,6 +167,23 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
133
167
|
return self.find(substr, start, end)
|
|
134
168
|
|
|
135
169
|
|
|
170
|
+
@find.to_sql
|
|
171
|
+
def _(
|
|
172
|
+
self: sql.ColumnElement,
|
|
173
|
+
substr: sql.ColumnElement,
|
|
174
|
+
start: sql.ColumnElement,
|
|
175
|
+
end: Optional[sql.ColumnElement] = None,
|
|
176
|
+
) -> sql.ColumnElement:
|
|
177
|
+
sl = pxt.functions.string.slice._to_sql(self, start, end)
|
|
178
|
+
if sl is None:
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
strpos = sql.func.strpos(sl, substr)
|
|
182
|
+
return sql.case(
|
|
183
|
+
(strpos == 0, -1), (start >= 0, strpos + start - 1), else_=strpos + sql.func.char_length(self) + start - 1
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
136
187
|
@pxt.udf(is_method=True)
|
|
137
188
|
def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
138
189
|
"""
|
|
@@ -144,8 +195,6 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
|
144
195
|
pattern: regular expression pattern
|
|
145
196
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
146
197
|
"""
|
|
147
|
-
import re
|
|
148
|
-
|
|
149
198
|
return re.findall(pattern, self, flags)
|
|
150
199
|
|
|
151
200
|
|
|
@@ -171,8 +220,6 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
|
|
|
171
220
|
case: if False, ignore case
|
|
172
221
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
173
222
|
"""
|
|
174
|
-
import re
|
|
175
|
-
|
|
176
223
|
if not case:
|
|
177
224
|
flags |= re.IGNORECASE
|
|
178
225
|
_ = bool(re.fullmatch(pattern, self, flags))
|
|
@@ -180,7 +227,7 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
|
|
|
180
227
|
|
|
181
228
|
|
|
182
229
|
@pxt.udf(is_method=True)
|
|
183
|
-
def index(self: str, substr: str, start:
|
|
230
|
+
def index(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
|
|
184
231
|
"""
|
|
185
232
|
Return the lowest index in string where `substr` is found within the slice `[start:end]`.
|
|
186
233
|
Raises ValueError if `substr` is not found.
|
|
@@ -330,6 +377,11 @@ def len(self: str) -> int:
|
|
|
330
377
|
return builtins.len(self)
|
|
331
378
|
|
|
332
379
|
|
|
380
|
+
@len.to_sql
|
|
381
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
382
|
+
return sql.func.char_length(self)
|
|
383
|
+
|
|
384
|
+
|
|
333
385
|
@pxt.udf(is_method=True)
|
|
334
386
|
def ljust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
335
387
|
"""
|
|
@@ -355,6 +407,11 @@ def lower(self: str) -> str:
|
|
|
355
407
|
return self.lower()
|
|
356
408
|
|
|
357
409
|
|
|
410
|
+
@lower.to_sql
|
|
411
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
412
|
+
return sql.func.lower(self)
|
|
413
|
+
|
|
414
|
+
|
|
358
415
|
@pxt.udf(is_method=True)
|
|
359
416
|
def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
360
417
|
"""
|
|
@@ -369,6 +426,11 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
369
426
|
return self.lstrip(chars)
|
|
370
427
|
|
|
371
428
|
|
|
429
|
+
@lstrip.to_sql
|
|
430
|
+
def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
|
|
431
|
+
return sql.func.ltrim(self, chars if chars is not None else whitespace)
|
|
432
|
+
|
|
433
|
+
|
|
372
434
|
@pxt.udf(is_method=True)
|
|
373
435
|
def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
374
436
|
"""
|
|
@@ -379,8 +441,6 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
|
379
441
|
case: if False, ignore case
|
|
380
442
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
381
443
|
"""
|
|
382
|
-
import re
|
|
383
|
-
|
|
384
444
|
if not case:
|
|
385
445
|
flags |= re.IGNORECASE
|
|
386
446
|
return bool(re.match(pattern, self, flags))
|
|
@@ -440,9 +500,12 @@ def removeprefix(self: str, prefix: str) -> str:
|
|
|
440
500
|
"""
|
|
441
501
|
Remove prefix. If the prefix is not present, returns string.
|
|
442
502
|
"""
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
503
|
+
return self.removeprefix(prefix)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
@removeprefix.to_sql
|
|
507
|
+
def _(self: sql.ColumnElement, prefix: sql.ColumnElement) -> sql.ColumnElement:
|
|
508
|
+
return sql.case((startswith._to_sql(self, prefix), sql.func.right(self, -sql.func.char_length(prefix))), else_=self)
|
|
446
509
|
|
|
447
510
|
|
|
448
511
|
@pxt.udf(is_method=True)
|
|
@@ -450,9 +513,12 @@ def removesuffix(self: str, suffix: str) -> str:
|
|
|
450
513
|
"""
|
|
451
514
|
Remove suffix. If the suffix is not present, returns string.
|
|
452
515
|
"""
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
516
|
+
return self.removesuffix(suffix)
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
@removesuffix.to_sql
|
|
520
|
+
def _(self: sql.ColumnElement, suffix: sql.ColumnElement) -> sql.ColumnElement:
|
|
521
|
+
return sql.case((endswith._to_sql(self, suffix), sql.func.left(self, -sql.func.char_length(suffix))), else_=self)
|
|
456
522
|
|
|
457
523
|
|
|
458
524
|
@pxt.udf(is_method=True)
|
|
@@ -463,32 +529,65 @@ def repeat(self: str, n: int) -> str:
|
|
|
463
529
|
return self * n
|
|
464
530
|
|
|
465
531
|
|
|
532
|
+
@repeat.to_sql
|
|
533
|
+
def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
|
|
534
|
+
return sql.func.repeat(self, n.cast(sql.types.INT))
|
|
535
|
+
|
|
536
|
+
|
|
466
537
|
@pxt.udf(is_method=True)
|
|
467
|
-
def replace(
|
|
468
|
-
self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
|
|
469
|
-
) -> str:
|
|
538
|
+
def replace(self: str, substr: str, repl: str, n: Optional[int] = None) -> str:
|
|
470
539
|
"""
|
|
471
|
-
Replace occurrences of `
|
|
540
|
+
Replace occurrences of `substr` with `repl`.
|
|
472
541
|
|
|
473
|
-
Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace)
|
|
474
|
-
[`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
|
|
542
|
+
Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace).
|
|
475
543
|
|
|
476
544
|
Args:
|
|
477
|
-
|
|
545
|
+
substr: string literal
|
|
478
546
|
repl: replacement string
|
|
479
|
-
n: number of replacements to make (
|
|
480
|
-
|
|
547
|
+
n: number of replacements to make (if `None`, replace all occurrences)
|
|
548
|
+
"""
|
|
549
|
+
return self.replace(substr, repl, n or -1)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@replace.to_sql
|
|
553
|
+
def _(
|
|
554
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: Optional[sql.ColumnElement] = None
|
|
555
|
+
) -> sql.ColumnElement:
|
|
556
|
+
if n is not None:
|
|
557
|
+
return None # SQL does not support bounding the number of replacements
|
|
558
|
+
|
|
559
|
+
return sql.func.replace(self, substr, repl)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
@pxt.udf(is_method=True)
|
|
563
|
+
def replace_re(self: str, pattern: str, repl: str, n: Optional[int] = None, flags: int = 0) -> str:
|
|
564
|
+
"""
|
|
565
|
+
Replace occurrences of a regular expression pattern with `repl`.
|
|
566
|
+
|
|
567
|
+
Equivalent to [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub).
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
pattern: regular expression pattern
|
|
571
|
+
repl: replacement string
|
|
572
|
+
n: number of replacements to make (if `None`, replace all occurrences)
|
|
481
573
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
482
|
-
regex: if True, treat pattern as a regular expression
|
|
483
574
|
"""
|
|
484
|
-
|
|
485
|
-
import re
|
|
575
|
+
return re.sub(pattern, repl, self, count=(n or 0), flags=flags)
|
|
486
576
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
577
|
+
|
|
578
|
+
@pxt.udf(is_method=True)
|
|
579
|
+
def reverse(self: str) -> str:
|
|
580
|
+
"""
|
|
581
|
+
Return a reversed copy of the string.
|
|
582
|
+
|
|
583
|
+
Equivalent to `str[::-1]`.
|
|
584
|
+
"""
|
|
585
|
+
return self[::-1]
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
@reverse.to_sql
|
|
589
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
590
|
+
return sql.func.reverse(self)
|
|
492
591
|
|
|
493
592
|
|
|
494
593
|
@pxt.udf(is_method=True)
|
|
@@ -556,6 +655,11 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
556
655
|
return self.rstrip(chars)
|
|
557
656
|
|
|
558
657
|
|
|
658
|
+
@rstrip.to_sql
|
|
659
|
+
def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
|
|
660
|
+
return sql.func.rtrim(self, chars if chars is not None else whitespace)
|
|
661
|
+
|
|
662
|
+
|
|
559
663
|
@pxt.udf(is_method=True)
|
|
560
664
|
def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
|
|
561
665
|
"""
|
|
@@ -569,6 +673,41 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
|
|
|
569
673
|
return self[start:stop:step]
|
|
570
674
|
|
|
571
675
|
|
|
676
|
+
@slice.to_sql
|
|
677
|
+
def _(
|
|
678
|
+
self: sql.ColumnElement,
|
|
679
|
+
start: Optional[sql.ColumnElement] = None,
|
|
680
|
+
stop: Optional[sql.ColumnElement] = None,
|
|
681
|
+
step: Optional[sql.ColumnElement] = None,
|
|
682
|
+
) -> sql.ColumnElement:
|
|
683
|
+
if step is not None:
|
|
684
|
+
return None
|
|
685
|
+
|
|
686
|
+
if start is not None:
|
|
687
|
+
start = start.cast(sql.types.INT) # Postgres won't accept a BIGINT
|
|
688
|
+
start = sql.case(
|
|
689
|
+
(start >= 0, start + 1), # SQL is 1-based, Python is 0-based
|
|
690
|
+
else_=sql.func.char_length(self) + start + 1, # negative index
|
|
691
|
+
)
|
|
692
|
+
start = sql.func.greatest(start, 1)
|
|
693
|
+
|
|
694
|
+
if stop is not None:
|
|
695
|
+
stop = stop.cast(sql.types.INT) # Postgres won't accept a BIGINT
|
|
696
|
+
stop = sql.case(
|
|
697
|
+
(stop >= 0, stop + 1), # SQL is 1-based, Python is 0-based
|
|
698
|
+
else_=sql.func.char_length(self) + stop + 1, # negative index
|
|
699
|
+
)
|
|
700
|
+
stop = sql.func.greatest(stop, 0)
|
|
701
|
+
|
|
702
|
+
if start is None:
|
|
703
|
+
if stop is None:
|
|
704
|
+
return self
|
|
705
|
+
return sql.func.substr(self, 1, stop)
|
|
706
|
+
if stop is None:
|
|
707
|
+
return sql.func.substr(self, start)
|
|
708
|
+
return sql.func.substr(self, start, sql.func.greatest(stop - start, 0))
|
|
709
|
+
|
|
710
|
+
|
|
572
711
|
@pxt.udf(is_method=True)
|
|
573
712
|
def slice_replace(
|
|
574
713
|
self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
|
|
@@ -585,16 +724,23 @@ def slice_replace(
|
|
|
585
724
|
|
|
586
725
|
|
|
587
726
|
@pxt.udf(is_method=True)
|
|
588
|
-
def startswith(self: str,
|
|
727
|
+
def startswith(self: str, substr: str) -> int:
|
|
589
728
|
"""
|
|
590
|
-
Return `True` if string starts with `
|
|
729
|
+
Return `True` if string starts with `substr`, otherwise return `False`.
|
|
591
730
|
|
|
592
731
|
Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
|
|
593
732
|
|
|
594
733
|
Args:
|
|
595
|
-
|
|
734
|
+
substr: string literal
|
|
596
735
|
"""
|
|
597
|
-
return self.startswith(
|
|
736
|
+
return self.startswith(substr)
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
@startswith.to_sql
|
|
740
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
|
|
741
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
742
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
743
|
+
return self.like(sql.func.concat(escaped_substr, '%'))
|
|
598
744
|
|
|
599
745
|
|
|
600
746
|
@pxt.udf(is_method=True)
|
|
@@ -610,6 +756,11 @@ def strip(self: str, chars: Optional[str] = None) -> str:
|
|
|
610
756
|
return self.strip(chars)
|
|
611
757
|
|
|
612
758
|
|
|
759
|
+
@strip.to_sql
|
|
760
|
+
def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
|
|
761
|
+
return sql.func.trim(self, chars if chars is not None else whitespace)
|
|
762
|
+
|
|
763
|
+
|
|
613
764
|
@pxt.udf(is_method=True)
|
|
614
765
|
def swapcase(self: str) -> str:
|
|
615
766
|
"""
|
|
@@ -641,6 +792,11 @@ def upper(self: str) -> str:
|
|
|
641
792
|
return self.upper()
|
|
642
793
|
|
|
643
794
|
|
|
795
|
+
@upper.to_sql
|
|
796
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
797
|
+
return sql.func.upper(self)
|
|
798
|
+
|
|
799
|
+
|
|
644
800
|
@pxt.udf(is_method=True)
|
|
645
801
|
def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
646
802
|
"""
|
|
@@ -653,8 +809,6 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
|
653
809
|
width: Maximum line width.
|
|
654
810
|
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
655
811
|
"""
|
|
656
|
-
import textwrap
|
|
657
|
-
|
|
658
812
|
return textwrap.wrap(self, width, **kwargs)
|
|
659
813
|
|
|
660
814
|
|
pixeltable/functions/together.py
CHANGED
|
@@ -7,7 +7,7 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
|
|
|
7
7
|
|
|
8
8
|
import base64
|
|
9
9
|
import io
|
|
10
|
-
from typing import TYPE_CHECKING, Callable, Optional, TypeVar
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import PIL.Image
|
|
@@ -50,21 +50,7 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
53
|
-
async def completions(
|
|
54
|
-
prompt: str,
|
|
55
|
-
*,
|
|
56
|
-
model: str,
|
|
57
|
-
max_tokens: Optional[int] = None,
|
|
58
|
-
stop: Optional[list] = None,
|
|
59
|
-
temperature: Optional[float] = None,
|
|
60
|
-
top_p: Optional[float] = None,
|
|
61
|
-
top_k: Optional[int] = None,
|
|
62
|
-
repetition_penalty: Optional[float] = None,
|
|
63
|
-
logprobs: Optional[int] = None,
|
|
64
|
-
echo: Optional[bool] = None,
|
|
65
|
-
n: Optional[int] = None,
|
|
66
|
-
safety_model: Optional[str] = None,
|
|
67
|
-
) -> dict:
|
|
53
|
+
async def completions(prompt: str, *, model: str, model_kwargs: Optional[dict[str, Any]] = None) -> dict:
|
|
68
54
|
"""
|
|
69
55
|
Generate completions based on a given prompt using a specified model.
|
|
70
56
|
|
|
@@ -82,8 +68,8 @@ async def completions(
|
|
|
82
68
|
Args:
|
|
83
69
|
prompt: A string providing context for the model to complete.
|
|
84
70
|
model: The name of the model to query.
|
|
85
|
-
|
|
86
|
-
|
|
71
|
+
model_kwargs: Additional keyword arguments for the Together `completions` API.
|
|
72
|
+
For details on the available parameters, see: <https://docs.together.ai/reference/completions-1>
|
|
87
73
|
|
|
88
74
|
Returns:
|
|
89
75
|
A dictionary containing the response and other metadata.
|
|
@@ -94,41 +80,16 @@ async def completions(
|
|
|
94
80
|
|
|
95
81
|
>>> tbl.add_computed_column(response=completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1'))
|
|
96
82
|
"""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
stop=stop,
|
|
102
|
-
temperature=temperature,
|
|
103
|
-
top_p=top_p,
|
|
104
|
-
top_k=top_k,
|
|
105
|
-
repetition_penalty=repetition_penalty,
|
|
106
|
-
logprobs=logprobs,
|
|
107
|
-
echo=echo,
|
|
108
|
-
n=n,
|
|
109
|
-
safety_model=safety_model,
|
|
110
|
-
)
|
|
83
|
+
if model_kwargs is None:
|
|
84
|
+
model_kwargs = {}
|
|
85
|
+
|
|
86
|
+
result = await _together_client().completions.create(prompt=prompt, model=model, **model_kwargs)
|
|
111
87
|
return result.dict()
|
|
112
88
|
|
|
113
89
|
|
|
114
90
|
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
115
91
|
async def chat_completions(
|
|
116
|
-
messages: list[dict[str, str]],
|
|
117
|
-
*,
|
|
118
|
-
model: str,
|
|
119
|
-
max_tokens: Optional[int] = None,
|
|
120
|
-
stop: Optional[list[str]] = None,
|
|
121
|
-
temperature: Optional[float] = None,
|
|
122
|
-
top_p: Optional[float] = None,
|
|
123
|
-
top_k: Optional[int] = None,
|
|
124
|
-
repetition_penalty: Optional[float] = None,
|
|
125
|
-
logprobs: Optional[int] = None,
|
|
126
|
-
echo: Optional[bool] = None,
|
|
127
|
-
n: Optional[int] = None,
|
|
128
|
-
safety_model: Optional[str] = None,
|
|
129
|
-
response_format: Optional[dict] = None,
|
|
130
|
-
tools: Optional[dict] = None,
|
|
131
|
-
tool_choice: Optional[dict] = None,
|
|
92
|
+
messages: list[dict[str, str]], *, model: str, model_kwargs: Optional[dict[str, Any]] = None
|
|
132
93
|
) -> dict:
|
|
133
94
|
"""
|
|
134
95
|
Generate chat completions based on a given prompt using a specified model.
|
|
@@ -147,8 +108,8 @@ async def chat_completions(
|
|
|
147
108
|
Args:
|
|
148
109
|
messages: A list of messages comprising the conversation so far.
|
|
149
110
|
model: The name of the model to query.
|
|
150
|
-
|
|
151
|
-
|
|
111
|
+
model_kwargs: Additional keyword arguments for the Together `chat/completions` API.
|
|
112
|
+
For details on the available parameters, see: <https://docs.together.ai/reference/chat-completions-1>
|
|
152
113
|
|
|
153
114
|
Returns:
|
|
154
115
|
A dictionary containing the response and other metadata.
|
|
@@ -160,23 +121,10 @@ async def chat_completions(
|
|
|
160
121
|
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
161
122
|
... tbl.add_computed_column(response=chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1'))
|
|
162
123
|
"""
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
stop=stop,
|
|
168
|
-
temperature=temperature,
|
|
169
|
-
top_p=top_p,
|
|
170
|
-
top_k=top_k,
|
|
171
|
-
repetition_penalty=repetition_penalty,
|
|
172
|
-
logprobs=logprobs,
|
|
173
|
-
echo=echo,
|
|
174
|
-
n=n,
|
|
175
|
-
safety_model=safety_model,
|
|
176
|
-
response_format=response_format,
|
|
177
|
-
tools=tools,
|
|
178
|
-
tool_choice=tool_choice,
|
|
179
|
-
)
|
|
124
|
+
if model_kwargs is None:
|
|
125
|
+
model_kwargs = {}
|
|
126
|
+
|
|
127
|
+
result = await _together_client().chat.completions.create(messages=messages, model=model, **model_kwargs)
|
|
180
128
|
return result.dict()
|
|
181
129
|
|
|
182
130
|
|
|
@@ -236,14 +184,7 @@ def _(model: str) -> ts.ArrayType:
|
|
|
236
184
|
|
|
237
185
|
@pxt.udf(resource_pool='request-rate:together:images')
|
|
238
186
|
async def image_generations(
|
|
239
|
-
prompt: str,
|
|
240
|
-
*,
|
|
241
|
-
model: str,
|
|
242
|
-
steps: Optional[int] = None,
|
|
243
|
-
seed: Optional[int] = None,
|
|
244
|
-
height: Optional[int] = None,
|
|
245
|
-
width: Optional[int] = None,
|
|
246
|
-
negative_prompt: Optional[str] = None,
|
|
187
|
+
prompt: str, *, model: str, model_kwargs: Optional[dict[str, Any]] = None
|
|
247
188
|
) -> PIL.Image.Image:
|
|
248
189
|
"""
|
|
249
190
|
Generate images based on a given prompt using a specified model.
|
|
@@ -262,8 +203,8 @@ async def image_generations(
|
|
|
262
203
|
Args:
|
|
263
204
|
prompt: A description of the desired images.
|
|
264
205
|
model: The model to use for image generation.
|
|
265
|
-
|
|
266
|
-
|
|
206
|
+
model_kwargs: Additional keyword args for the Together `images/generations` API.
|
|
207
|
+
For details on the available parameters, see: <https://docs.together.ai/reference/post_images-generations>
|
|
267
208
|
|
|
268
209
|
Returns:
|
|
269
210
|
The generated image.
|
|
@@ -276,9 +217,10 @@ async def image_generations(
|
|
|
276
217
|
... response=image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
|
|
277
218
|
... )
|
|
278
219
|
"""
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
220
|
+
if model_kwargs is None:
|
|
221
|
+
model_kwargs = {}
|
|
222
|
+
|
|
223
|
+
result = await _together_client().images.generate(prompt=prompt, model=model, **model_kwargs)
|
|
282
224
|
if result.data[0].b64_json is not None:
|
|
283
225
|
b64_bytes = base64.b64decode(result.data[0].b64_json)
|
|
284
226
|
img = PIL.Image.open(io.BytesIO(b64_bytes))
|
pixeltable/globals.py
CHANGED
|
@@ -249,13 +249,17 @@ def create_view(
|
|
|
249
249
|
where: Optional[exprs.Expr] = None
|
|
250
250
|
if isinstance(base, catalog.Table):
|
|
251
251
|
tbl_version_path = base._tbl_version_path
|
|
252
|
+
sample_clause = None
|
|
252
253
|
elif isinstance(base, DataFrame):
|
|
253
254
|
base._validate_mutable('create_view', allow_select=True)
|
|
254
255
|
if len(base._from_clause.tbls) > 1:
|
|
255
256
|
raise excs.Error('Cannot create a view of a join')
|
|
256
257
|
tbl_version_path = base._from_clause.tbls[0]
|
|
257
258
|
where = base.where_clause
|
|
259
|
+
sample_clause = base.sample_clause
|
|
258
260
|
select_list = base.select_list
|
|
261
|
+
if sample_clause is not None and not is_snapshot and not sample_clause.is_repeatable:
|
|
262
|
+
raise excs.Error('Non-snapshot views cannot be created with non-fractional or stratified sampling')
|
|
259
263
|
else:
|
|
260
264
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
261
265
|
assert isinstance(base, (catalog.Table, DataFrame))
|
|
@@ -272,7 +276,7 @@ def create_view(
|
|
|
272
276
|
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
273
277
|
raise excs.Error(
|
|
274
278
|
f'Column {col_name!r} already exists in the base table '
|
|
275
|
-
f'{tbl_version_path.get_column(col_name).tbl.
|
|
279
|
+
f'{tbl_version_path.get_column(col_name).tbl.name}.'
|
|
276
280
|
)
|
|
277
281
|
|
|
278
282
|
return Catalog.get().create_view(
|
|
@@ -280,6 +284,7 @@ def create_view(
|
|
|
280
284
|
tbl_version_path,
|
|
281
285
|
select_list=select_list,
|
|
282
286
|
where=where,
|
|
287
|
+
sample_clause=sample_clause,
|
|
283
288
|
additional_columns=additional_columns,
|
|
284
289
|
is_snapshot=is_snapshot,
|
|
285
290
|
iterator=iterator,
|
|
@@ -422,7 +427,8 @@ def get_table(path: str) -> catalog.Table:
|
|
|
422
427
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
423
428
|
"""
|
|
424
429
|
path_obj = catalog.Path(path)
|
|
425
|
-
|
|
430
|
+
tbl = Catalog.get().get_table(path_obj)
|
|
431
|
+
return tbl
|
|
426
432
|
|
|
427
433
|
|
|
428
434
|
def move(path: str, new_path: str) -> None:
|
|
@@ -493,8 +499,8 @@ def drop_table(
|
|
|
493
499
|
if isinstance(table, catalog.Table):
|
|
494
500
|
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
495
501
|
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
496
|
-
with
|
|
497
|
-
tbl_path = table._path
|
|
502
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
503
|
+
tbl_path = table._path()
|
|
498
504
|
else:
|
|
499
505
|
assert isinstance(table, str)
|
|
500
506
|
tbl_path = table
|
pixeltable/index/base.py
CHANGED
|
@@ -41,6 +41,11 @@ class IndexBase(abc.ABC):
|
|
|
41
41
|
"""Create the index on the index value column"""
|
|
42
42
|
pass
|
|
43
43
|
|
|
44
|
+
@abc.abstractmethod
|
|
45
|
+
def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
|
|
46
|
+
"""Drop the index on the index value column"""
|
|
47
|
+
pass
|
|
48
|
+
|
|
44
49
|
@classmethod
|
|
45
50
|
@abc.abstractmethod
|
|
46
51
|
def display_name(cls) -> str:
|