pixeltable 0.3.15__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +296 -105
  3. pixeltable/catalog/column.py +10 -8
  4. pixeltable/catalog/dir.py +1 -2
  5. pixeltable/catalog/insertable_table.py +25 -20
  6. pixeltable/catalog/schema_object.py +3 -6
  7. pixeltable/catalog/table.py +261 -189
  8. pixeltable/catalog/table_version.py +333 -202
  9. pixeltable/catalog/table_version_handle.py +15 -2
  10. pixeltable/catalog/table_version_path.py +60 -14
  11. pixeltable/catalog/view.py +38 -6
  12. pixeltable/dataframe.py +196 -18
  13. pixeltable/env.py +4 -4
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/expr_eval/evaluators.py +4 -1
  16. pixeltable/exec/in_memory_data_node.py +1 -1
  17. pixeltable/exec/sql_node.py +171 -22
  18. pixeltable/exprs/column_property_ref.py +15 -6
  19. pixeltable/exprs/column_ref.py +32 -11
  20. pixeltable/exprs/comparison.py +1 -1
  21. pixeltable/exprs/data_row.py +5 -3
  22. pixeltable/exprs/expr.py +7 -0
  23. pixeltable/exprs/literal.py +2 -0
  24. pixeltable/exprs/row_builder.py +4 -6
  25. pixeltable/exprs/rowid_ref.py +8 -0
  26. pixeltable/exprs/similarity_expr.py +1 -0
  27. pixeltable/func/query_template_function.py +1 -1
  28. pixeltable/func/tools.py +1 -1
  29. pixeltable/functions/gemini.py +0 -1
  30. pixeltable/functions/string.py +212 -58
  31. pixeltable/globals.py +12 -4
  32. pixeltable/index/base.py +5 -0
  33. pixeltable/index/btree.py +5 -0
  34. pixeltable/index/embedding_index.py +5 -0
  35. pixeltable/io/external_store.py +8 -29
  36. pixeltable/io/label_studio.py +1 -1
  37. pixeltable/io/parquet.py +2 -2
  38. pixeltable/io/table_data_conduit.py +0 -31
  39. pixeltable/metadata/__init__.py +11 -2
  40. pixeltable/metadata/converters/convert_13.py +2 -2
  41. pixeltable/metadata/converters/convert_30.py +6 -11
  42. pixeltable/metadata/converters/convert_35.py +9 -0
  43. pixeltable/metadata/converters/convert_36.py +38 -0
  44. pixeltable/metadata/converters/util.py +3 -9
  45. pixeltable/metadata/notes.py +2 -0
  46. pixeltable/metadata/schema.py +8 -1
  47. pixeltable/plan.py +221 -14
  48. pixeltable/share/packager.py +137 -13
  49. pixeltable/share/publish.py +2 -2
  50. pixeltable/store.py +19 -13
  51. pixeltable/utils/dbms.py +1 -1
  52. pixeltable/utils/formatter.py +64 -42
  53. pixeltable/utils/sample.py +25 -0
  54. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/METADATA +2 -1
  55. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/RECORD +58 -55
  56. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/LICENSE +0 -0
  57. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/WHEEL +0 -0
  58. {pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/entry_points.txt +0 -0
@@ -12,8 +12,13 @@ t.select(t.str_col.capitalize()).collect()
12
12
  """
13
13
 
14
14
  import builtins
15
+ import re
16
+ import textwrap
17
+ from string import whitespace
15
18
  from typing import Any, Optional
16
19
 
20
+ import sqlalchemy as sql
21
+
17
22
  import pixeltable as pxt
18
23
  from pixeltable.utils.code import local_public_names
19
24
 
@@ -28,6 +33,11 @@ def capitalize(self: str) -> str:
28
33
  return self.capitalize()
29
34
 
30
35
 
36
+ @capitalize.to_sql
37
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
38
+ return sql.func.concat(sql.func.upper(sql.func.left(self, 1)), sql.func.lower(sql.func.right(self, -1)))
39
+
40
+
31
41
  @pxt.udf(is_method=True)
32
42
  def casefold(self: str) -> str:
33
43
  """
@@ -53,26 +63,47 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
53
63
 
54
64
 
55
65
  @pxt.udf(is_method=True)
56
- def contains(self: str, pattern: str, case: bool = True, flags: int = 0, regex: bool = True) -> bool:
66
+ def contains(self: str, substr: str, case: bool = True) -> bool:
57
67
  """
58
- Test if string contains pattern or regex.
68
+ Test if string contains a substring.
59
69
 
60
70
  Args:
61
- pattern: string literal or regular expression
71
+ substr: string literal or regular expression
62
72
  case: if False, ignore case
63
- flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
64
- regex: if True, treat pattern as a regular expression
65
73
  """
66
- if regex:
67
- import re
68
-
69
- if not case:
70
- flags |= re.IGNORECASE
71
- return bool(re.search(pattern, self, flags))
72
- elif case:
73
- return pattern in self
74
+ if case:
75
+ return substr in self
76
+ else:
77
+ return substr.lower() in self.lower()
78
+
79
+
80
+ @contains.to_sql
81
+ def _(
82
+ self: sql.ColumnElement, substr: sql.ColumnElement, case: Optional[sql.ColumnElement] = None
83
+ ) -> sql.ColumnElement:
84
+ # Replace all occurrences of `%`, `_`, and `\` with escaped versions
85
+ escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
86
+ if case is None:
87
+ # Default `case` is True, so we do a case-sensitive comparison
88
+ return self.like(sql.func.concat('%', escaped_substr, '%'))
74
89
  else:
75
- return pattern.lower() in self.lower()
90
+ # Toggle case-sensitivity based on the value of `case`
91
+ return sql.case(
92
+ (case, self.like(sql.func.concat('%', escaped_substr, '%'))),
93
+ else_=sql.func.lower(self).like(sql.func.concat('%', sql.func.lower(escaped_substr), '%')),
94
+ )
95
+
96
+
97
+ @pxt.udf(is_method=True)
98
+ def contains_re(self: str, pattern: str, flags: int = 0) -> bool:
99
+ """
100
+ Test if string contains a regular expression pattern.
101
+
102
+ Args:
103
+ pattern: regular expression pattern
104
+ flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
105
+ """
106
+ return bool(re.search(pattern, self, flags))
76
107
 
77
108
 
78
109
  @pxt.udf(is_method=True)
@@ -84,22 +115,27 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
84
115
  pattern: string literal or regular expression
85
116
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
86
117
  """
87
- import re
88
-
89
118
  return builtins.len(re.findall(pattern, self, flags))
90
119
 
91
120
 
92
121
  @pxt.udf(is_method=True)
93
- def endswith(self: str, pattern: str) -> bool:
122
+ def endswith(self: str, substr: str) -> bool:
94
123
  """
95
124
  Return `True` if the string ends with the specified suffix, otherwise return `False`.
96
125
 
97
126
  Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
98
127
 
99
128
  Args:
100
- pattern: string literal
129
+ substr: string literal
101
130
  """
102
- return self.endswith(pattern)
131
+ return self.endswith(substr)
132
+
133
+
134
+ @endswith.to_sql
135
+ def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
136
+ # Replace all occurrences of `%`, `_`, and `\` with escaped versions
137
+ escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
138
+ return self.like(sql.func.concat('%', escaped_substr))
103
139
 
104
140
 
105
141
  @pxt.udf(is_method=True)
@@ -113,13 +149,11 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
113
149
  width: Maximum line width.
114
150
  kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
115
151
  """
116
- import textwrap
117
-
118
152
  return textwrap.fill(self, width, **kwargs)
119
153
 
120
154
 
121
155
  @pxt.udf(is_method=True)
122
- def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
156
+ def find(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
123
157
  """
124
158
  Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
125
159
 
@@ -133,6 +167,23 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
133
167
  return self.find(substr, start, end)
134
168
 
135
169
 
170
+ @find.to_sql
171
+ def _(
172
+ self: sql.ColumnElement,
173
+ substr: sql.ColumnElement,
174
+ start: sql.ColumnElement,
175
+ end: Optional[sql.ColumnElement] = None,
176
+ ) -> sql.ColumnElement:
177
+ sl = pxt.functions.string.slice._to_sql(self, start, end)
178
+ if sl is None:
179
+ return None
180
+
181
+ strpos = sql.func.strpos(sl, substr)
182
+ return sql.case(
183
+ (strpos == 0, -1), (start >= 0, strpos + start - 1), else_=strpos + sql.func.char_length(self) + start - 1
184
+ )
185
+
186
+
136
187
  @pxt.udf(is_method=True)
137
188
  def findall(self: str, pattern: str, flags: int = 0) -> list:
138
189
  """
@@ -144,8 +195,6 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
144
195
  pattern: regular expression pattern
145
196
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
146
197
  """
147
- import re
148
-
149
198
  return re.findall(pattern, self, flags)
150
199
 
151
200
 
@@ -171,8 +220,6 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
171
220
  case: if False, ignore case
172
221
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
173
222
  """
174
- import re
175
-
176
223
  if not case:
177
224
  flags |= re.IGNORECASE
178
225
  _ = bool(re.fullmatch(pattern, self, flags))
@@ -180,7 +227,7 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
180
227
 
181
228
 
182
229
  @pxt.udf(is_method=True)
183
- def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
230
+ def index(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
184
231
  """
185
232
  Return the lowest index in string where `substr` is found within the slice `[start:end]`.
186
233
  Raises ValueError if `substr` is not found.
@@ -330,6 +377,11 @@ def len(self: str) -> int:
330
377
  return builtins.len(self)
331
378
 
332
379
 
380
+ @len.to_sql
381
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
382
+ return sql.func.char_length(self)
383
+
384
+
333
385
  @pxt.udf(is_method=True)
334
386
  def ljust(self: str, width: int, fillchar: str = ' ') -> str:
335
387
  """
@@ -355,6 +407,11 @@ def lower(self: str) -> str:
355
407
  return self.lower()
356
408
 
357
409
 
410
+ @lower.to_sql
411
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
412
+ return sql.func.lower(self)
413
+
414
+
358
415
  @pxt.udf(is_method=True)
359
416
  def lstrip(self: str, chars: Optional[str] = None) -> str:
360
417
  """
@@ -369,6 +426,11 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
369
426
  return self.lstrip(chars)
370
427
 
371
428
 
429
+ @lstrip.to_sql
430
+ def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
431
+ return sql.func.ltrim(self, chars if chars is not None else whitespace)
432
+
433
+
372
434
  @pxt.udf(is_method=True)
373
435
  def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
374
436
  """
@@ -379,8 +441,6 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
379
441
  case: if False, ignore case
380
442
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
381
443
  """
382
- import re
383
-
384
444
  if not case:
385
445
  flags |= re.IGNORECASE
386
446
  return bool(re.match(pattern, self, flags))
@@ -440,9 +500,12 @@ def removeprefix(self: str, prefix: str) -> str:
440
500
  """
441
501
  Remove prefix. If the prefix is not present, returns string.
442
502
  """
443
- if self.startswith(prefix):
444
- return self[builtins.len(prefix) :]
445
- return self
503
+ return self.removeprefix(prefix)
504
+
505
+
506
+ @removeprefix.to_sql
507
+ def _(self: sql.ColumnElement, prefix: sql.ColumnElement) -> sql.ColumnElement:
508
+ return sql.case((startswith._to_sql(self, prefix), sql.func.right(self, -sql.func.char_length(prefix))), else_=self)
446
509
 
447
510
 
448
511
  @pxt.udf(is_method=True)
@@ -450,9 +513,12 @@ def removesuffix(self: str, suffix: str) -> str:
450
513
  """
451
514
  Remove suffix. If the suffix is not present, returns string.
452
515
  """
453
- if self.endswith(suffix):
454
- return self[: -builtins.len(suffix)]
455
- return self
516
+ return self.removesuffix(suffix)
517
+
518
+
519
+ @removesuffix.to_sql
520
+ def _(self: sql.ColumnElement, suffix: sql.ColumnElement) -> sql.ColumnElement:
521
+ return sql.case((endswith._to_sql(self, suffix), sql.func.left(self, -sql.func.char_length(suffix))), else_=self)
456
522
 
457
523
 
458
524
  @pxt.udf(is_method=True)
@@ -463,32 +529,65 @@ def repeat(self: str, n: int) -> str:
463
529
  return self * n
464
530
 
465
531
 
532
+ @repeat.to_sql
533
+ def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
534
+ return sql.func.repeat(self, n.cast(sql.types.INT))
535
+
536
+
466
537
  @pxt.udf(is_method=True)
467
- def replace(
468
- self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
469
- ) -> str:
538
+ def replace(self: str, substr: str, repl: str, n: Optional[int] = None) -> str:
470
539
  """
471
- Replace occurrences of `pattern` with `repl`.
540
+ Replace occurrences of `substr` with `repl`.
472
541
 
473
- Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace) or
474
- [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
542
+ Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace).
475
543
 
476
544
  Args:
477
- pattern: string literal or regular expression
545
+ substr: string literal
478
546
  repl: replacement string
479
- n: number of replacements to make (-1 for all)
480
- case: if False, ignore case
547
+ n: number of replacements to make (if `None`, replace all occurrences)
548
+ """
549
+ return self.replace(substr, repl, n or -1)
550
+
551
+
552
+ @replace.to_sql
553
+ def _(
554
+ self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: Optional[sql.ColumnElement] = None
555
+ ) -> sql.ColumnElement:
556
+ if n is not None:
557
+ return None # SQL does not support bounding the number of replacements
558
+
559
+ return sql.func.replace(self, substr, repl)
560
+
561
+
562
+ @pxt.udf(is_method=True)
563
+ def replace_re(self: str, pattern: str, repl: str, n: Optional[int] = None, flags: int = 0) -> str:
564
+ """
565
+ Replace occurrences of a regular expression pattern with `repl`.
566
+
567
+ Equivalent to [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub).
568
+
569
+ Args:
570
+ pattern: regular expression pattern
571
+ repl: replacement string
572
+ n: number of replacements to make (if `None`, replace all occurrences)
481
573
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
482
- regex: if True, treat pattern as a regular expression
483
574
  """
484
- if regex:
485
- import re
575
+ return re.sub(pattern, repl, self, count=(n or 0), flags=flags)
486
576
 
487
- if not case:
488
- flags |= re.IGNORECASE
489
- return re.sub(pattern, repl, self, count=(0 if n == -1 else n), flags=flags)
490
- else:
491
- return self.replace(pattern, repl, n)
577
+
578
+ @pxt.udf(is_method=True)
579
+ def reverse(self: str) -> str:
580
+ """
581
+ Return a reversed copy of the string.
582
+
583
+ Equivalent to `str[::-1]`.
584
+ """
585
+ return self[::-1]
586
+
587
+
588
+ @reverse.to_sql
589
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
590
+ return sql.func.reverse(self)
492
591
 
493
592
 
494
593
  @pxt.udf(is_method=True)
@@ -556,6 +655,11 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
556
655
  return self.rstrip(chars)
557
656
 
558
657
 
658
+ @rstrip.to_sql
659
+ def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
660
+ return sql.func.rtrim(self, chars if chars is not None else whitespace)
661
+
662
+
559
663
  @pxt.udf(is_method=True)
560
664
  def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
561
665
  """
@@ -569,6 +673,41 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
569
673
  return self[start:stop:step]
570
674
 
571
675
 
676
+ @slice.to_sql
677
+ def _(
678
+ self: sql.ColumnElement,
679
+ start: Optional[sql.ColumnElement] = None,
680
+ stop: Optional[sql.ColumnElement] = None,
681
+ step: Optional[sql.ColumnElement] = None,
682
+ ) -> sql.ColumnElement:
683
+ if step is not None:
684
+ return None
685
+
686
+ if start is not None:
687
+ start = start.cast(sql.types.INT) # Postgres won't accept a BIGINT
688
+ start = sql.case(
689
+ (start >= 0, start + 1), # SQL is 1-based, Python is 0-based
690
+ else_=sql.func.char_length(self) + start + 1, # negative index
691
+ )
692
+ start = sql.func.greatest(start, 1)
693
+
694
+ if stop is not None:
695
+ stop = stop.cast(sql.types.INT) # Postgres won't accept a BIGINT
696
+ stop = sql.case(
697
+ (stop >= 0, stop + 1), # SQL is 1-based, Python is 0-based
698
+ else_=sql.func.char_length(self) + stop + 1, # negative index
699
+ )
700
+ stop = sql.func.greatest(stop, 0)
701
+
702
+ if start is None:
703
+ if stop is None:
704
+ return self
705
+ return sql.func.substr(self, 1, stop)
706
+ if stop is None:
707
+ return sql.func.substr(self, start)
708
+ return sql.func.substr(self, start, sql.func.greatest(stop - start, 0))
709
+
710
+
572
711
  @pxt.udf(is_method=True)
573
712
  def slice_replace(
574
713
  self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
@@ -585,16 +724,23 @@ def slice_replace(
585
724
 
586
725
 
587
726
  @pxt.udf(is_method=True)
588
- def startswith(self: str, pattern: str) -> int:
727
+ def startswith(self: str, substr: str) -> int:
589
728
  """
590
- Return `True` if string starts with `pattern`, otherwise return `False`.
729
+ Return `True` if string starts with `substr`, otherwise return `False`.
591
730
 
592
731
  Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
593
732
 
594
733
  Args:
595
- pattern: string literal
734
+ substr: string literal
596
735
  """
597
- return self.startswith(pattern)
736
+ return self.startswith(substr)
737
+
738
+
739
+ @startswith.to_sql
740
+ def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
741
+ # Replace all occurrences of `%`, `_`, and `\` with escaped versions
742
+ escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
743
+ return self.like(sql.func.concat(escaped_substr, '%'))
598
744
 
599
745
 
600
746
  @pxt.udf(is_method=True)
@@ -610,6 +756,11 @@ def strip(self: str, chars: Optional[str] = None) -> str:
610
756
  return self.strip(chars)
611
757
 
612
758
 
759
+ @strip.to_sql
760
+ def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
761
+ return sql.func.trim(self, chars if chars is not None else whitespace)
762
+
763
+
613
764
  @pxt.udf(is_method=True)
614
765
  def swapcase(self: str) -> str:
615
766
  """
@@ -641,6 +792,11 @@ def upper(self: str) -> str:
641
792
  return self.upper()
642
793
 
643
794
 
795
+ @upper.to_sql
796
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
797
+ return sql.func.upper(self)
798
+
799
+
644
800
  @pxt.udf(is_method=True)
645
801
  def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
646
802
  """
@@ -653,8 +809,6 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
653
809
  width: Maximum line width.
654
810
  kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
655
811
  """
656
- import textwrap
657
-
658
812
  return textwrap.wrap(self, width, **kwargs)
659
813
 
660
814
 
pixeltable/globals.py CHANGED
@@ -249,13 +249,17 @@ def create_view(
249
249
  where: Optional[exprs.Expr] = None
250
250
  if isinstance(base, catalog.Table):
251
251
  tbl_version_path = base._tbl_version_path
252
+ sample_clause = None
252
253
  elif isinstance(base, DataFrame):
253
254
  base._validate_mutable('create_view', allow_select=True)
254
255
  if len(base._from_clause.tbls) > 1:
255
256
  raise excs.Error('Cannot create a view of a join')
256
257
  tbl_version_path = base._from_clause.tbls[0]
257
258
  where = base.where_clause
259
+ sample_clause = base.sample_clause
258
260
  select_list = base.select_list
261
+ if sample_clause is not None and not is_snapshot and not sample_clause.is_repeatable:
262
+ raise excs.Error('Non-snapshot views cannot be created with non-fractional or stratified sampling')
259
263
  else:
260
264
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
261
265
  assert isinstance(base, (catalog.Table, DataFrame))
@@ -272,7 +276,7 @@ def create_view(
272
276
  if col_name in [c.name for c in tbl_version_path.columns()]:
273
277
  raise excs.Error(
274
278
  f'Column {col_name!r} already exists in the base table '
275
- f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
279
+ f'{tbl_version_path.get_column(col_name).tbl.name}.'
276
280
  )
277
281
 
278
282
  return Catalog.get().create_view(
@@ -280,6 +284,7 @@ def create_view(
280
284
  tbl_version_path,
281
285
  select_list=select_list,
282
286
  where=where,
287
+ sample_clause=sample_clause,
283
288
  additional_columns=additional_columns,
284
289
  is_snapshot=is_snapshot,
285
290
  iterator=iterator,
@@ -422,7 +427,10 @@ def get_table(path: str) -> catalog.Table:
422
427
  >>> tbl = pxt.get_table('my_snapshot')
423
428
  """
424
429
  path_obj = catalog.Path(path)
425
- return Catalog.get().get_table(path_obj)
430
+ tbl = Catalog.get().get_table(path_obj)
431
+ tv = tbl._tbl_version.get()
432
+ _logger.debug(f'get_table(): tbl={tv.id}:{tv.effective_version} sa_tbl={id(tv.store_tbl.sa_tbl):x} tv={id(tv):x}')
433
+ return tbl
426
434
 
427
435
 
428
436
  def move(path: str, new_path: str) -> None:
@@ -493,8 +501,8 @@ def drop_table(
493
501
  if isinstance(table, catalog.Table):
494
502
  # if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
495
503
  # the Table record, and then get X locks in the correct order (first containing directory, then table)
496
- with Env.get().begin_xact():
497
- tbl_path = table._path
504
+ with Catalog.get().begin_xact(for_write=False):
505
+ tbl_path = table._path()
498
506
  else:
499
507
  assert isinstance(table, str)
500
508
  tbl_path = table
pixeltable/index/base.py CHANGED
@@ -41,6 +41,11 @@ class IndexBase(abc.ABC):
41
41
  """Create the index on the index value column"""
42
42
  pass
43
43
 
44
+ @abc.abstractmethod
45
+ def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
46
+ """Drop the index on the index value column"""
47
+ pass
48
+
44
49
  @classmethod
45
50
  @abc.abstractmethod
46
51
  def display_name(cls) -> str:
pixeltable/index/btree.py CHANGED
@@ -59,6 +59,11 @@ class BtreeIndex(IndexBase):
59
59
  conn = Env.get().conn
60
60
  idx.create(bind=conn)
61
61
 
62
+ def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
63
+ """Drop the index on the index value column"""
64
+ # TODO: implement
65
+ raise NotImplementedError()
66
+
62
67
  @classmethod
63
68
  def display_name(cls) -> str:
64
69
  return 'btree'
@@ -148,6 +148,11 @@ class EmbeddingIndex(IndexBase):
148
148
  conn = Env.get().conn
149
149
  idx.create(bind=conn)
150
150
 
151
+ def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
152
+ """Drop the index on the index value column"""
153
+ # TODO: implement
154
+ raise NotImplementedError()
155
+
151
156
  def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
152
157
  """Create a ColumnElement that represents '<val_column> <op> <item>'"""
153
158
  assert isinstance(item, (str, PIL.Image.Image))
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- import time
7
6
  from dataclasses import dataclass
8
7
  from typing import Any, Optional
9
8
  from uuid import UUID
@@ -11,7 +10,7 @@ from uuid import UUID
11
10
  import pixeltable.exceptions as excs
12
11
  import pixeltable.type_system as ts
13
12
  from pixeltable import Column, Table
14
- from pixeltable.catalog import TableVersion, TableVersionHandle
13
+ from pixeltable.catalog import TableVersion
15
14
 
16
15
  _logger = logging.getLogger('pixeltable')
17
16
 
@@ -32,15 +31,11 @@ class ExternalStore(abc.ABC):
32
31
 
33
32
  @abc.abstractmethod
34
33
  def link(self, tbl_version: TableVersion) -> None:
35
- """
36
- Called by `TableVersion.link()` to implement store-specific logic.
37
- """
34
+ """Creates store-specific metadata needed to implement sync()."""
38
35
 
39
36
  @abc.abstractmethod
40
37
  def unlink(self, tbl_version: TableVersion) -> None:
41
- """
42
- Called by `TableVersion.unlink()` to implement store-specific logic.
43
- """
38
+ """Removes store-specific metadata created in link()."""
44
39
 
45
40
  @abc.abstractmethod
46
41
  def get_local_columns(self) -> list[Column]:
@@ -111,17 +106,10 @@ class Project(ExternalStore, abc.ABC):
111
106
 
112
107
  if len(stored_proxies_needed) > 0:
113
108
  _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
114
- # Create stored proxies for columns that need one. Increment the schema version
115
- # accordingly.
116
- tbl_version.version += 1
117
- preceding_schema_version = tbl_version.schema_version
118
- tbl_version.schema_version = tbl_version.version
119
- proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
109
+ # Create stored proxies for columns that need one
110
+ proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
120
111
  # Add the columns; this will also update table metadata.
121
- tbl_version._add_columns(proxy_cols, print_stats=False, on_error='ignore')
122
- # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
123
- # invisible to the user.
124
- tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
112
+ tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
125
113
 
126
114
  def unlink(self, tbl_version: TableVersion) -> None:
127
115
  # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
@@ -132,15 +120,10 @@ class Project(ExternalStore, abc.ABC):
132
120
  deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
133
121
  if len(deletions_needed) > 0:
134
122
  _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
135
- # Delete stored proxies that are no longer needed.
136
- tbl_version.version += 1
137
- preceding_schema_version = tbl_version.schema_version
138
- tbl_version.schema_version = tbl_version.version
139
123
  tbl_version._drop_columns(deletions_needed)
140
124
  self.stored_proxies.clear()
141
- tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
142
125
 
143
- def create_stored_proxy(self, tbl_version: TableVersion, col: Column) -> Column:
126
+ def create_stored_proxy(self, col: Column) -> Column:
144
127
  """
145
128
  Creates a proxy column for the specified column. The proxy column will be created in the specified
146
129
  `TableVersion`.
@@ -158,12 +141,7 @@ class Project(ExternalStore, abc.ABC):
158
141
  # Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
159
142
  computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
160
143
  stored=True,
161
- col_id=tbl_version.next_col_id,
162
- sa_col_type=col.col_type.to_sa_type(),
163
- schema_version_add=tbl_version.schema_version,
164
144
  )
165
- proxy_col.tbl = TableVersionHandle(tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version)
166
- tbl_version.next_col_id += 1
167
145
  self.stored_proxies[col] = proxy_col
168
146
  return proxy_col
169
147
 
@@ -213,6 +191,7 @@ class Project(ExternalStore, abc.ABC):
213
191
  external (import or export) columns.
214
192
  If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
215
193
  in which the Pixeltable column names are resolved to the corresponding `Column` objects.
194
+ TODO: return columns as names or qualified ids
216
195
  """
217
196
  from pixeltable import exprs
218
197
 
@@ -577,7 +577,7 @@ class LabelStudioProject(Project):
577
577
  else:
578
578
  local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
579
579
  if local_annotations_column not in t._schema:
580
- t.add_columns({local_annotations_column: ts.JsonType(nullable=True)})
580
+ t.add_columns({local_annotations_column: ts.Json})
581
581
 
582
582
  resolved_col_mapping = cls.validate_columns(
583
583
  t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
pixeltable/io/parquet.py CHANGED
@@ -14,7 +14,7 @@ import PIL.Image
14
14
 
15
15
  import pixeltable as pxt
16
16
  import pixeltable.exceptions as excs
17
- from pixeltable.env import Env
17
+ from pixeltable.catalog import Catalog
18
18
  from pixeltable.utils.transactional_directory import transactional_directory
19
19
 
20
20
  if typing.TYPE_CHECKING:
@@ -87,7 +87,7 @@ def export_parquet(
87
87
  current_value_batch: dict[str, deque] = {k: deque() for k in df.schema}
88
88
  current_byte_estimate = 0
89
89
 
90
- with Env.get().begin_xact():
90
+ with Catalog.get().begin_xact(for_write=False):
91
91
  for data_row in df._exec():
92
92
  for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
93
93
  val = data_row[e.slot_idx]