pixeltable 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. pixeltable/__init__.py +2 -27
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -7
  4. pixeltable/catalog/column.py +6 -2
  5. pixeltable/catalog/dir.py +2 -1
  6. pixeltable/catalog/insertable_table.py +11 -0
  7. pixeltable/catalog/schema_object.py +2 -1
  8. pixeltable/catalog/table.py +27 -38
  9. pixeltable/catalog/table_version.py +19 -0
  10. pixeltable/catalog/table_version_path.py +7 -0
  11. pixeltable/catalog/view.py +31 -0
  12. pixeltable/dataframe.py +50 -7
  13. pixeltable/env.py +1 -1
  14. pixeltable/exceptions.py +20 -2
  15. pixeltable/exec/aggregation_node.py +14 -0
  16. pixeltable/exec/cache_prefetch_node.py +1 -1
  17. pixeltable/exec/expr_eval/evaluators.py +0 -4
  18. pixeltable/exec/expr_eval/expr_eval_node.py +1 -2
  19. pixeltable/exec/sql_node.py +3 -2
  20. pixeltable/exprs/column_ref.py +42 -17
  21. pixeltable/exprs/data_row.py +3 -0
  22. pixeltable/exprs/globals.py +1 -1
  23. pixeltable/exprs/literal.py +11 -1
  24. pixeltable/exprs/rowid_ref.py +4 -1
  25. pixeltable/exprs/similarity_expr.py +1 -1
  26. pixeltable/func/function.py +1 -1
  27. pixeltable/func/udf.py +1 -1
  28. pixeltable/functions/__init__.py +2 -0
  29. pixeltable/functions/anthropic.py +1 -1
  30. pixeltable/functions/bedrock.py +130 -0
  31. pixeltable/functions/date.py +185 -0
  32. pixeltable/functions/gemini.py +22 -20
  33. pixeltable/functions/globals.py +1 -16
  34. pixeltable/functions/huggingface.py +7 -6
  35. pixeltable/functions/image.py +15 -16
  36. pixeltable/functions/json.py +2 -1
  37. pixeltable/functions/math.py +40 -0
  38. pixeltable/functions/mistralai.py +3 -2
  39. pixeltable/functions/openai.py +9 -8
  40. pixeltable/functions/string.py +1 -2
  41. pixeltable/functions/together.py +4 -3
  42. pixeltable/functions/video.py +2 -2
  43. pixeltable/globals.py +26 -9
  44. pixeltable/io/datarows.py +4 -3
  45. pixeltable/io/hf_datasets.py +2 -2
  46. pixeltable/io/label_studio.py +17 -17
  47. pixeltable/io/pandas.py +29 -16
  48. pixeltable/io/parquet.py +2 -0
  49. pixeltable/io/table_data_conduit.py +8 -2
  50. pixeltable/metadata/__init__.py +1 -1
  51. pixeltable/metadata/converters/convert_19.py +2 -2
  52. pixeltable/metadata/converters/convert_34.py +21 -0
  53. pixeltable/metadata/notes.py +1 -0
  54. pixeltable/plan.py +12 -5
  55. pixeltable/share/__init__.py +1 -1
  56. pixeltable/share/packager.py +219 -119
  57. pixeltable/share/publish.py +61 -16
  58. pixeltable/store.py +45 -20
  59. pixeltable/type_system.py +46 -2
  60. pixeltable/utils/arrow.py +8 -2
  61. pixeltable/utils/pytorch.py +4 -0
  62. {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/METADATA +2 -4
  63. {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/RECORD +66 -63
  64. {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/WHEEL +1 -1
  65. {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/LICENSE +0 -0
  66. {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/entry_points.txt +0 -0
@@ -1,32 +1,35 @@
1
1
  """
2
2
  Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
3
  that wrap various endpoints from the Google Gemini API. In order to use them, you must
4
- first `pip install google-generativeai` and configure your Gemini credentials, as described in
4
+ first `pip install google-genai` and configure your Gemini credentials, as described in
5
5
  the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini) tutorial.
6
6
  """
7
7
 
8
- from typing import Optional
8
+ from typing import TYPE_CHECKING, Optional
9
9
 
10
10
  import pixeltable as pxt
11
11
  from pixeltable import env
12
12
 
13
+ if TYPE_CHECKING:
14
+ from google import genai
15
+
13
16
 
14
17
  @env.register_client('gemini')
15
- def _(api_key: str) -> None:
16
- import google.generativeai as genai
18
+ def _(api_key: str) -> 'genai.client.Client':
19
+ from google import genai
17
20
 
18
- genai.configure(api_key=api_key)
21
+ return genai.client.Client(api_key=api_key)
19
22
 
20
23
 
21
- def _ensure_loaded() -> None:
22
- env.Env.get().get_client('gemini')
24
+ def _genai_client() -> 'genai.client.Client':
25
+ return env.Env.get().get_client('gemini')
23
26
 
24
27
 
25
28
  @pxt.udf(resource_pool='request-rate:gemini')
26
29
  async def generate_content(
27
30
  contents: str,
28
31
  *,
29
- model_name: str,
32
+ model: str,
30
33
  candidate_count: Optional[int] = None,
31
34
  stop_sequences: Optional[list[str]] = None,
32
35
  max_output_tokens: Optional[int] = None,
@@ -48,11 +51,11 @@ async def generate_content(
48
51
 
49
52
  __Requirements:__
50
53
 
51
- - `pip install google-generativeai`
54
+ - `pip install google-genai`
52
55
 
53
56
  Args:
54
57
  contents: The input content to generate from.
55
- model_name: The name of the model to use.
58
+ model: The name of the model to use.
56
59
 
57
60
  For details on the other parameters, see: <https://ai.google.dev/gemini-api/docs>
58
61
 
@@ -63,14 +66,12 @@ async def generate_content(
63
66
  Add a computed column that applies the model `gemini-1.5-flash`
64
67
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
65
68
 
66
- >>> tbl.add_computed_column(response=generate_content(tbl.prompt, model_name='gemini-1.5-flash'))
69
+ >>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-1.5-flash'))
67
70
  """
68
- env.Env.get().require_package('google.generativeai')
69
- _ensure_loaded()
70
- import google.generativeai as genai
71
+ env.Env.get().require_package('google.genai')
72
+ from google.genai import types
71
73
 
72
- model = genai.GenerativeModel(model_name=model_name)
73
- gc = genai.GenerationConfig(
74
+ config = types.GenerateContentConfig(
74
75
  candidate_count=candidate_count,
75
76
  stop_sequences=stop_sequences,
76
77
  max_output_tokens=max_output_tokens,
@@ -82,10 +83,11 @@ async def generate_content(
82
83
  presence_penalty=presence_penalty,
83
84
  frequency_penalty=frequency_penalty,
84
85
  )
85
- response = await model.generate_content_async(contents, generation_config=gc)
86
- return response.to_dict()
86
+
87
+ response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config)
88
+ return response.model_dump()
87
89
 
88
90
 
89
91
  @generate_content.resource_pool
90
- def _(model_name: str) -> str:
91
- return f'request-rate:gemini:{model_name}'
92
+ def _(model: str) -> str:
93
+ return f'request-rate:gemini:{model}'
@@ -49,22 +49,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
49
49
  allows_window=True,
50
50
  # Allow counting non-null values of any type
51
51
  # TODO: should we have an "Any" type that can be used here?
52
- type_substitutions=tuple(
53
- {T: Optional[t]} # type: ignore[misc]
54
- for t in (
55
- ts.String,
56
- ts.Int,
57
- ts.Float,
58
- ts.Bool,
59
- ts.Timestamp,
60
- ts.Array,
61
- ts.Json,
62
- ts.Image,
63
- ts.Video,
64
- ts.Audio,
65
- ts.Document,
66
- )
67
- ),
52
+ type_substitutions=tuple({T: Optional[t]} for t in ts.ALL_PIXELTABLE_TYPES), # type: ignore[misc]
68
53
  )
69
54
  class count(func.Aggregator, typing.Generic[T]):
70
55
  def __init__(self) -> None:
@@ -13,6 +13,7 @@ import PIL.Image
13
13
 
14
14
  import pixeltable as pxt
15
15
  import pixeltable.exceptions as excs
16
+ import pixeltable.type_system as ts
16
17
  from pixeltable import env
17
18
  from pixeltable.func import Batch
18
19
  from pixeltable.functions.util import normalize_image_mode, resolve_torch_device
@@ -61,14 +62,14 @@ def sentence_transformer(
61
62
 
62
63
 
63
64
  @sentence_transformer.conditional_return_type
64
- def _(model_id: str) -> pxt.ArrayType:
65
+ def _(model_id: str) -> ts.ArrayType:
65
66
  try:
66
67
  from sentence_transformers import SentenceTransformer
67
68
 
68
69
  model = _lookup_model(model_id, SentenceTransformer)
69
- return pxt.ArrayType((model.get_sentence_embedding_dimension(),), dtype=pxt.FloatType(), nullable=False)
70
+ return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
70
71
  except ImportError:
71
- return pxt.ArrayType((None,), dtype=pxt.FloatType(), nullable=False)
72
+ return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
72
73
 
73
74
 
74
75
  @pxt.udf
@@ -199,14 +200,14 @@ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,
199
200
 
200
201
 
201
202
  @clip.conditional_return_type
202
- def _(model_id: str) -> pxt.ArrayType:
203
+ def _(model_id: str) -> ts.ArrayType:
203
204
  try:
204
205
  from transformers import CLIPModel
205
206
 
206
207
  model = _lookup_model(model_id, CLIPModel.from_pretrained)
207
- return pxt.ArrayType((model.config.projection_dim,), dtype=pxt.FloatType(), nullable=False)
208
+ return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
208
209
  except ImportError:
209
- return pxt.ArrayType((None,), dtype=pxt.FloatType(), nullable=False)
210
+ return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
210
211
 
211
212
 
212
213
  @pxt.udf(batch_size=4)
@@ -16,6 +16,7 @@ from typing import Optional
16
16
  import PIL.Image
17
17
 
18
18
  import pixeltable as pxt
19
+ import pixeltable.type_system as ts
19
20
  from pixeltable.exprs import Expr
20
21
  from pixeltable.utils.code import local_public_names
21
22
 
@@ -88,10 +89,10 @@ def convert(self: PIL.Image.Image, mode: str) -> PIL.Image.Image:
88
89
 
89
90
 
90
91
  @convert.conditional_return_type
91
- def _(self: Expr, mode: str) -> pxt.ColumnType:
92
+ def _(self: Expr, mode: str) -> ts.ColumnType:
92
93
  input_type = self.col_type
93
- assert isinstance(input_type, pxt.ImageType)
94
- return pxt.ImageType(size=input_type.size, mode=mode, nullable=input_type.nullable)
94
+ assert isinstance(input_type, ts.ImageType)
95
+ return ts.ImageType(size=input_type.size, mode=mode, nullable=input_type.nullable)
95
96
 
96
97
 
97
98
  # Image.crop()
@@ -108,14 +109,12 @@ def crop(self: PIL.Image.Image, box: tuple[int, int, int, int]) -> PIL.Image.Ima
108
109
 
109
110
 
110
111
  @crop.conditional_return_type
111
- def _(self: Expr, box: tuple[int, int, int, int]) -> pxt.ColumnType:
112
+ def _(self: Expr, box: tuple[int, int, int, int]) -> ts.ColumnType:
112
113
  input_type = self.col_type
113
- assert isinstance(input_type, pxt.ImageType)
114
+ assert isinstance(input_type, ts.ImageType)
114
115
  if (isinstance(box, (list, tuple))) and len(box) == 4 and all(isinstance(x, int) for x in box):
115
- return pxt.ImageType(
116
- size=(box[2] - box[0], box[3] - box[1]), mode=input_type.mode, nullable=input_type.nullable
117
- )
118
- return pxt.ImageType(mode=input_type.mode, nullable=input_type.nullable) # we can't compute the size statically
116
+ return ts.ImageType(size=(box[2] - box[0], box[3] - box[1]), mode=input_type.mode, nullable=input_type.nullable)
117
+ return ts.ImageType(mode=input_type.mode, nullable=input_type.nullable) # we can't compute the size statically
119
118
 
120
119
 
121
120
  # Image.getchannel()
@@ -134,10 +133,10 @@ def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
134
133
 
135
134
 
136
135
  @getchannel.conditional_return_type
137
- def _(self: Expr) -> pxt.ColumnType:
136
+ def _(self: Expr) -> ts.ColumnType:
138
137
  input_type = self.col_type
139
- assert isinstance(input_type, pxt.ImageType)
140
- return pxt.ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
138
+ assert isinstance(input_type, ts.ImageType)
139
+ return ts.ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
141
140
 
142
141
 
143
142
  @pxt.udf(is_method=True)
@@ -183,10 +182,10 @@ def resize(self: PIL.Image.Image, size: tuple[int, int]) -> PIL.Image.Image:
183
182
 
184
183
 
185
184
  @resize.conditional_return_type
186
- def _(self: Expr, size: tuple[int, int]) -> pxt.ColumnType:
185
+ def _(self: Expr, size: tuple[int, int]) -> ts.ColumnType:
187
186
  input_type = self.col_type
188
- assert isinstance(input_type, pxt.ImageType)
189
- return pxt.ImageType(size=size, mode=input_type.mode, nullable=input_type.nullable)
187
+ assert isinstance(input_type, ts.ImageType)
188
+ return ts.ImageType(size=size, mode=input_type.mode, nullable=input_type.nullable)
190
189
 
191
190
 
192
191
  # Image.rotate()
@@ -237,7 +236,7 @@ def transpose(self: PIL.Image.Image, method: int) -> PIL.Image.Image:
237
236
  @rotate.conditional_return_type
238
237
  @effect_spread.conditional_return_type
239
238
  @transpose.conditional_return_type
240
- def _(self: Expr) -> pxt.ColumnType:
239
+ def _(self: Expr) -> ts.ColumnType:
241
240
  return self.col_type
242
241
 
243
242
 
@@ -4,9 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
4
4
  Example:
5
5
  ```python
6
6
  import pixeltable as pxt
7
+ import pixeltable.functions as pxtf
7
8
 
8
9
  t = pxt.get_table(...)
9
- t.select(pxt.functions.json.make_list()).collect()
10
+ t.select(pxtf.json.make_list(t.json_col)).collect()
10
11
  ```
11
12
  """
12
13
 
@@ -1,3 +1,15 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for mathematical operations.
3
+
4
+ Example:
5
+ ```python
6
+ import pixeltable as pxt
7
+
8
+ t = pxt.get_table(...)
9
+ t.select(t.float_col.floor()).collect()
10
+ ```
11
+ """
12
+
1
13
  import builtins
2
14
  import math
3
15
  from typing import Optional
@@ -10,6 +22,11 @@ from pixeltable.utils.code import local_public_names
10
22
 
11
23
  @pxt.udf(is_method=True)
12
24
  def abs(self: float) -> float:
25
+ """
26
+ Return the absolute value of the given number.
27
+
28
+ Equivalent to Python [`builtins.abs()`](https://docs.python.org/3/library/functions.html#abs).
29
+ """
13
30
  return builtins.abs(self)
14
31
 
15
32
 
@@ -20,6 +37,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
20
37
 
21
38
  @pxt.udf(is_method=True)
22
39
  def ceil(self: float) -> float:
40
+ """
41
+ Return the ceiling of the given number.
42
+
43
+ Equivalent to Python [`float(math.ceil(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
44
+ is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
45
+ `math.ceil(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior in
46
+ Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
47
+ """
23
48
  # This ensures the same behavior as SQL
24
49
  if math.isfinite(self):
25
50
  return float(math.ceil(self))
@@ -34,6 +59,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
34
59
 
35
60
  @pxt.udf(is_method=True)
36
61
  def floor(self: float) -> float:
62
+ """
63
+ Return the ceiling of the given number.
64
+
65
+ Equivalent to Python [`float(math.floor(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
66
+ is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
67
+ `math.floor(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior of
68
+ Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
69
+ """
37
70
  # This ensures the same behavior as SQL
38
71
  if math.isfinite(self):
39
72
  return float(math.floor(self))
@@ -48,6 +81,13 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
48
81
 
49
82
  @pxt.udf(is_method=True)
50
83
  def round(self: float, digits: Optional[int] = None) -> float:
84
+ """
85
+ Round a number to a given precision in decimal digits.
86
+
87
+ Equivalent to Python [`builtins.round(self, digits or 0)`](https://docs.python.org/3/library/functions.html#round).
88
+ Note that if `digits` is not specified, the behavior matches `builtins.round(self, 0)` rather than
89
+ `builtins.round(self)`; this ensures that the return type is always `float` (as in SQL) rather than `int`.
90
+ """
51
91
  # Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
52
92
  return builtins.round(self, digits or 0)
53
93
 
@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Optional, TypeVar, Union
10
10
  import numpy as np
11
11
 
12
12
  import pixeltable as pxt
13
+ import pixeltable.type_system as ts
13
14
  from pixeltable.env import Env, register_client
14
15
  from pixeltable.func.signature import Batch
15
16
  from pixeltable.utils.code import local_public_names
@@ -176,9 +177,9 @@ async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,
176
177
 
177
178
 
178
179
  @embeddings.conditional_return_type
179
- def _(model: str) -> pxt.ArrayType:
180
+ def _(model: str) -> ts.ArrayType:
180
181
  dimensions = _embedding_dimensions_cache.get(model) # `None` if unknown model
181
- return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
182
+ return ts.ArrayType((dimensions,), dtype=ts.FloatType())
182
183
 
183
184
 
184
185
  _T = TypeVar('_T')
@@ -21,6 +21,7 @@ import numpy as np
21
21
  import PIL
22
22
 
23
23
  import pixeltable as pxt
24
+ import pixeltable.type_system as ts
24
25
  from pixeltable import env, exprs
25
26
  from pixeltable.func import Batch, Tools
26
27
  from pixeltable.utils.code import local_public_names
@@ -666,13 +667,13 @@ async def embeddings(
666
667
 
667
668
 
668
669
  @embeddings.conditional_return_type
669
- def _(model: str, dimensions: Optional[int] = None) -> pxt.ArrayType:
670
+ def _(model: str, dimensions: Optional[int] = None) -> ts.ArrayType:
670
671
  if dimensions is None:
671
672
  if model not in _embedding_dimensions_cache:
672
673
  # TODO: find some other way to retrieve a sample
673
- return pxt.ArrayType((None,), dtype=pxt.FloatType(), nullable=False)
674
+ return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
674
675
  dimensions = _embedding_dimensions_cache.get(model)
675
- return pxt.ArrayType((dimensions,), dtype=pxt.FloatType(), nullable=False)
676
+ return ts.ArrayType((dimensions,), dtype=ts.FloatType(), nullable=False)
676
677
 
677
678
 
678
679
  #####################################
@@ -738,17 +739,17 @@ async def image_generations(
738
739
 
739
740
 
740
741
  @image_generations.conditional_return_type
741
- def _(size: Optional[str] = None) -> pxt.ImageType:
742
+ def _(size: Optional[str] = None) -> ts.ImageType:
742
743
  if size is None:
743
- return pxt.ImageType(size=(1024, 1024))
744
+ return ts.ImageType(size=(1024, 1024))
744
745
  x_pos = size.find('x')
745
746
  if x_pos == -1:
746
- return pxt.ImageType()
747
+ return ts.ImageType()
747
748
  try:
748
749
  width, height = int(size[:x_pos]), int(size[x_pos + 1 :])
749
750
  except ValueError:
750
- return pxt.ImageType()
751
- return pxt.ImageType(size=(width, height))
751
+ return ts.ImageType()
752
+ return ts.ImageType(size=(width, height))
752
753
 
753
754
 
754
755
  #####################################
@@ -5,10 +5,9 @@ It closely follows the Pandas `pandas.Series.str` API.
5
5
  Example:
6
6
  ```python
7
7
  import pixeltable as pxt
8
- from pixeltable.functions import string as pxt_str
9
8
 
10
9
  t = pxt.get_table(...)
11
- t.select(pxt_str.capitalize(t.str_col)).collect()
10
+ t.select(t.str_col.capitalize()).collect()
12
11
  ```
13
12
  """
14
13
 
@@ -16,6 +16,7 @@ import tenacity
16
16
 
17
17
  import pixeltable as pxt
18
18
  import pixeltable.exceptions as excs
19
+ import pixeltable.type_system as ts
19
20
  from pixeltable import env
20
21
  from pixeltable.func import Batch
21
22
  from pixeltable.utils.code import local_public_names
@@ -225,12 +226,12 @@ async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,
225
226
 
226
227
 
227
228
  @embeddings.conditional_return_type
228
- def _(model: str) -> pxt.ArrayType:
229
+ def _(model: str) -> ts.ArrayType:
229
230
  if model not in _embedding_dimensions_cache:
230
231
  # TODO: find some other way to retrieve a sample
231
- return pxt.ArrayType((None,), dtype=pxt.FloatType())
232
+ return ts.ArrayType((None,), dtype=ts.FloatType())
232
233
  dimensions = _embedding_dimensions_cache[model]
233
- return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
234
+ return ts.ArrayType((dimensions,), dtype=ts.FloatType())
234
235
 
235
236
 
236
237
  @pxt.udf(resource_pool='request-rate:together:images')
@@ -4,10 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
4
4
  Example:
5
5
  ```python
6
6
  import pixeltable as pxt
7
- from pixeltable.functions import video as pxt_video
7
+ import pixeltable.functions as pxtf
8
8
 
9
9
  t = pxt.get_table(...)
10
- t.select(pxt_video.extract_audio(t.video_col)).collect()
10
+ t.select(pxtf.video.extract_audio(t.video_col)).collect()
11
11
  ```
12
12
  """
13
13
 
pixeltable/globals.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import os
5
- import urllib.parse
6
5
  from pathlib import Path
7
6
  from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
8
7
 
@@ -372,6 +371,31 @@ def create_snapshot(
372
371
  )
373
372
 
374
373
 
374
+ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
375
+ """
376
+ Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
377
+ replica of a remote table. A given table can have at most one replica per Pixeltable instance.
378
+
379
+ Args:
380
+ destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
381
+ a remote URI such as `'pxt://username/mydir.my_table'`.
382
+ source: Path to the source table, or (if the source table is a local table) a handle to the source table.
383
+ """
384
+ remote_dest = destination.startswith('pxt://')
385
+ remote_source = isinstance(source, str) and source.startswith('pxt://')
386
+ if remote_dest == remote_source:
387
+ raise excs.Error('Exactly one of `destination` or `source` must be a remote URI.')
388
+
389
+ if remote_dest:
390
+ if isinstance(source, str):
391
+ source = get_table(source)
392
+ share.push_replica(destination, source)
393
+ return None
394
+ else:
395
+ assert isinstance(source, str)
396
+ return share.pull_replica(destination, source)
397
+
398
+
375
399
  def get_table(path: str) -> catalog.Table:
376
400
  """Get a handle to an existing table, view, or snapshot.
377
401
 
@@ -470,7 +494,7 @@ def drop_table(
470
494
  # if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
471
495
  # the Table record, and then get X locks in the correct order (first containing directory, then table)
472
496
  with Env.get().begin_xact():
473
- tbl_path = table._path()
497
+ tbl_path = table._path
474
498
  else:
475
499
  assert isinstance(table, str)
476
500
  tbl_path = table
@@ -627,13 +651,6 @@ def _extract_paths(
627
651
  return result
628
652
 
629
653
 
630
- def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
631
- parsed_uri = urllib.parse.urlparse(dest_uri)
632
- if parsed_uri.scheme != 'pxt':
633
- raise excs.Error(f'Invalid Pixeltable URI (does not start with pxt://): {dest_uri}')
634
- share.publish_snapshot(dest_uri, table)
635
-
636
-
637
654
  def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
638
655
  """List the directories in a directory.
639
656
 
pixeltable/io/datarows.py CHANGED
@@ -3,13 +3,14 @@ from __future__ import annotations
3
3
  from typing import Any, Iterable, Optional, Union
4
4
 
5
5
  import pixeltable as pxt
6
+ import pixeltable.type_system as ts
6
7
  from pixeltable import exceptions as excs
7
8
 
8
9
 
9
10
  def _infer_schema_from_rows(
10
11
  rows: Iterable[dict[str, Any]], schema_overrides: dict[str, Any], primary_key: list[str]
11
- ) -> dict[str, pxt.ColumnType]:
12
- schema: dict[str, pxt.ColumnType] = {}
12
+ ) -> dict[str, ts.ColumnType]:
13
+ schema: dict[str, ts.ColumnType] = {}
13
14
  cols_with_nones: set[str] = set()
14
15
 
15
16
  for n, row in enumerate(rows):
@@ -23,7 +24,7 @@ def _infer_schema_from_rows(
23
24
  elif value is not None:
24
25
  # If `key` is not in `schema_overrides`, then we infer its type from the data.
25
26
  # The column type will always be nullable by default.
26
- col_type = pxt.ColumnType.infer_literal_type(value, nullable=col_name not in primary_key)
27
+ col_type = ts.ColumnType.infer_literal_type(value, nullable=col_name not in primary_key)
27
28
  if col_type is None:
28
29
  raise excs.Error(
29
30
  f'Could not infer type for column `{col_name}`; the value in row {n} '
@@ -31,8 +31,8 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
31
31
  'timestamp[s]': ts.TimestampType(nullable=True),
32
32
  'timestamp[ms]': ts.TimestampType(nullable=True), # HF dataset iterator converts timestamps to datetime.datetime
33
33
  'timestamp[us]': ts.TimestampType(nullable=True),
34
- 'date32': ts.StringType(nullable=True), # date32 is not supported in pixeltable, use string
35
- 'date64': ts.StringType(nullable=True), # date64 is not supported in pixeltable, use string
34
+ 'date32': ts.DateType(nullable=True),
35
+ 'date64': ts.DateType(nullable=True),
36
36
  }
37
37
 
38
38
 
@@ -11,7 +11,7 @@ import label_studio_sdk # type: ignore[import-untyped]
11
11
  import PIL.Image
12
12
  from requests.exceptions import HTTPError
13
13
 
14
- import pixeltable as pxt
14
+ import pixeltable.type_system as ts
15
15
  from pixeltable import Column, Table, env, exceptions as excs
16
16
  from pixeltable.config import Config
17
17
  from pixeltable.exprs import ColumnRef, DataRow, Expr
@@ -89,21 +89,21 @@ class LabelStudioProject(Project):
89
89
  def __project_config(self) -> '_LabelStudioConfig':
90
90
  return self.__parse_project_config(self.project_params['label_config'])
91
91
 
92
- def get_export_columns(self) -> dict[str, pxt.ColumnType]:
92
+ def get_export_columns(self) -> dict[str, ts.ColumnType]:
93
93
  """
94
94
  The data keys and preannotation fields specified in this Label Studio project.
95
95
  """
96
96
  return self.__project_config.export_columns
97
97
 
98
- def get_import_columns(self) -> dict[str, pxt.ColumnType]:
98
+ def get_import_columns(self) -> dict[str, ts.ColumnType]:
99
99
  """
100
100
  Always contains a single entry:
101
101
 
102
102
  ```
103
- {"annotations": pxt.JsonType(nullable=True)}
103
+ {"annotations": ts.JsonType(nullable=True)}
104
104
  ```
105
105
  """
106
- return {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}
106
+ return {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}
107
107
 
108
108
  def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
109
109
  _logger.info(
@@ -412,8 +412,8 @@ class LabelStudioProject(Project):
412
412
  # TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
413
413
  ancestor = t
414
414
  while local_annotations_col not in ancestor._tbl_version.get().cols:
415
- assert ancestor._base is not None
416
- ancestor = ancestor._base
415
+ assert ancestor._base_table is not None
416
+ ancestor = ancestor._base_table
417
417
  update_status = ancestor.batch_update(updates)
418
418
  env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
419
419
  return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
@@ -577,10 +577,10 @@ class LabelStudioProject(Project):
577
577
  else:
578
578
  local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
579
579
  if local_annotations_column not in t._schema:
580
- t.add_columns({local_annotations_column: pxt.JsonType(nullable=True)})
580
+ t.add_columns({local_annotations_column: ts.JsonType(nullable=True)})
581
581
 
582
582
  resolved_col_mapping = cls.validate_columns(
583
- t, config.export_columns, {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}, col_mapping
583
+ t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
584
584
  )
585
585
 
586
586
  # Perform some additional validation
@@ -649,7 +649,7 @@ class LabelStudioProject(Project):
649
649
  @dataclass(frozen=True)
650
650
  class _DataKey:
651
651
  name: Optional[str] # The 'name' attribute of the data key; may differ from the field name
652
- column_type: pxt.ColumnType
652
+ column_type: ts.ColumnType
653
653
 
654
654
 
655
655
  @dataclass(frozen=True)
@@ -673,18 +673,18 @@ class _LabelStudioConfig:
673
673
  )
674
674
 
675
675
  @property
676
- def export_columns(self) -> dict[str, pxt.ColumnType]:
676
+ def export_columns(self) -> dict[str, ts.ColumnType]:
677
677
  data_key_cols = {key_id: key_info.column_type for key_id, key_info in self.data_keys.items()}
678
- rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels}
678
+ rl_cols = {name: ts.JsonType() for name in self.rectangle_labels}
679
679
  return {**data_key_cols, **rl_cols}
680
680
 
681
681
 
682
682
  ANNOTATIONS_COLUMN = 'annotations'
683
683
  _PAGE_SIZE = 100 # This is the default used in the LS SDK
684
684
  _LS_TAG_MAP = {
685
- 'header': pxt.StringType(),
686
- 'text': pxt.StringType(),
687
- 'image': pxt.ImageType(),
688
- 'video': pxt.VideoType(),
689
- 'audio': pxt.AudioType(),
685
+ 'header': ts.StringType(),
686
+ 'text': ts.StringType(),
687
+ 'image': ts.ImageType(),
688
+ 'video': ts.VideoType(),
689
+ 'audio': ts.AudioType(),
690
690
  }