pixeltable 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -7
  4. pixeltable/catalog/column.py +6 -2
  5. pixeltable/catalog/dir.py +2 -1
  6. pixeltable/catalog/insertable_table.py +1 -1
  7. pixeltable/catalog/schema_object.py +2 -1
  8. pixeltable/catalog/table.py +12 -8
  9. pixeltable/catalog/table_version.py +19 -0
  10. pixeltable/catalog/table_version_path.py +7 -0
  11. pixeltable/catalog/view.py +3 -3
  12. pixeltable/dataframe.py +48 -5
  13. pixeltable/env.py +1 -1
  14. pixeltable/exec/aggregation_node.py +14 -0
  15. pixeltable/exec/cache_prefetch_node.py +1 -1
  16. pixeltable/exec/expr_eval/expr_eval_node.py +1 -1
  17. pixeltable/exprs/column_ref.py +42 -17
  18. pixeltable/exprs/data_row.py +3 -0
  19. pixeltable/exprs/globals.py +1 -1
  20. pixeltable/exprs/literal.py +11 -1
  21. pixeltable/exprs/rowid_ref.py +4 -1
  22. pixeltable/exprs/similarity_expr.py +1 -1
  23. pixeltable/func/function.py +1 -1
  24. pixeltable/functions/__init__.py +1 -0
  25. pixeltable/functions/date.py +185 -0
  26. pixeltable/functions/gemini.py +22 -20
  27. pixeltable/functions/globals.py +1 -16
  28. pixeltable/functions/json.py +2 -1
  29. pixeltable/functions/math.py +40 -0
  30. pixeltable/functions/string.py +1 -2
  31. pixeltable/functions/video.py +2 -2
  32. pixeltable/globals.py +26 -9
  33. pixeltable/io/hf_datasets.py +2 -2
  34. pixeltable/io/pandas.py +16 -4
  35. pixeltable/io/parquet.py +2 -0
  36. pixeltable/metadata/__init__.py +1 -1
  37. pixeltable/metadata/converters/convert_34.py +21 -0
  38. pixeltable/metadata/notes.py +1 -0
  39. pixeltable/plan.py +12 -5
  40. pixeltable/share/__init__.py +1 -1
  41. pixeltable/share/packager.py +219 -119
  42. pixeltable/share/publish.py +61 -16
  43. pixeltable/store.py +45 -20
  44. pixeltable/type_system.py +46 -2
  45. pixeltable/utils/arrow.py +8 -2
  46. pixeltable/utils/pytorch.py +4 -0
  47. {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/METADATA +2 -4
  48. {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/RECORD +51 -49
  49. {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/LICENSE +0 -0
  50. {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/WHEEL +0 -0
  51. {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/entry_points.txt +0 -0
@@ -50,6 +50,9 @@ class Literal(Expr):
50
50
  assert isinstance(self.val, datetime.datetime)
51
51
  default_tz = Env.get().default_time_zone
52
52
  return f"'{self.val.astimezone(default_tz).isoformat()}'"
53
+ if self.col_type.is_date_type():
54
+ assert isinstance(self.val, datetime.date)
55
+ return f"'{self.val.isoformat()}'"
53
56
  if self.col_type.is_array_type():
54
57
  assert isinstance(self.val, np.ndarray)
55
58
  return str(self.val.tolist())
@@ -82,6 +85,10 @@ class Literal(Expr):
82
85
  # stored as UTC in the database)
83
86
  encoded_val = self.val.isoformat()
84
87
  return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
88
+ elif self.col_type.is_date_type():
89
+ assert isinstance(self.val, datetime.date)
90
+ encoded_val = self.val.isoformat()
91
+ return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
85
92
  elif self.col_type.is_array_type():
86
93
  assert isinstance(self.val, np.ndarray)
87
94
  return {'val': self.val.tolist(), 'val_t': self.col_type._type.name, **super()._as_dict()}
@@ -96,7 +103,10 @@ class Literal(Expr):
96
103
  assert 'val' in d
97
104
  if 'val_t' in d:
98
105
  val_t = d['val_t']
99
- if val_t == ts.ColumnType.Type.TIMESTAMP.name:
106
+ if val_t == ts.ColumnType.Type.DATE.name:
107
+ dt = datetime.date.fromisoformat(d['val'])
108
+ return cls(dt)
109
+ elif val_t == ts.ColumnType.Type.TIMESTAMP.name:
100
110
  dt = datetime.datetime.fromisoformat(d['val'])
101
111
  assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
102
112
  return cls(dt)
@@ -30,7 +30,7 @@ class RowidRef(Expr):
30
30
 
31
31
  def __init__(
32
32
  self,
33
- tbl: catalog.TableVersionHandle,
33
+ tbl: Optional[catalog.TableVersionHandle],
34
34
  idx: int,
35
35
  tbl_id: Optional[UUID] = None,
36
36
  normalized_base_id: Optional[UUID] = None,
@@ -98,6 +98,9 @@ class RowidRef(Expr):
98
98
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
99
99
  tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
100
100
  rowid_cols = tbl.store_tbl.rowid_columns()
101
+ assert self.rowid_component_idx <= len(rowid_cols), (
102
+ f'{self.rowid_component_idx} not consistent with {rowid_cols}'
103
+ )
101
104
  return rowid_cols[self.rowid_component_idx]
102
105
 
103
106
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -26,7 +26,7 @@ class SimilarityExpr(Expr):
26
26
  from pixeltable import index
27
27
 
28
28
  # determine index to use
29
- idx_dict = ColumnRef.find_embedding_index(col_ref.col, idx_name, 'similarity')
29
+ idx_dict = col_ref.find_embedding_index(idx_name, 'similarity')
30
30
  assert len(idx_dict) == 1
31
31
  self.idx_info = next(iter(idx_dict.values()))
32
32
  idx = self.idx_info.idx
@@ -514,7 +514,7 @@ class InvalidFunction(Function):
514
514
  def _as_dict(self) -> dict:
515
515
  """
516
516
  Here we write out (verbatim) the original metadata that failed to load (and that resulted in the
517
- InvalidFunction). Note that the InvalidFunction itself is never serlialized, so there is no corresponding
517
+ InvalidFunction). Note that the InvalidFunction itself is never serialized, so there is no corresponding
518
518
  from_dict() method.
519
519
  """
520
520
  return self.fn_dict
@@ -6,6 +6,7 @@ from . import (
6
6
  anthropic,
7
7
  audio,
8
8
  bedrock,
9
+ date,
9
10
  deepseek,
10
11
  fireworks,
11
12
  gemini,
@@ -0,0 +1,185 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `DateType`.
3
+
4
+ Usage example:
5
+ ```python
6
+ import pixeltable as pxt
7
+
8
+ t = pxt.get_table(...)
9
+ t.select(t.date_col.year, t.date_col.weekday()).collect()
10
+ ```
11
+ """
12
+
13
+ from datetime import date, timedelta
14
+
15
+ import sqlalchemy as sql
16
+
17
+ import pixeltable as pxt
18
+ from pixeltable.utils.code import local_public_names
19
+
20
+ _SQL_ZERO = sql.literal(0)
21
+
22
+ # NOT YET SUPPORTED date +/- integer
23
+ # NOT YET SUPPORTED date1 - date2 -> integer
24
+ # NOT YET SUPPORTED timestamp(date)
25
+ # NOT YET SUPPORTED date(timestamp)
26
+
27
+
28
+ @pxt.udf(is_property=True)
29
+ def year(self: date) -> int:
30
+ """
31
+ Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
32
+ [`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) inclusive.
33
+
34
+ Equivalent to [`date.year`](https://docs.python.org/3/library/datetime.html#datetime.date.year).
35
+ """
36
+ return self.year
37
+
38
+
39
+ @year.to_sql
40
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
41
+ return sql.extract('year', self)
42
+
43
+
44
+ @pxt.udf(is_property=True)
45
+ def month(self: date) -> int:
46
+ """
47
+ Between 1 and 12 inclusive.
48
+
49
+ Equivalent to [`date.month`](https://docs.python.org/3/library/datetime.html#datetime.date.month).
50
+ """
51
+ return self.month
52
+
53
+
54
+ @month.to_sql
55
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
56
+ return sql.extract('month', self)
57
+
58
+
59
+ @pxt.udf(is_property=True)
60
+ def day(self: date) -> int:
61
+ """
62
+ Between 1 and the number of days in the given month of the given year.
63
+
64
+ Equivalent to [`date.day`](https://docs.python.org/3/library/datetime.html#datetime.date.day).
65
+ """
66
+ return self.day
67
+
68
+
69
+ @day.to_sql
70
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
71
+ return sql.extract('day', self)
72
+
73
+
74
+ @pxt.udf(is_method=True)
75
+ def make_date(year: int, month: int, day: int) -> date:
76
+ """
77
+ Create a date.
78
+
79
+ Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.date).
80
+ """
81
+ return date(year, month, day)
82
+
83
+
84
+ @make_date.to_sql
85
+ def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
86
+ return sql.func.make_date(sql.cast(year, sql.Integer), sql.cast(month, sql.Integer), sql.cast(day, sql.Integer))
87
+
88
+
89
+ @pxt.udf(is_method=True)
90
+ def weekday(self: date) -> int:
91
+ """
92
+ Between 0 (Monday) and 6 (Sunday) inclusive.
93
+
94
+ Equivalent to [`date.weekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.weekday).
95
+ """
96
+ return self.weekday()
97
+
98
+
99
+ @weekday.to_sql
100
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
101
+ return sql.extract('isodow', self) - 1
102
+
103
+
104
+ @pxt.udf(is_method=True)
105
+ def isoweekday(self: date) -> int:
106
+ """
107
+ Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
108
+
109
+ Equivalent to [`date.isoweekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoweekday).
110
+ """
111
+ return self.isoweekday()
112
+
113
+
114
+ @isoweekday.to_sql
115
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
116
+ return sql.extract('isodow', self)
117
+
118
+
119
+ @pxt.udf(is_method=True)
120
+ def isocalendar(self: date) -> dict:
121
+ """
122
+ Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
123
+
124
+ Equivalent to
125
+ [`date.isocalendar()`](https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar).
126
+ """
127
+ iso_year, iso_week, iso_weekday = self.isocalendar()
128
+ return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
129
+
130
+
131
+ @pxt.udf(is_method=True)
132
+ def isoformat(self: date, sep: str = 'T', timespec: str = 'auto') -> str:
133
+ """
134
+ Return a string representing the date and time in ISO 8601 format.
135
+
136
+ Equivalent to [`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat).
137
+
138
+ Args:
139
+ sep: Separator between date and time.
140
+ timespec: The number of additional terms in the output. See the
141
+ [`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat)
142
+ documentation for more details.
143
+ """
144
+ return self.isoformat()
145
+
146
+
147
+ @pxt.udf(is_method=True)
148
+ def toordinal(self: date) -> int:
149
+ """
150
+ Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
151
+
152
+ Equivalent to [`date.toordinal()`](https://docs.python.org/3/library/datetime.html#datetime.date.toordinal).
153
+ """
154
+ return self.toordinal()
155
+
156
+
157
+ @pxt.udf(is_method=True)
158
+ def strftime(self: date, format: str) -> str:
159
+ """
160
+ Return a string representing the date and time, controlled by an explicit format string.
161
+
162
+ Equivalent to [`date.strftime()`](https://docs.python.org/3/library/datetime.html#datetime.date.strftime).
163
+
164
+ Args:
165
+ format: The format string to control the output. For a complete list of formatting directives, see
166
+ [`strftime()` and `strptime()` Behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
167
+ """
168
+ return self.strftime(format)
169
+
170
+
171
+ @pxt.udf(is_method=True)
172
+ def add_days(self: date, n: int) -> date:
173
+ """
174
+ Add `n` days to the date.
175
+
176
+ Equivalent to [`date + timedelta(days=n)`](https://docs.python.org/3/library/datetime.html#datetime.timedelta).
177
+ """
178
+ return self + timedelta(days=n)
179
+
180
+
181
+ __all__ = local_public_names(__name__)
182
+
183
+
184
+ def __dir__() -> list[str]:
185
+ return __all__
@@ -1,32 +1,35 @@
1
1
  """
2
2
  Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
3
  that wrap various endpoints from the Google Gemini API. In order to use them, you must
4
- first `pip install google-generativeai` and configure your Gemini credentials, as described in
4
+ first `pip install google-genai` and configure your Gemini credentials, as described in
5
5
  the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini) tutorial.
6
6
  """
7
7
 
8
- from typing import Optional
8
+ from typing import TYPE_CHECKING, Optional
9
9
 
10
10
  import pixeltable as pxt
11
11
  from pixeltable import env
12
12
 
13
+ if TYPE_CHECKING:
14
+ from google import genai
15
+
13
16
 
14
17
  @env.register_client('gemini')
15
- def _(api_key: str) -> None:
16
- import google.generativeai as genai
18
+ def _(api_key: str) -> 'genai.client.Client':
19
+ from google import genai
17
20
 
18
- genai.configure(api_key=api_key)
21
+ return genai.client.Client(api_key=api_key)
19
22
 
20
23
 
21
- def _ensure_loaded() -> None:
22
- env.Env.get().get_client('gemini')
24
+ def _genai_client() -> 'genai.client.Client':
25
+ return env.Env.get().get_client('gemini')
23
26
 
24
27
 
25
28
  @pxt.udf(resource_pool='request-rate:gemini')
26
29
  async def generate_content(
27
30
  contents: str,
28
31
  *,
29
- model_name: str,
32
+ model: str,
30
33
  candidate_count: Optional[int] = None,
31
34
  stop_sequences: Optional[list[str]] = None,
32
35
  max_output_tokens: Optional[int] = None,
@@ -48,11 +51,11 @@ async def generate_content(
48
51
 
49
52
  __Requirements:__
50
53
 
51
- - `pip install google-generativeai`
54
+ - `pip install google-genai`
52
55
 
53
56
  Args:
54
57
  contents: The input content to generate from.
55
- model_name: The name of the model to use.
58
+ model: The name of the model to use.
56
59
 
57
60
  For details on the other parameters, see: <https://ai.google.dev/gemini-api/docs>
58
61
 
@@ -63,14 +66,12 @@ async def generate_content(
63
66
  Add a computed column that applies the model `gemini-1.5-flash`
64
67
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
65
68
 
66
- >>> tbl.add_computed_column(response=generate_content(tbl.prompt, model_name='gemini-1.5-flash'))
69
+ >>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-1.5-flash'))
67
70
  """
68
- env.Env.get().require_package('google.generativeai')
69
- _ensure_loaded()
70
- import google.generativeai as genai
71
+ env.Env.get().require_package('google.genai')
72
+ from google.genai import types
71
73
 
72
- model = genai.GenerativeModel(model_name=model_name)
73
- gc = genai.GenerationConfig(
74
+ config = types.GenerateContentConfig(
74
75
  candidate_count=candidate_count,
75
76
  stop_sequences=stop_sequences,
76
77
  max_output_tokens=max_output_tokens,
@@ -82,10 +83,11 @@ async def generate_content(
82
83
  presence_penalty=presence_penalty,
83
84
  frequency_penalty=frequency_penalty,
84
85
  )
85
- response = await model.generate_content_async(contents, generation_config=gc)
86
- return response.to_dict()
86
+
87
+ response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config)
88
+ return response.model_dump()
87
89
 
88
90
 
89
91
  @generate_content.resource_pool
90
- def _(model_name: str) -> str:
91
- return f'request-rate:gemini:{model_name}'
92
+ def _(model: str) -> str:
93
+ return f'request-rate:gemini:{model}'
@@ -49,22 +49,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
49
49
  allows_window=True,
50
50
  # Allow counting non-null values of any type
51
51
  # TODO: should we have an "Any" type that can be used here?
52
- type_substitutions=tuple(
53
- {T: Optional[t]} # type: ignore[misc]
54
- for t in (
55
- ts.String,
56
- ts.Int,
57
- ts.Float,
58
- ts.Bool,
59
- ts.Timestamp,
60
- ts.Array,
61
- ts.Json,
62
- ts.Image,
63
- ts.Video,
64
- ts.Audio,
65
- ts.Document,
66
- )
67
- ),
52
+ type_substitutions=tuple({T: Optional[t]} for t in ts.ALL_PIXELTABLE_TYPES), # type: ignore[misc]
68
53
  )
69
54
  class count(func.Aggregator, typing.Generic[T]):
70
55
  def __init__(self) -> None:
@@ -4,9 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
4
4
  Example:
5
5
  ```python
6
6
  import pixeltable as pxt
7
+ import pixeltable.functions as pxtf
7
8
 
8
9
  t = pxt.get_table(...)
9
- t.select(pxt.functions.json.make_list()).collect()
10
+ t.select(pxtf.json.make_list(t.json_col)).collect()
10
11
  ```
11
12
  """
12
13
 
@@ -1,3 +1,15 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for mathematical operations.
3
+
4
+ Example:
5
+ ```python
6
+ import pixeltable as pxt
7
+
8
+ t = pxt.get_table(...)
9
+ t.select(t.float_col.floor()).collect()
10
+ ```
11
+ """
12
+
1
13
  import builtins
2
14
  import math
3
15
  from typing import Optional
@@ -10,6 +22,11 @@ from pixeltable.utils.code import local_public_names
10
22
 
11
23
  @pxt.udf(is_method=True)
12
24
  def abs(self: float) -> float:
25
+ """
26
+ Return the absolute value of the given number.
27
+
28
+ Equivalent to Python [`builtins.abs()`](https://docs.python.org/3/library/functions.html#abs).
29
+ """
13
30
  return builtins.abs(self)
14
31
 
15
32
 
@@ -20,6 +37,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
20
37
 
21
38
  @pxt.udf(is_method=True)
22
39
  def ceil(self: float) -> float:
40
+ """
41
+ Return the ceiling of the given number.
42
+
43
+ Equivalent to Python [`float(math.ceil(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
44
+ is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
45
+ `math.ceil(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior in
46
+ Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
47
+ """
23
48
  # This ensures the same behavior as SQL
24
49
  if math.isfinite(self):
25
50
  return float(math.ceil(self))
@@ -34,6 +59,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
34
59
 
35
60
  @pxt.udf(is_method=True)
36
61
  def floor(self: float) -> float:
62
+ """
63
+ Return the ceiling of the given number.
64
+
65
+ Equivalent to Python [`float(math.floor(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
66
+ is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
67
+ `math.floor(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior of
68
+ Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
69
+ """
37
70
  # This ensures the same behavior as SQL
38
71
  if math.isfinite(self):
39
72
  return float(math.floor(self))
@@ -48,6 +81,13 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
48
81
 
49
82
  @pxt.udf(is_method=True)
50
83
  def round(self: float, digits: Optional[int] = None) -> float:
84
+ """
85
+ Round a number to a given precision in decimal digits.
86
+
87
+ Equivalent to Python [`builtins.round(self, digits or 0)`](https://docs.python.org/3/library/functions.html#round).
88
+ Note that if `digits` is not specified, the behavior matches `builtins.round(self, 0)` rather than
89
+ `builtins.round(self)`; this ensures that the return type is always `float` (as in SQL) rather than `int`.
90
+ """
51
91
  # Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
52
92
  return builtins.round(self, digits or 0)
53
93
 
@@ -5,10 +5,9 @@ It closely follows the Pandas `pandas.Series.str` API.
5
5
  Example:
6
6
  ```python
7
7
  import pixeltable as pxt
8
- from pixeltable.functions import string as pxt_str
9
8
 
10
9
  t = pxt.get_table(...)
11
- t.select(pxt_str.capitalize(t.str_col)).collect()
10
+ t.select(t.str_col.capitalize()).collect()
12
11
  ```
13
12
  """
14
13
 
@@ -4,10 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
4
4
  Example:
5
5
  ```python
6
6
  import pixeltable as pxt
7
- from pixeltable.functions import video as pxt_video
7
+ import pixeltable.functions as pxtf
8
8
 
9
9
  t = pxt.get_table(...)
10
- t.select(pxt_video.extract_audio(t.video_col)).collect()
10
+ t.select(pxtf.video.extract_audio(t.video_col)).collect()
11
11
  ```
12
12
  """
13
13
 
pixeltable/globals.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import os
5
- import urllib.parse
6
5
  from pathlib import Path
7
6
  from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
8
7
 
@@ -372,6 +371,31 @@ def create_snapshot(
372
371
  )
373
372
 
374
373
 
374
+ def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
375
+ """
376
+ Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
377
+ replica of a remote table. A given table can have at most one replica per Pixeltable instance.
378
+
379
+ Args:
380
+ destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
381
+ a remote URI such as `'pxt://username/mydir.my_table'`.
382
+ source: Path to the source table, or (if the source table is a local table) a handle to the source table.
383
+ """
384
+ remote_dest = destination.startswith('pxt://')
385
+ remote_source = isinstance(source, str) and source.startswith('pxt://')
386
+ if remote_dest == remote_source:
387
+ raise excs.Error('Exactly one of `destination` or `source` must be a remote URI.')
388
+
389
+ if remote_dest:
390
+ if isinstance(source, str):
391
+ source = get_table(source)
392
+ share.push_replica(destination, source)
393
+ return None
394
+ else:
395
+ assert isinstance(source, str)
396
+ return share.pull_replica(destination, source)
397
+
398
+
375
399
  def get_table(path: str) -> catalog.Table:
376
400
  """Get a handle to an existing table, view, or snapshot.
377
401
 
@@ -470,7 +494,7 @@ def drop_table(
470
494
  # if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
471
495
  # the Table record, and then get X locks in the correct order (first containing directory, then table)
472
496
  with Env.get().begin_xact():
473
- tbl_path = table._path()
497
+ tbl_path = table._path
474
498
  else:
475
499
  assert isinstance(table, str)
476
500
  tbl_path = table
@@ -627,13 +651,6 @@ def _extract_paths(
627
651
  return result
628
652
 
629
653
 
630
- def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
631
- parsed_uri = urllib.parse.urlparse(dest_uri)
632
- if parsed_uri.scheme != 'pxt':
633
- raise excs.Error(f'Invalid Pixeltable URI (does not start with pxt://): {dest_uri}')
634
- share.publish_snapshot(dest_uri, table)
635
-
636
-
637
654
  def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
638
655
  """List the directories in a directory.
639
656
 
@@ -31,8 +31,8 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
31
31
  'timestamp[s]': ts.TimestampType(nullable=True),
32
32
  'timestamp[ms]': ts.TimestampType(nullable=True), # HF dataset iterator converts timestamps to datetime.datetime
33
33
  'timestamp[us]': ts.TimestampType(nullable=True),
34
- 'date32': ts.StringType(nullable=True), # date32 is not supported in pixeltable, use string
35
- 'date64': ts.StringType(nullable=True), # date64 is not supported in pixeltable, use string
34
+ 'date32': ts.DateType(nullable=True),
35
+ 'date64': ts.DateType(nullable=True),
36
36
  }
37
37
 
38
38
 
pixeltable/io/pandas.py CHANGED
@@ -9,6 +9,7 @@ from pandas.api.types import is_datetime64_any_dtype, is_extension_array_dtype
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
11
11
  import pixeltable.type_system as ts
12
+ from pixeltable.env import Env
12
13
 
13
14
 
14
15
  def import_pandas(
@@ -209,14 +210,25 @@ def _df_row_to_pxt_row(
209
210
  nval = bool(val)
210
211
  elif pxt_type.is_string_type():
211
212
  nval = str(val)
213
+ elif pxt_type.is_date_type():
214
+ if pd.isnull(val):
215
+ # pandas has the bespoke 'NaT' valud for a missing timestamp
216
+ # This is not supported by postgres, and must be converted to None
217
+ nval = None
218
+ else:
219
+ nval = pd.Timestamp(val).date()
212
220
  elif pxt_type.is_timestamp_type():
213
221
  if pd.isnull(val):
214
- # pandas has the bespoke 'NaT' type for a missing timestamp; postgres is very
215
- # much not-ok with it. (But if we convert it to None and then load out the
216
- # table contents as a pandas DataFrame, it will correctly restore the 'NaT'!)
222
+ # pandas has the bespoke 'NaT' value for a missing timestamp
223
+ # This is not supported by postgres, and must be converted to None
217
224
  nval = None
218
225
  else:
219
- nval = pd.Timestamp(val).to_pydatetime()
226
+ tval = pd.Timestamp(val)
227
+ # pandas supports tz-aware and naive timestamps.
228
+ if tval.tz is None:
229
+ nval = pd.Timestamp(tval).tz_localize(tz=Env.get().default_time_zone)
230
+ else:
231
+ nval = tval.astimezone(Env.get().default_time_zone)
220
232
  else:
221
233
  nval = val
222
234
  pxt_row[pxt_name] = nval
pixeltable/io/parquet.py CHANGED
@@ -127,6 +127,8 @@ def export_parquet(
127
127
  length = 8
128
128
  elif col_type.is_bool_type():
129
129
  length = 1
130
+ elif col_type.is_date_type():
131
+ length = 4
130
132
  elif col_type.is_timestamp_type():
131
133
  val = val.astimezone(datetime.timezone.utc)
132
134
  length = 8
@@ -16,7 +16,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
16
16
 
17
17
 
18
18
  # current version of the metadata; this is incremented whenever the metadata schema changes
19
- VERSION = 34
19
+ VERSION = 35
20
20
 
21
21
 
22
22
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -0,0 +1,21 @@
1
+ from typing import Any, Optional
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=34)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, substitution_fn=__substitute_md)
12
+
13
+
14
+ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
15
+ if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
16
+ # Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
17
+ assert 'reference_tbl' not in v
18
+ v['reference_tbl'] = None
19
+ return k, v
20
+
21
+ return None
@@ -2,6 +2,7 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 35: 'Track reference_tbl in ColumnRef',
5
6
  34: 'Set default value for is_pk field in column metadata to False',
6
7
  33: 'Add is_replica field to table metadata',
7
8
  32: 'Add the lock_dummy BIGINT column to the dirs table',