PyPI - pixeltable - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

pixeltable 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (99) hide show

pixeltable/__init__.py +18 -9
pixeltable/__version__.py +3 -0
pixeltable/catalog/column.py +31 -50
pixeltable/catalog/insertable_table.py +7 -6
pixeltable/catalog/table.py +171 -57
pixeltable/catalog/table_version.py +417 -140
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/dataframe.py +239 -121
pixeltable/env.py +82 -16
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/data_row_batch.py +6 -7
pixeltable/exec/expr_eval_node.py +28 -28
pixeltable/exec/in_memory_data_node.py +11 -7
pixeltable/exec/sql_scan_node.py +7 -6
pixeltable/exprs/__init__.py +4 -3
pixeltable/exprs/column_ref.py +9 -0
pixeltable/exprs/comparison.py +3 -3
pixeltable/exprs/data_row.py +5 -1
pixeltable/exprs/expr.py +15 -7
pixeltable/exprs/function_call.py +17 -15
pixeltable/exprs/image_member_access.py +9 -28
pixeltable/exprs/in_predicate.py +96 -0
pixeltable/exprs/inline_array.py +13 -11
pixeltable/exprs/inline_dict.py +15 -13
pixeltable/exprs/literal.py +16 -4
pixeltable/exprs/row_builder.py +15 -41
pixeltable/exprs/similarity_expr.py +65 -0
pixeltable/ext/__init__.py +5 -0
pixeltable/ext/functions/yolox.py +92 -0
pixeltable/func/__init__.py +0 -2
pixeltable/func/aggregate_function.py +18 -15
pixeltable/func/callable_function.py +57 -13
pixeltable/func/expr_template_function.py +20 -3
pixeltable/func/function.py +35 -4
pixeltable/func/globals.py +24 -14
pixeltable/func/signature.py +23 -27
pixeltable/func/udf.py +13 -12
pixeltable/functions/__init__.py +8 -8
pixeltable/functions/eval.py +7 -8
pixeltable/functions/huggingface.py +64 -17
pixeltable/functions/openai.py +36 -3
pixeltable/functions/pil/image.py +61 -64
pixeltable/functions/together.py +21 -0
pixeltable/functions/util.py +11 -0
pixeltable/globals.py +425 -0
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +51 -0
pixeltable/index/embedding_index.py +168 -0
pixeltable/io/__init__.py +3 -0
pixeltable/{utils → io}/hf_datasets.py +48 -17
pixeltable/io/pandas.py +148 -0
pixeltable/{utils → io}/parquet.py +58 -33
pixeltable/iterators/__init__.py +1 -1
pixeltable/iterators/base.py +4 -0
pixeltable/iterators/document.py +218 -97
pixeltable/iterators/video.py +8 -9
pixeltable/metadata/__init__.py +7 -3
pixeltable/metadata/converters/convert_12.py +3 -0
pixeltable/metadata/converters/convert_13.py +41 -0
pixeltable/metadata/schema.py +45 -22
pixeltable/plan.py +15 -51
pixeltable/store.py +38 -41
pixeltable/tool/create_test_db_dump.py +39 -4
pixeltable/type_system.py +47 -96
pixeltable/utils/documents.py +42 -12
pixeltable/utils/http_server.py +70 -0
{pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
pixeltable-0.2.6.dist-info/RECORD +119 -0
{pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
pixeltable/client.py +0 -604
pixeltable/exprs/image_similarity_predicate.py +0 -58
pixeltable/func/batched_function.py +0 -53
pixeltable/tests/conftest.py +0 -177
pixeltable/tests/functions/test_fireworks.py +0 -42
pixeltable/tests/functions/test_functions.py +0 -60
pixeltable/tests/functions/test_huggingface.py +0 -158
pixeltable/tests/functions/test_openai.py +0 -152
pixeltable/tests/functions/test_together.py +0 -111
pixeltable/tests/test_audio.py +0 -65
pixeltable/tests/test_catalog.py +0 -27
pixeltable/tests/test_client.py +0 -21
pixeltable/tests/test_component_view.py +0 -370
pixeltable/tests/test_dataframe.py +0 -439
pixeltable/tests/test_dirs.py +0 -107
pixeltable/tests/test_document.py +0 -120
pixeltable/tests/test_exprs.py +0 -805
pixeltable/tests/test_function.py +0 -324
pixeltable/tests/test_migration.py +0 -43
pixeltable/tests/test_nos.py +0 -54
pixeltable/tests/test_snapshot.py +0 -208
pixeltable/tests/test_table.py +0 -1267
pixeltable/tests/test_transactional_directory.py +0 -42
pixeltable/tests/test_types.py +0 -22
pixeltable/tests/test_video.py +0 -159
pixeltable/tests/test_view.py +0 -530
pixeltable/tests/utils.py +0 -408
pixeltable-0.2.4.dist-info/RECORD +0 -132
{pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0

pixeltable/catalog/table_version_path.py CHANGED Viewed

@@ -101,8 +101,8 @@ class TableVersionPath:
         return DataFrame(self).__getitem__(index)
     def columns(self) -> List[Column]:
-        """Return all columns visible in this tbl version path, including columns from bases"""
-        result = self.tbl_version.cols.copy()
+        """Return all user columns visible in this tbl version path, including columns from bases"""
+        result = list(self.tbl_version.cols_by_name.values())
         if self.base is not None:
             base_cols = self.base.columns()
             # we only include base columns that don't conflict with one of our column names

pixeltable/dataframe.py CHANGED Viewed

@@ -11,6 +11,8 @@ import traceback
 from pathlib import Path
 from typing import List, Optional, Any, Dict, Generator, Tuple, Set
+import PIL.Image
+import cv2
 import pandas as pd
 import pandas.io.formats.style
 import sqlalchemy as sql
@@ -24,37 +26,20 @@ from pixeltable.catalog import is_valid_identifier
 from pixeltable.env import Env
 from pixeltable.plan import Planner
 from pixeltable.type_system import ColumnType
+from pixeltable.utils.http_server import get_file_uri
-__all__ = [
-    'DataFrame'
-]
+__all__ = ['DataFrame']
 _logger = logging.getLogger('pixeltable')
-def _format_img(img: object) -> str:
-    """
-    Create <img> tag for Image object.
-    """
-    assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
-    with io.BytesIO() as buffer:
-        img.save(buffer, 'jpeg')
-        img_base64 = base64.b64encode(buffer.getvalue()).decode()
-        return f'<div style="width:200px;"><img src="data:image/jpeg;base64,{img_base64}" width="200" /></div>'
 def _create_source_tag(file_path: str) -> str:
-    abs_path = Path(file_path)
-    assert abs_path.is_absolute()
-    src_url = f'{Env.get().http_address}/{abs_path}'
+    src_url = get_file_uri(Env.get().http_address, file_path)
     mime = mimetypes.guess_type(src_url)[0]
     # if mime is None, the attribute string would not be valid html.
     mime_attr = f'type="{mime}"' if mime is not None else ''
     return f'<source src="{src_url}" {mime_attr} />'
-def _format_video(file_path: str) -> str:
-    return f'<video controls>{_create_source_tag(file_path)}</video>'
-def _format_audio(file_path: str) -> str:
-    return f'<audio controls>{_create_source_tag(file_path)}</audio>'
 class DataFrameResultSet:
     def __init__(self, rows: List[List[Any]], col_names: List[str], col_types: List[ColumnType]):
@@ -62,9 +47,10 @@ class DataFrameResultSet:
         self._col_names = col_names
         self._col_types = col_types
         self._formatters = {
-            ts.ImageType: _format_img,
-            ts.VideoType: _format_video,
-            ts.AudioType: _format_audio,
+            ts.ImageType: self._format_img,
+            ts.VideoType: self._format_video,
+            ts.AudioType: self._format_audio,
+            ts.DocumentType: self._format_document,
         }
     def __len__(self) -> int:
@@ -85,9 +71,7 @@ class DataFrameResultSet:
             for col_name, col_type in zip(self._col_names, self._col_types)
             if col_type.__class__ in self._formatters
         }
-        # TODO: why does mypy complain about formatters having an incorrect type?
-        return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)  # type: ignore[arg-type]
+        return self.to_pandas().to_html(formatters=formatters, escape=False, index=False)
     def __str__(self) -> str:
         return self.to_pandas().to_string()
@@ -102,6 +86,100 @@ class DataFrameResultSet:
     def _row_to_dict(self, row_idx: int) -> Dict[str, Any]:
         return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
+    # Formatters
+    def _format_img(self, img: Image.Image) -> str:
+        """
+        Create <img> tag for Image object.
+        """
+        assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
+        # Try to make it look decent in a variety of display scenarios
+        if len(self._rows) > 1:
+            width = 240  # Multiple rows: display small images
+        elif len(self._col_names) > 1:
+            width = 480  # Multiple columns: display medium images
+        else:
+            width = 640  # A single image: larger display
+        with io.BytesIO() as buffer:
+            img.save(buffer, 'jpeg')
+            img_base64 = base64.b64encode(buffer.getvalue()).decode()
+            return f"""
+            <div class="pxt_image" style="width:{width}px;">
+                <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
+            </div>
+            """
+    def _format_video(self, file_path: str) -> str:
+        thumb_tag = ''
+        # Attempt to extract the first frame of the video to use as a thumbnail,
+        # so that the notebook can be exported as HTML and viewed in contexts where
+        # the video itself is not accessible.
+        # TODO(aaron-siegel): If the video is backed by a concrete external URL,
+        # should we link to that instead?
+        video_reader = cv2.VideoCapture(str(file_path))
+        if video_reader.isOpened():
+            status, img_array = video_reader.read()
+            if status:
+                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
+                thumb = PIL.Image.fromarray(img_array)
+                with io.BytesIO() as buffer:
+                    thumb.save(buffer, 'jpeg')
+                    thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
+                    thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
+            video_reader.release()
+        if len(self._rows) > 1:
+            width = 320
+        elif len(self._col_names) > 1:
+            width = 480
+        else:
+            width = 800
+        return f"""
+        <div class="pxt_video" style="width:{width}px;">
+            <video controls width="{width}" {thumb_tag}>
+                {_create_source_tag(file_path)}
+            </video>
+        </div>
+        """
+    def _format_document(self, file_path: str) -> str:
+        max_width = max_height = 320
+        # by default, file path will be shown as a link
+        inner_element = file_path
+        # try generating a thumbnail for different types and use that if successful
+        if file_path.lower().endswith('.pdf'):
+            try:
+                import fitz
+                doc = fitz.open(file_path)
+                p = doc.get_page_pixmap(0)
+                while p.width > max_width or p.height > max_height:
+                    # shrink(1) will halve each dimension
+                    p.shrink(1)
+                data = p.tobytes(output='jpeg')
+                thumb_base64 = base64.b64encode(data).decode()
+                img_src = f'data:image/jpeg;base64,{thumb_base64}'
+                inner_element = f"""
+                    <img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
+                """
+            except:
+                logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
+        return f"""
+        <div class="pxt_document" style="width:{max_width}px;">
+            <a href="{get_file_uri(Env.get().http_address, file_path)}">
+                {inner_element}
+            </a>
+        </div>
+        """
+    def _format_audio(self, file_path: str) -> str:
+        return f"""
+        <div class="pxt_audio">
+            <audio controls>
+                {_create_source_tag(file_path)}
+            </audio>
+        </div>
+        """
     def __getitem__(self, index: Any) -> Any:
         if isinstance(index, str):
             if index not in self._col_names:
@@ -141,52 +219,53 @@ class DataFrameResultSetIterator:
         return row
-# TODO: remove this; it's only here as a reminder that we still need to call release() in the current implementation
-class AnalysisInfo:
-    def __init__(self, tbl: catalog.TableVersion):
-        self.tbl = tbl
-        # output of the SQL scan stage
-        self.sql_scan_output_exprs: List[exprs.Expr] = []
-        # output of the agg stage
-        self.agg_output_exprs: List[exprs.Expr] = []
-        # Where clause of the Select stmt of the SQL scan stage
-        self.sql_where_clause: Optional[sql.ClauseElement] = None
-        # filter predicate applied to input rows of the SQL scan stage
-        self.filter: Optional[exprs.Predicate] = None
-        self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
-        self.agg_fn_calls: List[exprs.FunctionCall] = []  # derived from unique_exprs
-        self.has_frame_col: bool = False  # True if we're referencing the frame col
-        self.evaluator: Optional[exprs.Evaluator] = None
-        self.sql_scan_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of SQL scan stage
-        self.agg_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of agg stage
-        self.filter_eval_ctx: List[exprs.Expr] = []
-        self.group_by_eval_ctx: List[exprs.Expr] = []
-    def finalize_exec(self) -> None:
-        """
-        Call release() on all collected Exprs.
-        """
-        exprs.Expr.release_list(self.sql_scan_output_exprs)
-        exprs.Expr.release_list(self.agg_output_exprs)
-        if self.filter is not None:
-            self.filter.release()
+# # TODO: remove this; it's only here as a reminder that we still need to call release() in the current implementation
+# class AnalysisInfo:
+#     def __init__(self, tbl: catalog.TableVersion):
+#         self.tbl = tbl
+#         # output of the SQL scan stage
+#         self.sql_scan_output_exprs: List[exprs.Expr] = []
+#         # output of the agg stage
+#         self.agg_output_exprs: List[exprs.Expr] = []
+#         # Where clause of the Select stmt of the SQL scan stage
+#         self.sql_where_clause: Optional[sql.ClauseElement] = None
+#         # filter predicate applied to input rows of the SQL scan stage
+#         self.filter: Optional[exprs.Predicate] = None
+#         self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
+#         self.agg_fn_calls: List[exprs.FunctionCall] = []  # derived from unique_exprs
+#         self.has_frame_col: bool = False  # True if we're referencing the frame col
+#
+#         self.evaluator: Optional[exprs.Evaluator] = None
+#         self.sql_scan_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of SQL scan stage
+#         self.agg_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of agg stage
+#         self.filter_eval_ctx: List[exprs.Expr] = []
+#         self.group_by_eval_ctx: List[exprs.Expr] = []
+#
+#     def finalize_exec(self) -> None:
+#         """
+#         Call release() on all collected Exprs.
+#         """
+#         exprs.Expr.release_list(self.sql_scan_output_exprs)
+#         exprs.Expr.release_list(self.agg_output_exprs)
+#         if self.filter is not None:
+#             self.filter.release()
 class DataFrame:
     def __init__(
-            self, tbl: catalog.TableVersionPath,
-            select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]] = None,
-            where_clause: Optional[exprs.Predicate] = None,
-            group_by_clause: Optional[List[exprs.Expr]] = None,
-            grouping_tbl: Optional[catalog.TableVersion] = None,
-            order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None,  # List[(expr, asc)]
-            limit: Optional[int] = None):
+        self,
+        tbl: catalog.TableVersionPath,
+        select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]] = None,
+        where_clause: Optional[exprs.Predicate] = None,
+        group_by_clause: Optional[List[exprs.Expr]] = None,
+        grouping_tbl: Optional[catalog.TableVersion] = None,
+        order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None,  # List[(expr, asc)]
+        limit: Optional[int] = None,
+    ):
         self.tbl = tbl
         # select list logic
-        DataFrame._select_list_check_rep(select_list) # check select list without expansion
+        DataFrame._select_list_check_rep(select_list)  # check select list without expansion
         # exprs contain execution state and therefore cannot be shared
         select_list = copy.deepcopy(select_list)
         select_list_exprs, column_names = DataFrame._normalize_select_list(tbl, select_list)
@@ -205,12 +284,12 @@ class DataFrame:
         self.limit_val = limit
     @classmethod
-    def _select_list_check_rep(cls,
+    def _select_list_check_rep(
+        cls,
         select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]],
     ) -> None:
-        """Validate basic select list types.
-        """
-        if select_list is None: # basic check for valid select list
+        """Validate basic select list types."""
+        if select_list is None:  # basic check for valid select list
             return
         assert len(select_list) > 0
@@ -223,13 +302,14 @@ class DataFrame:
                 assert is_valid_identifier(ent[1])
     @classmethod
-    def _normalize_select_list(cls,
+    def _normalize_select_list(
+        cls,
         tbl: catalog.TableVersionPath,
         select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]],
     ) -> Tuple[List[exprs.Expr], List[str]]:
         """
         Expand select list information with all columns and their names
-        Returns:
+        Returns:
             a pair composed of the list of expressions and the list of corresponding names
         """
         if select_list is None:
@@ -237,9 +317,9 @@ class DataFrame:
         else:
             expanded_list = select_list
-        out_exprs : List[exprs.Expr] = []
-        out_names : List[str] = [] # keep track of order
-        seen_out_names : set[str] = set() # use to check for duplicates in loop, avoid square complexity
+        out_exprs: List[exprs.Expr] = []
+        out_names: List[str] = []  # keep track of order
+        seen_out_names: set[str] = set()  # use to check for duplicates in loop, avoid square complexity
         for i, (expr, name) in enumerate(expanded_list):
             if name is None:
                 # use default, add suffix if needed so default adds no duplicates
@@ -248,13 +328,13 @@ class DataFrame:
                     column_name = default_name
                     if default_name in seen_out_names:
                         # already used, then add suffix until unique name is found
-                        for j in range(1, len(out_names)+1):
+                        for j in range(1, len(out_names) + 1):
                             column_name = f'{default_name}_{j}'
                             if column_name not in seen_out_names:
                                 break
-                else: # no default name, eg some expressions
+                else:  # no default name, eg some expressions
                     column_name = f'col_{i}'
-            else: # user provided name, no attempt to rename
+            else:  # user provided name, no attempt to rename
                 column_name = name
             out_exprs.append(expr)
@@ -282,9 +362,13 @@ class DataFrame:
         for item in self._select_list_exprs:
             item.bind_rel_paths(None)
         plan = Planner.create_query_plan(
-            self.tbl, self._select_list_exprs, where_clause=self.where_clause, group_by_clause=group_by_clause,
+            self.tbl,
+            self._select_list_exprs,
+            where_clause=self.where_clause,
+            group_by_clause=group_by_clause,
             order_by_clause=self.order_by_clause if self.order_by_clause is not None else [],
-            limit=self.limit_val if self.limit_val is not None else 0)  # limit_val == 0: no limit_val
+            limit=self.limit_val if self.limit_val is not None else 0,
+        )  # limit_val == 0: no limit_val
         with Env.get().engine.begin() as conn:
             plan.ctx.conn = conn
@@ -330,12 +414,10 @@ class DataFrame:
                 result_row = [data_row[e.slot_idx] for e in self._select_list_exprs]
                 result_rows.append(result_row)
         except excs.ExprEvalError as e:
-            msg = (f'In row {e.row_num} the {e.expr_msg} encountered exception '
-                   f'{type(e.exc).__name__}:\n{str(e.exc)}')
+            msg = f'In row {e.row_num} the {e.expr_msg} encountered exception ' f'{type(e.exc).__name__}:\n{str(e.exc)}'
             if len(e.input_vals) > 0:
                 input_msgs = [
-                    f"'{d}' = {d.col_type.print_value(e.input_vals[i])}"
-                    for i, d in enumerate(e.expr.dependencies())
+                    f"'{d}' = {d.col_type.print_value(e.input_vals[i])}" for i, d in enumerate(e.expr.dependencies())
                 ]
                 msg += f'\nwith {", ".join(input_msgs)}'
             assert e.exc_tb is not None
@@ -355,6 +437,7 @@ class DataFrame:
     def count(self) -> int:
         from pixeltable.plan import Planner
         stmt = Planner.create_count_stmt(self.tbl, self.where_clause)
         with Env.get().engine.connect() as conn:
             result: int = conn.execute(stmt).scalar_one()
@@ -380,9 +463,9 @@ class DataFrame:
         if self.order_by_clause is not None:
             heading_vals.append('Order By')
             heading_vals.extend([''] * (len(self.order_by_clause) - 1))
-            info_vals.extend([
-                f'{e[0].display_str(inline=False)} {"asc" if e[1] else "desc"}' for e in self.order_by_clause
-            ])
+            info_vals.extend(
+                [f'{e[0].display_str(inline=False)} {"asc" if e[1] else "desc"}' for e in self.order_by_clause]
+            )
         if self.limit_val is not None:
             heading_vals.append('Limit')
             info_vals.append(str(self.limit_val))
@@ -396,9 +479,12 @@ class DataFrame:
         pd_df = self._description()
         # white-space: pre-wrap: print \n as newline
         # th: center-align headings
-        return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
-            .set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
-            .hide(axis='index').hide(axis='columns')
+        return (
+            pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'})
+            .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
+            .hide(axis='index')
+            .hide(axis='columns')
+        )
     def describe(self) -> None:
         """
@@ -409,6 +495,7 @@ class DataFrame:
         try:
             __IPYTHON__
             from IPython.display import display
             display(self._description_html())
         except NameError:
             print(self.__repr__())
@@ -419,16 +506,16 @@ class DataFrame:
     def _repr_html_(self) -> str:
         return self._description_html()._repr_html_()
-    def select(self, *items: Any, **named_items : Any) -> DataFrame:
+    def select(self, *items: Any, **named_items: Any) -> DataFrame:
         if self.select_list is not None:
             raise excs.Error(f'Select list already specified')
-        for (name, _) in named_items.items():
+        for name, _ in named_items.items():
             if not isinstance(name, str) or not is_valid_identifier(name):
                 raise excs.Error(f'Invalid name: {name}')
         base_list = [(expr, None) for expr in items] + [(expr, k) for (k, expr) in named_items.items()]
         if len(base_list) == 0:
             raise excs.Error(f'Empty select list')
         # analyze select list; wrap literals with the corresponding expressions
         select_list = []
         for raw_expr, name in base_list:
@@ -457,13 +544,25 @@ class DataFrame:
             seen.add(name)
         return DataFrame(
-            self.tbl, select_list=select_list, where_clause=self.where_clause, group_by_clause=self.group_by_clause,
-            grouping_tbl=self.grouping_tbl, order_by_clause=self.order_by_clause, limit=self.limit_val)
+            self.tbl,
+            select_list=select_list,
+            where_clause=self.where_clause,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=self.limit_val,
+        )
     def where(self, pred: exprs.Predicate) -> DataFrame:
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=pred, group_by_clause=self.group_by_clause,
-            grouping_tbl=self.grouping_tbl, order_by_clause=self.order_by_clause, limit=self.limit_val)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=pred,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=self.limit_val,
+        )
     def group_by(self, *grouping_items: Any) -> DataFrame:
         """Add a group-by clause to this DataFrame.
@@ -490,8 +589,14 @@ class DataFrame:
         if grouping_tbl is None:
             group_by_clause = list(grouping_items)
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=self.where_clause, group_by_clause=group_by_clause,
-            grouping_tbl=grouping_tbl, order_by_clause=self.order_by_clause, limit=self.limit_val)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=self.where_clause,
+            group_by_clause=group_by_clause,
+            grouping_tbl=grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=self.limit_val,
+        )
     def order_by(self, *expr_list: exprs.Expr, asc: bool = True) -> DataFrame:
         for e in expr_list:
@@ -500,16 +605,26 @@ class DataFrame:
         order_by_clause = self.order_by_clause if self.order_by_clause is not None else []
         order_by_clause.extend([(e.copy(), asc) for e in expr_list])
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=self.where_clause,
-            group_by_clause=self.group_by_clause, grouping_tbl=self.grouping_tbl, order_by_clause=order_by_clause,
-            limit=self.limit_val)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=self.where_clause,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=order_by_clause,
+            limit=self.limit_val,
+        )
     def limit(self, n: int) -> DataFrame:
         assert n is not None and isinstance(n, int)
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=self.where_clause,
-            group_by_clause=self.group_by_clause, grouping_tbl=self.grouping_tbl, order_by_clause=self.order_by_clause,
-            limit=n)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=self.where_clause,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=n,
+        )
     def __getitem__(self, index: object) -> DataFrame:
         """
@@ -527,24 +642,27 @@ class DataFrame:
         if isinstance(index, list):
             return self.select(*index)
         raise TypeError(f'Invalid index type: {type(index)}')
     def _as_dict(self) -> Dict[str, Any]:
-        """
-            Returns:
-                Dictionary representing this dataframe.
+        """
+        Returns:
+            Dictionary representing this dataframe.
         """
         tbl_versions = self.tbl.get_tbl_versions()
         d = {
             '_classname': 'DataFrame',
             'tbl_ids': [str(t.id) for t in tbl_versions],
             'tbl_versions': [t.version for t in tbl_versions],
-            'select_list':
-                [(e.as_dict(), name) for (e, name) in self.select_list] if self.select_list is not None else None,
+            'select_list': [(e.as_dict(), name) for (e, name) in self.select_list]
+            if self.select_list is not None
+            else None,
             'where_clause': self.where_clause.as_dict() if self.where_clause is not None else None,
-            'group_by_clause':
-                [e.as_dict() for e in self.group_by_clause] if self.group_by_clause is not None else None,
-            'order_by_clause':
-                [(e.as_dict(), asc) for (e,asc) in self.order_by_clause] if self.order_by_clause is not None else None,
+            'group_by_clause': [e.as_dict() for e in self.group_by_clause]
+            if self.group_by_clause is not None
+            else None,
+            'order_by_clause': [(e.as_dict(), asc) for (e, asc) in self.order_by_clause]
+            if self.order_by_clause is not None
+            else None,
             'limit_val': self.limit_val,
         }
         return d
@@ -571,7 +689,7 @@ class DataFrame:
         summary_string = json.dumps(self._as_dict())
         cache_key = hashlib.sha256(summary_string.encode()).hexdigest()
-        dest_path = (Env.get().dataset_cache_dir / f'coco_{cache_key}')
+        dest_path = Env.get().dataset_cache_dir / f'coco_{cache_key}'
         if dest_path.exists():
             assert dest_path.is_dir()
             data_file_path = dest_path / 'data.json'
@@ -616,14 +734,14 @@ class DataFrame:
         Env.get().require_package('torch')
         Env.get().require_package('torchvision')
-        from pixeltable.utils.parquet import save_parquet # pylint: disable=import-outside-toplevel
-        from pixeltable.utils.pytorch import PixeltablePytorchDataset # pylint: disable=import-outside-toplevel
+        from pixeltable.io.parquet import save_parquet  # pylint: disable=import-outside-toplevel
+        from pixeltable.utils.pytorch import PixeltablePytorchDataset  # pylint: disable=import-outside-toplevel
-        summary_string = json.dumps(self._as_dict())
+        summary_string = json.dumps(self._as_dict())
         cache_key = hashlib.sha256(summary_string.encode()).hexdigest()
-        dest_path = (Env.get().dataset_cache_dir / f'df_{cache_key}').with_suffix('.parquet') # pylint: disable = protected-access
-        if dest_path.exists(): # fast path: use cache
+        dest_path = (Env.get().dataset_cache_dir / f'df_{cache_key}').with_suffix('.parquet')  # pylint: disable = protected-access
+        if dest_path.exists():  # fast path: use cache
             assert dest_path.is_dir()
         else:
             save_parquet(self, dest_path)

pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.4py3-none-any.whl → 0.2.6py3-none-any.whl