PyPI - pixeltable - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

pixeltable 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (110) hide show

pixeltable/__init__.py +20 -9
pixeltable/__version__.py +3 -0
pixeltable/catalog/column.py +23 -7
pixeltable/catalog/insertable_table.py +32 -19
pixeltable/catalog/table.py +210 -20
pixeltable/catalog/table_version.py +272 -111
pixeltable/catalog/table_version_path.py +6 -1
pixeltable/dataframe.py +184 -110
pixeltable/datatransfer/__init__.py +1 -0
pixeltable/datatransfer/label_studio.py +526 -0
pixeltable/datatransfer/remote.py +113 -0
pixeltable/env.py +213 -79
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/data_row_batch.py +6 -7
pixeltable/exec/expr_eval_node.py +28 -28
pixeltable/exec/sql_scan_node.py +7 -6
pixeltable/exprs/__init__.py +4 -3
pixeltable/exprs/column_ref.py +11 -2
pixeltable/exprs/comparison.py +39 -1
pixeltable/exprs/data_row.py +7 -0
pixeltable/exprs/expr.py +26 -19
pixeltable/exprs/function_call.py +17 -18
pixeltable/exprs/globals.py +14 -2
pixeltable/exprs/image_member_access.py +9 -28
pixeltable/exprs/in_predicate.py +96 -0
pixeltable/exprs/inline_array.py +13 -11
pixeltable/exprs/inline_dict.py +15 -13
pixeltable/exprs/row_builder.py +7 -1
pixeltable/exprs/similarity_expr.py +67 -0
pixeltable/ext/functions/whisperx.py +30 -0
pixeltable/ext/functions/yolox.py +16 -0
pixeltable/func/__init__.py +0 -2
pixeltable/func/aggregate_function.py +5 -2
pixeltable/func/callable_function.py +57 -13
pixeltable/func/expr_template_function.py +14 -3
pixeltable/func/function.py +35 -4
pixeltable/func/signature.py +5 -15
pixeltable/func/udf.py +8 -12
pixeltable/functions/fireworks.py +9 -4
pixeltable/functions/huggingface.py +48 -5
pixeltable/functions/openai.py +49 -11
pixeltable/functions/pil/image.py +61 -64
pixeltable/functions/together.py +32 -6
pixeltable/functions/util.py +0 -43
pixeltable/functions/video.py +46 -8
pixeltable/globals.py +443 -0
pixeltable/index/__init__.py +1 -0
pixeltable/index/base.py +9 -2
pixeltable/index/btree.py +54 -0
pixeltable/index/embedding_index.py +91 -15
pixeltable/io/__init__.py +4 -0
pixeltable/io/globals.py +59 -0
pixeltable/{utils → io}/hf_datasets.py +48 -17
pixeltable/io/pandas.py +148 -0
pixeltable/{utils → io}/parquet.py +58 -33
pixeltable/iterators/__init__.py +1 -1
pixeltable/iterators/base.py +8 -4
pixeltable/iterators/document.py +225 -93
pixeltable/iterators/video.py +16 -9
pixeltable/metadata/__init__.py +8 -4
pixeltable/metadata/converters/convert_12.py +3 -0
pixeltable/metadata/converters/convert_13.py +41 -0
pixeltable/metadata/converters/convert_14.py +13 -0
pixeltable/metadata/converters/convert_15.py +29 -0
pixeltable/metadata/converters/util.py +63 -0
pixeltable/metadata/schema.py +12 -6
pixeltable/plan.py +11 -24
pixeltable/store.py +16 -23
pixeltable/tool/create_test_db_dump.py +49 -14
pixeltable/type_system.py +27 -58
pixeltable/utils/coco.py +94 -0
pixeltable/utils/documents.py +42 -12
pixeltable/utils/http_server.py +70 -0
pixeltable-0.2.7.dist-info/METADATA +137 -0
pixeltable-0.2.7.dist-info/RECORD +126 -0
{pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
pixeltable/client.py +0 -600
pixeltable/exprs/image_similarity_predicate.py +0 -58
pixeltable/func/batched_function.py +0 -53
pixeltable/func/nos_function.py +0 -202
pixeltable/tests/conftest.py +0 -171
pixeltable/tests/ext/test_yolox.py +0 -21
pixeltable/tests/functions/test_fireworks.py +0 -43
pixeltable/tests/functions/test_functions.py +0 -60
pixeltable/tests/functions/test_huggingface.py +0 -158
pixeltable/tests/functions/test_openai.py +0 -162
pixeltable/tests/functions/test_together.py +0 -112
pixeltable/tests/test_audio.py +0 -65
pixeltable/tests/test_catalog.py +0 -27
pixeltable/tests/test_client.py +0 -21
pixeltable/tests/test_component_view.py +0 -379
pixeltable/tests/test_dataframe.py +0 -440
pixeltable/tests/test_dirs.py +0 -107
pixeltable/tests/test_document.py +0 -120
pixeltable/tests/test_exprs.py +0 -802
pixeltable/tests/test_function.py +0 -332
pixeltable/tests/test_index.py +0 -138
pixeltable/tests/test_migration.py +0 -44
pixeltable/tests/test_nos.py +0 -54
pixeltable/tests/test_snapshot.py +0 -231
pixeltable/tests/test_table.py +0 -1343
pixeltable/tests/test_transactional_directory.py +0 -42
pixeltable/tests/test_types.py +0 -52
pixeltable/tests/test_video.py +0 -159
pixeltable/tests/test_view.py +0 -535
pixeltable/tests/utils.py +0 -442
pixeltable/utils/clip.py +0 -18
pixeltable-0.2.5.dist-info/METADATA +0 -128
pixeltable-0.2.5.dist-info/RECORD +0 -139
{pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0

pixeltable/catalog/table_version_path.py CHANGED Viewed

@@ -106,9 +106,14 @@ class TableVersionPath:
         if self.base is not None:
             base_cols = self.base.columns()
             # we only include base columns that don't conflict with one of our column names
-            result.extend([c for c in base_cols if c.name not in self.tbl_version.cols_by_name])
+            result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
         return result
+    def cols_by_name(self) -> dict[str, Column]:
+        """Return a dict of all user columns visible in this tbl version path, including columns from bases"""
+        cols = self.columns()
+        return {col.name: col for col in cols}
     def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
         """Return the column with the given name, or None if not found"""
         col = self.tbl_version.cols_by_name.get(name)

pixeltable/dataframe.py CHANGED Viewed

@@ -26,18 +26,15 @@ from pixeltable.catalog import is_valid_identifier
 from pixeltable.env import Env
 from pixeltable.plan import Planner
 from pixeltable.type_system import ColumnType
+from pixeltable.utils.http_server import get_file_uri
-__all__ = [
-    'DataFrame'
-]
+__all__ = ['DataFrame']
 _logger = logging.getLogger('pixeltable')
 def _create_source_tag(file_path: str) -> str:
-    abs_path = Path(file_path)
-    assert abs_path.is_absolute()
-    src_url = f'{Env.get().http_address}/{abs_path}'
+    src_url = get_file_uri(Env.get().http_address, file_path)
     mime = mimetypes.guess_type(src_url)[0]
     # if mime is None, the attribute string would not be valid html.
     mime_attr = f'type="{mime}"' if mime is not None else ''
@@ -45,7 +42,6 @@ def _create_source_tag(file_path: str) -> str:
 class DataFrameResultSet:
     def __init__(self, rows: List[List[Any]], col_names: List[str], col_types: List[ColumnType]):
         self._rows = rows
         self._col_names = col_names
@@ -54,6 +50,7 @@ class DataFrameResultSet:
             ts.ImageType: self._format_img,
             ts.VideoType: self._format_video,
             ts.AudioType: self._format_audio,
+            ts.DocumentType: self._format_document,
         }
     def __len__(self) -> int:
@@ -90,7 +87,6 @@ class DataFrameResultSet:
         return {self._col_names[i]: self._rows[row_idx][i] for i in range(len(self._col_names))}
     # Formatters
     def _format_img(self, img: Image.Image) -> str:
         """
         Create <img> tag for Image object.
@@ -106,14 +102,14 @@ class DataFrameResultSet:
         with io.BytesIO() as buffer:
             img.save(buffer, 'jpeg')
             img_base64 = base64.b64encode(buffer.getvalue()).decode()
-            return f'''
-            <div style="width:{width}px;">
+            return f"""
+            <div class="pxt_image" style="width:{width}px;">
                 <img src="data:image/jpeg;base64,{img_base64}" width="{width}" />
             </div>
-            '''
+            """
     def _format_video(self, file_path: str) -> str:
-        thumb_tag = ""
+        thumb_tag = ''
         # Attempt to extract the first frame of the video to use as a thumbnail,
         # so that the notebook can be exported as HTML and viewed in contexts where
         # the video itself is not accessible.
@@ -136,16 +132,53 @@ class DataFrameResultSet:
             width = 480
         else:
             width = 800
-        return f'''
-        <div style="width:{width}px;">
+        return f"""
+        <div class="pxt_video" style="width:{width}px;">
             <video controls width="{width}" {thumb_tag}>
                 {_create_source_tag(file_path)}
             </video>
         </div>
-        '''
+        """
+    def _format_document(self, file_path: str) -> str:
+        max_width = max_height = 320
+        # by default, file path will be shown as a link
+        inner_element = file_path
+        # try generating a thumbnail for different types and use that if successful
+        if file_path.lower().endswith('.pdf'):
+            try:
+                import fitz
+                doc = fitz.open(file_path)
+                p = doc.get_page_pixmap(0)
+                while p.width > max_width or p.height > max_height:
+                    # shrink(1) will halve each dimension
+                    p.shrink(1)
+                data = p.tobytes(output='jpeg')
+                thumb_base64 = base64.b64encode(data).decode()
+                img_src = f'data:image/jpeg;base64,{thumb_base64}'
+                inner_element = f"""
+                    <img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
+                """
+            except:
+                logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
+        return f"""
+        <div class="pxt_document" style="width:{max_width}px;">
+            <a href="{get_file_uri(Env.get().http_address, file_path)}">
+                {inner_element}
+            </a>
+        </div>
+        """
     def _format_audio(self, file_path: str) -> str:
-        return f'<audio controls>{_create_source_tag(file_path)}</audio>'
+        return f"""
+        <div class="pxt_audio">
+            <audio controls>
+                {_create_source_tag(file_path)}
+            </audio>
+        </div>
+        """
     def __getitem__(self, index: Any) -> Any:
         if isinstance(index, str):
@@ -186,51 +219,53 @@ class DataFrameResultSetIterator:
         return row
-# TODO: remove this; it's only here as a reminder that we still need to call release() in the current implementation
-class AnalysisInfo:
-    def __init__(self, tbl: catalog.TableVersion):
-        self.tbl = tbl
-        # output of the SQL scan stage
-        self.sql_scan_output_exprs: List[exprs.Expr] = []
-        # output of the agg stage
-        self.agg_output_exprs: List[exprs.Expr] = []
-        # Where clause of the Select stmt of the SQL scan stage
-        self.sql_where_clause: Optional[sql.ClauseElement] = None
-        # filter predicate applied to input rows of the SQL scan stage
-        self.filter: Optional[exprs.Predicate] = None
-        self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
-        self.agg_fn_calls: List[exprs.FunctionCall] = []  # derived from unique_exprs
-        self.has_frame_col: bool = False  # True if we're referencing the frame col
-        self.evaluator: Optional[exprs.Evaluator] = None
-        self.sql_scan_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of SQL scan stage
-        self.agg_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of agg stage
-        self.filter_eval_ctx: List[exprs.Expr] = []
-        self.group_by_eval_ctx: List[exprs.Expr] = []
-    def finalize_exec(self) -> None:
-        """
-        Call release() on all collected Exprs.
-        """
-        exprs.Expr.release_list(self.sql_scan_output_exprs)
-        exprs.Expr.release_list(self.agg_output_exprs)
-        if self.filter is not None:
-            self.filter.release()
+# # TODO: remove this; it's only here as a reminder that we still need to call release() in the current implementation
+# class AnalysisInfo:
+#     def __init__(self, tbl: catalog.TableVersion):
+#         self.tbl = tbl
+#         # output of the SQL scan stage
+#         self.sql_scan_output_exprs: List[exprs.Expr] = []
+#         # output of the agg stage
+#         self.agg_output_exprs: List[exprs.Expr] = []
+#         # Where clause of the Select stmt of the SQL scan stage
+#         self.sql_where_clause: Optional[sql.ClauseElement] = None
+#         # filter predicate applied to input rows of the SQL scan stage
+#         self.filter: Optional[exprs.Predicate] = None
+#         self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
+#         self.agg_fn_calls: List[exprs.FunctionCall] = []  # derived from unique_exprs
+#         self.has_frame_col: bool = False  # True if we're referencing the frame col
+#
+#         self.evaluator: Optional[exprs.Evaluator] = None
+#         self.sql_scan_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of SQL scan stage
+#         self.agg_eval_ctx: List[exprs.Expr] = []  # needed to materialize output of agg stage
+#         self.filter_eval_ctx: List[exprs.Expr] = []
+#         self.group_by_eval_ctx: List[exprs.Expr] = []
+#
+#     def finalize_exec(self) -> None:
+#         """
+#         Call release() on all collected Exprs.
+#         """
+#         exprs.Expr.release_list(self.sql_scan_output_exprs)
+#         exprs.Expr.release_list(self.agg_output_exprs)
+#         if self.filter is not None:
+#             self.filter.release()
 class DataFrame:
     def __init__(
-            self, tbl: catalog.TableVersionPath,
-            select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]] = None,
-            where_clause: Optional[exprs.Predicate] = None,
-            group_by_clause: Optional[List[exprs.Expr]] = None,
-            grouping_tbl: Optional[catalog.TableVersion] = None,
-            order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None,  # List[(expr, asc)]
-            limit: Optional[int] = None):
+        self,
+        tbl: catalog.TableVersionPath,
+        select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]] = None,
+        where_clause: Optional[exprs.Predicate] = None,
+        group_by_clause: Optional[List[exprs.Expr]] = None,
+        grouping_tbl: Optional[catalog.TableVersion] = None,
+        order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None,  # List[(expr, asc)]
+        limit: Optional[int] = None,
+    ):
         self.tbl = tbl
         # select list logic
-        DataFrame._select_list_check_rep(select_list) # check select list without expansion
+        DataFrame._select_list_check_rep(select_list)  # check select list without expansion
         # exprs contain execution state and therefore cannot be shared
         select_list = copy.deepcopy(select_list)
         select_list_exprs, column_names = DataFrame._normalize_select_list(tbl, select_list)
@@ -249,12 +284,12 @@ class DataFrame:
         self.limit_val = limit
     @classmethod
-    def _select_list_check_rep(cls,
+    def _select_list_check_rep(
+        cls,
         select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]],
     ) -> None:
-        """Validate basic select list types.
-        """
-        if select_list is None: # basic check for valid select list
+        """Validate basic select list types."""
+        if select_list is None:  # basic check for valid select list
             return
         assert len(select_list) > 0
@@ -267,13 +302,14 @@ class DataFrame:
                 assert is_valid_identifier(ent[1])
     @classmethod
-    def _normalize_select_list(cls,
+    def _normalize_select_list(
+        cls,
         tbl: catalog.TableVersionPath,
         select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]],
     ) -> Tuple[List[exprs.Expr], List[str]]:
         """
         Expand select list information with all columns and their names
-        Returns:
+        Returns:
             a pair composed of the list of expressions and the list of corresponding names
         """
         if select_list is None:
@@ -281,9 +317,9 @@ class DataFrame:
         else:
             expanded_list = select_list
-        out_exprs : List[exprs.Expr] = []
-        out_names : List[str] = [] # keep track of order
-        seen_out_names : set[str] = set() # use to check for duplicates in loop, avoid square complexity
+        out_exprs: List[exprs.Expr] = []
+        out_names: List[str] = []  # keep track of order
+        seen_out_names: set[str] = set()  # use to check for duplicates in loop, avoid square complexity
         for i, (expr, name) in enumerate(expanded_list):
             if name is None:
                 # use default, add suffix if needed so default adds no duplicates
@@ -292,13 +328,13 @@ class DataFrame:
                     column_name = default_name
                     if default_name in seen_out_names:
                         # already used, then add suffix until unique name is found
-                        for j in range(1, len(out_names)+1):
+                        for j in range(1, len(out_names) + 1):
                             column_name = f'{default_name}_{j}'
                             if column_name not in seen_out_names:
                                 break
-                else: # no default name, eg some expressions
+                else:  # no default name, eg some expressions
                     column_name = f'col_{i}'
-            else: # user provided name, no attempt to rename
+            else:  # user provided name, no attempt to rename
                 column_name = name
             out_exprs.append(expr)
@@ -326,9 +362,13 @@ class DataFrame:
         for item in self._select_list_exprs:
             item.bind_rel_paths(None)
         plan = Planner.create_query_plan(
-            self.tbl, self._select_list_exprs, where_clause=self.where_clause, group_by_clause=group_by_clause,
+            self.tbl,
+            self._select_list_exprs,
+            where_clause=self.where_clause,
+            group_by_clause=group_by_clause,
             order_by_clause=self.order_by_clause if self.order_by_clause is not None else [],
-            limit=self.limit_val if self.limit_val is not None else 0)  # limit_val == 0: no limit_val
+            limit=self.limit_val if self.limit_val is not None else 0,
+        )  # limit_val == 0: no limit_val
         with Env.get().engine.begin() as conn:
             plan.ctx.conn = conn
@@ -374,12 +414,10 @@ class DataFrame:
                 result_row = [data_row[e.slot_idx] for e in self._select_list_exprs]
                 result_rows.append(result_row)
         except excs.ExprEvalError as e:
-            msg = (f'In row {e.row_num} the {e.expr_msg} encountered exception '
-                   f'{type(e.exc).__name__}:\n{str(e.exc)}')
+            msg = f'In row {e.row_num} the {e.expr_msg} encountered exception ' f'{type(e.exc).__name__}:\n{str(e.exc)}'
             if len(e.input_vals) > 0:
                 input_msgs = [
-                    f"'{d}' = {d.col_type.print_value(e.input_vals[i])}"
-                    for i, d in enumerate(e.expr.dependencies())
+                    f"'{d}' = {d.col_type.print_value(e.input_vals[i])}" for i, d in enumerate(e.expr.dependencies())
                 ]
                 msg += f'\nwith {", ".join(input_msgs)}'
             assert e.exc_tb is not None
@@ -399,6 +437,7 @@ class DataFrame:
     def count(self) -> int:
         from pixeltable.plan import Planner
         stmt = Planner.create_count_stmt(self.tbl, self.where_clause)
         with Env.get().engine.connect() as conn:
             result: int = conn.execute(stmt).scalar_one()
@@ -424,9 +463,9 @@ class DataFrame:
         if self.order_by_clause is not None:
             heading_vals.append('Order By')
             heading_vals.extend([''] * (len(self.order_by_clause) - 1))
-            info_vals.extend([
-                f'{e[0].display_str(inline=False)} {"asc" if e[1] else "desc"}' for e in self.order_by_clause
-            ])
+            info_vals.extend(
+                [f'{e[0].display_str(inline=False)} {"asc" if e[1] else "desc"}' for e in self.order_by_clause]
+            )
         if self.limit_val is not None:
             heading_vals.append('Limit')
             info_vals.append(str(self.limit_val))
@@ -440,9 +479,12 @@ class DataFrame:
         pd_df = self._description()
         # white-space: pre-wrap: print \n as newline
         # th: center-align headings
-        return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
-            .set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
-            .hide(axis='index').hide(axis='columns')
+        return (
+            pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'})
+            .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
+            .hide(axis='index')
+            .hide(axis='columns')
+        )
     def describe(self) -> None:
         """
@@ -453,6 +495,7 @@ class DataFrame:
         try:
             __IPYTHON__
             from IPython.display import display
             display(self._description_html())
         except NameError:
             print(self.__repr__())
@@ -463,16 +506,16 @@ class DataFrame:
     def _repr_html_(self) -> str:
         return self._description_html()._repr_html_()
-    def select(self, *items: Any, **named_items : Any) -> DataFrame:
+    def select(self, *items: Any, **named_items: Any) -> DataFrame:
         if self.select_list is not None:
             raise excs.Error(f'Select list already specified')
-        for (name, _) in named_items.items():
+        for name, _ in named_items.items():
             if not isinstance(name, str) or not is_valid_identifier(name):
                 raise excs.Error(f'Invalid name: {name}')
         base_list = [(expr, None) for expr in items] + [(expr, k) for (k, expr) in named_items.items()]
         if len(base_list) == 0:
             raise excs.Error(f'Empty select list')
         # analyze select list; wrap literals with the corresponding expressions
         select_list = []
         for raw_expr, name in base_list:
@@ -501,13 +544,25 @@ class DataFrame:
             seen.add(name)
         return DataFrame(
-            self.tbl, select_list=select_list, where_clause=self.where_clause, group_by_clause=self.group_by_clause,
-            grouping_tbl=self.grouping_tbl, order_by_clause=self.order_by_clause, limit=self.limit_val)
+            self.tbl,
+            select_list=select_list,
+            where_clause=self.where_clause,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=self.limit_val,
+        )
     def where(self, pred: exprs.Predicate) -> DataFrame:
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=pred, group_by_clause=self.group_by_clause,
-            grouping_tbl=self.grouping_tbl, order_by_clause=self.order_by_clause, limit=self.limit_val)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=pred,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=self.limit_val,
+        )
     def group_by(self, *grouping_items: Any) -> DataFrame:
         """Add a group-by clause to this DataFrame.
@@ -534,8 +589,14 @@ class DataFrame:
         if grouping_tbl is None:
             group_by_clause = list(grouping_items)
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=self.where_clause, group_by_clause=group_by_clause,
-            grouping_tbl=grouping_tbl, order_by_clause=self.order_by_clause, limit=self.limit_val)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=self.where_clause,
+            group_by_clause=group_by_clause,
+            grouping_tbl=grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=self.limit_val,
+        )
     def order_by(self, *expr_list: exprs.Expr, asc: bool = True) -> DataFrame:
         for e in expr_list:
@@ -544,16 +605,26 @@ class DataFrame:
         order_by_clause = self.order_by_clause if self.order_by_clause is not None else []
         order_by_clause.extend([(e.copy(), asc) for e in expr_list])
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=self.where_clause,
-            group_by_clause=self.group_by_clause, grouping_tbl=self.grouping_tbl, order_by_clause=order_by_clause,
-            limit=self.limit_val)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=self.where_clause,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=order_by_clause,
+            limit=self.limit_val,
+        )
     def limit(self, n: int) -> DataFrame:
         assert n is not None and isinstance(n, int)
         return DataFrame(
-            self.tbl, select_list=self.select_list, where_clause=self.where_clause,
-            group_by_clause=self.group_by_clause, grouping_tbl=self.grouping_tbl, order_by_clause=self.order_by_clause,
-            limit=n)
+            self.tbl,
+            select_list=self.select_list,
+            where_clause=self.where_clause,
+            group_by_clause=self.group_by_clause,
+            grouping_tbl=self.grouping_tbl,
+            order_by_clause=self.order_by_clause,
+            limit=n,
+        )
     def __getitem__(self, index: object) -> DataFrame:
         """
@@ -571,24 +642,27 @@ class DataFrame:
         if isinstance(index, list):
             return self.select(*index)
         raise TypeError(f'Invalid index type: {type(index)}')
     def _as_dict(self) -> Dict[str, Any]:
-        """
-            Returns:
-                Dictionary representing this dataframe.
+        """
+        Returns:
+            Dictionary representing this dataframe.
         """
         tbl_versions = self.tbl.get_tbl_versions()
         d = {
             '_classname': 'DataFrame',
             'tbl_ids': [str(t.id) for t in tbl_versions],
             'tbl_versions': [t.version for t in tbl_versions],
-            'select_list':
-                [(e.as_dict(), name) for (e, name) in self.select_list] if self.select_list is not None else None,
+            'select_list': [(e.as_dict(), name) for (e, name) in self.select_list]
+            if self.select_list is not None
+            else None,
             'where_clause': self.where_clause.as_dict() if self.where_clause is not None else None,
-            'group_by_clause':
-                [e.as_dict() for e in self.group_by_clause] if self.group_by_clause is not None else None,
-            'order_by_clause':
-                [(e.as_dict(), asc) for (e,asc) in self.order_by_clause] if self.order_by_clause is not None else None,
+            'group_by_clause': [e.as_dict() for e in self.group_by_clause]
+            if self.group_by_clause is not None
+            else None,
+            'order_by_clause': [(e.as_dict(), asc) for (e, asc) in self.order_by_clause]
+            if self.order_by_clause is not None
+            else None,
             'limit_val': self.limit_val,
         }
         return d
@@ -615,7 +689,7 @@ class DataFrame:
         summary_string = json.dumps(self._as_dict())
         cache_key = hashlib.sha256(summary_string.encode()).hexdigest()
-        dest_path = (Env.get().dataset_cache_dir / f'coco_{cache_key}')
+        dest_path = Env.get().dataset_cache_dir / f'coco_{cache_key}'
         if dest_path.exists():
             assert dest_path.is_dir()
             data_file_path = dest_path / 'data.json'
@@ -660,14 +734,14 @@ class DataFrame:
         Env.get().require_package('torch')
         Env.get().require_package('torchvision')
-        from pixeltable.utils.parquet import save_parquet # pylint: disable=import-outside-toplevel
-        from pixeltable.utils.pytorch import PixeltablePytorchDataset # pylint: disable=import-outside-toplevel
+        from pixeltable.io.parquet import save_parquet  # pylint: disable=import-outside-toplevel
+        from pixeltable.utils.pytorch import PixeltablePytorchDataset  # pylint: disable=import-outside-toplevel
-        summary_string = json.dumps(self._as_dict())
+        summary_string = json.dumps(self._as_dict())
         cache_key = hashlib.sha256(summary_string.encode()).hexdigest()
-        dest_path = (Env.get().dataset_cache_dir / f'df_{cache_key}').with_suffix('.parquet') # pylint: disable = protected-access
-        if dest_path.exists(): # fast path: use cache
+        dest_path = (Env.get().dataset_cache_dir / f'df_{cache_key}').with_suffix('.parquet')  # pylint: disable = protected-access
+        if dest_path.exists():  # fast path: use cache
             assert dest_path.is_dir()
         else:
             save_parquet(self, dest_path)

pixeltable/datatransfer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .remote import Remote

pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl