PyPI - pixeltable - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

pixeltable 0.2.5py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (87) hide show

pixeltable/__init__.py +18 -9
pixeltable/__version__.py +3 -0
pixeltable/catalog/column.py +9 -5
pixeltable/catalog/insertable_table.py +0 -2
pixeltable/catalog/table.py +16 -8
pixeltable/catalog/table_version.py +3 -2
pixeltable/dataframe.py +184 -110
pixeltable/env.py +69 -18
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/data_row_batch.py +6 -7
pixeltable/exec/expr_eval_node.py +28 -28
pixeltable/exec/sql_scan_node.py +7 -6
pixeltable/exprs/__init__.py +4 -3
pixeltable/exprs/column_ref.py +9 -0
pixeltable/exprs/expr.py +15 -7
pixeltable/exprs/function_call.py +17 -15
pixeltable/exprs/image_member_access.py +9 -28
pixeltable/exprs/in_predicate.py +96 -0
pixeltable/exprs/inline_array.py +13 -11
pixeltable/exprs/inline_dict.py +15 -13
pixeltable/exprs/row_builder.py +7 -1
pixeltable/exprs/similarity_expr.py +65 -0
pixeltable/func/__init__.py +0 -2
pixeltable/func/aggregate_function.py +3 -0
pixeltable/func/callable_function.py +57 -13
pixeltable/func/expr_template_function.py +11 -2
pixeltable/func/function.py +35 -4
pixeltable/func/signature.py +5 -15
pixeltable/func/udf.py +6 -10
pixeltable/functions/huggingface.py +23 -4
pixeltable/functions/openai.py +34 -1
pixeltable/functions/pil/image.py +61 -64
pixeltable/functions/together.py +21 -0
pixeltable/globals.py +425 -0
pixeltable/index/base.py +3 -1
pixeltable/index/embedding_index.py +87 -14
pixeltable/io/__init__.py +3 -0
pixeltable/{utils → io}/hf_datasets.py +48 -17
pixeltable/io/pandas.py +148 -0
pixeltable/{utils → io}/parquet.py +58 -33
pixeltable/iterators/__init__.py +1 -1
pixeltable/iterators/base.py +4 -0
pixeltable/iterators/document.py +218 -97
pixeltable/iterators/video.py +8 -9
pixeltable/metadata/__init__.py +7 -3
pixeltable/metadata/converters/convert_12.py +3 -0
pixeltable/metadata/converters/convert_13.py +41 -0
pixeltable/plan.py +2 -19
pixeltable/store.py +2 -2
pixeltable/tool/create_test_db_dump.py +32 -13
pixeltable/type_system.py +13 -54
pixeltable/utils/documents.py +42 -12
pixeltable/utils/http_server.py +70 -0
{pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/METADATA +10 -7
pixeltable-0.2.6.dist-info/RECORD +119 -0
{pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
pixeltable/client.py +0 -600
pixeltable/exprs/image_similarity_predicate.py +0 -58
pixeltable/func/batched_function.py +0 -53
pixeltable/tests/conftest.py +0 -171
pixeltable/tests/ext/test_yolox.py +0 -21
pixeltable/tests/functions/test_fireworks.py +0 -43
pixeltable/tests/functions/test_functions.py +0 -60
pixeltable/tests/functions/test_huggingface.py +0 -158
pixeltable/tests/functions/test_openai.py +0 -162
pixeltable/tests/functions/test_together.py +0 -112
pixeltable/tests/test_audio.py +0 -65
pixeltable/tests/test_catalog.py +0 -27
pixeltable/tests/test_client.py +0 -21
pixeltable/tests/test_component_view.py +0 -379
pixeltable/tests/test_dataframe.py +0 -440
pixeltable/tests/test_dirs.py +0 -107
pixeltable/tests/test_document.py +0 -120
pixeltable/tests/test_exprs.py +0 -802
pixeltable/tests/test_function.py +0 -332
pixeltable/tests/test_index.py +0 -138
pixeltable/tests/test_migration.py +0 -44
pixeltable/tests/test_nos.py +0 -54
pixeltable/tests/test_snapshot.py +0 -231
pixeltable/tests/test_table.py +0 -1343
pixeltable/tests/test_transactional_directory.py +0 -42
pixeltable/tests/test_types.py +0 -52
pixeltable/tests/test_video.py +0 -159
pixeltable/tests/test_view.py +0 -535
pixeltable/tests/utils.py +0 -442
pixeltable-0.2.5.dist-info/RECORD +0 -139
{pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0

pixeltable/env.py CHANGED Viewed

@@ -23,19 +23,24 @@ from tqdm import TqdmWarning
 import pixeltable.exceptions as excs
 from pixeltable import metadata
+from pixeltable.utils.http_server import make_server
 class Env:
     """
     Store for runtime globals.
     """
     _instance: Optional[Env] = None
     _log_fmt_str = '%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d: %(message)s'
     @classmethod
     def get(cls) -> Env:
         if cls._instance is None:
-            cls._instance = Env()
+            env = Env()
+            env._set_up()
+            env._upgrade_metadata()
+            cls._instance = env
         return cls._instance
     def __init__(self):
@@ -46,7 +51,7 @@ class Env:
         self._log_dir: Optional[Path] = None  # log files
         self._tmp_dir: Optional[Path] = None  # any tmp files
         self._sa_engine: Optional[sql.engine.base.Engine] = None
-        self._pgdata_dir : Optional[Path] = None
+        self._pgdata_dir: Optional[Path] = None
         self._db_name: Optional[str] = None
         self._db_server: Optional[pgserver.PostgresServer] = None
         self._db_url: Optional[str] = None
@@ -56,7 +61,7 @@ class Env:
         self._installed_packages: Dict[str, Optional[List[int]]] = {}
         self._nos_client: Optional[Any] = None
         self._spacy_nlp: Optional[Any] = None  # spacy.Language
-        self._httpd: Optional[socketserver.TCPServer] = None
+        self._httpd: Optional[http.server.ThreadingHTTPServer] = None
         self._http_address: Optional[str] = None
         self._registered_clients: dict[str, Any] = {}
@@ -94,13 +99,43 @@ class Env:
         assert self._http_address is not None
         return self._http_address
+    def configure_logging(
+        self,
+        *,
+        to_stdout: Optional[bool] = None,
+        level: Optional[int] = None,
+        add: Optional[str] = None,
+        remove: Optional[str] = None,
+    ) -> None:
+        """Configure logging.
+        Args:
+            to_stdout: if True, also log to stdout
+            level: default log level
+            add: comma-separated list of 'module name:log level' pairs; ex.: add='video:10'
+            remove: comma-separated list of module names
+        """
+        if to_stdout is not None:
+            self.log_to_stdout(to_stdout)
+        if level is not None:
+            self.set_log_level(level)
+        if add is not None:
+            for module, level in [t.split(':') for t in add.split(',')]:
+                self.set_module_log_level(module, int(level))
+        if remove is not None:
+            for module in remove.split(','):
+                self.set_module_log_level(module, None)
+        if to_stdout is None and level is None and add is None and remove is None:
+            self.print_log_config()
     def print_log_config(self) -> None:
         print(f'logging to {self._logfilename}')
         print(f'{"" if self._log_to_stdout else "not "}logging to stdout')
         print(f'default log level: {logging.getLevelName(self._default_log_level)}')
         print(
             f'module log levels: '
-            f'{",".join([name + ":" + logging.getLevelName(val) for name, val in self._module_log_level.items()])}')
+            f'{",".join([name + ":" + logging.getLevelName(val) for name, val in self._module_log_level.items()])}'
+        )
     def log_to_stdout(self, enable: bool = True) -> None:
         self._log_to_stdout = enable
@@ -135,10 +170,14 @@ class Env:
         else:
             return False
-    def set_up(self, echo: bool = False, reinit_db: bool = False) -> None:
+    def _set_up(self, echo: bool = False, reinit_db: bool = False) -> None:
         if self._initialized:
             return
+        # Disable spurious warnings
+        warnings.simplefilter('ignore', category=TqdmWarning)
+        os.environ['TOKENIZERS_PARALLELISM'] = 'false'
         self._initialized = True
         home = Path(os.environ.get('PIXELTABLE_HOME', str(Path.home() / '.pixeltable')))
         assert self._home is None or self._home == home
@@ -204,6 +243,14 @@ class Env:
         av_logger.addHandler(av_fh)
         av_logger.propagate = False
+        # configure web-server logging
+        http_logfilename = self._logfilename.replace('.log', '_http.log')
+        http_fh = logging.FileHandler(self._log_dir / http_logfilename, mode='w')
+        http_fh.setFormatter(logging.Formatter(self._log_fmt_str))
+        http_logger = logging.getLogger('pixeltable.http.server')
+        http_logger.addHandler(http_fh)
+        http_logger.propagate = False
         # empty tmp dir
         for path in glob.glob(f'{self._tmp_dir}/*'):
             os.remove(path)
@@ -224,6 +271,7 @@ class Env:
             create_database(self.db_url)
             self._sa_engine = sql.create_engine(self.db_url, echo=echo, future=True)
             from pixeltable.metadata import schema
             schema.Base.metadata.create_all(self._sa_engine)
             metadata.create_system_info(self._sa_engine)
             # enable pgvector
@@ -240,14 +288,12 @@ class Env:
         self._set_up_runtime()
         self.log_to_stdout(False)
-        # Disable spurious warnings
-        warnings.simplefilter("ignore", category=TqdmWarning)
-    def upgrade_metadata(self) -> None:
+    def _upgrade_metadata(self) -> None:
         metadata.upgrade_md(self._sa_engine)
     def _create_nos_client(self) -> None:
         import nos
         self._logger.info('connecting to NOS')
         nos.init(logging_level=logging.DEBUG)
         self._nos_client = nos.client.InferenceClient()
@@ -256,6 +302,7 @@ class Env:
         # now that we have a client, we can create the module
         import importlib
         try:
             importlib.import_module('pixeltable.functions.nos')
             # it's already been created
@@ -263,6 +310,7 @@ class Env:
         except ImportError:
             pass
         from pixeltable.functions.util import create_nos_modules
         _ = create_nos_modules()
     def get_client(self, name: str, init: Callable, environ: Optional[str] = None) -> Any:
@@ -296,16 +344,13 @@ class Env:
         """
         The http server root is the file system root.
         eg: /home/media/foo.mp4 is located at http://127.0.0.1:{port}/home/media/foo.mp4
+        in windows, the server will translate paths like http://127.0.0.1:{port}/c:/media/foo.mp4
         This arrangement enables serving media hosted within _home,
         as well as external media inserted into pixeltable or produced by pixeltable.
         The port is chosen dynamically to prevent conflicts.
         """
         # Port 0 means OS picks one for us.
-        address = ("127.0.0.1", 0)
-        class FixedRootHandler(http.server.SimpleHTTPRequestHandler):
-            def __init__(self, *args, **kwargs):
-                super().__init__(*args, directory='/', **kwargs)
-        self._httpd = socketserver.TCPServer(address, FixedRootHandler)
+        self._httpd = make_server('127.0.0.1', 0)
         port = self._httpd.server_address[1]
         self._http_address = f'http://127.0.0.1:{port}'
@@ -336,10 +381,12 @@ class Env:
         check('sentence_transformers')
         check('yolox')
         check('boto3')
+        check('fitz')  # pymupdf
         check('pyarrow')
         check('spacy')  # TODO: deal with en-core-web-sm
         if self.is_installed_package('spacy'):
             import spacy
             self._spacy_nlp = spacy.load('en_core_web_sm')
         check('tiktoken')
         check('openai')
@@ -348,6 +395,7 @@ class Env:
         check('nos')
         if self.is_installed_package('nos'):
             self._create_nos_client()
+        check('openpyxl')
     def require_package(self, package: str, min_version: Optional[List[int]] = None) -> None:
         assert package in self._installed_packages
@@ -365,9 +413,12 @@ class Env:
         if len(min_version) < len(installed_version):
             normalized_min_version = min_version + [0] * (len(installed_version) - len(min_version))
         if any([a < b for a, b in zip(installed_version, normalized_min_version)]):
-            raise excs.Error((
-                f'The installed version of package {package} is {".".join([str[v] for v in installed_version])}, '
-                f'but version  >={".".join([str[v] for v in min_version])} is required'))
+            raise excs.Error(
+                (
+                    f'The installed version of package {package} is {".".join([str[v] for v in installed_version])}, '
+                    f'but version  >={".".join([str[v] for v in min_version])} is required'
+                )
+            )
     def num_tmp_files(self) -> int:
         return len(glob.glob(f'{self._tmp_dir}/*'))
@@ -412,4 +463,4 @@ class Env:
     @property
     def spacy_nlp(self) -> Any:
         assert self._spacy_nlp is not None
-        return self._spacy_nlp
+        return self._spacy_nlp

pixeltable/exec/__init__.py CHANGED Viewed

@@ -6,4 +6,5 @@ from .exec_node import ExecNode
 from .expr_eval_node import ExprEvalNode
 from .in_memory_data_node import InMemoryDataNode
 from .sql_scan_node import SqlScanNode
-from .media_validation_node import MediaValidationNode
+from .media_validation_node import MediaValidationNode
+from .data_row_batch import DataRowBatch

pixeltable/exec/data_row_batch.py CHANGED Viewed

@@ -14,9 +14,8 @@ class DataRowBatch:
     Contains the metadata needed to initialize DataRows.
     """
-    def __init__(self, tbl: catalog.TableVersion, row_builder: exprs.RowBuilder, len: int = 0):
-        self.tbl_id = tbl.id
-        self.tbl_version = tbl.version
+    def __init__(self, tbl: Optional[catalog.TableVersion], row_builder: exprs.RowBuilder, len: int = 0):
+        self.tbl = tbl
         self.row_builder = row_builder
         self.img_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_image_type()]
         # non-image media slots
@@ -42,9 +41,10 @@ class DataRowBatch:
     def set_row_ids(self, row_ids: List[int]) -> None:
         """Sets pks for rows in batch"""
+        assert self.tbl is not None
         assert len(row_ids) == len(self.rows)
         for row, row_id in zip(self.rows, row_ids):
-            row.set_pk((row_id, self.tbl_version))
+            row.set_pk((row_id, self.tbl))
     def __len__(self) -> int:
         return len(self.rows)
@@ -57,6 +57,7 @@ class DataRowBatch:
             flushed_slot_idxs: Optional[List[int]] = None
     ) -> None:
         """Flushes images in the given range of rows."""
+        assert self.tbl is not None
         if stored_img_info is None:
             stored_img_info = []
         if flushed_slot_idxs is None:
@@ -67,12 +68,10 @@ class DataRowBatch:
             idx_range = slice(0, len(self.rows))
         for row in self.rows[idx_range]:
             for info in stored_img_info:
-                filepath = str(MediaStore.prepare_media_path(self.tbl_id, info.col.id, self.tbl_version))
+                filepath = str(MediaStore.prepare_media_path(self.tbl.id, info.col.id, self.tbl.version))
                 row.flush_img(info.slot_idx, filepath)
             for slot_idx in flushed_slot_idxs:
                 row.flush_img(slot_idx)
-        #_logger.debug(
-            #f'flushed images in range {idx_range}: slot_idxs={flushed_slot_idxs} stored_img_info={stored_img_info}')
     def __iter__(self) -> Iterator[exprs.DataRow]:
         return DataRowBatchIterator(self)

pixeltable/exec/expr_eval_node.py CHANGED Viewed

@@ -1,20 +1,20 @@
-import sys
-import warnings
-from typing import List, Optional, Tuple
-from dataclasses import dataclass, field
 import logging
+import sys
 import time
+import warnings
+from dataclasses import dataclass
+from typing import List, Optional
 from tqdm import tqdm, TqdmWarning
+import pixeltable.exprs as exprs
+from pixeltable.func import CallableFunction
 from .data_row_batch import DataRowBatch
 from .exec_node import ExecNode
-import pixeltable.exprs as exprs
-import pixeltable.func as func
 _logger = logging.getLogger('pixeltable')
 class ExprEvalNode(ExecNode):
     """Materializes expressions
     """
@@ -22,7 +22,7 @@ class ExprEvalNode(ExecNode):
     class Cohort:
         """List of exprs that form an evaluation context and contain calls to at most one external function"""
         exprs: List[exprs.Expr]
-        ext_function: Optional[func.BatchedFunction]
+        batched_fn: Optional[CallableFunction]
         segment_ctxs: List[exprs.RowBuilder.EvalCtx]
         target_slot_idxs: List[int]
         batch_size: int = 8
@@ -63,12 +63,12 @@ class ExprEvalNode(ExecNode):
         if self.pbar is not None:
             self.pbar.close()
-    def _get_batched_fn(self, expr: exprs.Expr) -> Optional[func.BatchedFunction]:
-        if not isinstance(expr, exprs.FunctionCall):
-            return None
-        return expr.fn if isinstance(expr.fn, func.BatchedFunction) else None
+    def _get_batched_fn(self, expr: exprs.Expr) -> Optional[CallableFunction]:
+        if isinstance(expr, exprs.FunctionCall) and isinstance(expr.fn, CallableFunction) and expr.fn.is_batched:
+            return expr.fn
+        return None
-    def _is_ext_call(self, expr: exprs.Expr) -> bool:
+    def _is_batched_fn_call(self, expr: exprs.Expr) -> bool:
         return self._get_batched_fn(expr) is not None
     def _create_cohorts(self) -> None:
@@ -76,14 +76,14 @@ class ExprEvalNode(ExecNode):
         # break up all_exprs into cohorts such that each cohort contains calls to at most one external function;
         # seed the cohorts with only the ext fn calls
         cohorts: List[List[exprs.Expr]] = []
-        current_ext_function: Optional[func.BatchedFunction] = None
+        current_batched_fn: Optional[CallableFunction] = None
         for e in all_exprs:
-            if not self._is_ext_call(e):
+            if not self._is_batched_fn_call(e):
                 continue
-            if current_ext_function is None or current_ext_function != e.fn:
+            if current_batched_fn is None or current_batched_fn != e.fn:
                 # create a new cohort
                 cohorts.append([])
-                current_ext_function = e.fn
+                current_batched_fn = e.fn
             cohorts[-1].append(e)
         # expand the cohorts to include all exprs that are in the same evaluation context as the external calls;
@@ -115,18 +115,18 @@ class ExprEvalNode(ExecNode):
             assert len(cohort) > 0
             # create the first segment here, so we can avoid checking for an empty list in the loop
             segments = [[cohort[0]]]
-            is_ext_segment = self._is_ext_call(cohort[0])
-            ext_fn: Optional[func.BatchedFunction] = self._get_batched_fn(cohort[0])
+            is_batched_segment = self._is_batched_fn_call(cohort[0])
+            batched_fn: Optional[CallableFunction] = self._get_batched_fn(cohort[0])
             for e in cohort[1:]:
-                if self._is_ext_call(e):
+                if self._is_batched_fn_call(e):
                     segments.append([e])
-                    is_ext_segment = True
-                    ext_fn = self._get_batched_fn(e)
+                    is_batched_segment = True
+                    batched_fn = self._get_batched_fn(e)
                 else:
-                    if is_ext_segment:
+                    if is_batched_segment:
                         # start a new segment
                         segments.append([])
-                        is_ext_segment = False
+                        is_batched_segment = False
                     segments[-1].append(e)
             # we create the EvalCtxs manually because create_eval_ctx() would repeat the dependencies of each segment
@@ -135,21 +135,21 @@ class ExprEvalNode(ExecNode):
                     slot_idxs=[e.slot_idx for e in s], exprs=s, target_slot_idxs=[], target_exprs=[])
                 for s in segments
             ]
-            cohort_info = self.Cohort(cohort, ext_fn, segment_ctxs, target_slot_idxs[i])
+            cohort_info = self.Cohort(cohort, batched_fn, segment_ctxs, target_slot_idxs[i])
             self.cohorts.append(cohort_info)
     def _exec_cohort(self, cohort: Cohort, rows: DataRowBatch) -> None:
         """Compute the cohort for the entire input batch by dividing it up into sub-batches"""
         batch_start_idx = 0  # start row of the current sub-batch
         # for multi-resolution models, we re-assess the correct ext fn batch size for each input batch
-        ext_batch_size = cohort.ext_function.get_batch_size() if cohort.ext_function is not None else None
+        ext_batch_size = cohort.batched_fn.get_batch_size() if cohort.batched_fn is not None else None
         if ext_batch_size is not None:
             cohort.batch_size = ext_batch_size
         while batch_start_idx < len(rows):
             num_batch_rows = min(cohort.batch_size, len(rows) - batch_start_idx)
             for segment_ctx in cohort.segment_ctxs:
-                if not self._is_ext_call(segment_ctx.exprs[0]):
+                if not self._is_batched_fn_call(segment_ctx.exprs[0]):
                     # compute batch row-wise
                     for row_idx in range(batch_start_idx, batch_start_idx + num_batch_rows):
                         self.row_builder.eval(
@@ -193,7 +193,7 @@ class ExprEvalNode(ExecNode):
                             for k in kwarg_batches.keys()
                         }
                         start_ts = time.perf_counter()
-                        result_batch = fn_call.fn.invoke(call_args, call_kwargs)
+                        result_batch = fn_call.fn.exec_batch(*call_args, **call_kwargs)
                         self.ctx.profile.eval_time[fn_call.slot_idx] += time.perf_counter() - start_ts
                         self.ctx.profile.eval_count[fn_call.slot_idx] += num_ext_batch_rows

pixeltable/exec/sql_scan_node.py CHANGED Viewed

@@ -21,7 +21,6 @@ class SqlScanNode(ExecNode):
             select_list: Iterable[exprs.Expr],
             where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Predicate] = None,
             order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
-            similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None,
             limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
     ):
         """
@@ -77,15 +76,17 @@ class SqlScanNode(ExecNode):
         # the number of tables that need to be joined to the target table
         for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
             rowid_ref.set_tbl(tbl)
-        order_by_clause = [e.sql_expr().desc() if not asc else e.sql_expr() for e, asc in order_by_items]
+        order_by_clause: List[sql.ClauseElement] = []
+        for e, asc in order_by_items:
+            if isinstance(e, exprs.SimilarityExpr):
+                order_by_clause.append(e.as_order_by_clause(asc))
+            else:
+                order_by_clause.append(e.sql_expr().desc() if not asc else e.sql_expr())
         if where_clause is not None:
             sql_where_clause = where_clause.sql_expr()
             assert sql_where_clause is not None
             self.stmt = self.stmt.where(sql_where_clause)
-        if similarity_clause is not None:
-            self.stmt = self.stmt.order_by(
-                similarity_clause.img_col_ref.col.sa_idx_col.l2_distance(similarity_clause.embedding()))
         if len(order_by_clause) > 0:
             self.stmt = self.stmt.order_by(*order_by_clause)
         elif target.id in row_builder.unstored_iter_args:
@@ -201,7 +202,7 @@ class SqlScanNode(ExecNode):
                 self.row_builder.eval(output_row, self.filter_eval_ctx, profile=self.ctx.profile)
                 if output_row[self.filter.slot_idx]:
                     needs_row = True
-                    if self.limit is not None and len(output_batch) >= self.limit:
+                    if self.limit > 0 and len(output_batch) >= self.limit:
                         self.has_more_rows = False
                         break
                 else:

pixeltable/exprs/__init__.py CHANGED Viewed

@@ -6,9 +6,10 @@ from .comparison import Comparison
 from .compound_predicate import CompoundPredicate
 from .data_row import DataRow
 from .expr import Expr
+from .expr_set import ExprSet
 from .function_call import FunctionCall
 from .image_member_access import ImageMemberAccess
-from .image_similarity_predicate import ImageSimilarityPredicate
+from .in_predicate import InPredicate
 from .inline_array import InlineArray
 from .inline_dict import InlineDict
 from .is_null import IsNull
@@ -16,9 +17,9 @@ from .json_mapper import JsonMapper
 from .json_path import RELATIVE_PATH_ROOT, JsonPath
 from .literal import Literal
 from .object_ref import ObjectRef
-from  .variable import Variable
 from .predicate import Predicate
 from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
 from .rowid_ref import RowidRef
-from .expr_set import ExprSet
+from .similarity_expr import SimilarityExpr
 from .type_cast import TypeCast
+from .variable import Variable

pixeltable/exprs/column_ref.py CHANGED Viewed

@@ -63,6 +63,15 @@ class ColumnRef(Expr):
         return super().__getattr__(name)
+    def similarity(self, other: Any) -> Expr:
+        if isinstance(other, Expr):
+            raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
+        item = Expr.from_object(other)
+        if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
+            raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
+        from .similarity_expr import SimilarityExpr
+        return SimilarityExpr(self, item)
     def default_column_name(self) -> Optional[str]:
         return str(self)

pixeltable/exprs/expr.py CHANGED Viewed

@@ -60,9 +60,9 @@ class Expr(abc.ABC):
         # index of the expr's value in the data row:
         # - set for all materialized exprs
-        # - -1: not executable
+        # - None: not executable
         # - not set for subexprs that don't need to be materialized because the parent can be materialized via SQL
-        self.slot_idx = -1
+        self.slot_idx: Optional[int] = None
         self.components: List[Expr] = []  # the subexprs that are needed to construct this expr
     def dependencies(self) -> List[Expr]:
@@ -110,6 +110,11 @@ class Expr(abc.ABC):
                 return False
         return self._equals(other)
+    def _equals(self, other: Expr) -> bool:
+        # we already compared the type and components in equals(); subclasses that require additional comparisons
+        # override this
+        return True
     def _id_attrs(self) -> List[Tuple[str, Any]]:
         """Returns attribute name/value pairs that are used to construct the instance id.
@@ -148,7 +153,7 @@ class Expr(abc.ABC):
         cls = self.__class__
         result = cls.__new__(cls)
         result.__dict__.update(self.__dict__)
-        result.slot_idx = -1
+        result.slot_idx = None
         result.components = [c.copy() for c in self.components]
         return result
@@ -313,10 +318,6 @@ class Expr(abc.ABC):
             return InlineArray(tuple(o))
         return None
-    @abc.abstractmethod
-    def _equals(self, other: Expr) -> bool:
-        pass
     @abc.abstractmethod
     def sql_expr(self) -> Optional[sql.ClauseElement]:
         """
@@ -396,6 +397,13 @@ class Expr(abc.ABC):
     def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
         assert False, 'not implemented'
+    def isin(self, value_set: Any) -> 'pixeltable.exprs.InPredicate':
+        from .in_predicate import InPredicate
+        if isinstance(value_set, Expr):
+            return InPredicate(self, value_set_expr=value_set)
+        else:
+            return InPredicate(self, value_set_literal=value_set)
     def astype(self, new_type: ts.ColumnType) -> 'pixeltable.exprs.TypeCast':
         from pixeltable.exprs import TypeCast
         return TypeCast(self, new_type)

pixeltable/exprs/function_call.py CHANGED Viewed

@@ -28,7 +28,7 @@ class FunctionCall(Expr):
         if group_by_clause is None:
             group_by_clause = []
         signature = fn.signature
-        super().__init__(signature.get_return_type(bound_args))
+        super().__init__(fn.call_return_type(bound_args))
         self.fn = fn
         self.is_method_call = is_method_call
         self.check_args(signature, bound_args)
@@ -46,9 +46,9 @@ class FunctionCall(Expr):
         # Tuple[int, Any]:
         # - for Exprs: (index into components, None)
-        # - otherwise: (-1, val)
-        self.args: List[Tuple[int, Any]] = []
-        self.kwargs: Dict[str, Tuple[int, Any]] = {}
+        # - otherwise: (None, val)
+        self.args: List[Tuple[Optional[int], Optional[Any]]] = []
+        self.kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]] = {}
         # we record the types of non-variable parameters for runtime type checks
         self.arg_types: List[ts.ColumnType] = []
@@ -62,7 +62,7 @@ class FunctionCall(Expr):
                 self.args.append((len(self.components), None))
                 self.components.append(arg.copy())
             else:
-                self.args.append((-1, arg))
+                self.args.append((None, arg))
             if param.kind != inspect.Parameter.VAR_POSITIONAL and param.kind != inspect.Parameter.VAR_KEYWORD:
                 self.arg_types.append(signature.parameters[param.name].col_type)
@@ -74,7 +74,7 @@ class FunctionCall(Expr):
                 self.kwargs[param_name] = (len(self.components), None)
                 self.components.append(arg.copy())
             else:
-                self.kwargs[param_name] = (-1, arg)
+                self.kwargs[param_name] = (None, arg)
             if fn.py_signature.parameters[param_name].kind != inspect.Parameter.VAR_KEYWORD:
                 self.kwarg_types[param_name] = signature.parameters[param_name].col_type
@@ -215,12 +215,12 @@ class FunctionCall(Expr):
     def _print_args(self, start_idx: int = 0, inline: bool = True) -> str:
         arg_strs = [
-            str(arg) if idx == -1 else str(self.components[idx]) for idx, arg in self.args[start_idx:]
+            str(arg) if idx is None else str(self.components[idx]) for idx, arg in self.args[start_idx:]
         ]
         def print_arg(arg: Any) -> str:
             return f"'{arg}'" if isinstance(arg, str) else str(arg)
         arg_strs.extend([
-            f'{param_name}={print_arg(arg) if idx == -1 else str(self.components[idx])}'
+            f'{param_name}={print_arg(arg) if idx is None else str(self.components[idx])}'
             for param_name, (idx, arg) in self.kwargs.items()
         ])
         if len(self.order_by) > 0:
@@ -287,7 +287,7 @@ class FunctionCall(Expr):
         """Return args and kwargs, constructed for data_row"""
         kwargs: Dict[str, Any] = {}
         for param_name, (component_idx, arg) in self.kwargs.items():
-            val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
+            val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
             param = self.fn.signature.parameters[param_name]
             if param.kind == inspect.Parameter.VAR_KEYWORD:
                 # expand **kwargs parameter
@@ -298,7 +298,7 @@ class FunctionCall(Expr):
         args: List[Any] = []
         for param_idx, (component_idx, arg) in enumerate(self.args):
-            val = arg if component_idx == -1 else data_row[self.components[component_idx].slot_idx]
+            val = arg if component_idx is None else data_row[self.components[component_idx].slot_idx]
             param = self.fn.signature.parameters_by_pos[param_idx]
             if param.kind == inspect.Parameter.VAR_POSITIONAL:
                 # expand *args parameter
@@ -333,7 +333,8 @@ class FunctionCall(Expr):
             # TODO: can we get rid of this extra copy?
             fn_expr = self.components[self.fn_expr_idx]
             data_row[self.slot_idx] = data_row[fn_expr.slot_idx]
-        elif isinstance(self.fn, func.CallableFunction):
+        elif isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
+            # optimization: avoid additional level of indirection we'd get from calling Function.exec()
             data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
         elif self.is_window_fn_call:
             if self.has_group_by():
@@ -348,9 +349,10 @@ class FunctionCall(Expr):
                 self.aggregator = self.fn.agg_cls(**self.agg_init_args)
             self.aggregator.update(*args)
             data_row[self.slot_idx] = self.aggregator.value()
-        else:
-            assert self.is_agg_fn_call
+        elif self.is_agg_fn_call:
             data_row[self.slot_idx] = self.aggregator.value()
+        else:
+            data_row[self.slot_idx] = self.fn.exec(*args, **kwargs)
     def _as_dict(self) -> Dict:
         result = {
@@ -369,9 +371,9 @@ class FunctionCall(Expr):
         # reassemble bound args
         fn = func.Function.from_dict(d['fn'])
         param_names = list(fn.signature.parameters.keys())
-        bound_args = {param_names[i]: arg if idx == -1 else components[idx] for i, (idx, arg) in enumerate(d['args'])}
+        bound_args = {param_names[i]: arg if idx is None else components[idx] for i, (idx, arg) in enumerate(d['args'])}
         bound_args.update(
-            {param_name: val if idx == -1 else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
+            {param_name: val if idx is None else components[idx] for param_name, (idx, val) in d['kwargs'].items()})
         group_by_exprs = components[d['group_by_start_idx']:d['group_by_stop_idx']]
         order_by_exprs = components[d['order_by_start_idx']:]
         fn_call = cls(

pixeltable 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.5py3-none-any.whl → 0.2.6py3-none-any.whl