PyPI - pixeltable - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

pixeltable 0.1.0py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show

pixeltable/__init__.py +34 -6
pixeltable/catalog/__init__.py +13 -0
pixeltable/catalog/catalog.py +159 -0
pixeltable/catalog/column.py +200 -0
pixeltable/catalog/dir.py +32 -0
pixeltable/catalog/globals.py +33 -0
pixeltable/catalog/insertable_table.py +191 -0
pixeltable/catalog/named_function.py +36 -0
pixeltable/catalog/path.py +58 -0
pixeltable/catalog/path_dict.py +139 -0
pixeltable/catalog/schema_object.py +39 -0
pixeltable/catalog/table.py +581 -0
pixeltable/catalog/table_version.py +749 -0
pixeltable/catalog/table_version_path.py +133 -0
pixeltable/catalog/view.py +203 -0
pixeltable/client.py +590 -30
pixeltable/dataframe.py +540 -349
pixeltable/env.py +359 -45
pixeltable/exceptions.py +12 -21
pixeltable/exec/__init__.py +9 -0
pixeltable/exec/aggregation_node.py +78 -0
pixeltable/exec/cache_prefetch_node.py +116 -0
pixeltable/exec/component_iteration_node.py +79 -0
pixeltable/exec/data_row_batch.py +95 -0
pixeltable/exec/exec_context.py +22 -0
pixeltable/exec/exec_node.py +61 -0
pixeltable/exec/expr_eval_node.py +217 -0
pixeltable/exec/in_memory_data_node.py +69 -0
pixeltable/exec/media_validation_node.py +43 -0
pixeltable/exec/sql_scan_node.py +225 -0
pixeltable/exprs/__init__.py +24 -0
pixeltable/exprs/arithmetic_expr.py +102 -0
pixeltable/exprs/array_slice.py +71 -0
pixeltable/exprs/column_property_ref.py +77 -0
pixeltable/exprs/column_ref.py +105 -0
pixeltable/exprs/comparison.py +77 -0
pixeltable/exprs/compound_predicate.py +98 -0
pixeltable/exprs/data_row.py +195 -0
pixeltable/exprs/expr.py +586 -0
pixeltable/exprs/expr_set.py +39 -0
pixeltable/exprs/function_call.py +380 -0
pixeltable/exprs/globals.py +69 -0
pixeltable/exprs/image_member_access.py +115 -0
pixeltable/exprs/image_similarity_predicate.py +58 -0
pixeltable/exprs/inline_array.py +107 -0
pixeltable/exprs/inline_dict.py +101 -0
pixeltable/exprs/is_null.py +38 -0
pixeltable/exprs/json_mapper.py +121 -0
pixeltable/exprs/json_path.py +159 -0
pixeltable/exprs/literal.py +54 -0
pixeltable/exprs/object_ref.py +41 -0
pixeltable/exprs/predicate.py +44 -0
pixeltable/exprs/row_builder.py +355 -0
pixeltable/exprs/rowid_ref.py +94 -0
pixeltable/exprs/type_cast.py +53 -0
pixeltable/exprs/variable.py +45 -0
pixeltable/func/__init__.py +9 -0
pixeltable/func/aggregate_function.py +194 -0
pixeltable/func/batched_function.py +53 -0
pixeltable/func/callable_function.py +69 -0
pixeltable/func/expr_template_function.py +82 -0
pixeltable/func/function.py +110 -0
pixeltable/func/function_registry.py +227 -0
pixeltable/func/globals.py +36 -0
pixeltable/func/nos_function.py +202 -0
pixeltable/func/signature.py +166 -0
pixeltable/func/udf.py +163 -0
pixeltable/functions/__init__.py +52 -103
pixeltable/functions/eval.py +216 -0
pixeltable/functions/fireworks.py +34 -0
pixeltable/functions/huggingface.py +120 -0
pixeltable/functions/image.py +16 -0
pixeltable/functions/openai.py +256 -0
pixeltable/functions/pil/image.py +148 -7
pixeltable/functions/string.py +13 -0
pixeltable/functions/together.py +122 -0
pixeltable/functions/util.py +41 -0
pixeltable/functions/video.py +62 -0
pixeltable/iterators/__init__.py +3 -0
pixeltable/iterators/base.py +48 -0
pixeltable/iterators/document.py +311 -0
pixeltable/iterators/video.py +89 -0
pixeltable/metadata/__init__.py +54 -0
pixeltable/metadata/converters/convert_10.py +18 -0
pixeltable/metadata/schema.py +211 -0
pixeltable/plan.py +656 -0
pixeltable/store.py +418 -182
pixeltable/tests/conftest.py +146 -88
pixeltable/tests/functions/test_fireworks.py +42 -0
pixeltable/tests/functions/test_functions.py +60 -0
pixeltable/tests/functions/test_huggingface.py +158 -0
pixeltable/tests/functions/test_openai.py +152 -0
pixeltable/tests/functions/test_together.py +111 -0
pixeltable/tests/test_audio.py +65 -0
pixeltable/tests/test_catalog.py +27 -0
pixeltable/tests/test_client.py +14 -14
pixeltable/tests/test_component_view.py +370 -0
pixeltable/tests/test_dataframe.py +439 -0
pixeltable/tests/test_dirs.py +78 -62
pixeltable/tests/test_document.py +120 -0
pixeltable/tests/test_exprs.py +592 -135
pixeltable/tests/test_function.py +297 -67
pixeltable/tests/test_migration.py +43 -0
pixeltable/tests/test_nos.py +54 -0
pixeltable/tests/test_snapshot.py +208 -0
pixeltable/tests/test_table.py +1195 -263
pixeltable/tests/test_transactional_directory.py +42 -0
pixeltable/tests/test_types.py +5 -11
pixeltable/tests/test_video.py +151 -34
pixeltable/tests/test_view.py +530 -0
pixeltable/tests/utils.py +320 -45
pixeltable/tool/create_test_db_dump.py +149 -0
pixeltable/tool/create_test_video.py +81 -0
pixeltable/type_system.py +445 -124
pixeltable/utils/__init__.py +17 -46
pixeltable/utils/arrow.py +98 -0
pixeltable/utils/clip.py +12 -15
pixeltable/utils/coco.py +136 -0
pixeltable/utils/documents.py +39 -0
pixeltable/utils/filecache.py +195 -0
pixeltable/utils/help.py +11 -0
pixeltable/utils/hf_datasets.py +157 -0
pixeltable/utils/media_store.py +76 -0
pixeltable/utils/parquet.py +167 -0
pixeltable/utils/pytorch.py +91 -0
pixeltable/utils/s3.py +13 -0
pixeltable/utils/sql.py +17 -0
pixeltable/utils/transactional_directory.py +35 -0
pixeltable-0.2.4.dist-info/LICENSE +18 -0
pixeltable-0.2.4.dist-info/METADATA +127 -0
pixeltable-0.2.4.dist-info/RECORD +132 -0
{pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
pixeltable/catalog.py +0 -1421
pixeltable/exprs.py +0 -1745
pixeltable/function.py +0 -269
pixeltable/functions/clip.py +0 -10
pixeltable/functions/pil/__init__.py +0 -23
pixeltable/functions/tf.py +0 -21
pixeltable/index.py +0 -57
pixeltable/tests/test_dict.py +0 -24
pixeltable/tests/test_functions.py +0 -11
pixeltable/tests/test_tf.py +0 -69
pixeltable/tf.py +0 -33
pixeltable/utils/tf.py +0 -33
pixeltable/utils/video.py +0 -32
pixeltable-0.1.0.dist-info/METADATA +0 -34
pixeltable-0.1.0.dist-info/RECORD +0 -36

pixeltable/tests/utils.py CHANGED Viewed

@@ -1,16 +1,34 @@
 import datetime
 import glob
+import json
 import os
+from collections import namedtuple
 from pathlib import Path
-from typing import Dict, Any, List
+from typing import Any, Dict, List, Optional, Set
 import numpy as np
 import pandas as pd
+import pytest
-import pixeltable as pt
+import pixeltable as pxt
+import pixeltable.type_system as ts
 from pixeltable import catalog
-from pixeltable.type_system import ColumnType, StringType, IntType, FloatType, BoolType, TimestampType
-from pixeltable.function import Function
+from pixeltable.catalog.globals import UpdateStatus
+from pixeltable.dataframe import DataFrameResultSet
+from pixeltable.env import Env
+from pixeltable.type_system import (
+    ArrayType,
+    BoolType,
+    ColumnType,
+    FloatType,
+    ImageType,
+    IntType,
+    JsonType,
+    StringType,
+    TimestampType,
+    VideoType,
+)
 def make_default_type(t: ColumnType.Type) -> ColumnType:
     if t == ColumnType.Type.STRING:
@@ -25,14 +43,19 @@ def make_default_type(t: ColumnType.Type) -> ColumnType:
         return TimestampType()
     assert False
-def make_tbl(db: pt.Db, name: str = 'test', col_names: List[str] = ['c1']) -> pt.MutableTable:
-    schema: List[catalog.Column] = []
+def make_tbl(cl: pxt.Client, name: str = 'test', col_names: Optional[List[str]] = None) -> catalog.InsertableTable:
+    if col_names is None:
+        col_names = ['c1']
+    schema: Dict[str, ts.ColumnType] = {}
     for i, col_name in enumerate(col_names):
-        schema.append(catalog.Column(f'{col_name}', make_default_type(ColumnType.Type(i % 5))))
-    return db.create_table(name, schema)
+        schema[f'{col_name}'] = make_default_type(ColumnType.Type(i % 5))
+    return cl.create_table(name, schema)
-def create_table_data(t: catalog.Table, col_names: List[str] = [], num_rows: int = 10) -> pd.DataFrame:
+def create_table_data(t: catalog.Table, col_names: Optional[List[str]] = None, num_rows: int = 10) -> List[Dict[str, Any]]:
+    if col_names is None:
+        col_names = []
     data: Dict[str, Any] = {}
     sample_dict = {
         'detections': [{
             'id': '637e8e073b28441a453564cf',
@@ -70,38 +93,138 @@ def create_table_data(t: catalog.Table, col_names: List[str] = [], num_rows: int
     }
     if len(col_names) == 0:
-        col_names = [c.name for c in t.columns]
+        col_names = [c.name for c in t.columns() if not c.is_computed]
+    col_types = t.column_types()
     for col_name in col_names:
-        col = t.cols_by_name[col_name]
+        col_type = col_types[col_name]
         col_data: Any = None
-        if col.col_type.is_string_type():
+        if col_type.is_string_type():
             col_data = ['test string'] * num_rows
-        if col.col_type.is_int_type():
-            col_data = np.random.randint(0, 100, size=num_rows)
-        if col.col_type.is_float_type():
-            col_data = np.random.random(size=num_rows) * 100
-        if col.col_type.is_bool_type():
+        if col_type.is_int_type():
+            col_data = np.random.randint(0, 100, size=num_rows).tolist()
+        if col_type.is_float_type():
+            col_data = (np.random.random(size=num_rows) * 100).tolist()
+        if col_type.is_bool_type():
             col_data = np.random.randint(0, 2, size=num_rows)
             col_data = [False if i == 0 else True for i in col_data]
-        if col.col_type.is_timestamp_type():
-            col_data = datetime.datetime.now()
-        if col.col_type.is_json_type():
+        if col_type.is_timestamp_type():
+            col_data = [datetime.datetime.now()] * num_rows
+        if col_type.is_json_type():
             col_data = [sample_dict] * num_rows
-        # TODO: implement this
-        assert not col.col_type.is_image_type()
-        assert not col.col_type.is_array_type()
-        data[col.name] = col_data
-    return pd.DataFrame(data=data)
+        if col_type.is_array_type():
+            col_data = [np.ones(col_type.shape, dtype=col_type.numpy_dtype()) for i in range(num_rows)]
+        if col_type.is_image_type():
+            image_path = get_image_files()[0]
+            col_data = [image_path for i in range(num_rows)]
+        if col_type.is_video_type():
+            video_path = get_video_files()[0]
+            col_data = [video_path for i in range(num_rows)]
+        data[col_name] = col_data
+    rows = [{col_name: data[col_name][i] for col_name in col_names} for i in range(num_rows)]
+    return rows
+def create_test_tbl(client: pxt.Client, name: str = 'test_tbl') -> catalog.Table:
+    schema = {
+        'c1': StringType(nullable=False),
+        'c1n': StringType(nullable=True),
+        'c2': IntType(nullable=False),
+        'c3': FloatType(nullable=False),
+        'c4': BoolType(nullable=False),
+        'c5': TimestampType(nullable=False),
+        'c6': JsonType(nullable=False),
+        'c7': JsonType(nullable=False),
+    }
+    t = client.create_table(name, schema, primary_key='c2')
+    t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
+    num_rows = 100
+    d1 = {
+        'f1': 'test string 1',
+        'f2': 1,
+        'f3': 1.0,
+        'f4': True,
+        'f5': [1.0, 2.0, 3.0, 4.0],
+        'f6': {
+            'f7': 'test string 2',
+            'f8': [1.0, 2.0, 3.0, 4.0],
+        },
+    }
+    d2 = [d1, d1]
-def read_data_file(dir_name: str, file_name: str, path_col_names: List[str] = []) -> pd.DataFrame:
+    c1_data = [f'test string {i}' for i in range(num_rows)]
+    c2_data = [i for i in range(num_rows)]
+    c3_data = [float(i) for i in range(num_rows)]
+    c4_data = [bool(i % 2) for i in range(num_rows)]
+    c5_data = [datetime.datetime.now()] * num_rows
+    c6_data = []
+    for i in range(num_rows):
+        d = {
+            'f1': f'test string {i}',
+            'f2': i,
+            'f3': float(i),
+            'f4': bool(i % 2),
+            'f5': [1.0, 2.0, 3.0, 4.0],
+            'f6': {
+                'f7': 'test string 2',
+                'f8': [1.0, 2.0, 3.0, 4.0],
+            },
+        }
+        c6_data.append(d)
+    c7_data = [d2] * num_rows
+    rows = [
+        {
+            'c1': c1_data[i],
+            'c1n': c1_data[i] if i % 10 != 0 else None,
+            'c2': c2_data[i],
+            'c3': c3_data[i],
+            'c4': c4_data[i],
+            'c5': c5_data[i],
+            'c6': c6_data[i],
+            'c7': c7_data[i],
+        }
+        for i in range(num_rows)
+    ]
+    t.insert(rows)
+    return t
+def create_all_datatypes_tbl(test_client: pxt.Client) -> catalog.Table:
+    """ Creates a table with all supported datatypes.
+    """
+    schema = {
+        'row_id': IntType(nullable=False), # used for row selection
+        'c_array': ArrayType(shape=(10,),  dtype=FloatType(), nullable=True),
+        'c_bool': BoolType(nullable=True),
+        'c_float': FloatType(nullable=True),
+        'c_image': ImageType(nullable=True),
+        'c_int': IntType(nullable=True),
+        'c_json': JsonType(nullable=True),
+        'c_string': StringType(nullable=True),
+        'c_timestamp': TimestampType(nullable=True),
+        'c_video': VideoType(nullable=True),
+    }
+    tbl = test_client.create_table('all_datatype_tbl', schema)
+    example_rows = create_table_data(tbl, num_rows=11)
+    for i,r in enumerate(example_rows):
+        r['row_id'] = i # row_id
+    tbl.insert(example_rows)
+    return tbl
+def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[str]] = None) -> List[Dict[str, Any]]:
     """
     Locate dir_name, create df out of file_name.
-    transform columns 'file_name' to column 'file_path' with absolute paths
     path_col_names: col names in csv file that contain file names; those will be converted to absolute paths
     by adding the path to 'file_name' as a prefix.
+    Returns:
+        tuple of (list of rows, list of column names)
     """
-    glob_result = glob.glob(f'{os.getcwd()}/**/{dir_name}', recursive=True)
+    if path_col_names is None:
+        path_col_names = []
+    tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
+    glob_result = glob.glob(f'{tests_dir}/**/{dir_name}', recursive=True)
     assert len(glob_result) == 1, f'Could not find {dir_name}'
     abs_path = Path(glob_result[0])
     data_file_path = abs_path / file_name
@@ -110,24 +233,176 @@ def read_data_file(dir_name: str, file_name: str, path_col_names: List[str] = []
     for col_name in path_col_names:
         assert col_name in df.columns
         df[col_name] = df.apply(lambda r: str(abs_path / r[col_name]), axis=1)
-        return df
+    return df.to_dict(orient='records')
+def get_video_files(include_bad_video: bool = False) -> List[str]:
+    tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
+    glob_result = glob.glob(f'{tests_dir}/**/videos/*', recursive=True)
+    if not include_bad_video:
+        glob_result = [f for f in glob_result if 'bad_video' not in f]
+    half_res = [f for f in glob_result if 'half_res' in f or 'bad_video' in f]
+    return half_res
-def get_video_files() -> List[str]:
-    glob_result = glob.glob(f'{os.getcwd()}/**/videos/*.mp4', recursive=True)
+def get_test_video_files() -> List[str]:
+    tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
+    glob_result = glob.glob(f'{tests_dir}/**/test_videos/*', recursive=True)
     return glob_result
+def get_image_files(include_bad_image: bool = False) -> List[str]:
+    tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
+    glob_result = glob.glob(f'{tests_dir}/**/imagenette2-160/*', recursive=True)
+    if not include_bad_image:
+        glob_result = [f for f in glob_result if 'bad_image' not in f]
+    return glob_result
+def get_audio_files(include_bad_audio: bool = False) -> List[str]:
+    tests_dir = os.path.dirname(__file__)
+    glob_result = glob.glob(f'{tests_dir}/**/audio/*', recursive=True)
+    if not include_bad_audio:
+        glob_result = [f for f in glob_result if 'bad_audio' not in f]
+    return glob_result
+def get_documents() -> List[str]:
+    tests_dir = os.path.dirname(__file__)
+    # for now, we can only handle .html and .md
+    return [p for p in glob.glob(f'{tests_dir}/**/documents/*', recursive=True) if not p.endswith('.pdf')]
+def get_sentences(n: int = 100) -> List[str]:
+    tests_dir = os.path.dirname(__file__)
+    path = glob.glob(f'{tests_dir}/**/jeopardy.json', recursive=True)[0]
+    with open(path, 'r', encoding='utf8') as f:
+        questions_list = json.load(f)
+    # this dataset contains \' around the questions
+    return [q['question'].replace("'", '') for q in questions_list[:n]]
+def assert_resultset_eq(r1: DataFrameResultSet, r2: DataFrameResultSet) -> None:
+    assert len(r1) == len(r2)
+    assert len(r1.column_names()) == len(r2.column_names())  # we don't care about the actual column names
+    r1_pd = r1.to_pandas()
+    r2_pd = r2.to_pandas()
+    for i in range(len(r1.column_names())):
+        # only compare column values
+        s1 = r1_pd.iloc[:, i]
+        s2 = r2_pd.iloc[:, i]
+        if s1.dtype == np.float64:
+            assert np.allclose(s1, s2)
+        else:
+            assert s1.equals(s2)
+def skip_test_if_not_installed(package) -> None:
+    if not Env.get().is_installed_package(package):
+        pytest.skip(f'Package `{package}` is not installed.')
+def validate_update_status(status: UpdateStatus, expected_rows: Optional[int] = None) -> None:
+    assert status.num_excs == 0
+    if expected_rows is not None:
+        assert status.num_rows == expected_rows
+def make_test_arrow_table(output_path: Path) -> None:
+    import pyarrow as pa
+    value_dict = {
+        'c_id': [1, 2, 3, 4, 5],
+        'c_int64': [-10, -20, -30, -40, None],
+        'c_int32': [-1, -2, -3, -4, None],
+        'c_float32': [1.1, 2.2, 3.3, 4.4, None],
+        'c_string': ['aaa', 'bbb', 'ccc', 'ddd', None],
+        'c_boolean': [True, False, True, False, None],
+        'c_timestamp': [
+            datetime.datetime(2012, 1, 1, 12, 0, 0, 25),
+            datetime.datetime(2012, 1, 2, 12, 0, 0, 25),
+            datetime.datetime(2012, 1, 3, 12, 0, 0, 25),
+            datetime.datetime(2012, 1, 4, 12, 0, 0, 25),
+            None,
+        ],
+        # The pyarrow fixed_shape_tensor type does not support NULLs (currently can write them but not read them)
+        # So, no nulls in this column
+        'c_array_float32': [
+            [
+                1.0,
+                2.0,
+            ],
+            [
+                10.0,
+                20.0,
+            ],
+            [
+                100.0,
+                200.0,
+            ],
+            [
+                1000.0,
+                2000.0,
+            ],
+            [10000.0, 20000.0],
+        ],
+    }
+    arr_size = len(value_dict['c_array_float32'][0])
+    tensor_type = pa.fixed_shape_tensor(pa.float32(), (arr_size,))
+    schema = pa.schema(
+        [
+            ('c_id', pa.int32()),
+            ('c_int64', pa.int64()),
+            ('c_int32', pa.int32()),
+            ('c_float32', pa.float32()),
+            ('c_string', pa.string()),
+            ('c_boolean', pa.bool_()),
+            ('c_timestamp', pa.timestamp('us')),
+            ('c_array_float32', tensor_type),
+        ]
+    )
+    test_table = pa.Table.from_pydict(value_dict, schema=schema)
+    pa.parquet.write_table(test_table, str(output_path / 'test.parquet'))
+def assert_hf_dataset_equal(hf_dataset: 'datasets.Dataset', df: pxt.DataFrame, split_column_name: str) -> None:
+    import datasets
+    assert df.count() == hf_dataset.num_rows
+    assert set(df.get_column_names()) == (set(hf_dataset.features.keys()) | {split_column_name})
+    # immutable so we can use it as in a set
+    DatasetTuple = namedtuple('DatasetTuple', ' '.join(hf_dataset.features.keys()))
+    acc_dataset: Set[DatasetTuple] = set()
+    for tup in hf_dataset:
+        immutable_tup = {}
+        for k in tup:
+            if isinstance(tup[k], list):
+                immutable_tup[k] = tuple(tup[k])
+            else:
+                immutable_tup[k] = tup[k]
+        acc_dataset.add(DatasetTuple(**immutable_tup))
+    for tup in df.collect():
+        assert tup[split_column_name] in hf_dataset.split._name
+        encoded_tup = {}
+        for column_name, value in tup.items():
+            if column_name == split_column_name:
+                continue
+            feature_type = hf_dataset.features[column_name]
+            if isinstance(feature_type, datasets.ClassLabel):
+                assert value in feature_type.names
+                # must use the index of the class label as the value to
+                # compare with dataset iteration output.
+                value = feature_type.encode_example(value)
+            elif isinstance(feature_type, datasets.Sequence):
+                assert feature_type.feature.dtype == 'float32', 'may need to add more types'
+                value = tuple([float(x) for x in value])
+            encoded_tup[column_name] = value
+        check_tup = DatasetTuple(**encoded_tup)
+        assert check_tup in acc_dataset
-class SumAggregator:
-    def __init__(self):
-        self.sum = 0
-    @classmethod
-    def make_aggregator(cls) -> 'SumAggregator':
-        return cls()
-    def update(self, val: int) -> None:
-        self.sum += val
-    def value(self) -> int:
-        return self.sum
-sum_uda = Function(
-    IntType(), [IntType()],
-    init_fn=SumAggregator.make_aggregator, update_fn=SumAggregator.update, value_fn=SumAggregator.value)
+SAMPLE_IMAGE_URL = \
+    'https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/source/data/images/000000000009.jpg'

pixeltable/tool/create_test_db_dump.py ADDED Viewed

@@ -0,0 +1,149 @@
+import datetime
+import json
+import logging
+import os
+import pathlib
+import subprocess
+import pgserver
+import toml
+import pixeltable as pxt
+import pixeltable.metadata as metadata
+from pixeltable.env import Env
+from pixeltable.type_system import \
+    StringType, IntType, FloatType, BoolType, TimestampType, JsonType
+_logger = logging.getLogger('pixeltable')
+class Dumper:
+    def __init__(self, output_dir='target', db_name='pxtdump') -> None:
+        self.output_dir = pathlib.Path(output_dir)
+        shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', '~/.pixeltable')).expanduser()
+        mock_home_dir = self.output_dir / '.pixeltable'
+        mock_home_dir.mkdir(parents=True, exist_ok=True)
+        os.environ['PIXELTABLE_HOME'] = str(mock_home_dir)
+        os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.yaml')
+        os.environ['PIXELTABLE_DB'] = db_name
+        os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
+        Env.get().set_up(reinit_db=True)
+        self.cl = pxt.Client()
+        self.cl.logging(level=logging.DEBUG, to_stdout=True)
+    def dump_db(self) -> None:
+        md_version = metadata.VERSION
+        dump_file = self.output_dir / f'pixeltable-v{md_version:03d}-test.dump.gz'
+        _logger.info(f'Creating database dump at: {dump_file}')
+        pg_package_dir = os.path.dirname(pgserver.__file__)
+        pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
+        _logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
+        with open(dump_file, 'wb') as dump:
+            pg_dump_process = subprocess.Popen(
+                [pg_dump_binary, Env.get().db_url, '-U', 'postgres', '-Fc'],
+                stdout=subprocess.PIPE
+            )
+            subprocess.run(
+                ["gzip", "-9"],
+                stdin=pg_dump_process.stdout,
+                stdout=dump,
+                check=True
+            )
+        info_file = self.output_dir / f'pixeltable-v{md_version:03d}-test-info.toml'
+        git_sha = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip()
+        user = os.environ.get('USER', os.environ.get('USERNAME'))
+        info_dict = {'pixeltable-dump': {
+            'metadata-version': md_version,
+            'git-sha': git_sha,
+            'datetime': datetime.datetime.utcnow(),
+            'user': user
+        }}
+        with open(info_file, 'w') as info:
+            toml.dump(info_dict, info)
+    # TODO: Add additional features to the test DB dump (ideally it should exercise
+    # every major pixeltable DB feature)
+    def create_tables(self) -> None:
+        schema = {
+            'c1': StringType(nullable=False),
+            'c1n': StringType(nullable=True),
+            'c2': IntType(nullable=False),
+            'c3': FloatType(nullable=False),
+            'c4': BoolType(nullable=False),
+            'c5': TimestampType(nullable=False),
+            'c6': JsonType(nullable=False),
+            'c7': JsonType(nullable=False),
+        }
+        t = self.cl.create_table('sample_table', schema, primary_key='c2')
+        t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
+        # Add columns for .astype converters to ensure they're persisted properly
+        t.add_column(c2_as_float=t.c2.astype(FloatType()))
+        # Add columns for .apply
+        t.add_column(c2_to_string=t.c2.apply(str))
+        t.add_column(c6_to_string=t.c6.apply(json.dumps))
+        t.add_column(c6_back_to_json=t.c6_to_string.apply(json.loads))
+        num_rows = 100
+        d1 = {
+            'f1': 'test string 1',
+            'f2': 1,
+            'f3': 1.0,
+            'f4': True,
+            'f5': [1.0, 2.0, 3.0, 4.0],
+            'f6': {
+                'f7': 'test string 2',
+                'f8': [1.0, 2.0, 3.0, 4.0],
+            },
+        }
+        d2 = [d1, d1]
+        c1_data = [f'test string {i}' for i in range(num_rows)]
+        c2_data = [i for i in range(num_rows)]
+        c3_data = [float(i) for i in range(num_rows)]
+        c4_data = [bool(i % 2) for i in range(num_rows)]
+        c5_data = [datetime.datetime.now()] * num_rows
+        c6_data = []
+        for i in range(num_rows):
+            d = {
+                'f1': f'test string {i}',
+                'f2': i,
+                'f3': float(i),
+                'f4': bool(i % 2),
+                'f5': [1.0, 2.0, 3.0, 4.0],
+                'f6': {
+                    'f7': 'test string 2',
+                    'f8': [1.0, 2.0, 3.0, 4.0],
+                },
+            }
+            c6_data.append(d)
+        c7_data = [d2] * num_rows
+        rows = [
+            {
+                'c1': c1_data[i],
+                'c1n': c1_data[i] if i % 10 != 0 else None,
+                'c2': c2_data[i],
+                'c3': c3_data[i],
+                'c4': c4_data[i],
+                'c5': c5_data[i],
+                'c6': c6_data[i],
+                'c7': c7_data[i],
+            }
+            for i in range(num_rows)
+        ]
+        t.insert(rows)
+def main() -> None:
+    _logger.info("Creating pixeltable test artifact.")
+    dumper = Dumper()
+    dumper.create_tables()
+    dumper.dump_db()
+if __name__ == "__main__":
+    main()

pixeltable/tool/create_test_video.py ADDED Viewed

@@ -0,0 +1,81 @@
+import av
+import PIL.Image
+import PIL.ImageDraw
+import PIL.ImageFont
+from pathlib import Path
+from typing import Optional
+import tempfile
+import math
+def create_test_video(
+    frame_count: int,
+    frame_rate: float = 1.0,
+    frame_width: int = 224,
+    aspect_ratio: str = '16:9',
+    frame_height: Optional[int] = None,
+    output_path: Optional[Path] = None,
+    font_file: str = '/Library/Fonts/Arial Unicode.ttf',
+) -> Path:
+    """
+        Creates an .mp4 video file such as the ones in /tests/data/test_videos
+        The video contains a frame number in each frame (for visual sanity check).
+        Args:
+            frame_count: Number of frames to create
+            frame_rate: Frame rate of the video
+            frame_width (int): Width in pixels of the video frame. Note: cost of decoding increases dramatically
+                with frame width * frame height.
+            aspect_ratio: Aspect ratio (width/height) of the video frames string of form 'width:height'
+            frame_height: Height of the video frame, if given, aspect_ratio is ignored
+            output_path: Path to save the video file
+            font_file: Path to the font file used for text.
+    """
+    if output_path is None:
+        output_path = Path(tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name)
+    parts = [int(p) for p in aspect_ratio.split(':')]
+    assert len(parts) == 2
+    aspect_ratio = parts[0] / parts[1]
+    if frame_height is None:
+        frame_height = math.ceil(frame_width / aspect_ratio)
+    frame_size = (frame_width, frame_height)
+    font_size = min(frame_height, frame_width) // 4
+    font = PIL.ImageFont.truetype(font=font_file, size=font_size)
+    font_fill = 0xFFFFFF  # white
+    frame_color = 0xFFFFFF - font_fill  # black
+    # Create a video container
+    container = av.open(str(output_path), mode='w')
+    # Add a video stream
+    stream = container.add_stream('h264', rate=frame_rate)
+    stream.width, stream.height = frame_size
+    stream.pix_fmt = 'yuv420p'
+    for frame_number in range(frame_count):
+        # Create an image with a number in it
+        image = PIL.Image.new('RGB', frame_size, color=frame_color)
+        draw = PIL.ImageDraw.Draw(image)
+        # Optionally, add a font here if you have one
+        text = str(frame_number)
+        _, _, text_width, text_height = draw.textbbox((0, 0), text, font=font)
+        text_position = ((frame_size[0] - text_width) // 2, (frame_size[1] - text_height) // 2)
+        draw.text(text_position, text, font=font, fill=font_fill)
+        # Convert the PIL image to an AVFrame
+        frame = av.VideoFrame.from_image(image)
+        # Encode and write the frame
+        for packet in stream.encode(frame):
+            container.mux(packet)
+    # Flush and close the stream
+    for packet in stream.encode():
+        container.mux(packet)
+    container.close()
+    return output_path

pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl

Potentially problematic release.

pixeltable 0.1.0py3-none-any.whl → 0.2.4py3-none-any.whl