pixeltable 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show
  1. pixeltable/catalog/column.py +25 -48
  2. pixeltable/catalog/insertable_table.py +7 -4
  3. pixeltable/catalog/table.py +163 -57
  4. pixeltable/catalog/table_version.py +416 -140
  5. pixeltable/catalog/table_version_path.py +2 -2
  6. pixeltable/client.py +0 -4
  7. pixeltable/dataframe.py +65 -21
  8. pixeltable/env.py +16 -1
  9. pixeltable/exec/cache_prefetch_node.py +1 -1
  10. pixeltable/exec/in_memory_data_node.py +11 -7
  11. pixeltable/exprs/comparison.py +3 -3
  12. pixeltable/exprs/data_row.py +5 -1
  13. pixeltable/exprs/literal.py +16 -4
  14. pixeltable/exprs/row_builder.py +8 -40
  15. pixeltable/ext/__init__.py +5 -0
  16. pixeltable/ext/functions/yolox.py +92 -0
  17. pixeltable/func/aggregate_function.py +15 -15
  18. pixeltable/func/expr_template_function.py +9 -1
  19. pixeltable/func/globals.py +24 -14
  20. pixeltable/func/signature.py +18 -12
  21. pixeltable/func/udf.py +7 -2
  22. pixeltable/functions/__init__.py +8 -8
  23. pixeltable/functions/eval.py +7 -8
  24. pixeltable/functions/huggingface.py +47 -19
  25. pixeltable/functions/openai.py +2 -2
  26. pixeltable/functions/util.py +11 -0
  27. pixeltable/index/__init__.py +2 -0
  28. pixeltable/index/base.py +49 -0
  29. pixeltable/index/embedding_index.py +95 -0
  30. pixeltable/metadata/schema.py +45 -22
  31. pixeltable/plan.py +15 -34
  32. pixeltable/store.py +38 -41
  33. pixeltable/tests/conftest.py +5 -11
  34. pixeltable/tests/ext/test_yolox.py +21 -0
  35. pixeltable/tests/functions/test_fireworks.py +1 -0
  36. pixeltable/tests/functions/test_huggingface.py +2 -2
  37. pixeltable/tests/functions/test_openai.py +15 -5
  38. pixeltable/tests/functions/test_together.py +1 -0
  39. pixeltable/tests/test_component_view.py +14 -5
  40. pixeltable/tests/test_dataframe.py +19 -18
  41. pixeltable/tests/test_exprs.py +99 -102
  42. pixeltable/tests/test_function.py +51 -43
  43. pixeltable/tests/test_index.py +138 -0
  44. pixeltable/tests/test_migration.py +2 -1
  45. pixeltable/tests/test_snapshot.py +24 -1
  46. pixeltable/tests/test_table.py +101 -25
  47. pixeltable/tests/test_types.py +30 -0
  48. pixeltable/tests/test_video.py +16 -16
  49. pixeltable/tests/test_view.py +5 -0
  50. pixeltable/tests/utils.py +43 -9
  51. pixeltable/tool/create_test_db_dump.py +16 -0
  52. pixeltable/type_system.py +37 -45
  53. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/METADATA +5 -4
  54. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/RECORD +56 -49
  55. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
  56. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0
@@ -18,7 +18,7 @@ from pixeltable import exceptions as excs
18
18
  from pixeltable.iterators import FrameIterator
19
19
  from pixeltable.tests.utils import \
20
20
  make_tbl, create_table_data, read_data_file, get_video_files, get_audio_files, get_image_files, get_documents, \
21
- assert_resultset_eq, assert_hf_dataset_equal, make_test_arrow_table
21
+ assert_resultset_eq, assert_hf_dataset_equal, make_test_arrow_table, validate_update_status
22
22
  from pixeltable.tests.utils import skip_test_if_not_installed
23
23
  from pixeltable.type_system import \
24
24
  StringType, IntType, FloatType, TimestampType, ImageType, VideoType, JsonType, BoolType, ArrayType, AudioType, \
@@ -41,6 +41,21 @@ class TestTable:
41
41
  def add1(a: int) -> int:
42
42
  return a + 1
43
43
 
44
+ @pxt.uda(
45
+ update_types=[IntType()], value_type=IntType(), requires_order_by=True,
46
+ allows_window=True)
47
+ class window_fn:
48
+ def __init__(self):
49
+ pass
50
+ def update(self, i: int) -> None:
51
+ pass
52
+ def value(self) -> int:
53
+ return 1
54
+
55
+ @pxt.expr_udf(param_types=[IntType(nullable=False)])
56
+ def add1(a: int) -> int:
57
+ return a + 1
58
+
44
59
  def test_create(self, test_client: pxt.Client) -> None:
45
60
  cl = test_client
46
61
  cl.create_dir('dir1')
@@ -56,7 +71,7 @@ class TestTable:
56
71
  with pytest.raises(excs.Error):
57
72
  _ = cl.create_table('1test', schema)
58
73
  with pytest.raises(excs.Error):
59
- _ = catalog.Column('1c', StringType())
74
+ _ = cl.create_table('bad name', schema={'c1': StringType()})
60
75
  with pytest.raises(excs.Error):
61
76
  _ = cl.create_table('test', schema)
62
77
  with pytest.raises(excs.Error):
@@ -299,10 +314,6 @@ class TestTable:
299
314
  cl.create_table('test', {'c1': {'type': StringType(), 'stored': 'true'}})
300
315
  assert '"stored" must be a bool' in str(exc_info.value)
301
316
 
302
- with pytest.raises(excs.Error) as exc_info:
303
- cl.create_table('test', {'c1': {'type': StringType(), 'indexed': 'true'}})
304
- assert '"indexed" must be a bool' in str(exc_info.value)
305
-
306
317
  with pytest.raises(excs.Error) as exc_info:
307
318
  cl.create_table('test', {'c1': StringType()}, primary_key='c2')
308
319
  assert 'primary key column c2 not found' in str(exc_info.value).lower()
@@ -501,18 +512,8 @@ class TestTable:
501
512
  # a non-materialized column that refers to another non-materialized column
502
513
  view.add_column(c4=view.c2.rotate(60), stored=False)
503
514
 
504
- @pxt.uda(
505
- name='window_fn', update_types=[IntType()], value_type=IntType(), requires_order_by = True,
506
- allows_window = True)
507
- class WindowFnAggregator:
508
- def __init__(self):
509
- pass
510
- def update(self, i: int) -> None:
511
- pass
512
- def value(self) -> int:
513
- return 1
514
515
  # cols computed with window functions are stored by default
515
- view.add_column(c5=window_fn(view.frame_idx, 1, group_by=view.video))
516
+ view.add_column(c5=self.window_fn(view.frame_idx, 1, group_by=view.video))
516
517
 
517
518
  # reload to make sure that metadata gets restored correctly
518
519
  cl = pxt.Client(reload=True)
@@ -553,6 +554,23 @@ class TestTable:
553
554
  cl.drop_table('test_tbl')
554
555
  assert MediaStore.count(view.get_id()) == 0
555
556
 
557
+ def test_insert_nulls(self, test_client: pxt.Client) -> None:
558
+ cl = test_client
559
+ schema = {
560
+ 'c1': StringType(nullable=True),
561
+ 'c2': IntType(nullable=True),
562
+ 'c3': FloatType(nullable=True),
563
+ 'c4': BoolType(nullable=True),
564
+ 'c5': ArrayType((2, 3), dtype=IntType(), nullable=True),
565
+ 'c6': JsonType(nullable=True),
566
+ 'c7': ImageType(nullable=True),
567
+ 'c8': VideoType(nullable=True),
568
+ }
569
+ t = cl.create_table('test1', schema)
570
+ status = t.insert(c1='abc')
571
+ assert status.num_rows == 1
572
+ assert status.num_excs == 0
573
+
556
574
  def test_insert(self, test_client: pxt.Client) -> None:
557
575
  cl = test_client
558
576
  schema = {
@@ -650,7 +668,63 @@ class TestTable:
650
668
  t2 = cl.get_table('test')
651
669
  _ = t2.show(n=0)
652
670
 
653
- def test_update(self, test_tbl: pxt.Table, indexed_img_tbl: pxt.Table) -> None:
671
+ def test_batch_update(self, test_tbl: pxt.Table) -> None:
672
+ t = test_tbl
673
+ validate_update_status(
674
+ t.batch_update([{'c1': '1', 'c2': 1}, {'c1': '2', 'c2': 2}]),
675
+ expected_rows=2)
676
+ assert t.where(t.c2 == 1).collect()[0]['c1'] == '1'
677
+ assert t.where(t.c2 == 2).collect()[0]['c1'] == '2'
678
+ validate_update_status(
679
+ t.batch_update([{'c1': 'one', '_rowid': (1,)}, {'c1': 'two', '_rowid': (2,)}]),
680
+ expected_rows=2)
681
+ assert t.where(t.c2 == 1).collect()[0]['c1'] == 'one'
682
+ assert t.where(t.c2 == 2).collect()[0]['c1'] == 'two'
683
+
684
+ cl = pxt.Client()
685
+ # test composite primary key
686
+ schema = {'c1': StringType(), 'c2': IntType(), 'c3': FloatType()}
687
+ t = cl.create_table('composite', schema=schema, primary_key=['c1', 'c2'])
688
+ rows = [{'c1': str(i), 'c2': i, 'c3': float(i)} for i in range(10)]
689
+ validate_update_status(t.insert(rows), expected_rows=10)
690
+
691
+ validate_update_status(
692
+ t.batch_update([{'c1': '1', 'c2': 1, 'c3': 2.0}, {'c1': '2', 'c2': 2, 'c3': 3.0}]),
693
+ expected_rows=2)
694
+
695
+ with pytest.raises(excs.Error) as exc_info:
696
+ # can't mix _rowid with primary key
697
+ _ = t.batch_update([{'c1': '1', 'c2': 1, 'c3': 2.0, '_rowid': (1,)}])
698
+ assert 'c1 is a primary key column' in str(exc_info.value).lower()
699
+
700
+ with pytest.raises(excs.Error) as exc_info:
701
+ # bad literal
702
+ _ = t.batch_update([{'c2': 1, 'c3': 'a'}])
703
+ assert "'a' is not a valid literal" in str(exc_info.value).lower()
704
+
705
+ with pytest.raises(excs.Error) as exc_info:
706
+ # missing primary key column
707
+ t.batch_update([{'c1': '1', 'c3': 2.0}])
708
+ assert 'primary key columns (c2) missing' in str(exc_info.value).lower()
709
+
710
+ # table without primary key
711
+ t2 = cl.create_table('no_pk', schema=schema)
712
+ validate_update_status(t2.insert(rows), expected_rows=10)
713
+ with pytest.raises(excs.Error) as exc_info:
714
+ _ = t2.batch_update([{'c1': '1', 'c2': 1, 'c3': 2.0}])
715
+ assert 'must have primary key for batch update' in str(exc_info.value).lower()
716
+
717
+ # updating with _rowid still works
718
+ validate_update_status(
719
+ t2.batch_update([{'c1': 'one', '_rowid': (1,)}, {'c1': 'two', '_rowid': (2,)}]),
720
+ expected_rows=2)
721
+ assert t2.where(t2.c2 == 1).collect()[0]['c1'] == 'one'
722
+ assert t2.where(t2.c2 == 2).collect()[0]['c1'] == 'two'
723
+ with pytest.raises(AssertionError):
724
+ # some rows are missing rowids
725
+ _ = t2.batch_update([{'c1': 'one', '_rowid': (1,)}, {'c1': 'two'}])
726
+
727
+ def test_update(self, test_tbl: pxt.Table, small_img_tbl) -> None:
654
728
  t = test_tbl
655
729
  # update every type with a literal
656
730
  test_cases = [
@@ -755,7 +829,7 @@ class TestTable:
755
829
  t.update({'c3': 1.0}, where=lambda c2: c2 == 10)
756
830
  assert 'Predicate' in str(excinfo.value)
757
831
 
758
- img_t = indexed_img_tbl
832
+ img_t = small_img_tbl
759
833
 
760
834
  # can't update image col
761
835
  with pytest.raises(excs.Error) as excinfo:
@@ -785,7 +859,7 @@ class TestTable:
785
859
  r2 = t.where(t.c2 < 5).select(t.c3, t.c10, t.d1, t.d2).order_by(t.c2).show(0)
786
860
  assert_resultset_eq(r1, r2)
787
861
 
788
- def test_delete(self, test_tbl: pxt.Table, indexed_img_tbl: pxt.Table) -> None:
862
+ def test_delete(self, test_tbl: pxt.Table, small_img_tbl) -> None:
789
863
  t = test_tbl
790
864
 
791
865
  cnt = t.where(t.c3 < 10.0).count()
@@ -813,7 +887,7 @@ class TestTable:
813
887
  t.delete(where=lambda c2: c2 == 10)
814
888
  assert 'Predicate' in str(excinfo.value)
815
889
 
816
- img_t = indexed_img_tbl
890
+ img_t = small_img_tbl
817
891
  # similarity search is not supported
818
892
  with pytest.raises(excs.Error) as excinfo:
819
893
  img_t.delete(where=img_t.img.nearest('car'))
@@ -979,6 +1053,10 @@ class TestTable:
979
1053
  t2.revert()
980
1054
  assert MediaStore.count(t2.get_id()) == t2.count() * stores_img_col
981
1055
 
1056
+ @pxt.udf(return_type=ImageType(), param_types=[ImageType()])
1057
+ def img_fn_with_exc(img: PIL.Image.Image) -> PIL.Image.Image:
1058
+ raise RuntimeError
1059
+
982
1060
  def test_computed_img_cols(self, test_client: pxt.Client) -> None:
983
1061
  cl = test_client
984
1062
  schema = {'img': ImageType(nullable=False)}
@@ -996,10 +1074,7 @@ class TestTable:
996
1074
 
997
1075
  # computed img col with exceptions
998
1076
  t = cl.create_table('test3', schema)
999
- @pxt.udf(return_type=ImageType(), param_types=[ImageType()])
1000
- def f(img: PIL.Image.Image) -> PIL.Image.Image:
1001
- raise RuntimeError
1002
- t.add_column(c3=f(t.img), stored=True)
1077
+ t.add_column(c3=self.img_fn_with_exc(t.img), stored=True)
1003
1078
  rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
1004
1079
  rows = [{'img': r['img']} for r in rows[:20]]
1005
1080
  t.insert(rows, fail_on_exception=False)
@@ -1211,6 +1286,7 @@ class TestTable:
1211
1286
  check_rename(t, 'c1_renamed', 'c1')
1212
1287
 
1213
1288
  # revert() works
1289
+ _ = t.select(t.c1_renamed).collect()
1214
1290
  t.revert()
1215
1291
  _ = t.select(t.c1).collect()
1216
1292
  #check_rename(t, 'c1', 'c1_renamed')
@@ -1,3 +1,7 @@
1
+ import datetime
2
+ from copy import copy
3
+ from typing import List, Dict, Optional
4
+
1
5
  from pixeltable.type_system import \
2
6
  ColumnType, StringType, IntType, BoolType, ImageType, InvalidType, FloatType, TimestampType, JsonType, ArrayType
3
7
 
@@ -20,3 +24,29 @@ class TestTypes:
20
24
  t_serialized = t.serialize()
21
25
  t_deserialized = ColumnType.deserialize(t_serialized)
22
26
  assert t == t_deserialized
27
+
28
+ def test_from_python_type(self) -> None:
29
+ test_cases = {
30
+ str: StringType(),
31
+ int: IntType(),
32
+ float: FloatType(),
33
+ bool: BoolType(),
34
+ datetime.date: TimestampType(),
35
+ datetime.datetime: TimestampType(),
36
+ list: JsonType(),
37
+ dict: JsonType(),
38
+ list[int]: JsonType(),
39
+ list[dict[str, int]]: JsonType(),
40
+ dict[int, str]: JsonType(),
41
+ dict[dict[str, int], list[int]]: JsonType(),
42
+ List: JsonType(),
43
+ Dict: JsonType(),
44
+ List[int]: JsonType(),
45
+ List[Dict[str, int]]: JsonType(),
46
+ Dict[int, str]: JsonType()
47
+ }
48
+ for py_type, pxt_type in test_cases.items():
49
+ assert ColumnType.from_python_type(py_type) == pxt_type
50
+ opt_pxt_type = copy(pxt_type)
51
+ opt_pxt_type.nullable = True
52
+ assert ColumnType.from_python_type(Optional[py_type]) == opt_pxt_type
@@ -107,6 +107,18 @@ class TestVideo:
107
107
  base_t.insert({'video': p} for p in video_filepaths)
108
108
  _ = view_t[view_t.c1, view_t.c2, view_t.c3, view_t.c4].show(0)
109
109
 
110
+ # window function that simply passes through the frame
111
+ @pxt.uda(
112
+ update_types=[ImageType()], value_type=ImageType(),
113
+ requires_order_by=True, allows_std_agg=False, allows_window=True)
114
+ class agg_fn:
115
+ def __init__(self):
116
+ self.img = None
117
+ def update(self, frame: PIL.Image.Image) -> None:
118
+ self.img = frame
119
+ def value(self) -> PIL.Image.Image:
120
+ return self.img
121
+
110
122
  def test_make_video(self, test_client: pxt.Client) -> None:
111
123
  video_filepaths = get_video_files()
112
124
  cl = test_client
@@ -131,29 +143,17 @@ class TestVideo:
131
143
  make_video(view_t.pos, view_t.frame),
132
144
  make_video(view_t.pos - 1, view_t.transformed)).group_by(base_t).show()
133
145
 
134
- # window function that simply passes through the frame
135
- @pxt.uda(
136
- update_types=[ImageType()], value_type=ImageType(), name='agg_fn',
137
- requires_order_by=True, allows_std_agg=False, allows_window=True)
138
- class WindowAgg:
139
- def __init__(self):
140
- self.img = None
141
- def update(self, frame: PIL.Image.Image) -> None:
142
- self.img = frame
143
- def value(self) -> PIL.Image.Image:
144
- return self.img
145
-
146
146
  # make sure it works
147
- _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
148
- status = view_t.add_column(agg=agg_fn(view_t.pos, view_t.frame, group_by=base_t))
147
+ _ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
148
+ status = view_t.add_column(agg=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t))
149
149
  assert status.num_excs == 0
150
150
  _ = view_t.select(make_video(view_t.pos, view_t.agg)).group_by(base_t).show()
151
151
 
152
152
  # image cols computed with a window function currently need to be stored
153
153
  with pytest.raises(excs.Error):
154
- view_t.add_column(agg2=agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
154
+ view_t.add_column(agg2=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
155
155
 
156
156
  # reload from store
157
157
  cl = pxt.Client(reload=True)
158
158
  base_t, view_t = cl.get_table(base_t.get_name()), cl.get_table(view_t.get_name())
159
- _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
159
+ _ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
@@ -1,3 +1,4 @@
1
+ import datetime
1
2
  import logging
2
3
 
3
4
  import PIL
@@ -414,6 +415,10 @@ class TestView:
414
415
  v.order_by(v.c2).show(0),
415
416
  t.where(t.c2 < 10).order_by(t.c2).show(0))
416
417
 
418
+ # create views with filters containing date and datetime
419
+ _ = cl.create_view('test_view_2', t, filter=t.c5 >= datetime.date.today())
420
+ _ = cl.create_view('test_view_3', t, filter=t.c5 < datetime.datetime.now())
421
+
417
422
  def test_view_of_snapshot(self, test_client: pxt.Client) -> None:
418
423
  """Test view over a snapshot"""
419
424
  cl = test_client
pixeltable/tests/utils.py CHANGED
@@ -6,6 +6,7 @@ from collections import namedtuple
6
6
  from pathlib import Path
7
7
  from typing import Any, Dict, List, Optional, Set
8
8
 
9
+ import PIL.Image
9
10
  import numpy as np
10
11
  import pandas as pd
11
12
  import pytest
@@ -16,6 +17,7 @@ from pixeltable import catalog
16
17
  from pixeltable.catalog.globals import UpdateStatus
17
18
  from pixeltable.dataframe import DataFrameResultSet
18
19
  from pixeltable.env import Env
20
+ from pixeltable.functions.huggingface import clip_image, clip_text
19
21
  from pixeltable.type_system import (
20
22
  ArrayType,
21
23
  BoolType,
@@ -43,6 +45,7 @@ def make_default_type(t: ColumnType.Type) -> ColumnType:
43
45
  return TimestampType()
44
46
  assert False
45
47
 
48
+
46
49
  def make_tbl(cl: pxt.Client, name: str = 'test', col_names: Optional[List[str]] = None) -> catalog.InsertableTable:
47
50
  if col_names is None:
48
51
  col_names = ['c1']
@@ -51,7 +54,9 @@ def make_tbl(cl: pxt.Client, name: str = 'test', col_names: Optional[List[str]]
51
54
  schema[f'{col_name}'] = make_default_type(ColumnType.Type(i % 5))
52
55
  return cl.create_table(name, schema)
53
56
 
54
- def create_table_data(t: catalog.Table, col_names: Optional[List[str]] = None, num_rows: int = 10) -> List[Dict[str, Any]]:
57
+
58
+ def create_table_data(t: catalog.Table, col_names: Optional[List[str]] = None, num_rows: int = 10) -> List[
59
+ Dict[str, Any]]:
55
60
  if col_names is None:
56
61
  col_names = []
57
62
  data: Dict[str, Any] = {}
@@ -124,6 +129,7 @@ def create_table_data(t: catalog.Table, col_names: Optional[List[str]] = None, n
124
129
  rows = [{col_name: data[col_name][i] for col_name in col_names} for i in range(num_rows)]
125
130
  return rows
126
131
 
132
+
127
133
  def create_test_tbl(client: pxt.Client, name: str = 'test_tbl') -> catalog.Table:
128
134
  schema = {
129
135
  'c1': StringType(nullable=False),
@@ -189,12 +195,25 @@ def create_test_tbl(client: pxt.Client, name: str = 'test_tbl') -> catalog.Table
189
195
  t.insert(rows)
190
196
  return t
191
197
 
198
+
199
+ def create_img_tbl(cl: pxt.Client, name: str = 'test_img_tbl') -> catalog.Table:
200
+ schema = {
201
+ 'img': ImageType(nullable=False),
202
+ 'category': StringType(nullable=False),
203
+ 'split': StringType(nullable=False),
204
+ }
205
+ tbl = cl.create_table(name, schema)
206
+ rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
207
+ tbl.insert(rows)
208
+ return tbl
209
+
210
+
192
211
  def create_all_datatypes_tbl(test_client: pxt.Client) -> catalog.Table:
193
212
  """ Creates a table with all supported datatypes.
194
213
  """
195
214
  schema = {
196
- 'row_id': IntType(nullable=False), # used for row selection
197
- 'c_array': ArrayType(shape=(10,), dtype=FloatType(), nullable=True),
215
+ 'row_id': IntType(nullable=False), # used for row selection
216
+ 'c_array': ArrayType(shape=(10,), dtype=FloatType(), nullable=True),
198
217
  'c_bool': BoolType(nullable=True),
199
218
  'c_float': FloatType(nullable=True),
200
219
  'c_image': ImageType(nullable=True),
@@ -207,12 +226,13 @@ def create_all_datatypes_tbl(test_client: pxt.Client) -> catalog.Table:
207
226
  tbl = test_client.create_table('all_datatype_tbl', schema)
208
227
  example_rows = create_table_data(tbl, num_rows=11)
209
228
 
210
- for i,r in enumerate(example_rows):
211
- r['row_id'] = i # row_id
229
+ for i, r in enumerate(example_rows):
230
+ r['row_id'] = i # row_id
212
231
 
213
232
  tbl.insert(example_rows)
214
233
  return tbl
215
234
 
235
+
216
236
  def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[str]] = None) -> List[Dict[str, Any]]:
217
237
  """
218
238
  Locate dir_name, create df out of file_name.
@@ -223,7 +243,7 @@ def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[
223
243
  """
224
244
  if path_col_names is None:
225
245
  path_col_names = []
226
- tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
246
+ tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
227
247
  glob_result = glob.glob(f'{tests_dir}/**/{dir_name}', recursive=True)
228
248
  assert len(glob_result) == 1, f'Could not find {dir_name}'
229
249
  abs_path = Path(glob_result[0])
@@ -235,8 +255,9 @@ def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[
235
255
  df[col_name] = df.apply(lambda r: str(abs_path / r[col_name]), axis=1)
236
256
  return df.to_dict(orient='records')
237
257
 
258
+
238
259
  def get_video_files(include_bad_video: bool = False) -> List[str]:
239
- tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
260
+ tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
240
261
  glob_result = glob.glob(f'{tests_dir}/**/videos/*', recursive=True)
241
262
  if not include_bad_video:
242
263
  glob_result = [f for f in glob_result if 'bad_video' not in f]
@@ -244,18 +265,21 @@ def get_video_files(include_bad_video: bool = False) -> List[str]:
244
265
  half_res = [f for f in glob_result if 'half_res' in f or 'bad_video' in f]
245
266
  return half_res
246
267
 
268
+
247
269
  def get_test_video_files() -> List[str]:
248
- tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
270
+ tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
249
271
  glob_result = glob.glob(f'{tests_dir}/**/test_videos/*', recursive=True)
250
272
  return glob_result
251
273
 
274
+
252
275
  def get_image_files(include_bad_image: bool = False) -> List[str]:
253
- tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
276
+ tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
254
277
  glob_result = glob.glob(f'{tests_dir}/**/imagenette2-160/*', recursive=True)
255
278
  if not include_bad_image:
256
279
  glob_result = [f for f in glob_result if 'bad_image' not in f]
257
280
  return glob_result
258
281
 
282
+
259
283
  def get_audio_files(include_bad_audio: bool = False) -> List[str]:
260
284
  tests_dir = os.path.dirname(__file__)
261
285
  glob_result = glob.glob(f'{tests_dir}/**/audio/*', recursive=True)
@@ -263,11 +287,13 @@ def get_audio_files(include_bad_audio: bool = False) -> List[str]:
263
287
  glob_result = [f for f in glob_result if 'bad_audio' not in f]
264
288
  return glob_result
265
289
 
290
+
266
291
  def get_documents() -> List[str]:
267
292
  tests_dir = os.path.dirname(__file__)
268
293
  # for now, we can only handle .html and .md
269
294
  return [p for p in glob.glob(f'{tests_dir}/**/documents/*', recursive=True) if not p.endswith('.pdf')]
270
295
 
296
+
271
297
  def get_sentences(n: int = 100) -> List[str]:
272
298
  tests_dir = os.path.dirname(__file__)
273
299
  path = glob.glob(f'{tests_dir}/**/jeopardy.json', recursive=True)[0]
@@ -403,6 +429,14 @@ def assert_hf_dataset_equal(hf_dataset: 'datasets.Dataset', df: pxt.DataFrame, s
403
429
  check_tup = DatasetTuple(**encoded_tup)
404
430
  assert check_tup in acc_dataset
405
431
 
432
+ @pxt.expr_udf
433
+ def img_embed(img: PIL.Image.Image) -> np.ndarray:
434
+ return clip_image(img, model_id='openai/clip-vit-base-patch32')
435
+
436
+ @pxt.expr_udf
437
+ def text_embed(txt: str) -> np.ndarray:
438
+ return clip_text(txt, model_id='openai/clip-vit-base-patch32')
406
439
 
407
440
  SAMPLE_IMAGE_URL = \
408
441
  'https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/source/data/images/000000000009.jpg'
442
+
@@ -136,6 +136,22 @@ class Dumper:
136
136
  for i in range(num_rows)
137
137
  ]
138
138
  t.insert(rows)
139
+ self.cl.create_dir('views')
140
+ v = self.cl.create_view('views.sample_view', t, filter=(t.c2 < 50))
141
+ _ = self.cl.create_view('views.sample_snapshot', t, filter=(t.c2 >= 75), is_snapshot=True)
142
+ # Computed column using a library function
143
+ v['str_format'] = pxt.functions.string.str_format('{0} {key}', t.c1, key=t.c1)
144
+ # Computed column using a bespoke udf
145
+ v['test_udf'] = test_udf(t.c2)
146
+ # astype
147
+ v['astype'] = t.c1.astype(pxt.FloatType())
148
+ # computed column using a stored function
149
+ v['stored'] = t.c1.apply(lambda x: f'Hello, {x}', col_type=pxt.StringType())
150
+
151
+
152
+ @pxt.udf
153
+ def test_udf(n: int) -> int:
154
+ return n + 1
139
155
 
140
156
 
141
157
  def main() -> None: