pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.1.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.1.dist-info/METADATA +119 -0
  124. pixeltable-0.2.1.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+
4
+ import sqlalchemy as sql
5
+
6
+ from .globals import ComparisonOperator
7
+ from .expr import Expr
8
+ from .predicate import Predicate
9
+ from .data_row import DataRow
10
+ from .row_builder import RowBuilder
11
+ import pixeltable.catalog as catalog
12
+
13
+
14
+ class Comparison(Predicate):
15
+ def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
16
+ super().__init__()
17
+ self.operator = operator
18
+ self.components = [op1, op2]
19
+ self.id = self._create_id()
20
+
21
+ def __str__(self) -> str:
22
+ return f'{self._op1} {self.operator} {self._op2}'
23
+
24
+ def _equals(self, other: Comparison) -> bool:
25
+ return self.operator == other.operator
26
+
27
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
28
+ return super()._id_attrs() + [('operator', self.operator.value)]
29
+
30
+ @property
31
+ def _op1(self) -> Expr:
32
+ return self.components[0]
33
+
34
+ @property
35
+ def _op2(self) -> Expr:
36
+ return self.components[1]
37
+
38
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
39
+ left = self._op1.sql_expr()
40
+ right = self._op2.sql_expr()
41
+ if left is None or right is None:
42
+ return None
43
+ if self.operator == ComparisonOperator.LT:
44
+ return left < right
45
+ if self.operator == ComparisonOperator.LE:
46
+ return left <= right
47
+ if self.operator == ComparisonOperator.EQ:
48
+ return left == right
49
+ if self.operator == ComparisonOperator.NE:
50
+ return left != right
51
+ if self.operator == ComparisonOperator.GT:
52
+ return left > right
53
+ if self.operator == ComparisonOperator.GE:
54
+ return left >= right
55
+
56
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
57
+ if self.operator == ComparisonOperator.LT:
58
+ data_row[self.slot_idx] = data_row[self._op1.slot_idx] < data_row[self._op2.slot_idx]
59
+ elif self.operator == ComparisonOperator.LE:
60
+ data_row[self.slot_idx] = data_row[self._op1.slot_idx] <= data_row[self._op2.slot_idx]
61
+ elif self.operator == ComparisonOperator.EQ:
62
+ data_row[self.slot_idx] = data_row[self._op1.slot_idx] == data_row[self._op2.slot_idx]
63
+ elif self.operator == ComparisonOperator.NE:
64
+ data_row[self.slot_idx] = data_row[self._op1.slot_idx] != data_row[self._op2.slot_idx]
65
+ elif self.operator == ComparisonOperator.GT:
66
+ data_row[self.slot_idx] = data_row[self._op1.slot_idx] > data_row[self._op2.slot_idx]
67
+ elif self.operator == ComparisonOperator.GE:
68
+ data_row[self.slot_idx] = data_row[self._op1.slot_idx] >= data_row[self._op2.slot_idx]
69
+
70
+ def _as_dict(self) -> Dict:
71
+ return {'operator': self.operator.value, **super()._as_dict()}
72
+
73
+ @classmethod
74
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
75
+ assert 'operator' in d
76
+ return cls(ComparisonOperator(d['operator']), components[0], components[1])
77
+
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple, Callable
3
+ import operator
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from .expr import Expr
8
+ from .globals import LogicalOperator
9
+ from .predicate import Predicate
10
+ from .data_row import DataRow
11
+ from .row_builder import RowBuilder
12
+ import pixeltable.catalog as catalog
13
+
14
+
15
+ class CompoundPredicate(Predicate):
16
+ def __init__(self, operator: LogicalOperator, operands: List[Predicate]):
17
+ super().__init__()
18
+ self.operator = operator
19
+ # operands are stored in self.components
20
+ if self.operator == LogicalOperator.NOT:
21
+ assert len(operands) == 1
22
+ self.components = operands
23
+ else:
24
+ assert len(operands) > 1
25
+ self.operands: List[Predicate] = []
26
+ for operand in operands:
27
+ self._merge_operand(operand)
28
+
29
+ self.id = self._create_id()
30
+
31
+ def __str__(self) -> str:
32
+ if self.operator == LogicalOperator.NOT:
33
+ return f'~({self.components[0]})'
34
+ return f' {self.operator} '.join([f'({e})' for e in self.components])
35
+
36
+ @classmethod
37
+ def make_conjunction(cls, operands: List[Predicate]) -> Optional[Predicate]:
38
+ if len(operands) == 0:
39
+ return None
40
+ if len(operands) == 1:
41
+ return operands[0]
42
+ return CompoundPredicate(LogicalOperator.AND, operands)
43
+
44
+ def _merge_operand(self, operand: Predicate) -> None:
45
+ """
46
+ Merge this operand, if possible, otherwise simply record it.
47
+ """
48
+ if isinstance(operand, CompoundPredicate) and operand.operator == self.operator:
49
+ # this can be merged
50
+ for child_op in operand.components:
51
+ self._merge_operand(child_op)
52
+ else:
53
+ self.components.append(operand)
54
+
55
+ def _equals(self, other: CompoundPredicate) -> bool:
56
+ return self.operator == other.operator
57
+
58
+ def _id_attrs(self) -> List[Tuple[str, Any]]:
59
+ return super()._id_attrs() + [('operator', self.operator.value)]
60
+
61
+ def split_conjuncts(
62
+ self, condition: Callable[[Predicate], bool]) -> Tuple[List[Predicate], Optional[Predicate]]:
63
+ if self.operator == LogicalOperator.OR or self.operator == LogicalOperator.NOT:
64
+ return super().split_conjuncts(condition)
65
+ matches = [op for op in self.components if condition(op)]
66
+ non_matches = [op for op in self.components if not condition(op)]
67
+ return (matches, self.make_conjunction(non_matches))
68
+
69
+ def sql_expr(self) -> Optional[sql.ClauseElement]:
70
+ sql_exprs = [op.sql_expr() for op in self.components]
71
+ if any(e is None for e in sql_exprs):
72
+ return None
73
+ if self.operator == LogicalOperator.NOT:
74
+ assert len(sql_exprs) == 1
75
+ return sql.not_(sql_exprs[0])
76
+ assert len(sql_exprs) > 1
77
+ operator = sql.and_ if self.operator == LogicalOperator.AND else sql.or_
78
+ combined = operator(*sql_exprs)
79
+ return combined
80
+
81
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
82
+ if self.operator == LogicalOperator.NOT:
83
+ data_row[self.slot_idx] = not data_row[self.components[0].slot_idx]
84
+ else:
85
+ val = True if self.operator == LogicalOperator.AND else False
86
+ op_function = operator.and_ if self.operator == LogicalOperator.AND else operator.or_
87
+ for op in self.components:
88
+ val = op_function(val, data_row[op.slot_idx])
89
+ data_row[self.slot_idx] = val
90
+
91
+ def _as_dict(self) -> Dict:
92
+ return {'operator': self.operator.value, **super()._as_dict()}
93
+
94
+ @classmethod
95
+ def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
96
+ assert 'operator' in d
97
+ return cls(LogicalOperator(d['operator']), components)
98
+
@@ -0,0 +1,187 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Tuple
3
+ import io
4
+ import urllib
5
+
6
+ import PIL
7
+ import numpy as np
8
+
9
+
10
+ class DataRow:
11
+ """
12
+ Encapsulates all data and execution state needed by RowBuilder and DataRowBatch:
13
+ - state for in-memory computation
14
+ - state for storing the data
15
+ This is not meant to be a black-box abstraction.
16
+
17
+ In-memory representations by column type:
18
+ - StringType: str
19
+ - IntType: int
20
+ - FloatType: float
21
+ - BoolType: bool
22
+ - TimestampType: datetime.datetime
23
+ - JsonType: json-serializable object
24
+ - ArrayType: numpy.ndarray
25
+ - ImageType: PIL.Image.Image
26
+ - VideoType: local path if available, otherwise url
27
+ """
28
+ def __init__(self, size: int, img_slot_idxs: List[int], media_slot_idxs: List[int], array_slot_idxs: List[int]):
29
+ self.vals: List[Any] = [None] * size # either cell values or exceptions
30
+ self.has_val = [False] * size
31
+ self.excs: List[Optional[Exception]] = [None] * size
32
+
33
+ # control structures that are shared across all DataRows in a batch
34
+ self.img_slot_idxs = img_slot_idxs
35
+ self.media_slot_idxs = media_slot_idxs # all media types aside from image
36
+ self.array_slot_idxs = array_slot_idxs
37
+
38
+ # the primary key of a store row is a sequence of ints (the number is different for table vs view)
39
+ self.pk: Optional[Tuple[int, ...]] = None
40
+
41
+ # file_urls:
42
+ # - stored url of file for media in vals[i]
43
+ # - None if vals[i] is not media type
44
+ # - not None if file_paths[i] is not None
45
+ self.file_urls: List[Optional[str]] = [None] * size
46
+
47
+ # file_paths:
48
+ # - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
49
+ # - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
50
+ self.file_paths: List[Optional[str]] = [None] * size
51
+
52
+ def clear(self) -> None:
53
+ size = len(self.vals)
54
+ self.vals = [None] * size
55
+ self.has_val = [False] * size
56
+ self.excs = [None] * size
57
+ self.pk = None
58
+ self.file_urls = [None] * size
59
+ self.file_paths = [None] * size
60
+
61
+ def copy(self, target: DataRow) -> None:
62
+ """Create a copy of the contents of this DataRow in target
63
+ The copy shares the cell values, but not the control structures (eg, self.has_val), because these
64
+ need to be independently updateable.
65
+ """
66
+ target.vals = self.vals.copy()
67
+ target.has_val = self.has_val.copy()
68
+ target.excs = self.excs.copy()
69
+ target.pk = self.pk
70
+ target.file_urls = self.file_urls.copy()
71
+ target.file_paths = self.file_paths.copy()
72
+
73
+ def set_pk(self, pk: Tuple[int, ...]) -> None:
74
+ self.pk = pk
75
+
76
+ def has_exc(self, slot_idx: int) -> bool:
77
+ return self.excs[slot_idx] is not None
78
+
79
+ def get_exc(self, slot_idx: int) -> Exception:
80
+ assert self.has_val[slot_idx] is False
81
+ assert self.excs[slot_idx] is not None
82
+ return self.excs[slot_idx]
83
+
84
+ def set_exc(self, slot_idx: int, exc: Exception) -> None:
85
+ assert self.excs[slot_idx] is None
86
+ self.excs[slot_idx] = exc
87
+
88
+ if self.has_val[slot_idx]:
89
+ # eg. during validation, where contents of file is found invalid
90
+ self.has_val[slot_idx] = False
91
+ self.vals[slot_idx] = None
92
+ self.file_paths[slot_idx] = None
93
+ self.file_urls[slot_idx] = None
94
+
95
+ def __getitem__(self, index: object) -> Any:
96
+ """Returns in-memory value, ie, what is needed for expr evaluation"""
97
+ if not self.has_val[index]:
98
+ # for debugging purposes
99
+ pass
100
+ assert self.has_val[index], index
101
+
102
+ if self.file_urls[index] is not None and index in self.img_slot_idxs:
103
+ # if we need to load this from a file, it should have been materialized locally
104
+ assert self.file_paths[index] is not None
105
+ if self.vals[index] is None:
106
+ self.vals[index] = PIL.Image.open(self.file_paths[index])
107
+
108
+ return self.vals[index]
109
+
110
+ def get_stored_val(self, index: object) -> Any:
111
+ """Return the value that gets stored in the db"""
112
+ assert self.excs[index] is None
113
+ if not self.has_val[index]:
114
+ # for debugging purposes
115
+ pass
116
+ assert self.has_val[index]
117
+
118
+ if self.file_urls[index] is not None and (index in self.img_slot_idxs or index in self.media_slot_idxs):
119
+ # if this is an image or other media type we want to store, we should have a url
120
+ return self.file_urls[index]
121
+
122
+ if self.vals[index] is not None and index in self.array_slot_idxs:
123
+ assert isinstance(self.vals[index], np.ndarray)
124
+ np_array = self.vals[index]
125
+ buffer = io.BytesIO()
126
+ np.save(buffer, np_array)
127
+ return buffer.getvalue()
128
+
129
+ return self.vals[index]
130
+
131
+ def __setitem__(self, idx: object, val: Any) -> None:
132
+ """Assign in-memory cell value
133
+ This allows overwriting
134
+ """
135
+ assert self.excs[idx] is None
136
+
137
+ if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
138
+ # this is either a local file path or a URL
139
+ parsed = urllib.parse.urlparse(val)
140
+ if parsed.scheme == '' or parsed.scheme == 'file':
141
+ # local file path
142
+ assert self.file_urls[idx] is None and self.file_paths[idx] is None
143
+ if parsed.scheme == '':
144
+ self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(parsed.path))
145
+ else:
146
+ self.file_urls[idx] = val
147
+ self.file_paths[idx] = urllib.parse.unquote(parsed.path)
148
+ else:
149
+ # URL
150
+ assert self.file_urls[idx] is None
151
+ self.file_urls[idx] = val
152
+
153
+ if idx in self.media_slot_idxs:
154
+ self.vals[idx] = self.file_paths[idx] if self.file_paths[idx] is not None else self.file_urls[idx]
155
+ elif idx in self.array_slot_idxs and isinstance(val, bytes):
156
+ self.vals[idx] = np.load(io.BytesIO(val))
157
+ else:
158
+ self.vals[idx] = val
159
+ self.has_val[idx] = True
160
+
161
+ def set_file_path(self, idx: object, path: str) -> None:
162
+ """Augment an existing url with a local file path"""
163
+ assert self.has_val[idx]
164
+ assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
165
+ self.file_paths[idx] = path
166
+ if idx in self.media_slot_idxs:
167
+ self.vals[idx] = path
168
+
169
+ def flush_img(self, index: object, filepath: Optional[str] = None) -> None:
170
+ """Discard the in-memory value and save it to a local file, if filepath is not None"""
171
+ if self.vals[index] is None:
172
+ return
173
+ assert self.excs[index] is None
174
+ if self.file_paths[index] is None:
175
+ if filepath is not None:
176
+ # we want to save this to a file
177
+ self.file_paths[index] = filepath
178
+ self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
179
+ self.vals[index].save(filepath, format='JPEG')
180
+ else:
181
+ # we discard the content of this cell
182
+ self.has_val[index] = False
183
+ else:
184
+ # we already have a file for this image, nothing left to do
185
+ pass
186
+ self.vals[index] = None
187
+