pixeltable 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (139) hide show
  1. pixeltable/__init__.py +34 -6
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -30
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -45
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -87
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1085 -262
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -126
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.1.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.1.dist-info/METADATA +0 -31
  139. pixeltable-0.1.1.dist-info/RECORD +0 -36
pixeltable/function.py DELETED
@@ -1,269 +0,0 @@
1
- import sys
2
- from typing import Optional, Callable, Dict, List, Any
3
- import importlib
4
- import sqlalchemy as sql
5
- import cloudpickle
6
-
7
- from pixeltable.type_system import ColumnType
8
- from pixeltable import store
9
- from pixeltable.env import Env
10
- from pixeltable import exceptions as exc
11
- #from pixeltable import function_pickle
12
-
13
-
14
- class Function:
15
- """
16
- A Function's executable function is specified either directly or as module/symbol.
17
- In the former case, the function needs to be pickled and stored for serialization.
18
- In the latter case, the executable function is resolved in init().
19
- self.id is only set for non-module functions that are in the backing store.
20
- """
21
- def __init__(
22
- self, return_type: ColumnType, param_types: Optional[List[ColumnType]], id: Optional[int] = None,
23
- module_name: Optional[str] = None, eval_symbol: Optional[str] = None, init_symbol: Optional[str] = None,
24
- update_symbol: Optional[str] = None, value_symbol: Optional[str] = None,
25
- eval_fn: Optional[Callable] = None, init_fn: Optional[Callable] = None,
26
- update_fn: Optional[Callable] = None, value_fn: Optional[Callable] = None,
27
- order_by: List[int] = []
28
- ):
29
- has_agg_symbols = init_symbol is not None and update_symbol is not None and value_symbol is not None
30
- has_agg_fns = init_fn is not None and update_fn is not None and value_fn is not None
31
- assert (module_name is not None) == (eval_symbol is not None or has_agg_symbols)
32
- assert (module_name is None) == (eval_fn is not None or has_agg_fns)
33
- # exactly one of the 4 scenarios (is agg fn x is library fn) is specified
34
- assert int(eval_symbol is not None) + int(eval_fn is not None) + int(has_agg_symbols) + int(has_agg_fns) == 1
35
- self.return_type = return_type
36
- self.param_types = param_types
37
- self.id = id
38
- self.module_name = module_name
39
- self.eval_symbol = eval_symbol
40
- self.eval_fn = eval_fn
41
- self.init_symbol = init_symbol
42
- self.init_fn = init_fn
43
- self.update_symbol = update_symbol
44
- self.update_fn = update_fn
45
- self.value_symbol = value_symbol
46
- self.value_fn = value_fn
47
-
48
- if module_name is not None:
49
- # resolve module_name and symbol
50
- module = importlib.import_module(module_name)
51
- if eval_symbol is not None:
52
- self.eval_fn = self._resolve_symbol(module, eval_symbol)
53
- if init_symbol is not None:
54
- self.init_fn = self._resolve_symbol(module, init_symbol)
55
- if update_symbol is not None:
56
- self.update_fn = self._resolve_symbol(module, update_symbol)
57
- if value_symbol is not None:
58
- self.value_fn = self._resolve_symbol(module, value_symbol)
59
-
60
- if len(order_by) > 0:
61
- if self.init_fn is None:
62
- raise exc.Error(f'order_by parameter only valid for aggregate functions')
63
- for idx in order_by:
64
- if not isinstance(idx, int) or idx >= len(param_types):
65
- raise exc.Error(f'order_by element not a valid index into param_types: {idx}')
66
- self.order_by = order_by
67
-
68
- def _resolve_symbol(self, module: Any, symbol: str) -> object:
69
- obj = module
70
- for el in symbol.split('.'):
71
- obj = getattr(obj, el)
72
- return obj
73
-
74
- @property
75
- def is_aggregate(self) -> bool:
76
- return self.init_fn is not None
77
-
78
- @property
79
- def is_library_function(self) -> bool:
80
- return self.module_name is not None
81
-
82
- def __call__(self, *args: object) -> 'pixeltable.exprs.FunctionCall':
83
- from pixeltable import exprs
84
- return exprs.FunctionCall(self, args)
85
-
86
- def __eq__(self, other: object) -> bool:
87
- if not isinstance(other, self.__class__):
88
- return False
89
- return self.return_type == other.return_type and self.param_types == other.param_types \
90
- and self.id == other.id and self.module_name == other.module_name \
91
- and self.eval_symbol == other.eval_symbol and self.init_symbol == other.init_symbol \
92
- and self.update_symbol == other.update_symbol and self.value_symbol == other.value_symbol \
93
- and self.eval_fn == other.eval_fn and self.init_fn == other.init_fn \
94
- and self.update_fn == other.update_fn and self.value_fn == other.value_fn
95
-
96
-
97
- def as_dict(self) -> Dict:
98
- if self.module_name is None and self.id is None:
99
- # this is not a library function and the absence of an assigned id indicates that it's not in the store yet
100
- FunctionRegistry.get().create_function(self)
101
- assert self.id is not None
102
- return {
103
- 'return_type': self.return_type.as_dict(),
104
- 'param_types': [t.as_dict() for t in self.param_types] if self.param_types is not None else None,
105
- 'id': self.id,
106
- 'module_name': self.module_name,
107
- 'eval_symbol': self.eval_symbol,
108
- 'init_symbol': self.init_symbol,
109
- 'update_symbol': self.update_symbol,
110
- 'value_symbol': self.value_symbol,
111
- }
112
-
113
- @classmethod
114
- def from_dict(cls, d: Dict) -> 'Function':
115
- assert 'return_type' in d
116
- return_type = ColumnType.from_dict(d['return_type'])
117
- assert 'param_types' in d
118
- if d['param_types'] is None:
119
- param_types = None
120
- else:
121
- param_types = [ColumnType.from_dict(type_dict) for type_dict in d['param_types']]
122
- assert 'id' in d
123
- assert 'module_name' in d
124
- assert 'eval_symbol' in d and 'init_symbol' in d and 'update_symbol' in d and 'value_symbol' in d
125
-
126
- if d['id'] is not None:
127
- assert d['module_name'] is None
128
- return FunctionRegistry.get().get_function(d['id'])
129
- else:
130
- return cls(
131
- return_type, param_types, module_name=d['module_name'], eval_symbol=d['eval_symbol'],
132
- init_symbol=d['init_symbol'], update_symbol=d['update_symbol'], value_symbol=d['value_symbol'])
133
-
134
-
135
- class FunctionRegistry:
136
- """
137
- A central registry for all Functions. Handles interactions with the backing store.
138
- Function are loaded from the store on demand.
139
- """
140
- _instance: Optional['FunctionRegistry'] = None
141
-
142
- @classmethod
143
- def get(cls) -> 'FunctionRegistry':
144
- if cls._instance is None:
145
- cls._instance = FunctionRegistry()
146
- return cls._instance
147
-
148
- def __init__(self):
149
- self.fns_by_id: Dict[int, Function] = {}
150
-
151
- def clear_cache(self) -> None:
152
- """
153
- Useful during testing
154
- """
155
- self.fns_by_id: Dict[int, Function] = {}
156
-
157
- def get_function(self, id: int) -> Function:
158
- if id not in self.fns_by_id:
159
- stmt = sql.select(
160
- store.Function.name, store.Function.return_type, store.Function.param_types,
161
- store.Function.eval_obj, store.Function.init_obj, store.Function.update_obj, store.Function.value_obj) \
162
- .where(store.Function.id == id)
163
- with Env.get().engine.begin() as conn:
164
- rows = conn.execute(stmt)
165
- row = next(rows)
166
- name = row[0]
167
- return_type = ColumnType.deserialize(row[1])
168
- param_types = ColumnType.deserialize_list(row[2])
169
- eval_fn = cloudpickle.loads(row[3]) if row[3] is not None else None
170
- # TODO: are these checks needed?
171
- if row[3] is not None and eval_fn is None:
172
- raise exc.Error(f'Could not load eval_fn for function {name}')
173
- init_fn = cloudpickle.loads(row[4]) if row[4] is not None else None
174
- if row[4] is not None and init_fn is None:
175
- raise exc.Error(f'Could not load init_fn for aggregate function {name}')
176
- update_fn = cloudpickle.loads(row[5]) if row[5] is not None else None
177
- if row[5] is not None and update_fn is None:
178
- raise exc.Error(f'Could not load update_fn for aggregate function {name}')
179
- value_fn = cloudpickle.loads(row[6]) if row[6] is not None else None
180
- if row[6] is not None and value_fn is None:
181
- raise exc.Error(f'Could not load value_fn for aggregate function {name}')
182
-
183
- func = Function(
184
- return_type, param_types, id=id, eval_fn=eval_fn, init_fn=init_fn, update_fn=update_fn,
185
- value_fn=value_fn)
186
- self.fns_by_id[id] = func
187
- assert id in self.fns_by_id
188
- return self.fns_by_id[id]
189
-
190
- def create_function(
191
- self, fn: Function, db_id: Optional[int] = None, dir_id: Optional[int] = None,
192
- name: Optional[str] = None
193
- ) -> None:
194
- with Env.get().engine.begin() as conn:
195
- eval_fn_str = cloudpickle.dumps(fn.eval_fn) if fn.eval_fn is not None else None
196
- init_fn_str = cloudpickle.dumps(fn.init_fn) if fn.init_fn is not None else None
197
- update_fn_str = cloudpickle.dumps(fn.update_fn) if fn.update_fn is not None else None
198
- value_fn_str = cloudpickle.dumps(fn.value_fn) if fn.value_fn is not None else None
199
- res = conn.execute(
200
- sql.insert(store.Function.__table__)
201
- .values(
202
- db_id=db_id, dir_id=dir_id, name=name, return_type=fn.return_type.serialize(),
203
- param_types=ColumnType.serialize_list(fn.param_types),
204
- eval_obj=eval_fn_str, init_obj=init_fn_str, update_obj=update_fn_str, value_obj=value_fn_str))
205
- fn.id = res.inserted_primary_key[0]
206
- self.fns_by_id[fn.id] = fn
207
-
208
- def update_function(
209
- self, id: int, eval_fn: Optional[Callable] = None, init_fn: Optional[Callable] = None,
210
- update_fn: Optional[Callable] = None, value_fn: Optional[Callable] = None
211
- ) -> None:
212
- """
213
- Updates the callable for the function with the given id in the store and in the cache, if present.
214
- """
215
- with Env.get().engine.begin() as conn:
216
- updates = {}
217
- if eval_fn is not None:
218
- updates[store.Function.eval_obj] = cloudpickle.dumps(eval_fn)
219
- if init_fn is not None:
220
- updates[store.Function.init_obj] = cloudpickle.dumps(init_fn)
221
- if update_fn is not None:
222
- updates[store.Function.update_obj] = cloudpickle.dumps(update_fn)
223
- if value_fn is not None:
224
- updates[store.Function.value_obj] = cloudpickle.dumps(value_fn)
225
- conn.execute(
226
- sql.update(store.Function.__table__)
227
- .values(updates)
228
- .where(store.Function.id == id))
229
- if id in self.fns_by_id:
230
- if eval_fn is not None:
231
- self.fns_by_id[id].eval_fn = eval_fn
232
- if init_fn is not None:
233
- self.fns_by_id[id].init_fn = init_fn
234
- if update_fn is not None:
235
- self.fns_by_id[id].update_fn = update_fn
236
- if value_fn is not None:
237
- self.fns_by_id[id].value_fn = value_fn
238
-
239
- def delete_function(self, id: int) -> None:
240
- assert id is not None
241
- with Env.get().engine.begin() as conn:
242
- conn.execute(
243
- sql.delete(store.Function.__table__)
244
- .where(store.Function.id == id))
245
-
246
-
247
- # def create_module_list() -> None:
248
- # """
249
- # Generate file standard_modules.py, which contains a list of modules available after 'import pixeltable'.
250
- # These are the modules we don't want to pickle.
251
- # TODO: move this elsewhere?
252
- # """
253
- # with open('standard_modules.py', 'w') as f:
254
- # f.write('module_names = set([\n ')
255
- # line_len = 0
256
- # module_names = sys.modules.keys()
257
- # for name in module_names:
258
- # str = f"'{name}', "
259
- # line_len += len(str)
260
- # if line_len >= 80:
261
- # f.write('\n ')
262
- # line_len = 4 # spaces
263
- # f.write(str)
264
- # f.write('\n])')
265
-
266
-
267
- # make create_module_list() callable from the commandline
268
- if __name__ == '__main__':
269
- globals()[sys.argv[1]]()
@@ -1,10 +0,0 @@
1
- from pixeltable.type_system import StringType, ImageType, ArrayType, ColumnType
2
- from pixeltable.function import Function
3
-
4
-
5
- encode_image = Function(
6
- ArrayType((512,), ColumnType.Type.FLOAT), [ImageType()],
7
- module_name='pixeltable.utils.clip', eval_symbol='encode_image')
8
- encode_text = Function(
9
- ArrayType((512,), ColumnType.Type.FLOAT), [StringType()],
10
- module_name = 'pixeltable.utils.clip', eval_symbol = 'encode_text')
@@ -1,23 +0,0 @@
1
- import numpy as np
2
- import PIL
3
-
4
- from pixeltable.type_system import ImageType, ArrayType, ColumnType
5
- from pixeltable.function import Function
6
- from pixeltable.exceptions import Error
7
-
8
- def _draw_boxes(img: PIL.Image.Image, boxes: np.ndarray) -> PIL.Image.Image:
9
- if len(boxes.shape) != 2 or boxes.shape[1] != 4:
10
- raise Error(f'draw(): boxes needs to have shape (None, 4) but instead has shape {boxes.shape}')
11
- result = img.copy()
12
- d = PIL.ImageDraw.Draw(result)
13
- for i in range(boxes.shape[0]):
14
- d.rectangle(list(boxes[i]), width=3)
15
- return result
16
-
17
- draw_boxes = Function(
18
- ImageType(), [ImageType(), ArrayType((None, 6), dtype=ColumnType.Type.FLOAT)],
19
- module_name=__name__, eval_symbol='_draw_boxes')
20
-
21
- __all__ = [
22
- draw_boxes,
23
- ]
@@ -1,21 +0,0 @@
1
- from typing import Callable, Any, Optional, Tuple
2
- import os
3
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
4
- import tensorflow as tf
5
-
6
-
7
- from pixeltable.type_system import ColumnType, ArrayType
8
- import pixeltable.utils.tf as tf_utils
9
- from pixeltable.function import Function
10
-
11
- def TFModelFunction(
12
- model: tf.keras.Model, param_type: ColumnType, output_shape: Optional[Tuple[int, ...]] = None,
13
- preprocess: Optional[Callable] = None) -> Function:
14
- def predict(arg: Any) -> Any:
15
- tensor = tf_utils.to_tensor(arg, param_type)
16
- tensor = tf.expand_dims(tensor, axis=0)
17
- if preprocess is not None:
18
- tensor = preprocess(tensor)
19
- output = model.predict(tensor)
20
- return output[0]
21
- return Function(ArrayType(output_shape, dtype=ColumnType.DType.FLOAT32), [param_type], eval_fn=predict)
pixeltable/index.py DELETED
@@ -1,57 +0,0 @@
1
- from typing import List, Set
2
- import PIL.Image
3
- import hnswlib
4
- import numpy as np
5
-
6
- from pixeltable.env import Env
7
-
8
- class VectorIndex:
9
- def __init__(self, name: str, dim: int, idx: hnswlib.Index):
10
- self.name = name
11
- self.dim = dim
12
- self.idx = idx
13
-
14
- @classmethod
15
- def create(cls, name: str, dim: int) -> 'VectorIndex':
16
- idx = hnswlib.Index(space='cosine', dim=dim)
17
- idx.init_index(max_elements=1000, M=64, ef_construction=200)
18
- filename = cls._filename(name)
19
- idx.save_index(filename)
20
- return VectorIndex(name, dim, idx)
21
-
22
- @classmethod
23
- def load(cls, name: str, dim: int) -> 'VectorIndex':
24
- idx = hnswlib.Index(space='cosine', dim=dim)
25
- filename = cls._filename(name)
26
- idx.load_index(filename)
27
- return VectorIndex(name, dim, idx)
28
-
29
- def insert(self, data: np.ndarray, rowids: np.ndarray) -> None:
30
- assert data.shape[0] == rowids.shape[0]
31
- total = self.idx.element_count + data.shape[0]
32
- if total > self.idx.max_elements:
33
- self.idx.resize_index(int(total * 1.1))
34
- self.idx.add_items(data, rowids)
35
- filename = self._filename(self.name)
36
- self.idx.save_index(filename)
37
-
38
- def search(self, embed: np.ndarray, num_nn: int, valid_rowids: Set[int]) -> List[int]:
39
- """
40
- Returns rowids of k nearest neighbors.
41
- """
42
- assert embed.shape == (512,)
43
- k = num_nn
44
- while True:
45
- nn, _ = self.idx.knn_query(embed.reshape(1, 512), k)
46
- # tolist(): make sure result contains ints, not uint64
47
- result = [rowid for rowid in nn.squeeze().tolist() if rowid in valid_rowids]
48
- if len(result) >= num_nn:
49
- return list(result[:num_nn])
50
- if k >= self.idx.element_count:
51
- # nothing left to look for
52
- return result
53
- k *= 2
54
-
55
- @classmethod
56
- def _filename(cls, name: str) -> str:
57
- return str(Env.get().nnidx_dir / f'idx_{name}')
@@ -1,24 +0,0 @@
1
- import sqlalchemy as sql
2
- import pytest
3
-
4
- from pixeltable import catalog
5
- from pixeltable.type_system import IntType, JsonType
6
- from pixeltable.tests.utils import make_tbl, create_table_data, read_data_file
7
-
8
- from pixeltable.env import Env
9
-
10
- class TestExprs:
11
- def test_basic(self, test_tbl: catalog.Table) -> None:
12
- t = test_tbl
13
- res = t[t.c6].show()
14
- stmt = sql.select(sql.func.jsonb_path_query(t.cols_by_name['c6'].sa_col, '$.detections[*].bounding_box'))
15
- #stmt = sql.select(t.cols_by_name['c2'].sa_col['iscrowd']).where(sql.cast(t.cols_by_name['c2'].sa_col['iscrowd'], sql.Integer) == 0)
16
- #stmt = sql.select(t.cols_by_name['c2'].sa_col['iscrowd']).where(t.cols_by_name['c2'].sa_col['supercategory'] == '"furniture"')
17
- #stmt = sql.select(t.cols_by_name['c2'].sa_col['bounding_box', 0]).where(t.cols_by_name['c2'].sa_col['supercategory'] == '"furniture"')
18
- #stmt = sql.select(t.cols_by_name['c2'].sa_col['bounding_box', 0]).where(t.cols_by_name['c2'].sa_col['supercategory'].astext == 'furniture')
19
- with Env.get().engine.connect() as conn:
20
- result = conn.execute(stmt)
21
- for row in result:
22
- print(row)
23
- print(res)
24
-
@@ -1,69 +0,0 @@
1
- import os
2
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
3
- import tensorflow as tf
4
-
5
- from pixeltable import catalog
6
- from pixeltable.functions import dict_map
7
- from pixeltable.functions.tf import TFModelFunction
8
- from pixeltable.type_system import ImageType
9
- import pixeltable.tf
10
-
11
-
12
- class TestTf:
13
- def verify_type(self, df: pixeltable.DataFrame, ds: tf.data.Dataset) -> None:
14
- """
15
- Verify that column types of df matches element spec of ds.
16
- """
17
- result_set = df.show(1)
18
- assert isinstance(ds.element_spec, tuple)
19
- assert len(result_set.col_types) == len(ds.element_spec)
20
- for i in range(len(result_set.col_types)):
21
- col_type = result_set.col_types[i]
22
- if col_type.is_scalar_type():
23
- assert ds.element_spec[i].shape == ()
24
- if col_type.is_image_type():
25
- assert ds.element_spec[i].shape == (col_type.height, col_type.width, col_type.num_channels)
26
-
27
- def test_basic(self, img_tbl: catalog.Table) -> None:
28
- # TODO: test all data types
29
- t = img_tbl
30
- m = t[t.category].categorical_map()
31
- # image types with increasingly specific dimensions
32
- df = t[t.img, dict_map(t.category, m)]
33
- ds = pixeltable.tf.to_dataset(df)
34
- self.verify_type(df, ds)
35
- for row in ds:
36
- pass
37
-
38
- df = t[t.img.resize((224, 224))]
39
- ds = pixeltable.tf.to_dataset(df)
40
- self.verify_type(df, ds)
41
- for row in ds:
42
- pass
43
-
44
- df = t[t.img.convert('RGB').resize((224, 224)), dict_map(t.category, m)]
45
- ds = pixeltable.tf.to_dataset(df)
46
- self.verify_type(df, ds)
47
- for row in ds:
48
- pass
49
-
50
- df = t[[{'a': t.img.convert('RGB').resize((224, 224)), 'b': dict_map(t.category, m)}]]
51
- ds = pixeltable.tf.to_dataset(df).batch(32)
52
- for row in ds:
53
- pass
54
-
55
- df = t[[t.img.convert('RGB').resize((224, 224)), {'b': dict_map(t.category, m)}]]
56
- ds = pixeltable.tf.to_dataset(df).batch(32)
57
- for row in ds:
58
- pass
59
-
60
-
61
- def test_model_fn(self, img_tbl: catalog.Table) -> None:
62
- model = tf.keras.applications.resnet50.ResNet50()
63
- preprocess = tf.keras.applications.resnet50.preprocess_input
64
- model_udf = TFModelFunction(
65
- model, ImageType(size=(224, 224), mode=ImageType.Mode.RGB), output_shape=(1000,),preprocess=preprocess)
66
- t = img_tbl
67
- df = t[model_udf(t.img)]
68
- res = df.show(1)
69
- _ = res._repr_html_()
pixeltable/tf.py DELETED
@@ -1,33 +0,0 @@
1
- from typing import Any, Tuple, List, Generator
2
- import os
3
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
4
- import tensorflow as tf
5
-
6
- from pixeltable.dataframe import DataFrame
7
- import pixeltable.utils.tf as tf_utils
8
-
9
-
10
- def to_dataset(df: DataFrame) -> tf.data.Dataset:
11
- """
12
- Convert df result set to tf.data.Dataset.
13
- """
14
- result_set = df.show(0)
15
- # construct output signature
16
- if len(result_set.col_types) == 1 and result_set.col_types[0].is_json_type():
17
- # we're only getting one column back and it's a dict: make the dataset type a dict
18
- type_spec = result_set.col_types[0].to_tf()
19
- else:
20
- tf_types = [t.to_tf() for t in result_set.col_types]
21
- type_spec = tuple(tf_types)
22
-
23
- def f() -> Generator[Tuple[Any, ...], None, None]:
24
- if isinstance(type_spec, dict):
25
- json_type = result_set.col_types[0]
26
- for row in result_set.rows:
27
- res = {k: tf_utils.to_tensor(v, json_type.type_spec[k]) for k, v in row[0].items()}
28
- yield res
29
- else:
30
- for row in result_set.rows:
31
- tf_row = [tf_utils.to_tensor(val, result_set.col_types[i]) for i, val in enumerate(row)]
32
- yield tuple(tf_row)
33
- return tf.data.Dataset.from_generator(f, output_signature=type_spec)
pixeltable/utils/tf.py DELETED
@@ -1,33 +0,0 @@
1
- from typing import Any, Tuple
2
- import os
3
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
4
- import tensorflow as tf
5
-
6
- from pixeltable.type_system import ColumnType, JsonType
7
-
8
- TF_DTYPES = {
9
- # ColumnType.DType.INT8: tf.int8,
10
- # ColumnType.DType.INT16: tf.int16,
11
- # ColumnType.DType.INT32: tf.int32,
12
- # ColumnType.DType.INT64: tf.int64,
13
- # ColumnType.DType.UINT8: tf.uint8,
14
- # ColumnType.DType.UINT16: tf.uint16,
15
- # ColumnType.DType.UINT32: tf.uint32,
16
- # ColumnType.DType.UINT64: tf.uint64,
17
- ColumnType.Type.INT: tf.int64,
18
- ColumnType.Type.FLOAT: tf.float32
19
- }
20
-
21
- def to_tensor(v: Any, t: ColumnType) -> tf.Tensor:
22
- if t.is_scalar_type():
23
- return tf.convert_to_tensor(v)
24
- if t.is_image_type():
25
- # convert PIL.Image.Image to tf.Tensor
26
- return tf.cast(tf.keras.utils.img_to_array(v), tf.uint8)
27
- if t.is_array_type():
28
- return tf.convert_to_tensor(v, dtype=TF_DTYPES[t.dtype])
29
- if t.is_json_type():
30
- assert isinstance(v, dict)
31
- assert isinstance(t, JsonType)
32
- assert t.type_spec is not None
33
- return {key: to_tensor(val, t.type_spec[key]) for key, val in v.items()}
pixeltable/utils/video.py DELETED
@@ -1,32 +0,0 @@
1
- from typing import List
2
- import ffmpeg
3
- import glob
4
- from pathlib import Path
5
-
6
- from pixeltable.exceptions import OperationalError
7
-
8
-
9
- def extract_frames(video_path_str: str, output_path_prefix: str, fps: int = 0) -> List[str]:
10
- """
11
- Extract frames at given fps as jpg files (fps == 0: all frames).
12
- Returns list of frame file paths.
13
- """
14
- video_path = Path(video_path_str)
15
- if not video_path.exists():
16
- raise OperationalError(f'File not found: {video_path_str}')
17
- if not video_path.is_file():
18
- raise OperationalError(f'Not a file: {video_path_str}')
19
- output_path_str = f'{output_path_prefix}_%07d.jpg'
20
- s = ffmpeg.input(video_path)
21
- if fps > 0:
22
- s = s.filter('fps', fps)
23
- s = s.output(output_path_str, loglevel='quiet')
24
- try:
25
- s.run()
26
- except ffmpeg.Error:
27
- raise OperationalError(f'ffmpeg exception')
28
-
29
- # collect generated files
30
- frame_paths = glob.glob(f'{output_path_prefix}_*.jpg')
31
- frame_paths.sort()
32
- return frame_paths
@@ -1,31 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: pixeltable
3
- Version: 0.1.1
4
- Summary:
5
- Author: Marcel Kornacker
6
- Author-email: marcelk@gmail.com
7
- Requires-Python: >=3.9,<4.0
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.9
10
- Classifier: Programming Language :: Python :: 3.10
11
- Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
13
- Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
14
- Requires-Dist: ftfy (>=6.1.1,<7.0.0)
15
- Requires-Dist: hnswlib (>=0.6.2,<0.7.0)
16
- Requires-Dist: jmespath (>=1.0.1,<2.0.0)
17
- Requires-Dist: numpy (>=1.24.1,<2.0.0)
18
- Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
19
- Requires-Dist: pandas (>=1.5.3,<2.0.0)
20
- Requires-Dist: pillow (>=9.4.0,<10.0.0)
21
- Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
22
- Requires-Dist: regex (>=2022.10.31,<2023.0.0)
23
- Requires-Dist: sqlalchemy (>=1.4.41,<2.0.0)
24
- Requires-Dist: sqlalchemy-utils (>=0.39.0,<0.40.0)
25
- Requires-Dist: tqdm (>=4.64.1,<5.0.0)
26
- Description-Content-Type: text/markdown
27
-
28
- # Pixeltable
29
-
30
- Pixeltable is a Python library that exposes image and video data via a table interface.
31
-
@@ -1,36 +0,0 @@
1
- pixeltable/__init__.py,sha256=fFMx_T8IYfOL2J2WmVjOu__TNx19J0pSgtlERNJVmD0,281
2
- pixeltable/catalog.py,sha256=gxjSkf852f4ZdqguyQ0k-q9fNN1ezDl0S86GS26Pk9c,65851
3
- pixeltable/client.py,sha256=KoPRDLQm9KaBdURDU99rL6R4EzYL6GmzzWZHYS79W_I,1091
4
- pixeltable/dataframe.py,sha256=pFPOfK9UjW3JnFq-j-omw5UQGYxppxdJzBG5TiinExc,20828
5
- pixeltable/env.py,sha256=Qn8lvRHj6uT88fYYV9opIi4ZPOiJi_hUeViH--1imvs,2809
6
- pixeltable/exceptions.py,sha256=C-OjDMhDLBwG9yJ0vbugk61JwGPsxjsVU7NV-03HOo0,312
7
- pixeltable/exprs.py,sha256=SjTjg6BzjvKaqtMGxjUZMhqnl6RL50gCHxjeGUYzVqk,68598
8
- pixeltable/function.py,sha256=TWqhUTF87ZmlL1RC7EvBAT2oMPSfsnJQq-eR5q99Pg4,12275
9
- pixeltable/functions/__init__.py,sha256=CIpacjTdO6k_9pfeFFYW5fUCHqdoa47Rqm7rIMHZnw0,4733
10
- pixeltable/functions/clip.py,sha256=2iSkVFhp1zBPNgM1LaLdO115jctjKEZ1lF-ARgMJoUE,436
11
- pixeltable/functions/pil/__init__.py,sha256=mZS5tp_D1Y_15UsurLtawYMrAbTzvl-NXJrPHqNSj00,763
12
- pixeltable/functions/pil/image.py,sha256=79XgGkgyCbwo6n84qe5UNBUS4PCy5hFYzOTnZL439cE,475
13
- pixeltable/functions/tf.py,sha256=XSeOU1cD3ZT9PTB0SbO69chuNNsMXO-ewdDlO7sSBfM,830
14
- pixeltable/index.py,sha256=EQsjXvwpU72RwgXnEv7II47DZuYhGSKExqHzH583ZDs,2009
15
- pixeltable/store.py,sha256=VpSyUSphpocg5fCm0aVrJ2GHuTvNOMpzbC1B9PO0Ilk,7966
16
- pixeltable/tests/conftest.py,sha256=2WNdzlYA9ts3qUa1asu4xgf0E-gKrCD6AqVs0Uc8s3o,4010
17
- pixeltable/tests/test_client.py,sha256=K_ayeutgaLyODAhEgEd2-f2RsOU-C-xgl9oklc52SCE,552
18
- pixeltable/tests/test_dict.py,sha256=dqm2QKiQk4CCbyHfRVGAQ0b4BRmsS8DN6qT3ZgTltV0,1213
19
- pixeltable/tests/test_dirs.py,sha256=lvllfBzqJKuu6w3k6P9T8O3r6bt5iIIqf6TJj05lKmY,3184
20
- pixeltable/tests/test_exprs.py,sha256=Cb13yq5BWQEa0SLQMGwyu-JDg-Njf__vpr-j1dQMbCk,13866
21
- pixeltable/tests/test_function.py,sha256=He46y8NJrszyFE0WAaDnTFRQ_1HnQBGhTMKEG2cbN50,3369
22
- pixeltable/tests/test_functions.py,sha256=vxKPiaiK-14ZB6rX9p1fHLn90cWjg3hOa4KwNMzC56o,302
23
- pixeltable/tests/test_table.py,sha256=OXBQFOD2b0Pi9sPMryxsOpxLCwZ3lRfQm6TurMs4i0g,13794
24
- pixeltable/tests/test_tf.py,sha256=xpLTIxej6PSWer2G7I4X_67d3Mqr1izM6618AetbY-w,2503
25
- pixeltable/tests/test_types.py,sha256=zr8CxZYvozOay3pO26KEmSLtAVzbYBzF-Lr3JA81qUo,1162
26
- pixeltable/tests/test_video.py,sha256=l3EMMn8gvcW_L3Ko9EcNJ66-4n3qC2hXim97dgOxOcg,1603
27
- pixeltable/tests/utils.py,sha256=QRKuAmdrCEGSg7ABmP5T90e3EaROJuLvE2364-lK4MM,4690
28
- pixeltable/tf.py,sha256=ZEaR_TRyfJ2opml69p4QSPzKDdGr2MLHVbnbJkYTF1c,1289
29
- pixeltable/type_system.py,sha256=_KTiPjrvdOloiKuP1z3iL5P40Qzst9AlggbicMp1W64,17376
30
- pixeltable/utils/__init__.py,sha256=QyqUCCNp1_A1cBJWzkL-nCjYUR8Cd4E0u4il7ruQR-c,1763
31
- pixeltable/utils/clip.py,sha256=6kGbnGj8F1cETrZYQDU_l2qCRhc6uOlJ9EoLHucab9o,628
32
- pixeltable/utils/tf.py,sha256=ZDrkaWutMAqIOx3G-uR5uYuaFkSRJaXyt53CJmwnM-Y,1143
33
- pixeltable/utils/video.py,sha256=KzkCROl3z2OVZILmakw-90uDvCqXFrREZiaNVnh62wI,1007
34
- pixeltable-0.1.1.dist-info/METADATA,sha256=489b7eIM89bc3iVT9uw5TLWjXzrQzi_76KevRCb7lws,1106
35
- pixeltable-0.1.1.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
36
- pixeltable-0.1.1.dist-info/RECORD,,