pixeltable 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +25 -15
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +123 -103
  14. pixeltable/catalog/table_version.py +292 -143
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +68 -27
  17. pixeltable/dataframe.py +102 -72
  18. pixeltable/env.py +39 -23
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -8
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +18 -17
  27. pixeltable/exec/expr_eval/expr_eval_node.py +29 -16
  28. pixeltable/exec/expr_eval/globals.py +33 -11
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +170 -42
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +101 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +31 -16
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +21 -15
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +214 -109
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +61 -28
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +3 -2
  101. pixeltable/io/label_studio.py +80 -71
  102. pixeltable/io/pandas.py +33 -9
  103. pixeltable/io/parquet.py +10 -13
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +9 -2
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/store.py +42 -26
  128. pixeltable/type_system.py +130 -85
  129. pixeltable/utils/arrow.py +1 -7
  130. pixeltable/utils/coco.py +16 -17
  131. pixeltable/utils/code.py +1 -1
  132. pixeltable/utils/console_output.py +44 -0
  133. pixeltable/utils/description_helper.py +7 -7
  134. pixeltable/utils/documents.py +3 -1
  135. pixeltable/utils/filecache.py +13 -8
  136. pixeltable/utils/http_server.py +9 -8
  137. pixeltable/utils/media_store.py +2 -1
  138. pixeltable/utils/pytorch.py +11 -14
  139. pixeltable/utils/s3.py +1 -0
  140. pixeltable/utils/sql.py +1 -0
  141. pixeltable/utils/transactional_directory.py +2 -2
  142. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/METADATA +7 -8
  143. pixeltable-0.3.3.dist-info/RECORD +163 -0
  144. pixeltable-0.3.1.dist-info/RECORD +0 -160
  145. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
  146. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
  147. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
@@ -25,12 +25,13 @@ if TYPE_CHECKING:
25
25
 
26
26
 
27
27
  @env.register_client('together')
28
- def _(api_key: str) -> 'together.Together':
28
+ def _(api_key: str) -> 'together.AsyncTogether':
29
29
  import together
30
- return together.Together(api_key=api_key)
31
30
 
31
+ return together.AsyncTogether(api_key=api_key)
32
32
 
33
- def _together_client() -> 'together.Together':
33
+
34
+ def _together_client() -> 'together.AsyncTogether':
34
35
  return env.Env.get().get_client('together')
35
36
 
36
37
 
@@ -39,6 +40,7 @@ T = TypeVar('T')
39
40
 
40
41
  def _retry(fn: Callable[..., T]) -> Callable[..., T]:
41
42
  import together
43
+
42
44
  return tenacity.retry(
43
45
  retry=tenacity.retry_if_exception_type(together.error.RateLimitError),
44
46
  wait=tenacity.wait_random_exponential(multiplier=1, max=60),
@@ -46,8 +48,8 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
46
48
  )(fn)
47
49
 
48
50
 
49
- @pxt.udf
50
- def completions(
51
+ @pxt.udf(resource_pool='request-rate:together:chat')
52
+ async def completions(
51
53
  prompt: str,
52
54
  *,
53
55
  model: str,
@@ -68,6 +70,10 @@ def completions(
68
70
  Equivalent to the Together AI `completions` API endpoint.
69
71
  For additional details, see: [https://docs.together.ai/reference/completions-1](https://docs.together.ai/reference/completions-1)
70
72
 
73
+ Request throttling:
74
+ Applies the rate limit set in the config (section `together.rate_limits`, key `chat`). If no rate
75
+ limit is configured, uses a default of 600 RPM.
76
+
71
77
  __Requirements:__
72
78
 
73
79
  - `pip install together`
@@ -85,29 +91,27 @@ def completions(
85
91
  Add a computed column that applies the model `mistralai/Mixtral-8x7B-v0.1` to an existing Pixeltable column `tbl.prompt`
86
92
  of the table `tbl`:
87
93
 
88
- >>> tbl['response'] = completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1')
94
+ >>> tbl.add_computed_column(response=completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1'))
89
95
  """
90
- return (
91
- _retry(_together_client().completions.create)(
92
- prompt=prompt,
93
- model=model,
94
- max_tokens=max_tokens,
95
- stop=stop,
96
- temperature=temperature,
97
- top_p=top_p,
98
- top_k=top_k,
99
- repetition_penalty=repetition_penalty,
100
- logprobs=logprobs,
101
- echo=echo,
102
- n=n,
103
- safety_model=safety_model,
104
- )
105
- .dict()
96
+ result = await _together_client().completions.create(
97
+ prompt=prompt,
98
+ model=model,
99
+ max_tokens=max_tokens,
100
+ stop=stop,
101
+ temperature=temperature,
102
+ top_p=top_p,
103
+ top_k=top_k,
104
+ repetition_penalty=repetition_penalty,
105
+ logprobs=logprobs,
106
+ echo=echo,
107
+ n=n,
108
+ safety_model=safety_model,
106
109
  )
110
+ return result.dict()
107
111
 
108
112
 
109
- @pxt.udf
110
- def chat_completions(
113
+ @pxt.udf(resource_pool='request-rate:together:chat')
114
+ async def chat_completions(
111
115
  messages: list[dict[str, str]],
112
116
  *,
113
117
  model: str,
@@ -131,6 +135,10 @@ def chat_completions(
131
135
  Equivalent to the Together AI `chat/completions` API endpoint.
132
136
  For additional details, see: [https://docs.together.ai/reference/chat-completions-1](https://docs.together.ai/reference/chat-completions-1)
133
137
 
138
+ Request throttling:
139
+ Applies the rate limit set in the config (section `together.rate_limits`, key `chat`). If no rate
140
+ limit is configured, uses a default of 600 RPM.
141
+
134
142
  __Requirements:__
135
143
 
136
144
  - `pip install together`
@@ -149,28 +157,26 @@ def chat_completions(
149
157
  of the table `tbl`:
150
158
 
151
159
  >>> messages = [{'role': 'user', 'content': tbl.prompt}]
152
- ... tbl['response'] = chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1')
160
+ ... tbl.add_computed_column(response=chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1'))
153
161
  """
154
- return (
155
- _retry(_together_client().chat.completions.create)(
156
- messages=messages,
157
- model=model,
158
- max_tokens=max_tokens,
159
- stop=stop,
160
- temperature=temperature,
161
- top_p=top_p,
162
- top_k=top_k,
163
- repetition_penalty=repetition_penalty,
164
- logprobs=logprobs,
165
- echo=echo,
166
- n=n,
167
- safety_model=safety_model,
168
- response_format=response_format,
169
- tools=tools,
170
- tool_choice=tool_choice,
171
- )
172
- .dict()
162
+ result = await _together_client().chat.completions.create(
163
+ messages=messages,
164
+ model=model,
165
+ max_tokens=max_tokens,
166
+ stop=stop,
167
+ temperature=temperature,
168
+ top_p=top_p,
169
+ top_k=top_k,
170
+ repetition_penalty=repetition_penalty,
171
+ logprobs=logprobs,
172
+ echo=echo,
173
+ n=n,
174
+ safety_model=safety_model,
175
+ response_format=response_format,
176
+ tools=tools,
177
+ tool_choice=tool_choice,
173
178
  )
179
+ return result.dict()
174
180
 
175
181
 
176
182
  _embedding_dimensions_cache = {
@@ -185,14 +191,18 @@ _embedding_dimensions_cache = {
185
191
  }
186
192
 
187
193
 
188
- @pxt.udf(batch_size=32)
189
- def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
194
+ @pxt.udf(batch_size=32, resource_pool='request-rate:together:embeddings')
195
+ async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
190
196
  """
191
197
  Query an embedding model for a given string of text.
192
198
 
193
199
  Equivalent to the Together AI `embeddings` API endpoint.
194
200
  For additional details, see: [https://docs.together.ai/reference/embeddings-2](https://docs.together.ai/reference/embeddings-2)
195
201
 
202
+ Request throttling:
203
+ Applies the rate limit set in the config (section `together.rate_limits`, key `embeddings`). If no rate
204
+ limit is configured, uses a default of 600 RPM.
205
+
196
206
  __Requirements:__
197
207
 
198
208
  - `pip install together`
@@ -208,9 +218,9 @@ def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt
208
218
  Add a computed column that applies the model `togethercomputer/m2-bert-80M-8k-retrieval`
209
219
  to an existing Pixeltable column `tbl.text` of the table `tbl`:
210
220
 
211
- >>> tbl['response'] = embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval')
221
+ >>> tbl.add_computed_column(response=embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval'))
212
222
  """
213
- result = _retry(_together_client().embeddings.create)(input=input, model=model)
223
+ result = await _together_client().embeddings.create(input=input, model=model)
214
224
  return [np.array(data.embedding, dtype=np.float64) for data in result.data]
215
225
 
216
226
 
@@ -223,8 +233,8 @@ def _(model: str) -> pxt.ArrayType:
223
233
  return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
224
234
 
225
235
 
226
- @pxt.udf
227
- def image_generations(
236
+ @pxt.udf(resource_pool='request-rate:together:images')
237
+ async def image_generations(
228
238
  prompt: str,
229
239
  *,
230
240
  model: str,
@@ -240,6 +250,10 @@ def image_generations(
240
250
  Equivalent to the Together AI `images/generations` API endpoint.
241
251
  For additional details, see: [https://docs.together.ai/reference/post_images-generations](https://docs.together.ai/reference/post_images-generations)
242
252
 
253
+ Request throttling:
254
+ Applies the rate limit set in the config (section `together.rate_limits`, key `images`). If no rate
255
+ limit is configured, uses a default of 600 RPM.
256
+
243
257
  __Requirements:__
244
258
 
245
259
  - `pip install together`
@@ -257,9 +271,9 @@ def image_generations(
257
271
  Add a computed column that applies the model `stabilityai/stable-diffusion-xl-base-1.0`
258
272
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
259
273
 
260
- >>> tbl['response'] = image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
274
+ >>> tbl.add_computed_column(response=image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0'))
261
275
  """
262
- result = _retry(_together_client().images.generate)(
276
+ result = await _together_client().images.generate(
263
277
  prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
264
278
  )
265
279
  if result.data[0].b64_json is not None:
@@ -52,6 +52,7 @@ class make_video(pxt.Aggregator):
52
52
  """
53
53
  Aggregator that creates a video from a sequence of images.
54
54
  """
55
+
55
56
  def __init__(self, fps: int = 25):
56
57
  """follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
57
58
  self.container: Optional[av.container.OutputContainer] = None
@@ -205,7 +205,9 @@ def eval_detections(
205
205
  pred_filter = pred_classes_arr == class_idx
206
206
  gt_filter = gt_classes_arr == class_idx
207
207
  class_pred_scores = pred_scores_arr[pred_filter]
208
- tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou)
208
+ tp, fp = __calculate_image_tpfp(
209
+ pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou
210
+ )
209
211
  ordered_class_pred_scores = -np.sort(-class_pred_scores)
210
212
  result.append(
211
213
  {
@@ -235,6 +237,7 @@ class mean_ap(pxt.Aggregator):
235
237
 
236
238
  - A `dict[int, float]` mapping each label class to an average precision (AP) value for that class.
237
239
  """
240
+
238
241
  def __init__(self):
239
242
  self.class_tpfp: dict[int, list[dict]] = defaultdict(list)
240
243
 
@@ -282,22 +285,22 @@ def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
282
285
  label_hash = int(hashlib.md5(str(label).encode()).hexdigest(), 16)
283
286
  hue = (label_hash % 360) / 360.0
284
287
  rgb = colorsys.hsv_to_rgb(hue, 0.7, 0.95)
285
- hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
288
+ hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255))
286
289
  result[label] = hex_color
287
290
  return result
288
291
 
289
292
 
290
293
  @pxt.udf
291
294
  def draw_bounding_boxes(
292
- img: PIL.Image.Image,
293
- boxes: list[list[int]],
294
- labels: Optional[list[Any]] = None,
295
- color: Optional[str] = None,
296
- box_colors: Optional[list[str]] = None,
297
- fill: bool = False,
298
- width: int = 1,
299
- font: Optional[str] = None,
300
- font_size: Optional[int] = None,
295
+ img: PIL.Image.Image,
296
+ boxes: list[list[int]],
297
+ labels: Optional[list[Any]] = None,
298
+ color: Optional[str] = None,
299
+ box_colors: Optional[list[str]] = None,
300
+ fill: bool = False,
301
+ width: int = 1,
302
+ font: Optional[str] = None,
303
+ font_size: Optional[int] = None,
301
304
  ) -> PIL.Image.Image:
302
305
  """
303
306
  Draws bounding boxes on the given image.
@@ -14,6 +14,7 @@ from pixeltable.env import Env
14
14
  if TYPE_CHECKING:
15
15
  from whisper import Whisper # type: ignore[import-untyped]
16
16
 
17
+
17
18
  @pxt.udf
18
19
  def transcribe(
19
20
  audio: pxt.Audio,
@@ -52,7 +53,7 @@ def transcribe(
52
53
  Add a computed column that applies the model `base.en` to an existing Pixeltable column `tbl.audio`
53
54
  of the table `tbl`:
54
55
 
55
- >>> tbl['result'] = transcribe(tbl.audio, model='base.en')
56
+ >>> tbl.add_computed_column(result=transcribe(tbl.audio, model='base.en'))
56
57
  """
57
58
  Env.get().require_package('whisper')
58
59
  Env.get().require_package('torch')
pixeltable/globals.py CHANGED
@@ -20,15 +20,17 @@ from pixeltable.utils.filecache import FileCache
20
20
 
21
21
  _logger = logging.getLogger('pixeltable')
22
22
 
23
+
23
24
  def init() -> None:
24
25
  """Initializes the Pixeltable environment."""
25
26
  _ = Catalog.get()
26
27
 
28
+
27
29
  def _get_or_drop_existing_path(
28
30
  path_str: str,
29
31
  expected_obj_type: type[catalog.SchemaObject],
30
32
  expected_snapshot: bool,
31
- if_exists: catalog.IfExistsParam
33
+ if_exists: catalog.IfExistsParam,
32
34
  ) -> Optional[catalog.SchemaObject]:
33
35
  """Handle schema object path collision during creation according to the if_exists parameter.
34
36
 
@@ -53,12 +55,15 @@ def _get_or_drop_existing_path(
53
55
  raise excs.Error(f'Path `{path_str}` already exists.')
54
56
 
55
57
  existing_path = cat.paths[path]
56
- existing_path_is_snapshot = 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
58
+ existing_path_is_snapshot = (
59
+ 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
60
+ )
57
61
  obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
58
62
  # Check if the existing path is of expected type.
59
- if (not isinstance(existing_path, expected_obj_type)
60
- or (expected_snapshot and not existing_path_is_snapshot)):
61
- raise excs.Error(f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.')
63
+ if not isinstance(existing_path, expected_obj_type) or (expected_snapshot and not existing_path_is_snapshot):
64
+ raise excs.Error(
65
+ f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
66
+ )
62
67
 
63
68
  # if_exists='ignore' return the handle to the existing object.
64
69
  assert isinstance(existing_path, expected_obj_type)
@@ -69,12 +74,14 @@ def _get_or_drop_existing_path(
69
74
  # unless if_exists='replace_force'.
70
75
  has_dependents = existing_path._has_dependents
71
76
  if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
72
- raise excs.Error(f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it.")
77
+ raise excs.Error(
78
+ f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it."
79
+ )
73
80
  else:
74
81
  assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
75
82
  # Drop the existing path so it can be replaced.
76
83
  # Any errors during drop will be raised.
77
- _logger.info(f"Dropping {obj_type_str} `{path_str}` to replace it.")
84
+ _logger.info(f'Dropping {obj_type_str} `{path_str}` to replace it.')
78
85
  if isinstance(existing_path, catalog.Dir):
79
86
  drop_dir(path_str, force=True)
80
87
  else:
@@ -83,6 +90,7 @@ def _get_or_drop_existing_path(
83
90
 
84
91
  return None
85
92
 
93
+
86
94
  def create_table(
87
95
  path_str: str,
88
96
  schema_or_df: Union[dict[str, Any], DataFrame],
@@ -91,7 +99,7 @@ def create_table(
91
99
  num_retained_versions: int = 10,
92
100
  comment: str = '',
93
101
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
94
- if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
102
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
95
103
  ) -> catalog.Table:
96
104
  """Create a new base table.
97
105
 
@@ -166,7 +174,9 @@ def create_table(
166
174
  df = schema_or_df
167
175
  schema = df.schema
168
176
  elif isinstance(schema_or_df, DataFrameResultSet):
169
- raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)')
177
+ raise excs.Error(
178
+ '`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)'
179
+ )
170
180
  else:
171
181
  raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
172
182
 
@@ -182,8 +192,15 @@ def create_table(
182
192
  raise excs.Error('primary_key must be a single column name or a list of column names')
183
193
 
184
194
  tbl = catalog.InsertableTable._create(
185
- dir._id, path.name, schema, df, primary_key=primary_key, num_retained_versions=num_retained_versions,
186
- comment=comment, media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
195
+ dir._id,
196
+ path.name,
197
+ schema,
198
+ df,
199
+ primary_key=primary_key,
200
+ num_retained_versions=num_retained_versions,
201
+ comment=comment,
202
+ media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
203
+ )
187
204
  cat.paths[path] = tbl
188
205
 
189
206
  _logger.info(f'Created table `{path_str}`.')
@@ -293,17 +310,27 @@ def create_view(
293
310
  # additional columns should not be in the base table
294
311
  for col_name in additional_columns.keys():
295
312
  if col_name in [c.name for c in tbl_version_path.columns()]:
296
- raise excs.Error(f"Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.")
313
+ raise excs.Error(
314
+ f'Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.'
315
+ )
297
316
  if iterator is None:
298
317
  iterator_class, iterator_args = None, None
299
318
  else:
300
319
  iterator_class, iterator_args = iterator
301
320
 
302
321
  view = catalog.View._create(
303
- dir._id, path.name, base=tbl_version_path, additional_columns=additional_columns, predicate=where,
304
- is_snapshot=is_snapshot, iterator_cls=iterator_class, iterator_args=iterator_args,
305
- num_retained_versions=num_retained_versions, comment=comment,
306
- media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
322
+ dir._id,
323
+ path.name,
324
+ base=tbl_version_path,
325
+ additional_columns=additional_columns,
326
+ predicate=where,
327
+ is_snapshot=is_snapshot,
328
+ iterator_cls=iterator_class,
329
+ iterator_args=iterator_args,
330
+ num_retained_versions=num_retained_versions,
331
+ comment=comment,
332
+ media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
333
+ )
307
334
  cat.paths[path] = view
308
335
  _logger.info(f'Created view `{path_str}`.')
309
336
  FileCache.get().emit_eviction_warnings()
@@ -450,8 +477,9 @@ def move(path: str, new_path: str) -> None:
450
477
  obj._move(new_p.name, new_dir._id)
451
478
 
452
479
 
453
- def drop_table(table: Union[str, catalog.Table], force: bool = False,
454
- if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
480
+ def drop_table(
481
+ table: Union[str, catalog.Table], force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
482
+ ) -> None:
455
483
  """Drop a table, view, or snapshot.
456
484
 
457
485
  Args:
@@ -497,7 +525,9 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False,
497
525
  else:
498
526
  raise excs.Error(f'Table `{table}` does not exist.')
499
527
  if not isinstance(tbl, catalog.Table):
500
- raise excs.Error(f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}')
528
+ raise excs.Error(
529
+ f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}'
530
+ )
501
531
  else:
502
532
  tbl = table
503
533
  tbl_path_obj = catalog.Path(tbl._path)
@@ -543,7 +573,10 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
543
573
  Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
544
574
  return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
545
575
 
546
- def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error') -> Optional[catalog.Dir]:
576
+
577
+ def create_dir(
578
+ path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
579
+ ) -> Optional[catalog.Dir]:
547
580
  """Create a directory.
548
581
 
549
582
  Args:
@@ -606,10 +639,10 @@ def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', '
606
639
  dir = catalog.Dir(dir_record.id, parent._id, path.name)
607
640
  cat.paths[path] = dir
608
641
  session.commit()
609
- _logger.info(f'Created directory `{path_str}`.')
610
- print(f'Created directory `{path_str}`.')
642
+ Env.get().console_logger.info(f'Created directory `{path_str}`.')
611
643
  return dir
612
644
 
645
+
613
646
  def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
614
647
  """Remove a directory.
615
648
 
@@ -660,7 +693,8 @@ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error',
660
693
 
661
694
  if not isinstance(obj, catalog.Dir):
662
695
  raise excs.Error(
663
- f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}')
696
+ f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}'
697
+ )
664
698
 
665
699
  children = cat.paths.get_children(path, child_type=None, recursive=True)
666
700
 
@@ -721,7 +755,9 @@ def list_functions() -> Styler:
721
755
  paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
722
756
  names = [f.name for f in functions]
723
757
  params = [
724
- ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()])
758
+ ', '.join(
759
+ [param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()]
760
+ )
725
761
  for f in functions
726
762
  ]
727
763
  pd_df = pd.DataFrame(
@@ -772,10 +808,7 @@ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
772
808
  ... pxt.tool(traffic_quote, name='traffic_conditions'),
773
809
  ... )
774
810
  """
775
- return func.tools.Tools(tools=[
776
- arg if isinstance(arg, func.tools.Tool) else tool(arg)
777
- for arg in args
778
- ])
811
+ return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
779
812
 
780
813
 
781
814
  def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
@@ -1,3 +1,3 @@
1
1
  from .base import IndexBase
2
- from .embedding_index import EmbeddingIndex
3
2
  from .btree import BtreeIndex
3
+ from .embedding_index import EmbeddingIndex
pixeltable/index/btree.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Optional, TYPE_CHECKING
1
+ from typing import TYPE_CHECKING, Optional
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -7,15 +7,18 @@ import sqlalchemy as sql
7
7
  import pixeltable.exceptions as excs
8
8
  from pixeltable import catalog, exprs
9
9
  from pixeltable.func.udf import udf
10
+
10
11
  from .base import IndexBase
11
12
 
12
13
  if TYPE_CHECKING:
13
14
  import pixeltable.exprs
14
15
 
16
+
15
17
  class BtreeIndex(IndexBase):
16
18
  """
17
19
  Interface to B-tree indices in Postgres.
18
20
  """
21
+
19
22
  MAX_STRING_LEN = 256
20
23
 
21
24
  value_expr: 'pixeltable.exprs.Expr'
@@ -25,7 +28,7 @@ class BtreeIndex(IndexBase):
25
28
  def str_filter(s: Optional[str]) -> Optional[str]:
26
29
  if s is None:
27
30
  return None
28
- return s[:BtreeIndex.MAX_STRING_LEN]
31
+ return s[: BtreeIndex.MAX_STRING_LEN]
29
32
 
30
33
  def __init__(self, c: 'catalog.Column'):
31
34
  if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
@@ -64,4 +67,3 @@ class BtreeIndex(IndexBase):
64
67
  @classmethod
65
68
  def from_dict(cls, c: 'catalog.Column', d: dict) -> 'BtreeIndex':
66
69
  return cls(c)
67
-
@@ -31,11 +31,7 @@ class EmbeddingIndex(IndexBase):
31
31
  IP = 2
32
32
  L2 = 3
33
33
 
34
- PGVECTOR_OPS = {
35
- Metric.COSINE: 'vector_cosine_ops',
36
- Metric.IP: 'vector_ip_ops',
37
- Metric.L2: 'vector_l2_ops'
38
- }
34
+ PGVECTOR_OPS = {Metric.COSINE: 'vector_cosine_ops', Metric.IP: 'vector_ip_ops', Metric.L2: 'vector_l2_ops'}
39
35
 
40
36
  metric: Metric
41
37
  value_expr: exprs.FunctionCall
@@ -97,8 +93,7 @@ class EmbeddingIndex(IndexBase):
97
93
  # contains no matching signatures.
98
94
  assert embed is not None
99
95
  raise excs.Error(
100
- f'The function `{embed.name}` is not a valid embedding: '
101
- 'it must take a single string or image parameter'
96
+ f'The function `{embed.name}` is not a valid embedding: it must take a single string or image parameter'
102
97
  )
103
98
 
104
99
  # Now validate the return types of the embedding functions.
@@ -116,7 +111,8 @@ class EmbeddingIndex(IndexBase):
116
111
 
117
112
  self.metric = self.Metric[metric.upper()]
118
113
  self.value_expr = (
119
- self.string_embed(exprs.ColumnRef(c)) if c.col_type.is_string_type()
114
+ self.string_embed(exprs.ColumnRef(c))
115
+ if c.col_type.is_string_type()
120
116
  else self.image_embed(exprs.ColumnRef(c))
121
117
  )
122
118
  assert isinstance(self.value_expr.col_type, ts.ArrayType)
@@ -138,10 +134,11 @@ class EmbeddingIndex(IndexBase):
138
134
  def create_index(self, index_name: str, index_value_col: catalog.Column, conn: sql.engine.Connection) -> None:
139
135
  """Create the index on the index value column"""
140
136
  idx = sql.Index(
141
- index_name, index_value_col.sa_col,
137
+ index_name,
138
+ index_value_col.sa_col,
142
139
  postgresql_using='hnsw',
143
140
  postgresql_with={'m': 16, 'ef_construction': 64},
144
- postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]}
141
+ postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]},
145
142
  )
146
143
  idx.create(bind=conn)
147
144
 
@@ -191,16 +188,20 @@ class EmbeddingIndex(IndexBase):
191
188
  return 'embedding'
192
189
 
193
190
  @classmethod
194
- def _resolve_embedding_fn(cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type) -> Optional[func.Function]:
191
+ def _resolve_embedding_fn(
192
+ cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type
193
+ ) -> Optional[func.Function]:
195
194
  """Find an overload resolution for `embed_fn` that matches the given type."""
196
195
  assert isinstance(embed_fn, func.Function)
197
196
  for resolved_fn in embed_fn._resolved_fns:
198
197
  # The embedding function must be a 1-ary function of the correct type. But it's ok if the function signature
199
198
  # has more than one parameter, as long as it has at most one *required* parameter.
200
199
  sig = resolved_fn.signature
201
- if (len(sig.parameters) >= 1
200
+ if (
201
+ len(sig.parameters) >= 1
202
202
  and len(sig.required_parameters) <= 1
203
- and sig.parameters_by_pos[0].col_type.type_enum == expected_type):
203
+ and sig.parameters_by_pos[0].col_type.type_enum == expected_type
204
+ ):
204
205
  return resolved_fn
205
206
  return None
206
207
 
@@ -237,7 +238,7 @@ class EmbeddingIndex(IndexBase):
237
238
  return {
238
239
  'metric': self.metric.name.lower(),
239
240
  'string_embed': None if self.string_embed is None else self.string_embed.as_dict(),
240
- 'image_embed': None if self.image_embed is None else self.image_embed.as_dict()
241
+ 'image_embed': None if self.image_embed is None else self.image_embed.as_dict(),
241
242
  }
242
243
 
243
244
  @classmethod
pixeltable/io/__init__.py CHANGED
@@ -2,7 +2,7 @@ from .external_store import ExternalStore, SyncStatus
2
2
  from .globals import create_label_studio_project, export_images_as_fo_dataset, import_json, import_rows
3
3
  from .hf_datasets import import_huggingface_dataset
4
4
  from .pandas import import_csv, import_excel, import_pandas
5
- from .parquet import import_parquet, export_parquet
5
+ from .parquet import export_parquet, import_parquet
6
6
 
7
7
  __default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
8
8
  __removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet'}