pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +22 -12
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +121 -101
- pixeltable/catalog/table_version.py +291 -142
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +67 -26
- pixeltable/dataframe.py +102 -72
- pixeltable/env.py +20 -21
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -8
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +13 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
- pixeltable/exec/expr_eval/globals.py +30 -7
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +151 -31
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +101 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +32 -17
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +16 -12
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +201 -108
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +60 -26
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +2 -1
- pixeltable/io/label_studio.py +77 -68
- pixeltable/io/pandas.py +33 -9
- pixeltable/io/parquet.py +9 -12
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +7 -1
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +62 -54
- pixeltable/utils/arrow.py +1 -2
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +6 -3
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +12 -7
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/media_store.py +2 -1
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
- pixeltable-0.3.3.dist-info/RECORD +163 -0
- pixeltable-0.3.2.dist-info/RECORD +0 -161
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/functions/together.py
CHANGED
|
@@ -25,12 +25,13 @@ if TYPE_CHECKING:
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
@env.register_client('together')
|
|
28
|
-
def _(api_key: str) -> 'together.
|
|
28
|
+
def _(api_key: str) -> 'together.AsyncTogether':
|
|
29
29
|
import together
|
|
30
|
-
return together.Together(api_key=api_key)
|
|
31
30
|
|
|
31
|
+
return together.AsyncTogether(api_key=api_key)
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
|
|
34
|
+
def _together_client() -> 'together.AsyncTogether':
|
|
34
35
|
return env.Env.get().get_client('together')
|
|
35
36
|
|
|
36
37
|
|
|
@@ -39,6 +40,7 @@ T = TypeVar('T')
|
|
|
39
40
|
|
|
40
41
|
def _retry(fn: Callable[..., T]) -> Callable[..., T]:
|
|
41
42
|
import together
|
|
43
|
+
|
|
42
44
|
return tenacity.retry(
|
|
43
45
|
retry=tenacity.retry_if_exception_type(together.error.RateLimitError),
|
|
44
46
|
wait=tenacity.wait_random_exponential(multiplier=1, max=60),
|
|
@@ -46,8 +48,8 @@ def _retry(fn: Callable[..., T]) -> Callable[..., T]:
|
|
|
46
48
|
)(fn)
|
|
47
49
|
|
|
48
50
|
|
|
49
|
-
@pxt.udf
|
|
50
|
-
def completions(
|
|
51
|
+
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
52
|
+
async def completions(
|
|
51
53
|
prompt: str,
|
|
52
54
|
*,
|
|
53
55
|
model: str,
|
|
@@ -68,6 +70,10 @@ def completions(
|
|
|
68
70
|
Equivalent to the Together AI `completions` API endpoint.
|
|
69
71
|
For additional details, see: [https://docs.together.ai/reference/completions-1](https://docs.together.ai/reference/completions-1)
|
|
70
72
|
|
|
73
|
+
Request throttling:
|
|
74
|
+
Applies the rate limit set in the config (section `together.rate_limits`, key `chat`). If no rate
|
|
75
|
+
limit is configured, uses a default of 600 RPM.
|
|
76
|
+
|
|
71
77
|
__Requirements:__
|
|
72
78
|
|
|
73
79
|
- `pip install together`
|
|
@@ -85,29 +91,27 @@ def completions(
|
|
|
85
91
|
Add a computed column that applies the model `mistralai/Mixtral-8x7B-v0.1` to an existing Pixeltable column `tbl.prompt`
|
|
86
92
|
of the table `tbl`:
|
|
87
93
|
|
|
88
|
-
>>> tbl
|
|
94
|
+
>>> tbl.add_computed_column(response=completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1'))
|
|
89
95
|
"""
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
safety_model=safety_model,
|
|
104
|
-
)
|
|
105
|
-
.dict()
|
|
96
|
+
result = await _together_client().completions.create(
|
|
97
|
+
prompt=prompt,
|
|
98
|
+
model=model,
|
|
99
|
+
max_tokens=max_tokens,
|
|
100
|
+
stop=stop,
|
|
101
|
+
temperature=temperature,
|
|
102
|
+
top_p=top_p,
|
|
103
|
+
top_k=top_k,
|
|
104
|
+
repetition_penalty=repetition_penalty,
|
|
105
|
+
logprobs=logprobs,
|
|
106
|
+
echo=echo,
|
|
107
|
+
n=n,
|
|
108
|
+
safety_model=safety_model,
|
|
106
109
|
)
|
|
110
|
+
return result.dict()
|
|
107
111
|
|
|
108
112
|
|
|
109
|
-
@pxt.udf
|
|
110
|
-
def chat_completions(
|
|
113
|
+
@pxt.udf(resource_pool='request-rate:together:chat')
|
|
114
|
+
async def chat_completions(
|
|
111
115
|
messages: list[dict[str, str]],
|
|
112
116
|
*,
|
|
113
117
|
model: str,
|
|
@@ -131,6 +135,10 @@ def chat_completions(
|
|
|
131
135
|
Equivalent to the Together AI `chat/completions` API endpoint.
|
|
132
136
|
For additional details, see: [https://docs.together.ai/reference/chat-completions-1](https://docs.together.ai/reference/chat-completions-1)
|
|
133
137
|
|
|
138
|
+
Request throttling:
|
|
139
|
+
Applies the rate limit set in the config (section `together.rate_limits`, key `chat`). If no rate
|
|
140
|
+
limit is configured, uses a default of 600 RPM.
|
|
141
|
+
|
|
134
142
|
__Requirements:__
|
|
135
143
|
|
|
136
144
|
- `pip install together`
|
|
@@ -149,28 +157,26 @@ def chat_completions(
|
|
|
149
157
|
of the table `tbl`:
|
|
150
158
|
|
|
151
159
|
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
152
|
-
... tbl
|
|
160
|
+
... tbl.add_computed_column(response=chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1'))
|
|
153
161
|
"""
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
tool_choice=tool_choice,
|
|
171
|
-
)
|
|
172
|
-
.dict()
|
|
162
|
+
result = await _together_client().chat.completions.create(
|
|
163
|
+
messages=messages,
|
|
164
|
+
model=model,
|
|
165
|
+
max_tokens=max_tokens,
|
|
166
|
+
stop=stop,
|
|
167
|
+
temperature=temperature,
|
|
168
|
+
top_p=top_p,
|
|
169
|
+
top_k=top_k,
|
|
170
|
+
repetition_penalty=repetition_penalty,
|
|
171
|
+
logprobs=logprobs,
|
|
172
|
+
echo=echo,
|
|
173
|
+
n=n,
|
|
174
|
+
safety_model=safety_model,
|
|
175
|
+
response_format=response_format,
|
|
176
|
+
tools=tools,
|
|
177
|
+
tool_choice=tool_choice,
|
|
173
178
|
)
|
|
179
|
+
return result.dict()
|
|
174
180
|
|
|
175
181
|
|
|
176
182
|
_embedding_dimensions_cache = {
|
|
@@ -185,14 +191,18 @@ _embedding_dimensions_cache = {
|
|
|
185
191
|
}
|
|
186
192
|
|
|
187
193
|
|
|
188
|
-
@pxt.udf(batch_size=32)
|
|
189
|
-
def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
|
|
194
|
+
@pxt.udf(batch_size=32, resource_pool='request-rate:together:embeddings')
|
|
195
|
+
async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
|
|
190
196
|
"""
|
|
191
197
|
Query an embedding model for a given string of text.
|
|
192
198
|
|
|
193
199
|
Equivalent to the Together AI `embeddings` API endpoint.
|
|
194
200
|
For additional details, see: [https://docs.together.ai/reference/embeddings-2](https://docs.together.ai/reference/embeddings-2)
|
|
195
201
|
|
|
202
|
+
Request throttling:
|
|
203
|
+
Applies the rate limit set in the config (section `together.rate_limits`, key `embeddings`). If no rate
|
|
204
|
+
limit is configured, uses a default of 600 RPM.
|
|
205
|
+
|
|
196
206
|
__Requirements:__
|
|
197
207
|
|
|
198
208
|
- `pip install together`
|
|
@@ -208,9 +218,9 @@ def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt
|
|
|
208
218
|
Add a computed column that applies the model `togethercomputer/m2-bert-80M-8k-retrieval`
|
|
209
219
|
to an existing Pixeltable column `tbl.text` of the table `tbl`:
|
|
210
220
|
|
|
211
|
-
>>> tbl
|
|
221
|
+
>>> tbl.add_computed_column(response=embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval'))
|
|
212
222
|
"""
|
|
213
|
-
result =
|
|
223
|
+
result = await _together_client().embeddings.create(input=input, model=model)
|
|
214
224
|
return [np.array(data.embedding, dtype=np.float64) for data in result.data]
|
|
215
225
|
|
|
216
226
|
|
|
@@ -223,8 +233,8 @@ def _(model: str) -> pxt.ArrayType:
|
|
|
223
233
|
return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
|
|
224
234
|
|
|
225
235
|
|
|
226
|
-
@pxt.udf
|
|
227
|
-
def image_generations(
|
|
236
|
+
@pxt.udf(resource_pool='request-rate:together:images')
|
|
237
|
+
async def image_generations(
|
|
228
238
|
prompt: str,
|
|
229
239
|
*,
|
|
230
240
|
model: str,
|
|
@@ -240,6 +250,10 @@ def image_generations(
|
|
|
240
250
|
Equivalent to the Together AI `images/generations` API endpoint.
|
|
241
251
|
For additional details, see: [https://docs.together.ai/reference/post_images-generations](https://docs.together.ai/reference/post_images-generations)
|
|
242
252
|
|
|
253
|
+
Request throttling:
|
|
254
|
+
Applies the rate limit set in the config (section `together.rate_limits`, key `images`). If no rate
|
|
255
|
+
limit is configured, uses a default of 600 RPM.
|
|
256
|
+
|
|
243
257
|
__Requirements:__
|
|
244
258
|
|
|
245
259
|
- `pip install together`
|
|
@@ -257,9 +271,9 @@ def image_generations(
|
|
|
257
271
|
Add a computed column that applies the model `stabilityai/stable-diffusion-xl-base-1.0`
|
|
258
272
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
259
273
|
|
|
260
|
-
>>> tbl
|
|
274
|
+
>>> tbl.add_computed_column(response=image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0'))
|
|
261
275
|
"""
|
|
262
|
-
result =
|
|
276
|
+
result = await _together_client().images.generate(
|
|
263
277
|
prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
|
|
264
278
|
)
|
|
265
279
|
if result.data[0].b64_json is not None:
|
pixeltable/functions/video.py
CHANGED
|
@@ -52,6 +52,7 @@ class make_video(pxt.Aggregator):
|
|
|
52
52
|
"""
|
|
53
53
|
Aggregator that creates a video from a sequence of images.
|
|
54
54
|
"""
|
|
55
|
+
|
|
55
56
|
def __init__(self, fps: int = 25):
|
|
56
57
|
"""follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
|
|
57
58
|
self.container: Optional[av.container.OutputContainer] = None
|
pixeltable/functions/vision.py
CHANGED
|
@@ -205,7 +205,9 @@ def eval_detections(
|
|
|
205
205
|
pred_filter = pred_classes_arr == class_idx
|
|
206
206
|
gt_filter = gt_classes_arr == class_idx
|
|
207
207
|
class_pred_scores = pred_scores_arr[pred_filter]
|
|
208
|
-
tp, fp = __calculate_image_tpfp(
|
|
208
|
+
tp, fp = __calculate_image_tpfp(
|
|
209
|
+
pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou
|
|
210
|
+
)
|
|
209
211
|
ordered_class_pred_scores = -np.sort(-class_pred_scores)
|
|
210
212
|
result.append(
|
|
211
213
|
{
|
|
@@ -235,6 +237,7 @@ class mean_ap(pxt.Aggregator):
|
|
|
235
237
|
|
|
236
238
|
- A `dict[int, float]` mapping each label class to an average precision (AP) value for that class.
|
|
237
239
|
"""
|
|
240
|
+
|
|
238
241
|
def __init__(self):
|
|
239
242
|
self.class_tpfp: dict[int, list[dict]] = defaultdict(list)
|
|
240
243
|
|
|
@@ -282,22 +285,22 @@ def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
|
|
|
282
285
|
label_hash = int(hashlib.md5(str(label).encode()).hexdigest(), 16)
|
|
283
286
|
hue = (label_hash % 360) / 360.0
|
|
284
287
|
rgb = colorsys.hsv_to_rgb(hue, 0.7, 0.95)
|
|
285
|
-
hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
|
|
288
|
+
hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255))
|
|
286
289
|
result[label] = hex_color
|
|
287
290
|
return result
|
|
288
291
|
|
|
289
292
|
|
|
290
293
|
@pxt.udf
|
|
291
294
|
def draw_bounding_boxes(
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
295
|
+
img: PIL.Image.Image,
|
|
296
|
+
boxes: list[list[int]],
|
|
297
|
+
labels: Optional[list[Any]] = None,
|
|
298
|
+
color: Optional[str] = None,
|
|
299
|
+
box_colors: Optional[list[str]] = None,
|
|
300
|
+
fill: bool = False,
|
|
301
|
+
width: int = 1,
|
|
302
|
+
font: Optional[str] = None,
|
|
303
|
+
font_size: Optional[int] = None,
|
|
301
304
|
) -> PIL.Image.Image:
|
|
302
305
|
"""
|
|
303
306
|
Draws bounding boxes on the given image.
|
pixeltable/functions/whisper.py
CHANGED
|
@@ -14,6 +14,7 @@ from pixeltable.env import Env
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from whisper import Whisper # type: ignore[import-untyped]
|
|
16
16
|
|
|
17
|
+
|
|
17
18
|
@pxt.udf
|
|
18
19
|
def transcribe(
|
|
19
20
|
audio: pxt.Audio,
|
|
@@ -52,7 +53,7 @@ def transcribe(
|
|
|
52
53
|
Add a computed column that applies the model `base.en` to an existing Pixeltable column `tbl.audio`
|
|
53
54
|
of the table `tbl`:
|
|
54
55
|
|
|
55
|
-
>>> tbl
|
|
56
|
+
>>> tbl.add_computed_column(result=transcribe(tbl.audio, model='base.en'))
|
|
56
57
|
"""
|
|
57
58
|
Env.get().require_package('whisper')
|
|
58
59
|
Env.get().require_package('torch')
|
pixeltable/globals.py
CHANGED
|
@@ -20,15 +20,17 @@ from pixeltable.utils.filecache import FileCache
|
|
|
20
20
|
|
|
21
21
|
_logger = logging.getLogger('pixeltable')
|
|
22
22
|
|
|
23
|
+
|
|
23
24
|
def init() -> None:
|
|
24
25
|
"""Initializes the Pixeltable environment."""
|
|
25
26
|
_ = Catalog.get()
|
|
26
27
|
|
|
28
|
+
|
|
27
29
|
def _get_or_drop_existing_path(
|
|
28
30
|
path_str: str,
|
|
29
31
|
expected_obj_type: type[catalog.SchemaObject],
|
|
30
32
|
expected_snapshot: bool,
|
|
31
|
-
if_exists: catalog.IfExistsParam
|
|
33
|
+
if_exists: catalog.IfExistsParam,
|
|
32
34
|
) -> Optional[catalog.SchemaObject]:
|
|
33
35
|
"""Handle schema object path collision during creation according to the if_exists parameter.
|
|
34
36
|
|
|
@@ -53,12 +55,15 @@ def _get_or_drop_existing_path(
|
|
|
53
55
|
raise excs.Error(f'Path `{path_str}` already exists.')
|
|
54
56
|
|
|
55
57
|
existing_path = cat.paths[path]
|
|
56
|
-
existing_path_is_snapshot =
|
|
58
|
+
existing_path_is_snapshot = (
|
|
59
|
+
'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
|
|
60
|
+
)
|
|
57
61
|
obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
|
|
58
62
|
# Check if the existing path is of expected type.
|
|
59
|
-
if
|
|
60
|
-
|
|
61
|
-
|
|
63
|
+
if not isinstance(existing_path, expected_obj_type) or (expected_snapshot and not existing_path_is_snapshot):
|
|
64
|
+
raise excs.Error(
|
|
65
|
+
f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
66
|
+
)
|
|
62
67
|
|
|
63
68
|
# if_exists='ignore' return the handle to the existing object.
|
|
64
69
|
assert isinstance(existing_path, expected_obj_type)
|
|
@@ -69,12 +74,14 @@ def _get_or_drop_existing_path(
|
|
|
69
74
|
# unless if_exists='replace_force'.
|
|
70
75
|
has_dependents = existing_path._has_dependents
|
|
71
76
|
if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
|
|
72
|
-
raise excs.Error(
|
|
77
|
+
raise excs.Error(
|
|
78
|
+
f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it."
|
|
79
|
+
)
|
|
73
80
|
else:
|
|
74
81
|
assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
|
|
75
82
|
# Drop the existing path so it can be replaced.
|
|
76
83
|
# Any errors during drop will be raised.
|
|
77
|
-
_logger.info(f
|
|
84
|
+
_logger.info(f'Dropping {obj_type_str} `{path_str}` to replace it.')
|
|
78
85
|
if isinstance(existing_path, catalog.Dir):
|
|
79
86
|
drop_dir(path_str, force=True)
|
|
80
87
|
else:
|
|
@@ -83,6 +90,7 @@ def _get_or_drop_existing_path(
|
|
|
83
90
|
|
|
84
91
|
return None
|
|
85
92
|
|
|
93
|
+
|
|
86
94
|
def create_table(
|
|
87
95
|
path_str: str,
|
|
88
96
|
schema_or_df: Union[dict[str, Any], DataFrame],
|
|
@@ -91,7 +99,7 @@ def create_table(
|
|
|
91
99
|
num_retained_versions: int = 10,
|
|
92
100
|
comment: str = '',
|
|
93
101
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
94
|
-
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
102
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
95
103
|
) -> catalog.Table:
|
|
96
104
|
"""Create a new base table.
|
|
97
105
|
|
|
@@ -166,7 +174,9 @@ def create_table(
|
|
|
166
174
|
df = schema_or_df
|
|
167
175
|
schema = df.schema
|
|
168
176
|
elif isinstance(schema_or_df, DataFrameResultSet):
|
|
169
|
-
raise excs.Error(
|
|
177
|
+
raise excs.Error(
|
|
178
|
+
'`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)'
|
|
179
|
+
)
|
|
170
180
|
else:
|
|
171
181
|
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
|
|
172
182
|
|
|
@@ -182,8 +192,15 @@ def create_table(
|
|
|
182
192
|
raise excs.Error('primary_key must be a single column name or a list of column names')
|
|
183
193
|
|
|
184
194
|
tbl = catalog.InsertableTable._create(
|
|
185
|
-
dir._id,
|
|
186
|
-
|
|
195
|
+
dir._id,
|
|
196
|
+
path.name,
|
|
197
|
+
schema,
|
|
198
|
+
df,
|
|
199
|
+
primary_key=primary_key,
|
|
200
|
+
num_retained_versions=num_retained_versions,
|
|
201
|
+
comment=comment,
|
|
202
|
+
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
203
|
+
)
|
|
187
204
|
cat.paths[path] = tbl
|
|
188
205
|
|
|
189
206
|
_logger.info(f'Created table `{path_str}`.')
|
|
@@ -293,17 +310,27 @@ def create_view(
|
|
|
293
310
|
# additional columns should not be in the base table
|
|
294
311
|
for col_name in additional_columns.keys():
|
|
295
312
|
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
296
|
-
raise excs.Error(
|
|
313
|
+
raise excs.Error(
|
|
314
|
+
f'Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.'
|
|
315
|
+
)
|
|
297
316
|
if iterator is None:
|
|
298
317
|
iterator_class, iterator_args = None, None
|
|
299
318
|
else:
|
|
300
319
|
iterator_class, iterator_args = iterator
|
|
301
320
|
|
|
302
321
|
view = catalog.View._create(
|
|
303
|
-
dir._id,
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
322
|
+
dir._id,
|
|
323
|
+
path.name,
|
|
324
|
+
base=tbl_version_path,
|
|
325
|
+
additional_columns=additional_columns,
|
|
326
|
+
predicate=where,
|
|
327
|
+
is_snapshot=is_snapshot,
|
|
328
|
+
iterator_cls=iterator_class,
|
|
329
|
+
iterator_args=iterator_args,
|
|
330
|
+
num_retained_versions=num_retained_versions,
|
|
331
|
+
comment=comment,
|
|
332
|
+
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
333
|
+
)
|
|
307
334
|
cat.paths[path] = view
|
|
308
335
|
_logger.info(f'Created view `{path_str}`.')
|
|
309
336
|
FileCache.get().emit_eviction_warnings()
|
|
@@ -450,8 +477,9 @@ def move(path: str, new_path: str) -> None:
|
|
|
450
477
|
obj._move(new_p.name, new_dir._id)
|
|
451
478
|
|
|
452
479
|
|
|
453
|
-
def drop_table(
|
|
454
|
-
if_not_exists: Literal['error', 'ignore'] = 'error'
|
|
480
|
+
def drop_table(
|
|
481
|
+
table: Union[str, catalog.Table], force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
|
|
482
|
+
) -> None:
|
|
455
483
|
"""Drop a table, view, or snapshot.
|
|
456
484
|
|
|
457
485
|
Args:
|
|
@@ -497,7 +525,9 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False,
|
|
|
497
525
|
else:
|
|
498
526
|
raise excs.Error(f'Table `{table}` does not exist.')
|
|
499
527
|
if not isinstance(tbl, catalog.Table):
|
|
500
|
-
raise excs.Error(
|
|
528
|
+
raise excs.Error(
|
|
529
|
+
f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}'
|
|
530
|
+
)
|
|
501
531
|
else:
|
|
502
532
|
tbl = table
|
|
503
533
|
tbl_path_obj = catalog.Path(tbl._path)
|
|
@@ -543,7 +573,10 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
543
573
|
Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
|
|
544
574
|
return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
|
|
545
575
|
|
|
546
|
-
|
|
576
|
+
|
|
577
|
+
def create_dir(
|
|
578
|
+
path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
579
|
+
) -> Optional[catalog.Dir]:
|
|
547
580
|
"""Create a directory.
|
|
548
581
|
|
|
549
582
|
Args:
|
|
@@ -609,6 +642,7 @@ def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', '
|
|
|
609
642
|
Env.get().console_logger.info(f'Created directory `{path_str}`.')
|
|
610
643
|
return dir
|
|
611
644
|
|
|
645
|
+
|
|
612
646
|
def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
613
647
|
"""Remove a directory.
|
|
614
648
|
|
|
@@ -659,7 +693,8 @@ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error',
|
|
|
659
693
|
|
|
660
694
|
if not isinstance(obj, catalog.Dir):
|
|
661
695
|
raise excs.Error(
|
|
662
|
-
f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}'
|
|
696
|
+
f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}'
|
|
697
|
+
)
|
|
663
698
|
|
|
664
699
|
children = cat.paths.get_children(path, child_type=None, recursive=True)
|
|
665
700
|
|
|
@@ -720,7 +755,9 @@ def list_functions() -> Styler:
|
|
|
720
755
|
paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
|
|
721
756
|
names = [f.name for f in functions]
|
|
722
757
|
params = [
|
|
723
|
-
', '.join(
|
|
758
|
+
', '.join(
|
|
759
|
+
[param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()]
|
|
760
|
+
)
|
|
724
761
|
for f in functions
|
|
725
762
|
]
|
|
726
763
|
pd_df = pd.DataFrame(
|
|
@@ -771,10 +808,7 @@ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
|
|
|
771
808
|
... pxt.tool(traffic_quote, name='traffic_conditions'),
|
|
772
809
|
... )
|
|
773
810
|
"""
|
|
774
|
-
return func.tools.Tools(tools=[
|
|
775
|
-
arg if isinstance(arg, func.tools.Tool) else tool(arg)
|
|
776
|
-
for arg in args
|
|
777
|
-
])
|
|
811
|
+
return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
|
|
778
812
|
|
|
779
813
|
|
|
780
814
|
def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
|
pixeltable/index/__init__.py
CHANGED
pixeltable/index/btree.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import TYPE_CHECKING, Optional
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -7,15 +7,18 @@ import sqlalchemy as sql
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
8
|
from pixeltable import catalog, exprs
|
|
9
9
|
from pixeltable.func.udf import udf
|
|
10
|
+
|
|
10
11
|
from .base import IndexBase
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
14
|
import pixeltable.exprs
|
|
14
15
|
|
|
16
|
+
|
|
15
17
|
class BtreeIndex(IndexBase):
|
|
16
18
|
"""
|
|
17
19
|
Interface to B-tree indices in Postgres.
|
|
18
20
|
"""
|
|
21
|
+
|
|
19
22
|
MAX_STRING_LEN = 256
|
|
20
23
|
|
|
21
24
|
value_expr: 'pixeltable.exprs.Expr'
|
|
@@ -25,7 +28,7 @@ class BtreeIndex(IndexBase):
|
|
|
25
28
|
def str_filter(s: Optional[str]) -> Optional[str]:
|
|
26
29
|
if s is None:
|
|
27
30
|
return None
|
|
28
|
-
return s[:BtreeIndex.MAX_STRING_LEN]
|
|
31
|
+
return s[: BtreeIndex.MAX_STRING_LEN]
|
|
29
32
|
|
|
30
33
|
def __init__(self, c: 'catalog.Column'):
|
|
31
34
|
if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
|
|
@@ -64,4 +67,3 @@ class BtreeIndex(IndexBase):
|
|
|
64
67
|
@classmethod
|
|
65
68
|
def from_dict(cls, c: 'catalog.Column', d: dict) -> 'BtreeIndex':
|
|
66
69
|
return cls(c)
|
|
67
|
-
|
|
@@ -31,11 +31,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
31
31
|
IP = 2
|
|
32
32
|
L2 = 3
|
|
33
33
|
|
|
34
|
-
PGVECTOR_OPS = {
|
|
35
|
-
Metric.COSINE: 'vector_cosine_ops',
|
|
36
|
-
Metric.IP: 'vector_ip_ops',
|
|
37
|
-
Metric.L2: 'vector_l2_ops'
|
|
38
|
-
}
|
|
34
|
+
PGVECTOR_OPS = {Metric.COSINE: 'vector_cosine_ops', Metric.IP: 'vector_ip_ops', Metric.L2: 'vector_l2_ops'}
|
|
39
35
|
|
|
40
36
|
metric: Metric
|
|
41
37
|
value_expr: exprs.FunctionCall
|
|
@@ -97,8 +93,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
97
93
|
# contains no matching signatures.
|
|
98
94
|
assert embed is not None
|
|
99
95
|
raise excs.Error(
|
|
100
|
-
f'The function `{embed.name}` is not a valid embedding: '
|
|
101
|
-
'it must take a single string or image parameter'
|
|
96
|
+
f'The function `{embed.name}` is not a valid embedding: it must take a single string or image parameter'
|
|
102
97
|
)
|
|
103
98
|
|
|
104
99
|
# Now validate the return types of the embedding functions.
|
|
@@ -116,7 +111,8 @@ class EmbeddingIndex(IndexBase):
|
|
|
116
111
|
|
|
117
112
|
self.metric = self.Metric[metric.upper()]
|
|
118
113
|
self.value_expr = (
|
|
119
|
-
self.string_embed(exprs.ColumnRef(c))
|
|
114
|
+
self.string_embed(exprs.ColumnRef(c))
|
|
115
|
+
if c.col_type.is_string_type()
|
|
120
116
|
else self.image_embed(exprs.ColumnRef(c))
|
|
121
117
|
)
|
|
122
118
|
assert isinstance(self.value_expr.col_type, ts.ArrayType)
|
|
@@ -138,10 +134,11 @@ class EmbeddingIndex(IndexBase):
|
|
|
138
134
|
def create_index(self, index_name: str, index_value_col: catalog.Column, conn: sql.engine.Connection) -> None:
|
|
139
135
|
"""Create the index on the index value column"""
|
|
140
136
|
idx = sql.Index(
|
|
141
|
-
index_name,
|
|
137
|
+
index_name,
|
|
138
|
+
index_value_col.sa_col,
|
|
142
139
|
postgresql_using='hnsw',
|
|
143
140
|
postgresql_with={'m': 16, 'ef_construction': 64},
|
|
144
|
-
postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]}
|
|
141
|
+
postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]},
|
|
145
142
|
)
|
|
146
143
|
idx.create(bind=conn)
|
|
147
144
|
|
|
@@ -191,16 +188,20 @@ class EmbeddingIndex(IndexBase):
|
|
|
191
188
|
return 'embedding'
|
|
192
189
|
|
|
193
190
|
@classmethod
|
|
194
|
-
def _resolve_embedding_fn(
|
|
191
|
+
def _resolve_embedding_fn(
|
|
192
|
+
cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type
|
|
193
|
+
) -> Optional[func.Function]:
|
|
195
194
|
"""Find an overload resolution for `embed_fn` that matches the given type."""
|
|
196
195
|
assert isinstance(embed_fn, func.Function)
|
|
197
196
|
for resolved_fn in embed_fn._resolved_fns:
|
|
198
197
|
# The embedding function must be a 1-ary function of the correct type. But it's ok if the function signature
|
|
199
198
|
# has more than one parameter, as long as it has at most one *required* parameter.
|
|
200
199
|
sig = resolved_fn.signature
|
|
201
|
-
if (
|
|
200
|
+
if (
|
|
201
|
+
len(sig.parameters) >= 1
|
|
202
202
|
and len(sig.required_parameters) <= 1
|
|
203
|
-
and sig.parameters_by_pos[0].col_type.type_enum == expected_type
|
|
203
|
+
and sig.parameters_by_pos[0].col_type.type_enum == expected_type
|
|
204
|
+
):
|
|
204
205
|
return resolved_fn
|
|
205
206
|
return None
|
|
206
207
|
|
|
@@ -237,7 +238,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
237
238
|
return {
|
|
238
239
|
'metric': self.metric.name.lower(),
|
|
239
240
|
'string_embed': None if self.string_embed is None else self.string_embed.as_dict(),
|
|
240
|
-
'image_embed': None if self.image_embed is None else self.image_embed.as_dict()
|
|
241
|
+
'image_embed': None if self.image_embed is None else self.image_embed.as_dict(),
|
|
241
242
|
}
|
|
242
243
|
|
|
243
244
|
@classmethod
|
pixeltable/io/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ from .external_store import ExternalStore, SyncStatus
|
|
|
2
2
|
from .globals import create_label_studio_project, export_images_as_fo_dataset, import_json, import_rows
|
|
3
3
|
from .hf_datasets import import_huggingface_dataset
|
|
4
4
|
from .pandas import import_csv, import_excel, import_pandas
|
|
5
|
-
from .parquet import
|
|
5
|
+
from .parquet import export_parquet, import_parquet
|
|
6
6
|
|
|
7
7
|
__default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
|
|
8
8
|
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet'}
|