pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,289 @@
1
+ import base64
2
+ import io
3
+ import pathlib
4
+ import uuid
5
+ from typing import Optional, TypeVar, Union, Callable
6
+
7
+ import PIL.Image
8
+ import numpy as np
9
+ import openai
10
+ import tenacity
11
+ from openai._types import NOT_GIVEN, NotGiven
12
+
13
+ import pixeltable as pxt
14
+ import pixeltable.type_system as ts
15
+ from pixeltable import env
16
+ from pixeltable.func import Batch
17
+
18
+
19
+ def openai_client() -> openai.OpenAI:
20
+ return env.Env.get().get_client('openai', lambda api_key: openai.OpenAI(api_key=api_key))
21
+
22
+
23
+ # Exponential backoff decorator using tenacity.
24
+ # TODO(aaron-siegel): Right now this hardwires random exponential backoff with defaults suggested
25
+ # by OpenAI. Should we investigate making this more customizable in the future?
26
+ def _retry(fn: Callable) -> Callable:
27
+ return tenacity.retry(
28
+ retry=tenacity.retry_if_exception_type(openai.RateLimitError),
29
+ wait=tenacity.wait_random_exponential(multiplier=3, max=180),
30
+ stop=tenacity.stop_after_attempt(20)
31
+ )(fn)
32
+
33
+
34
+ #####################################
35
+ # Audio Endpoints
36
+
37
+ @pxt.udf(return_type=ts.AudioType())
38
+ @_retry
39
+ def speech(
40
+ input: str,
41
+ *,
42
+ model: str,
43
+ voice: str,
44
+ response_format: Optional[str] = None,
45
+ speed: Optional[float] = None
46
+ ) -> str:
47
+ content = openai_client().audio.speech.create(
48
+ input=input,
49
+ model=model,
50
+ voice=voice,
51
+ response_format=_opt(response_format),
52
+ speed=_opt(speed)
53
+ )
54
+ ext = response_format or 'mp3'
55
+ output_filename = str(env.Env.get().tmp_dir / f"{uuid.uuid4()}.{ext}")
56
+ content.write_to_file(output_filename)
57
+ return output_filename
58
+
59
+
60
+ @pxt.udf(
61
+ param_types=[ts.AudioType(), ts.StringType(), ts.StringType(nullable=True),
62
+ ts.StringType(nullable=True), ts.FloatType(nullable=True)]
63
+ )
64
+ @_retry
65
+ def transcriptions(
66
+ audio: str,
67
+ *,
68
+ model: str,
69
+ language: Optional[str] = None,
70
+ prompt: Optional[str] = None,
71
+ temperature: Optional[float] = None
72
+ ) -> dict:
73
+ file = pathlib.Path(audio)
74
+ transcription = openai_client().audio.transcriptions.create(
75
+ file=file,
76
+ model=model,
77
+ language=_opt(language),
78
+ prompt=_opt(prompt),
79
+ temperature=_opt(temperature)
80
+ )
81
+ return transcription.dict()
82
+
83
+
84
+ @pxt.udf(
85
+ param_types=[ts.AudioType(), ts.StringType(), ts.StringType(nullable=True), ts.FloatType(nullable=True)]
86
+ )
87
+ @_retry
88
+ def translations(
89
+ audio: str,
90
+ *,
91
+ model: str,
92
+ prompt: Optional[str] = None,
93
+ temperature: Optional[float] = None
94
+ ) -> dict:
95
+ file = pathlib.Path(audio)
96
+ translation = openai_client().audio.translations.create(
97
+ file=file,
98
+ model=model,
99
+ prompt=_opt(prompt),
100
+ temperature=_opt(temperature)
101
+ )
102
+ return translation.dict()
103
+
104
+
105
+ #####################################
106
+ # Chat Endpoints
107
+
108
+ @pxt.udf
109
+ @_retry
110
+ def chat_completions(
111
+ messages: list,
112
+ *,
113
+ model: str,
114
+ frequency_penalty: Optional[float] = None,
115
+ logit_bias: Optional[dict[str, int]] = None,
116
+ logprobs: Optional[bool] = None,
117
+ top_logprobs: Optional[int] = None,
118
+ max_tokens: Optional[int] = None,
119
+ n: Optional[int] = None,
120
+ presence_penalty: Optional[float] = None,
121
+ response_format: Optional[dict] = None,
122
+ seed: Optional[int] = None,
123
+ stop: Optional[list[str]] = None,
124
+ temperature: Optional[float] = None,
125
+ top_p: Optional[float] = None,
126
+ tools: Optional[list[dict]] = None,
127
+ tool_choice: Optional[dict] = None,
128
+ user: Optional[str] = None
129
+ ) -> dict:
130
+ result = openai_client().chat.completions.create(
131
+ messages=messages,
132
+ model=model,
133
+ frequency_penalty=_opt(frequency_penalty),
134
+ logit_bias=_opt(logit_bias),
135
+ logprobs=_opt(logprobs),
136
+ top_logprobs=_opt(top_logprobs),
137
+ max_tokens=_opt(max_tokens),
138
+ n=_opt(n),
139
+ presence_penalty=_opt(presence_penalty),
140
+ response_format=_opt(response_format),
141
+ seed=_opt(seed),
142
+ stop=_opt(stop),
143
+ temperature=_opt(temperature),
144
+ top_p=_opt(top_p),
145
+ tools=_opt(tools),
146
+ tool_choice=_opt(tool_choice),
147
+ user=_opt(user)
148
+ )
149
+ return result.dict()
150
+
151
+
152
+ @pxt.udf
153
+ @_retry
154
+ def vision(
155
+ prompt: str,
156
+ image: PIL.Image.Image,
157
+ *,
158
+ model: str = 'gpt-4-vision-preview'
159
+ ) -> str:
160
+ # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
161
+ bytes_arr = io.BytesIO()
162
+ image.save(bytes_arr, format='png')
163
+ b64_bytes = base64.b64encode(bytes_arr.getvalue())
164
+ b64_encoded_image = b64_bytes.decode('utf-8')
165
+ messages = [
166
+ {'role': 'user',
167
+ 'content': [
168
+ {'type': 'text', 'text': prompt},
169
+ {'type': 'image_url', 'image_url': {
170
+ 'url': f'data:image/png;base64,{b64_encoded_image}'
171
+ }}
172
+ ]}
173
+ ]
174
+ result = openai_client().chat.completions.create(
175
+ messages=messages,
176
+ model=model
177
+ )
178
+ return result.choices[0].message.content
179
+
180
+
181
+ #####################################
182
+ # Embeddings Endpoints
183
+
184
+ _embedding_dimensions_cache: dict[str, int] = {
185
+ 'text-embedding-ada-002': 1536,
186
+ 'text-embedding-3-small': 1536,
187
+ 'text-embedding-3-large': 3072,
188
+ }
189
+
190
+
191
+ @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
192
+ @_retry
193
+ def embeddings(
194
+ input: Batch[str],
195
+ *,
196
+ model: str,
197
+ dimensions: Optional[int] = None,
198
+ user: Optional[str] = None
199
+ ) -> Batch[np.ndarray]:
200
+ result = openai_client().embeddings.create(
201
+ input=input,
202
+ model=model,
203
+ dimensions=_opt(dimensions),
204
+ user=_opt(user),
205
+ encoding_format='float'
206
+ )
207
+ return [
208
+ np.array(data.embedding, dtype=np.float64)
209
+ for data in result.data
210
+ ]
211
+
212
+
213
+ @embeddings.conditional_return_type
214
+ def _(model: str, dimensions: Optional[int] = None) -> ts.ArrayType:
215
+ if dimensions is None:
216
+ if model not in _embedding_dimensions_cache:
217
+ # TODO: find some other way to retrieve a sample
218
+ return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
219
+ dimensions = _embedding_dimensions_cache.get(model, None)
220
+ return ts.ArrayType((dimensions,), dtype=ts.FloatType(), nullable=False)
221
+
222
+
223
+ #####################################
224
+ # Images Endpoints
225
+
226
+ @pxt.udf
227
+ @_retry
228
+ def image_generations(
229
+ prompt: str,
230
+ *,
231
+ model: Optional[str] = None,
232
+ quality: Optional[str] = None,
233
+ size: Optional[str] = None,
234
+ style: Optional[str] = None,
235
+ user: Optional[str] = None
236
+ ) -> PIL.Image.Image:
237
+ # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
238
+ result = openai_client().images.generate(
239
+ prompt=prompt,
240
+ model=_opt(model),
241
+ quality=_opt(quality),
242
+ size=_opt(size),
243
+ style=_opt(style),
244
+ user=_opt(user),
245
+ response_format="b64_json"
246
+ )
247
+ b64_str = result.data[0].b64_json
248
+ b64_bytes = base64.b64decode(b64_str)
249
+ img = PIL.Image.open(io.BytesIO(b64_bytes))
250
+ img.load()
251
+ return img
252
+
253
+
254
+ @image_generations.conditional_return_type
255
+ def _(size: Optional[str] = None) -> ts.ImageType:
256
+ if size is None:
257
+ return ts.ImageType(size=(1024, 1024))
258
+ x_pos = size.find('x')
259
+ if x_pos == -1:
260
+ return ts.ImageType()
261
+ try:
262
+ width, height = int(size[:x_pos]), int(size[x_pos + 1:])
263
+ except ValueError:
264
+ return ts.ImageType()
265
+ return ts.ImageType(size=(width, height))
266
+
267
+
268
+ #####################################
269
+ # Moderations Endpoints
270
+
271
+ @pxt.udf
272
+ @_retry
273
+ def moderations(
274
+ input: str,
275
+ *,
276
+ model: Optional[str] = None
277
+ ) -> dict:
278
+ result = openai_client().moderations.create(
279
+ input=input,
280
+ model=_opt(model)
281
+ )
282
+ return result.dict()
283
+
284
+
285
+ _T = TypeVar('_T')
286
+
287
+
288
+ def _opt(arg: _T) -> Union[_T, NotGiven]:
289
+ return arg if arg is not None else NOT_GIVEN
@@ -0,0 +1,147 @@
1
+ from typing import Tuple, Optional
2
+
3
+ import PIL.Image
4
+ from PIL.Image import Dither
5
+
6
+ import pixeltable.func as func
7
+ from pixeltable.type_system import FloatType, ImageType, IntType, ArrayType, ColumnType, StringType, JsonType
8
+
9
+
10
+ @func.udf(
11
+ py_fn=PIL.Image.alpha_composite, return_type=ImageType(), param_types=[ImageType(), ImageType()])
12
+ def alpha_composite(im1: PIL.Image.Image, im2: PIL.Image.Image) -> PIL.Image.Image:
13
+ pass
14
+ @func.udf(
15
+ py_fn=PIL.Image.blend, return_type=ImageType(), param_types=[ImageType(), ImageType(), FloatType()])
16
+ def blend(im1: PIL.Image.Image, im2: PIL.Image.Image, alpha: float) -> PIL.Image.Image:
17
+ pass
18
+ @func.udf(
19
+ py_fn=PIL.Image.composite, return_type=ImageType(), param_types=[ImageType(), ImageType(), ImageType()])
20
+ def composite(image1: PIL.Image.Image, image2: PIL.Image.Image, mask: PIL.Image.Image) -> PIL.Image.Image:
21
+ pass
22
+
23
+
24
+ # PIL.Image.Image methods
25
+
26
+ # Image.convert()
27
+ @func.udf(param_types=[ImageType(), StringType()])
28
+ def convert(self: PIL.Image.Image, mode: str) -> PIL.Image.Image:
29
+ return self.convert(mode)
30
+
31
+
32
+ @convert.conditional_return_type
33
+ def _(self: PIL.Image.Image, mode: str) -> ColumnType:
34
+ input_type = self.col_type
35
+ assert input_type.is_image_type()
36
+ return ImageType(size=input_type.size, mode=mode, nullable=input_type.nullable)
37
+
38
+
39
+ # Image.crop()
40
+ @func.udf(
41
+ py_fn=PIL.Image.Image.crop,
42
+ param_types=[ImageType(), ArrayType((4,), dtype=IntType())])
43
+ def crop(self: PIL.Image.Image, box: Tuple[int, int, int, int]) -> PIL.Image.Image:
44
+ pass
45
+
46
+ @crop.conditional_return_type
47
+ def _(self: PIL.Image.Image, box: Tuple[int, int, int, int]) -> ColumnType:
48
+ input_type = self.col_type
49
+ assert input_type.is_image_type()
50
+ if isinstance(box, list) and all(isinstance(x, int) for x in box):
51
+ return ImageType(size=(box[2] - box[0], box[3] - box[1]), mode=input_type.mode, nullable=input_type.nullable)
52
+ return ImageType(mode=input_type.mode, nullable=input_type.nullable) # we can't compute the size statically
53
+
54
+ # Image.getchannel()
55
+ @func.udf(py_fn=PIL.Image.Image.getchannel, param_types=[ImageType(), IntType()])
56
+ def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
57
+ pass
58
+
59
+ @getchannel.conditional_return_type
60
+ def _(self: PIL.Image.Image) -> ColumnType:
61
+ input_type = self.col_type
62
+ assert input_type.is_image_type()
63
+ return ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
64
+
65
+
66
+ # Image.resize()
67
+ @func.udf(param_types=[ImageType(), ArrayType((2, ), dtype=IntType())])
68
+ def resize(self: PIL.Image.Image, size: Tuple[int, int]) -> PIL.Image.Image:
69
+ return self.resize(size)
70
+
71
+ @resize.conditional_return_type
72
+ def _(self: PIL.Image.Image, size: Tuple[int, int]) -> ColumnType:
73
+ input_type = self.col_type
74
+ assert input_type.is_image_type()
75
+ return ImageType(size=size, mode=input_type.mode, nullable=input_type.nullable)
76
+
77
+ # Image.rotate()
78
+ @func.udf(param_types=[ImageType(), IntType()])
79
+ def rotate(self: PIL.Image.Image, angle: int) -> PIL.Image.Image:
80
+ return self.rotate(angle)
81
+
82
+ @func.udf(py_fn=PIL.Image.Image.effect_spread, param_types=[ImageType(), IntType()])
83
+ def effect_spread(self: PIL.Image.Image, distance: int) -> PIL.Image.Image:
84
+ pass
85
+
86
+ @func.udf(py_fn=PIL.Image.Image.transpose, param_types=[ImageType(), IntType()])
87
+ def transpose(self: PIL.Image.Image, method: int) -> PIL.Image.Image:
88
+ pass
89
+
90
+ @rotate.conditional_return_type
91
+ @effect_spread.conditional_return_type
92
+ @transpose.conditional_return_type
93
+ def _(self: PIL.Image.Image) -> ColumnType:
94
+ return self.col_type
95
+
96
+ @func.udf(
97
+ py_fn=PIL.Image.Image.entropy, return_type=FloatType(), param_types=[ImageType(), ImageType(), JsonType()])
98
+ def entropy(self: PIL.Image.Image, mask: PIL.Image.Image, extrema: Optional[list] = None) -> float:
99
+ pass
100
+
101
+ @func.udf(py_fn=PIL.Image.Image.getbands, return_type=JsonType(), param_types=[ImageType()])
102
+ def getbands(self: PIL.Image.Image) -> Tuple[str]:
103
+ pass
104
+
105
+ @func.udf(py_fn=PIL.Image.Image.getbbox, return_type=JsonType(), param_types=[ImageType()])
106
+ def getbbox(self: PIL.Image.Image) -> Tuple[int, int, int, int]:
107
+ pass
108
+
109
+ @func.udf(py_fn=PIL.Image.Image.getcolors, return_type=JsonType(), param_types=[ImageType(), IntType()])
110
+ def getcolors(self: PIL.Image.Image, maxcolors: int) -> Tuple[Tuple[int, int, int], int]:
111
+ pass
112
+
113
+ @func.udf(py_fn=PIL.Image.Image.getextrema, return_type=JsonType(), param_types=[ImageType()])
114
+ def getextrema(self: PIL.Image.Image) -> Tuple[int, int]:
115
+ pass
116
+
117
+ @func.udf(
118
+ py_fn=PIL.Image.Image.getpalette, return_type=JsonType(), param_types=[ImageType(), StringType()])
119
+ def getpalette(self: PIL.Image.Image, mode: Optional[str] = None) -> Tuple[int]:
120
+ pass
121
+
122
+ @func.udf(
123
+ return_type=JsonType(), param_types=[ImageType(), ArrayType((2,), dtype=IntType())])
124
+ def getpixel(self: PIL.Image.Image, xy: tuple[int, int]) -> Tuple[int]:
125
+ # `xy` will be a list; `tuple(xy)` is necessary for pillow 9 compatibility
126
+ return self.getpixel(tuple(xy))
127
+
128
+ @func.udf(py_fn=PIL.Image.Image.getprojection, return_type=JsonType(), param_types=[ImageType()])
129
+ def getprojection(self: PIL.Image.Image) -> Tuple[int]:
130
+ pass
131
+
132
+ @func.udf(py_fn=PIL.Image.Image.histogram, return_type=JsonType(), param_types=[ImageType(), ImageType(), JsonType()])
133
+ def histogram(self: PIL.Image.Image, mask: PIL.Image.Image, extrema: Optional[list] = None) -> Tuple[int]:
134
+ pass
135
+
136
+ @func.udf(
137
+ py_fn=PIL.Image.Image.quantize, return_type=ImageType(),
138
+ param_types=[ImageType(), IntType(), IntType(nullable=True), IntType(), IntType(nullable=True), IntType()])
139
+ def quantize(
140
+ self: PIL.Image.Image, colors: int = 256, method: Optional[int] = None, kmeans: int = 0,
141
+ palette: Optional[int] = None, dither: int = Dither.FLOYDSTEINBERG) -> PIL.Image.Image:
142
+ pass
143
+
144
+ @func.udf(
145
+ py_fn=PIL.Image.Image.reduce, return_type=ImageType(), param_types=[ImageType(), IntType(), JsonType()])
146
+ def reduce(self: PIL.Image.Image, factor: int, box: Optional[Tuple[int]]) -> PIL.Image.Image:
147
+ pass
@@ -0,0 +1,13 @@
1
+ from typing import Any
2
+
3
+ from pixeltable.type_system import StringType
4
+ import pixeltable.func as func
5
+
6
+
7
+ @func.udf(return_type=StringType(), param_types=[StringType()])
8
+ def str_format(format_str: str, *args: Any, **kwargs: Any) -> str:
9
+ """ Return a formatted version of format_str, using substitutions from args and kwargs:
10
+ - {<int>} will be replaced by the corresponding element in args
11
+ - {<key>} will be replaced by the corresponding value in kwargs
12
+ """
13
+ return format_str.format(*args, **kwargs)
@@ -0,0 +1,143 @@
1
+ import base64
2
+ import io
3
+ from typing import Optional
4
+
5
+ import PIL.Image
6
+ import numpy as np
7
+ import together
8
+
9
+ import pixeltable as pxt
10
+ from pixeltable import env
11
+ from pixeltable.func import Batch
12
+
13
+
14
+ def together_client() -> together.Together:
15
+ return env.Env.get().get_client('together', lambda api_key: together.Together(api_key=api_key))
16
+
17
+
18
+ @pxt.udf
19
+ def completions(
20
+ prompt: str,
21
+ *,
22
+ model: str,
23
+ max_tokens: Optional[int] = None,
24
+ stop: Optional[list] = None,
25
+ temperature: Optional[float] = None,
26
+ top_p: Optional[float] = None,
27
+ top_k: Optional[int] = None,
28
+ repetition_penalty: Optional[float] = None,
29
+ logprobs: Optional[int] = None,
30
+ echo: Optional[bool] = None,
31
+ n: Optional[int] = None,
32
+ safety_model: Optional[str] = None
33
+ ) -> dict:
34
+ return together_client().completions.create(
35
+ prompt=prompt,
36
+ model=model,
37
+ max_tokens=max_tokens,
38
+ stop=stop,
39
+ temperature=temperature,
40
+ top_p=top_p,
41
+ top_k=top_k,
42
+ repetition_penalty=repetition_penalty,
43
+ logprobs=logprobs,
44
+ echo=echo,
45
+ n=n,
46
+ safety_model=safety_model
47
+ ).dict()
48
+
49
+
50
+ @pxt.udf
51
+ def chat_completions(
52
+ messages: list[dict[str, str]],
53
+ *,
54
+ model: str,
55
+ max_tokens: Optional[int] = None,
56
+ stop: Optional[list[str]] = None,
57
+ temperature: Optional[float] = None,
58
+ top_p: Optional[float] = None,
59
+ top_k: Optional[int] = None,
60
+ repetition_penalty: Optional[float] = None,
61
+ logprobs: Optional[int] = None,
62
+ echo: Optional[bool] = None,
63
+ n: Optional[int] = None,
64
+ safety_model: Optional[str] = None,
65
+ response_format: Optional[dict] = None,
66
+ tools: Optional[dict] = None,
67
+ tool_choice: Optional[dict] = None
68
+ ) -> dict:
69
+ return together_client().chat.completions.create(
70
+ messages=messages,
71
+ model=model,
72
+ max_tokens=max_tokens,
73
+ stop=stop,
74
+ temperature=temperature,
75
+ top_p=top_p,
76
+ top_k=top_k,
77
+ repetition_penalty=repetition_penalty,
78
+ logprobs=logprobs,
79
+ echo=echo,
80
+ n=n,
81
+ safety_model=safety_model,
82
+ response_format=response_format,
83
+ tools=tools,
84
+ tool_choice=tool_choice
85
+ ).dict()
86
+
87
+
88
+ _embedding_dimensions_cache = {
89
+ 'togethercomputer/m2-bert-80M-2k-retrieval': 768,
90
+ 'togethercomputer/m2-bert-80M-8k-retrieval': 768,
91
+ 'togethercomputer/m2-bert-80M-32k-retrieval': 768,
92
+ 'WhereIsAI/UAE-Large-V1': 1024,
93
+ 'BAAI/bge-large-en-v1.5': 1024,
94
+ 'BAAI/bge-base-en-v1.5': 768,
95
+ 'sentence-transformers/msmarco-bert-base-dot-v5': 768,
96
+ 'bert-base-uncased': 768,
97
+ }
98
+
99
+
100
+ @pxt.udf(batch_size=32, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
101
+ def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
102
+ result = together_client().embeddings.create(input=input, model=model)
103
+ return [
104
+ np.array(data.embedding, dtype=np.float64)
105
+ for data in result.data
106
+ ]
107
+
108
+
109
+ @embeddings.conditional_return_type
110
+ def _(model: str) -> pxt.ArrayType:
111
+ if model not in _embedding_dimensions_cache:
112
+ # TODO: find some other way to retrieve a sample
113
+ return pxt.ArrayType((None,), dtype=pxt.FloatType())
114
+ dimensions = _embedding_dimensions_cache[model]
115
+ return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
116
+
117
+
118
+ @pxt.udf
119
+ def image_generations(
120
+ prompt: str,
121
+ *,
122
+ model: str,
123
+ steps: Optional[int] = None,
124
+ seed: Optional[int] = None,
125
+ height: Optional[int] = None,
126
+ width: Optional[int] = None,
127
+ negative_prompt: Optional[str] = None,
128
+ ) -> PIL.Image.Image:
129
+ # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
130
+ result = together_client().images.generate(
131
+ prompt=prompt,
132
+ model=model,
133
+ steps=steps,
134
+ seed=seed,
135
+ height=height,
136
+ width=width,
137
+ negative_prompt=negative_prompt
138
+ )
139
+ b64_str = result.data[0].b64_json
140
+ b64_bytes = base64.b64decode(b64_str)
141
+ img = PIL.Image.open(io.BytesIO(b64_bytes))
142
+ img.load()
143
+ return img
@@ -0,0 +1,52 @@
1
+ from typing import Tuple, List, Optional
2
+ import types
3
+ import sys
4
+
5
+ import pixeltable.func as func
6
+ import pixeltable.type_system as ts
7
+ import pixeltable.env as env
8
+
9
+
10
+ def create_nos_modules() -> List[types.ModuleType]:
11
+ """Create module pixeltable.functions.nos with one submodule per task and return the submodules"""
12
+ models = env.Env.get().nos_client.ListModels()
13
+ model_info = [env.Env.get().nos_client.GetModelInfo(model) for model in models]
14
+ model_info.sort(key=lambda info: info.task.value)
15
+
16
+ module_name = 'pixeltable.functions.nos'
17
+ nos_module = types.ModuleType(module_name)
18
+ nos_module.__package__ = 'pixeltable.functions'
19
+ sys.modules[module_name] = nos_module
20
+
21
+ prev_task = ''
22
+ new_modules: List[types.ModuleType] = []
23
+ sub_module: Optional[types.ModuleType] = None
24
+ for info in model_info:
25
+ if info.task.value != prev_task:
26
+ # we construct one submodule per task
27
+ namespace = info.task.name.lower()
28
+ submodule_name = f'{module_name}.{namespace}'
29
+ sub_module = types.ModuleType(submodule_name)
30
+ sub_module.__package__ = module_name
31
+ setattr(nos_module, namespace, sub_module)
32
+ new_modules.append(sub_module)
33
+ sys.modules[submodule_name] = sub_module
34
+ prev_task = info.task.value
35
+
36
+ # add a Function for this model to the module
37
+ model_id = info.name.replace("/", "_").replace("-", "_")
38
+ pt_func = func.NOSFunction(info, f'{submodule_name}.{model_id}')
39
+ setattr(sub_module, model_id, pt_func)
40
+
41
+ return new_modules
42
+
43
+
44
+ def resolve_torch_device(device: str) -> str:
45
+ import torch
46
+ if device == 'auto':
47
+ if torch.cuda.is_available():
48
+ return 'cuda'
49
+ if torch.backends.mps.is_available():
50
+ return 'mps'
51
+ return 'cpu'
52
+ return device
@@ -0,0 +1,62 @@
1
+ from typing import Optional
2
+ import uuid
3
+ import av
4
+ import sys
5
+
6
+ import pixeltable.env as env
7
+ import pixeltable.func as func
8
+ import pixeltable.type_system as ts
9
+
10
+
11
+ _format_defaults = { # format -> (codec, ext)
12
+ 'wav': ('pcm_s16le', 'wav'),
13
+ 'mp3': ('libmp3lame', 'mp3'),
14
+ 'flac': ('flac', 'flac'),
15
+ #'mp4': ('aac', 'm4a'),
16
+ }
17
+
18
+ # for mp4:
19
+ # - extract_audio() fails with "Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1146 >= 290"
20
+ # - chatgpt suggests this can be fixed in the following manner
21
+ # for packet in container.demux(audio_stream):
22
+ # packet.pts = None # Reset the PTS and DTS to allow FFmpeg to set them automatically
23
+ # packet.dts = None
24
+ # for frame in packet.decode():
25
+ # frame.pts = None
26
+ # for packet in output_stream.encode(frame):
27
+ # output_container.mux(packet)
28
+ #
29
+ # # Flush remaining packets
30
+ # for packet in output_stream.encode():
31
+ # output_container.mux(packet)
32
+
33
+
34
+ _extract_audio_param_types = [
35
+ ts.VideoType(nullable=False),
36
+ ts.IntType(nullable=False),
37
+ ts.StringType(nullable=False),
38
+ ts.StringType(nullable=False)
39
+ ]
40
+ @func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types)
41
+ def extract_audio(
42
+ video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
43
+ ) -> Optional[str]:
44
+ """Extract an audio stream from a video file, save it as a media file and return its path"""
45
+ if format not in _format_defaults:
46
+ raise ValueError(f'extract_audio(): unsupported audio format: {format}')
47
+ default_codec, ext = _format_defaults[format]
48
+
49
+ with av.open(video_path) as container:
50
+ if len(container.streams.audio) <= stream_idx:
51
+ return None
52
+ audio_stream = container.streams.audio[stream_idx]
53
+ # create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
54
+ output_filename = str(env.Env.get().tmp_dir / f"{uuid.uuid4()}.{ext}")
55
+
56
+ with av.open(output_filename, "w", format=format) as output_container:
57
+ output_stream = output_container.add_stream(codec or default_codec)
58
+ for packet in container.demux(audio_stream):
59
+ for frame in packet.decode():
60
+ output_container.mux(output_stream.encode(frame))
61
+
62
+ return output_filename