pixeltable 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (139) hide show
  1. pixeltable/__init__.py +34 -6
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -30
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -45
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -87
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1085 -262
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -126
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.1.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.1.dist-info/METADATA +0 -31
  139. pixeltable-0.1.1.dist-info/RECORD +0 -36
@@ -0,0 +1,120 @@
1
+ from typing import Any, Callable
2
+
3
+ import PIL.Image
4
+ import numpy as np
5
+
6
+ import pixeltable as pxt
7
+ import pixeltable.env as env
8
+ import pixeltable.type_system as ts
9
+ from pixeltable.func import Batch
10
+
11
+
12
+ @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
13
+ def sentence_transformer(sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False) -> Batch[np.ndarray]:
14
+ env.Env.get().require_package('sentence_transformers')
15
+ from sentence_transformers import SentenceTransformer
16
+
17
+ model = _lookup_model(model_id, SentenceTransformer)
18
+
19
+ array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
20
+ return [array[i] for i in range(array.shape[0])]
21
+
22
+
23
+ @pxt.udf
24
+ def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embeddings: bool = False) -> list:
25
+ env.Env.get().require_package('sentence_transformers')
26
+ from sentence_transformers import SentenceTransformer
27
+
28
+ model = _lookup_model(model_id, SentenceTransformer)
29
+
30
+ array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
31
+ return [array[i].tolist() for i in range(array.shape[0])]
32
+
33
+
34
+ @pxt.udf(batch_size=32)
35
+ def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: str) -> Batch[float]:
36
+ env.Env.get().require_package('sentence_transformers')
37
+ from sentence_transformers import CrossEncoder
38
+
39
+ model = _lookup_model(model_id, CrossEncoder)
40
+
41
+ array = model.predict([[s1, s2] for s1, s2 in zip(sentences1, sentences2)], convert_to_numpy=True)
42
+ return array.tolist()
43
+
44
+
45
+ @pxt.udf
46
+ def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> list:
47
+ env.Env.get().require_package('sentence_transformers')
48
+ from sentence_transformers import CrossEncoder
49
+
50
+ model = _lookup_model(model_id, CrossEncoder)
51
+
52
+ array = model.predict([[sentence1, s2] for s2 in sentences2], convert_to_numpy=True)
53
+ return array.tolist()
54
+
55
+
56
+ @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
57
+ def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
58
+ env.Env.get().require_package('transformers')
59
+ from transformers import CLIPModel, CLIPProcessor
60
+
61
+ model = _lookup_model(model_id, CLIPModel.from_pretrained)
62
+ processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
63
+
64
+ inputs = processor(text=text, return_tensors='pt', padding=True, truncation=True)
65
+ embeddings = model.get_text_features(**inputs).detach().numpy()
66
+ return [embeddings[i] for i in range(embeddings.shape[0])]
67
+
68
+
69
+ @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
70
+ def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[np.ndarray]:
71
+ env.Env.get().require_package('transformers')
72
+ from transformers import CLIPModel, CLIPProcessor
73
+
74
+ model = _lookup_model(model_id, CLIPModel.from_pretrained)
75
+ processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
76
+
77
+ inputs = processor(images=image, return_tensors='pt', padding=True)
78
+ embeddings = model.get_image_features(**inputs).detach().numpy()
79
+ return [embeddings[i] for i in range(embeddings.shape[0])]
80
+
81
+
82
+ @pxt.udf(batch_size=32)
83
+ def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
84
+ env.Env.get().require_package('transformers')
85
+ from transformers import DetrImageProcessor, DetrForObjectDetection
86
+
87
+ model = _lookup_model(model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision='no_timm'))
88
+ processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision='no_timm'))
89
+
90
+ inputs = processor(images=image, return_tensors='pt')
91
+ outputs = model(**inputs)
92
+
93
+ results = processor.post_process_object_detection(outputs, threshold=threshold)
94
+ return [
95
+ {
96
+ 'scores': [score.item() for score in result['scores']],
97
+ 'labels': [label.item() for label in result['labels']],
98
+ 'label_text': [model.config.id2label[label.item()] for label in result['labels']],
99
+ 'boxes': [box.tolist() for box in result['boxes']]
100
+ }
101
+ for result in results
102
+ ]
103
+
104
+
105
+ def _lookup_model(model_id: str, create: Callable) -> Any:
106
+ key = (model_id, create) # For safety, include the `create` callable in the cache key
107
+ if key not in _model_cache:
108
+ _model_cache[key] = create(model_id)
109
+ return _model_cache[key]
110
+
111
+
112
+ def _lookup_processor(model_id: str, create: Callable) -> Any:
113
+ key = (model_id, create) # For safety, include the `create` callable in the cache key
114
+ if key not in _processor_cache:
115
+ _processor_cache[key] = create(model_id)
116
+ return _processor_cache[key]
117
+
118
+
119
+ _model_cache = {}
120
+ _processor_cache = {}
@@ -0,0 +1,16 @@
1
+ import base64
2
+
3
+ import PIL.Image
4
+
5
+ from pixeltable.type_system import ImageType, StringType
6
+ import pixeltable.func as func
7
+
8
+
9
+ @func.udf
10
+ def b64_encode(img: PIL.Image.Image, image_format: str = 'png') -> str:
11
+ # Encode this image as a b64-encoded png.
12
+ import io
13
+ bytes_arr = io.BytesIO()
14
+ img.save(bytes_arr, format=image_format)
15
+ b64_bytes = base64.b64encode(bytes_arr.getvalue())
16
+ return b64_bytes.decode('utf-8')
@@ -0,0 +1,88 @@
1
+ import base64
2
+ import io
3
+ from typing import Optional
4
+
5
+ import PIL.Image
6
+ import numpy as np
7
+
8
+ import pixeltable as pxt
9
+ import pixeltable.type_system as ts
10
+ from pixeltable import env
11
+ from pixeltable.func import Batch
12
+
13
+
14
+ @pxt.udf
15
+ def chat_completions(
16
+ messages: list,
17
+ model: str,
18
+ frequency_penalty: Optional[float] = None,
19
+ logit_bias: Optional[dict] = None,
20
+ max_tokens: Optional[int] = None,
21
+ n: Optional[int] = None,
22
+ presence_penalty: Optional[float] = None,
23
+ response_format: Optional[dict] = None,
24
+ seed: Optional[int] = None,
25
+ top_p: Optional[float] = None,
26
+ temperature: Optional[float] = None
27
+ ) -> dict:
28
+ from openai._types import NOT_GIVEN
29
+ result = env.Env.get().openai_client.chat.completions.create(
30
+ messages=messages,
31
+ model=model,
32
+ frequency_penalty=frequency_penalty if frequency_penalty is not None else NOT_GIVEN,
33
+ logit_bias=logit_bias if logit_bias is not None else NOT_GIVEN,
34
+ max_tokens=max_tokens if max_tokens is not None else NOT_GIVEN,
35
+ n=n if n is not None else NOT_GIVEN,
36
+ presence_penalty=presence_penalty if presence_penalty is not None else NOT_GIVEN,
37
+ response_format=response_format if response_format is not None else NOT_GIVEN,
38
+ seed=seed if seed is not None else NOT_GIVEN,
39
+ top_p=top_p if top_p is not None else NOT_GIVEN,
40
+ temperature=temperature if temperature is not None else NOT_GIVEN
41
+ )
42
+ return result.dict()
43
+
44
+
45
+ @pxt.udf
46
+ def vision(
47
+ prompt: str,
48
+ image: PIL.Image.Image,
49
+ model: str = 'gpt-4-vision-preview'
50
+ ) -> str:
51
+ bytes_arr = io.BytesIO()
52
+ image.save(bytes_arr, format='png')
53
+ b64_bytes = base64.b64encode(bytes_arr.getvalue())
54
+ b64_encoded_image = b64_bytes.decode('utf-8')
55
+ messages = [
56
+ {'role': 'user',
57
+ 'content': [
58
+ {'type': 'text', 'text': prompt},
59
+ {'type': 'image_url', 'image_url': {
60
+ 'url': f'data:image/png;base64,{b64_encoded_image}'
61
+ }}
62
+ ]}
63
+ ]
64
+ result = env.Env.get().openai_client.chat.completions.create(
65
+ messages=messages,
66
+ model=model
67
+ )
68
+ return result.choices[0].message.content
69
+
70
+
71
+ @pxt.udf
72
+ def moderations(input: str, model: Optional[str] = None) -> dict:
73
+ result = env.Env().get().openai_client.moderations.create(input=input, model=model)
74
+ return result.dict()
75
+
76
+
77
+ @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
78
+ def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
79
+ result = env.Env().get().openai_client.embeddings.create(
80
+ input=input,
81
+ model=model,
82
+ encoding_format='float'
83
+ )
84
+ embeddings = [
85
+ np.array(data.embedding, dtype=np.float64)
86
+ for data in result.data
87
+ ]
88
+ return embeddings
@@ -1,9 +1,150 @@
1
- from pixeltable.type_system import FloatType, ImageType
2
- from pixeltable.function import Function
1
+ from typing import Dict, Any, Tuple, Optional
3
2
 
3
+ import PIL.Image
4
4
 
5
- alpha_composite = Function(
6
- ImageType(), [ImageType(), ImageType()], module_name='PIL.Image', eval_symbol='alpha_composite')
7
- blend = Function(ImageType(), [ImageType(), ImageType(), FloatType()], module_name='PIL.Image', eval_symbol='blend')
8
- composite = Function(
9
- ImageType(), [ImageType(), ImageType(), ImageType()], module_name='PIL.Image', eval_symbol='composite')
5
+ from pixeltable.type_system import FloatType, ImageType, IntType, ArrayType, ColumnType, StringType, JsonType, BoolType
6
+ import pixeltable.func as func
7
+
8
+
9
+ def _caller_return_type(bound_args: Optional[Dict[str, Any]]) -> ColumnType:
10
+ if bound_args is None:
11
+ return ImageType()
12
+ return bound_args['self'].col_type
13
+
14
+ @func.udf(
15
+ py_fn=PIL.Image.alpha_composite, return_type=ImageType(), param_types=[ImageType(), ImageType()])
16
+ def alpha_composite(im1: PIL.Image.Image, im2: PIL.Image.Image) -> PIL.Image.Image:
17
+ pass
18
+ @func.udf(
19
+ py_fn=PIL.Image.blend, return_type=ImageType(), param_types=[ImageType(), ImageType(), FloatType()])
20
+ def blend(im1: PIL.Image.Image, im2: PIL.Image.Image, alpha: float) -> PIL.Image.Image:
21
+ pass
22
+ @func.udf(
23
+ py_fn=PIL.Image.composite, return_type=ImageType(), param_types=[ImageType(), ImageType(), ImageType()])
24
+ def composite(image1: PIL.Image.Image, image2: PIL.Image.Image, mask: PIL.Image.Image) -> PIL.Image.Image:
25
+ pass
26
+
27
+
28
+ # PIL.Image.Image methods
29
+
30
+ # Image.convert()
31
+ def _convert_return_type(bound_args: Dict[str, Any]) -> ColumnType:
32
+ if bound_args is None:
33
+ return ImageType()
34
+ assert 'self' in bound_args
35
+ assert 'mode' in bound_args
36
+ img_type = bound_args['self'].col_type
37
+ return ImageType(size=img_type.size, mode=bound_args['mode'])
38
+ @func.udf(return_type=_convert_return_type, param_types=[ImageType(), StringType()])
39
+ def convert(self: PIL.Image.Image, mode: str) -> PIL.Image.Image:
40
+ return self.convert(mode)
41
+
42
+ # Image.crop()
43
+ def _crop_return_type(bound_args: Dict[str, Any]) -> ColumnType:
44
+ if bound_args is None:
45
+ return ImageType()
46
+ img_type = bound_args['self'].col_type
47
+ box = bound_args['box']
48
+ if isinstance(box, list) and all(isinstance(x, int) for x in box):
49
+ return ImageType(size=(box[2] - box[0], box[3] - box[1]), mode=img_type.mode)
50
+ return ImageType() # we can't compute the size statically
51
+ @func.udf(
52
+ py_fn=PIL.Image.Image.crop, return_type=_crop_return_type,
53
+ param_types=[ImageType(), ArrayType((4,), dtype=IntType())])
54
+ def crop(self: PIL.Image.Image, box: Tuple[int, int, int, int]) -> PIL.Image.Image:
55
+ pass
56
+
57
+ # Image.getchannel()
58
+ def _getchannel_return_type(bound_args: Dict[str, Any]) -> ColumnType:
59
+ if bound_args is None:
60
+ return ImageType()
61
+ img_type = bound_args['self'].col_type
62
+ return ImageType(size=img_type.size, mode='L')
63
+ @func.udf(
64
+ py_fn=PIL.Image.Image.getchannel, return_type=_getchannel_return_type, param_types=[ImageType(), IntType()])
65
+ def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
66
+ pass
67
+
68
+ # Image.resize()
69
+ def resize_return_type(bound_args: Dict[str, Any]) -> ColumnType:
70
+ if bound_args is None:
71
+ return ImageType()
72
+ assert 'size' in bound_args
73
+ return ImageType(size=bound_args['size'])
74
+ @func.udf(return_type=resize_return_type, param_types=[ImageType(), ArrayType((2, ), dtype=IntType())])
75
+ def resize(self: PIL.Image.Image, size: Tuple[int, int]) -> PIL.Image.Image:
76
+ return self.resize(size)
77
+
78
+ # Image.rotate()
79
+ @func.udf(return_type=ImageType(), param_types=[ImageType(), IntType()])
80
+ def rotate(self: PIL.Image.Image, angle: int) -> PIL.Image.Image:
81
+ return self.rotate(angle)
82
+
83
+ # Image.transform()
84
+ @func.udf(return_type= _caller_return_type, param_types=[ImageType(), ArrayType((2,), dtype=IntType()), IntType()])
85
+ def transform(self: PIL.Image.Image, size: Tuple[int, int], method: int) -> PIL.Image.Image:
86
+ return self.transform(size, method)
87
+
88
+ @func.udf(
89
+ py_fn=PIL.Image.Image.effect_spread, return_type=_caller_return_type, param_types=[ImageType(), FloatType()])
90
+ def effect_spread(self: PIL.Image.Image, distance: float) -> PIL.Image.Image:
91
+ pass
92
+
93
+ @func.udf(
94
+ py_fn=PIL.Image.Image.entropy, return_type=FloatType(), param_types=[ImageType(), ImageType(), JsonType()])
95
+ def entropy(self: PIL.Image.Image, mask: PIL.Image.Image, histogram: Dict) -> float:
96
+ pass
97
+
98
+ @func.udf(py_fn=PIL.Image.Image.getbands, return_type=JsonType(), param_types=[ImageType()])
99
+ def getbands(self: PIL.Image.Image) -> Tuple[str]:
100
+ pass
101
+
102
+ @func.udf(py_fn=PIL.Image.Image.getbbox, return_type=JsonType(), param_types=[ImageType()])
103
+ def getbbox(self: PIL.Image.Image) -> Tuple[int, int, int, int]:
104
+ pass
105
+
106
+ @func.udf(
107
+ py_fn=PIL.Image.Image.getcolors, return_type=JsonType(), param_types=[ImageType(), IntType()])
108
+ def getcolors(self: PIL.Image.Image, maxcolors: int) -> Tuple[Tuple[int, int, int], int]:
109
+ pass
110
+
111
+ @func.udf(py_fn=PIL.Image.Image.getextrema, return_type=JsonType(), param_types=[ImageType()])
112
+ def getextrema(self: PIL.Image.Image) -> Tuple[int, int]:
113
+ pass
114
+
115
+ @func.udf(
116
+ py_fn=PIL.Image.Image.getpalette, return_type=JsonType(), param_types=[ImageType(), StringType()])
117
+ def getpalette(self: PIL.Image.Image, mode: str) -> Tuple[int]:
118
+ pass
119
+
120
+ @func.udf(
121
+ py_fn=PIL.Image.Image.getpixel, return_type=JsonType(), param_types=[ImageType(), ArrayType((2,), dtype=IntType())])
122
+ def getpixel(self: PIL.Image.Image, xy: Tuple[int, int]) -> Tuple[int]:
123
+ pass
124
+
125
+ @func.udf(
126
+ py_fn=PIL.Image.Image.getprojection, return_type=JsonType(), param_types=[ImageType()])
127
+ def getprojection(self: PIL.Image.Image) -> Tuple[int]:
128
+ pass
129
+
130
+ @func.udf(
131
+ py_fn=PIL.Image.Image.histogram, return_type=JsonType(), param_types=[ImageType(), ImageType(), JsonType()])
132
+ def histogram(self: PIL.Image.Image, mask: PIL.Image.Image, histogram: Dict) -> Tuple[int]:
133
+ pass
134
+
135
+ @func.udf(
136
+ py_fn=PIL.Image.Image.quantize, return_type=ImageType(),
137
+ param_types=[ImageType(), IntType(), IntType(nullable=True), IntType(), IntType(nullable=True), IntType()])
138
+ def quantize(
139
+ self: PIL.Image.Image, colors: int, method: int, kmeans: int, palette: int, dither: int) -> PIL.Image.Image:
140
+ pass
141
+
142
+ @func.udf(
143
+ py_fn=PIL.Image.Image.reduce, return_type=ImageType(), param_types=[ImageType(), IntType(), JsonType()])
144
+ def reduce(self: PIL.Image.Image, factor: int, filter: Tuple[int]) -> PIL.Image.Image:
145
+ pass
146
+
147
+ @func.udf(
148
+ py_fn=PIL.Image.Image.transpose, return_type=_caller_return_type, param_types=[ImageType(), IntType()])
149
+ def transpose(self: PIL.Image.Image, method: int) -> PIL.Image.Image:
150
+ pass
@@ -0,0 +1,13 @@
1
+ from typing import Any
2
+
3
+ from pixeltable.type_system import StringType
4
+ import pixeltable.func as func
5
+
6
+
7
+ @func.udf(return_type=StringType(), param_types=[StringType()])
8
+ def str_format(format_str: str, *args: Any, **kwargs: Any) -> str:
9
+ """ Return a formatted version of format_str, using substitutions from args and kwargs:
10
+ - {<int>} will be replaced by the corresponding element in args
11
+ - {<key>} will be replaced by the corresponding value in kwargs
12
+ """
13
+ return format_str.format(*args, **kwargs)
@@ -0,0 +1,27 @@
1
+ from typing import Optional
2
+
3
+ import pixeltable as pxt
4
+
5
+
6
+ @pxt.udf
7
+ def completions(
8
+ prompt: str,
9
+ model: str,
10
+ max_tokens: Optional[int] = None,
11
+ repetition_penalty: Optional[float] = None,
12
+ stop: Optional[list] = None,
13
+ top_k: Optional[int] = None,
14
+ top_p: Optional[float] = None,
15
+ temperature: Optional[float] = None
16
+ ) -> dict:
17
+ import together
18
+ return together.Complete.create(
19
+ prompt,
20
+ model,
21
+ max_tokens=max_tokens,
22
+ repetition_penalty=repetition_penalty,
23
+ stop=stop,
24
+ top_k=top_k,
25
+ top_p=top_p,
26
+ temperature=temperature
27
+ )
@@ -0,0 +1,41 @@
1
+ from typing import Tuple, List, Optional
2
+ import types
3
+ import sys
4
+
5
+ import pixeltable.func as func
6
+ import pixeltable.type_system as ts
7
+ import pixeltable.env as env
8
+
9
+
10
+ def create_nos_modules() -> List[types.ModuleType]:
11
+ """Create module pixeltable.functions.nos with one submodule per task and return the submodules"""
12
+ models = env.Env.get().nos_client.ListModels()
13
+ model_info = [env.Env.get().nos_client.GetModelInfo(model) for model in models]
14
+ model_info.sort(key=lambda info: info.task.value)
15
+
16
+ module_name = 'pixeltable.functions.nos'
17
+ nos_module = types.ModuleType(module_name)
18
+ nos_module.__package__ = 'pixeltable.functions'
19
+ sys.modules[module_name] = nos_module
20
+
21
+ prev_task = ''
22
+ new_modules: List[types.ModuleType] = []
23
+ sub_module: Optional[types.ModuleType] = None
24
+ for info in model_info:
25
+ if info.task.value != prev_task:
26
+ # we construct one submodule per task
27
+ namespace = info.task.name.lower()
28
+ submodule_name = f'{module_name}.{namespace}'
29
+ sub_module = types.ModuleType(submodule_name)
30
+ sub_module.__package__ = module_name
31
+ setattr(nos_module, namespace, sub_module)
32
+ new_modules.append(sub_module)
33
+ sys.modules[submodule_name] = sub_module
34
+ prev_task = info.task.value
35
+
36
+ # add a Function for this model to the module
37
+ model_id = info.name.replace("/", "_").replace("-", "_")
38
+ pt_func = func.NOSFunction(info, f'{submodule_name}.{model_id}')
39
+ setattr(sub_module, model_id, pt_func)
40
+
41
+ return new_modules
@@ -0,0 +1,62 @@
1
+ from typing import Optional
2
+ import uuid
3
+ import av
4
+ import sys
5
+
6
+ import pixeltable.env as env
7
+ import pixeltable.func as func
8
+ import pixeltable.type_system as ts
9
+
10
+
11
+ _format_defaults = { # format -> (codec, ext)
12
+ 'wav': ('pcm_s16le', 'wav'),
13
+ 'mp3': ('libmp3lame', 'mp3'),
14
+ 'flac': ('flac', 'flac'),
15
+ #'mp4': ('aac', 'm4a'),
16
+ }
17
+
18
+ # for mp4:
19
+ # - extract_audio() fails with "Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1146 >= 290"
20
+ # - chatgpt suggests this can be fixed in the following manner
21
+ # for packet in container.demux(audio_stream):
22
+ # packet.pts = None # Reset the PTS and DTS to allow FFmpeg to set them automatically
23
+ # packet.dts = None
24
+ # for frame in packet.decode():
25
+ # frame.pts = None
26
+ # for packet in output_stream.encode(frame):
27
+ # output_container.mux(packet)
28
+ #
29
+ # # Flush remaining packets
30
+ # for packet in output_stream.encode():
31
+ # output_container.mux(packet)
32
+
33
+
34
+ _extract_audio_param_types = [
35
+ ts.VideoType(nullable=False),
36
+ ts.IntType(nullable=False),
37
+ ts.StringType(nullable=False),
38
+ ts.StringType(nullable=False)
39
+ ]
40
+ @func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types)
41
+ def extract_audio(
42
+ video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
43
+ ) -> Optional[str]:
44
+ """Extract an audio stream from a video file, save it as a media file and return its path"""
45
+ if format not in _format_defaults:
46
+ raise ValueError(f'extract_audio(): unsupported audio format: {format}')
47
+ default_codec, ext = _format_defaults[format]
48
+
49
+ with av.open(video_path) as container:
50
+ if len(container.streams.audio) <= stream_idx:
51
+ return None
52
+ audio_stream = container.streams.audio[stream_idx]
53
+ # create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
54
+ output_filename = str(env.Env.get().tmp_dir / f"{uuid.uuid4()}.{ext}")
55
+
56
+ with av.open(output_filename, "w", format=format) as output_container:
57
+ output_stream = output_container.add_stream(codec or default_codec)
58
+ for packet in container.demux(audio_stream):
59
+ for frame in packet.decode():
60
+ output_container.mux(output_stream.encode(frame))
61
+
62
+ return output_filename
@@ -0,0 +1,3 @@
1
+ from .base import ComponentIterator
2
+ from .video import FrameIterator
3
+
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+ from typing import Dict, Any, Tuple, List
3
+ from abc import abstractmethod, ABC
4
+
5
+ from pixeltable.type_system import ColumnType
6
+
7
+
8
+ class ComponentIterator(ABC):
9
+ """Base class for iterators."""
10
+
11
+ @classmethod
12
+ @abstractmethod
13
+ def input_schema(cls) -> Dict[str, ColumnType]:
14
+ """Provide the Pixeltable types of the init() parameters
15
+
16
+ The keys need to match the names of the init() parameters. This is equivalent to the parameters_types
17
+ parameter of the @function decorator.
18
+ """
19
+ raise NotImplementedError
20
+
21
+ @classmethod
22
+ @abstractmethod
23
+ def output_schema(cls, *args: Any, **kwargs: Any) -> Tuple[Dict[str, ColumnType], List[str]]:
24
+ """Specify the dictionary returned by next() and a list of unstored column names
25
+
26
+ Returns:
27
+ a dictionary which is turned into a list of columns in the output table
28
+ a list of unstored column names
29
+ """
30
+ raise NotImplementedError
31
+
32
+ def __iter__(self) -> ComponentIterator:
33
+ return self
34
+
35
+ @abstractmethod
36
+ def __next__(self) -> Dict[str, Any]:
37
+ """Return the next element of the iterator as a dictionary or raise StopIteration"""
38
+ raise NotImplementedError
39
+
40
+ @abstractmethod
41
+ def close(self) -> None:
42
+ """Close the iterator and release all resources"""
43
+ raise NotImplementedError
44
+
45
+ @abstractmethod
46
+ def set_pos(self, pos: int) -> None:
47
+ """Set the iterator position to pos"""
48
+ raise NotImplementedError