pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,311 +0,0 @@
1
- import datetime
2
- import json
3
- import logging
4
- import os
5
- import pathlib
6
- import subprocess
7
- import sys
8
- from typing import Any
9
- from zoneinfo import ZoneInfo
10
-
11
- import pixeltable_pgserver
12
- import toml
13
-
14
- import pixeltable as pxt
15
- import pixeltable.metadata as metadata
16
- from pixeltable.env import Env
17
- from pixeltable.func import Batch
18
- from pixeltable.io.external_store import Project
19
- from pixeltable.tool import embed_udf
20
- from pixeltable.type_system import BoolType, FloatType, ImageType, IntType, JsonType, StringType, TimestampType
21
-
22
- _logger = logging.getLogger('pixeltable')
23
-
24
-
25
- class Dumper:
26
-
27
- def __init__(self, output_dir='target', db_name='pxtdump') -> None:
28
- if sys.version_info >= (3, 10):
29
- raise RuntimeError(
30
- 'This script must be run on Python 3.9. '
31
- 'DB dumps are incompatible across versions due to issues with pickling anonymous UDFs.'
32
- )
33
-
34
- self.output_dir = pathlib.Path(output_dir)
35
- shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', '~/.pixeltable')).expanduser()
36
- mock_home_dir = self.output_dir / '.pixeltable'
37
- mock_home_dir.mkdir(parents=True, exist_ok=True)
38
- os.environ['PIXELTABLE_HOME'] = str(mock_home_dir)
39
- os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.toml')
40
- os.environ['PIXELTABLE_DB'] = db_name
41
- os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
42
-
43
- Env._init_env(reinit_db=True)
44
-
45
- Env.get().configure_logging(level=logging.DEBUG, to_stdout=True)
46
-
47
- def dump_db(self) -> None:
48
- md_version = metadata.VERSION
49
- dump_file = self.output_dir / f'pixeltable-v{md_version:03d}-test.dump.gz'
50
- _logger.info(f'Creating database dump at: {dump_file}')
51
- pg_package_dir = os.path.dirname(pixeltable_pgserver.__file__)
52
- pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
53
- _logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
54
- # We need the raw DB URL, without a driver qualifier. (The driver qualifier is needed by
55
- # SQLAlchemy, but command-line Postgres won't know how to interpret it.)
56
- db_url = Env.get()._db_server.get_uri(Env.get()._db_name)
57
- with open(dump_file, 'wb') as dump:
58
- pg_dump_process = subprocess.Popen(
59
- (pg_dump_binary, db_url, '-U', 'postgres', '-Fc'),
60
- stdout=subprocess.PIPE
61
- )
62
- subprocess.run(
63
- ('gzip', '-9'),
64
- stdin=pg_dump_process.stdout,
65
- stdout=dump,
66
- check=True
67
- )
68
- if pg_dump_process.poll() != 0:
69
- raise RuntimeError(f'pg_dump failed with return code {pg_dump_process.returncode}')
70
- info_file = self.output_dir / f'pixeltable-v{md_version:03d}-test-info.toml'
71
- git_sha = subprocess.check_output(('git', 'rev-parse', 'HEAD')).decode('ascii').strip()
72
- user = os.environ.get('USER', os.environ.get('USERNAME'))
73
- info_dict = {'pixeltable-dump': {
74
- 'metadata-version': md_version,
75
- 'git-sha': git_sha,
76
- 'datetime': datetime.datetime.now(tz=datetime.timezone.utc),
77
- 'user': user
78
- }}
79
- with open(info_file, 'w') as info:
80
- toml.dump(info_dict, info)
81
-
82
- # Expression types, predicate types, embedding indices, views on views
83
- def create_tables(self) -> None:
84
- schema = {
85
- 'c1': StringType(nullable=False),
86
- 'c1n': StringType(nullable=True),
87
- 'c2': IntType(nullable=False),
88
- 'c3': FloatType(nullable=False),
89
- 'c4': BoolType(nullable=False),
90
- 'c5': TimestampType(nullable=False),
91
- 'c6': JsonType(nullable=False),
92
- 'c7': JsonType(nullable=False),
93
- 'c8': ImageType(nullable=True)
94
- }
95
- t = pxt.create_table('base_table', schema, primary_key='c2')
96
-
97
- num_rows = 20
98
- d1 = {
99
- 'f1': 'test string 1',
100
- 'f2': 1,
101
- 'f3': 1.0,
102
- 'f4': True,
103
- 'f5': [1.0, 2.0, 3.0, 4.0],
104
- 'f6': {
105
- 'f7': 'test string 2',
106
- 'f8': [1.0, 2.0, 3.0, 4.0],
107
- },
108
- }
109
- d2 = [d1, d1]
110
-
111
- c1_data = [f'test string {i}' for i in range(num_rows)]
112
- c2_data = [i for i in range(num_rows)]
113
- c3_data = [float(i) for i in range(num_rows)]
114
- c4_data = [bool(i % 2) for i in range(num_rows)]
115
- c5_data = [datetime.datetime.now()] * num_rows
116
- c6_data = [
117
- {
118
- 'f1': f'test string {i}',
119
- 'f2': i,
120
- 'f3': float(i),
121
- 'f4': bool(i % 2),
122
- 'f5': [1.0, 2.0, 3.0, 4.0],
123
- 'f6': {
124
- 'f7': 'test string 2',
125
- 'f8': [1.0, 2.0, 3.0, 4.0],
126
- },
127
- }
128
- for i in range(num_rows)
129
- ]
130
- c7_data = [d2] * num_rows
131
- rows = [
132
- {
133
- 'c1': c1_data[i],
134
- 'c1n': c1_data[i] if i % 10 != 0 else None,
135
- 'c2': c2_data[i],
136
- 'c3': c3_data[i],
137
- 'c4': c4_data[i],
138
- 'c5': c5_data[i],
139
- 'c6': c6_data[i],
140
- 'c7': c7_data[i],
141
- 'c8': None
142
- }
143
- for i in range(num_rows)
144
- ]
145
-
146
- self.__add_expr_columns(t, 'base_table')
147
- t.insert(rows)
148
-
149
- pxt.create_dir('views')
150
-
151
- # simple view
152
- v = pxt.create_view('views.view', t.where(t.c2 < 50))
153
- self.__add_expr_columns(v, 'view')
154
-
155
- # snapshot
156
- _ = pxt.create_snapshot('views.snapshot', t.where(t.c2 >= 75))
157
-
158
- # view of views
159
- vv = pxt.create_view('views.view_of_views', v.where(t.c2 >= 25))
160
- self.__add_expr_columns(vv, 'view_of_views')
161
-
162
- # empty view
163
- e = pxt.create_view('views.empty_view', t.where(t.c2 == 4171780))
164
- assert e.count() == 0
165
- self.__add_expr_columns(e, 'empty_view', include_expensive_functions=True)
166
-
167
- # Add external stores
168
- from pixeltable.io.external_store import MockProject
169
- v._link_external_store(
170
- MockProject.create(
171
- v,
172
- 'project',
173
- {'int_field': pxt.IntType()},
174
- {'str_field': pxt.StringType()},
175
- {'view_test_udf': 'int_field', 'c1': 'str_field'}
176
- )
177
- )
178
- # We're just trying to test metadata here, so it's ok to link a false Label Studio project.
179
- # We include a computed image column in order to ensure the creation of a stored proxy.
180
- from pixeltable.io.label_studio import LabelStudioProject
181
- col_mapping = Project.validate_columns(
182
- v, {'str_field': pxt.StringType(), 'img_field': pxt.ImageType()}, {},
183
- {'view_function_call': 'str_field', 'base_table_image_rot': 'img_field'}
184
- )
185
- project = LabelStudioProject('ls_project_0', 4171780, media_import_method='file', col_mapping=col_mapping)
186
- v._link_external_store(project)
187
- # Sanity check that the stored proxy column did get created
188
- assert len(project.stored_proxies) == 1
189
- assert t.base_table_image_rot.col in project.stored_proxies
190
-
191
- def __add_expr_columns(self, t: pxt.Table, col_prefix: str, include_expensive_functions=False) -> None:
192
- def add_column(col_name: str, col_expr: Any, stored: bool = True) -> None:
193
- t.add_column(**{f'{col_prefix}_{col_name}': col_expr}, stored=stored)
194
-
195
- # arithmetic_expr
196
- add_column('plus', t.c2 + 6)
197
- add_column('minus', t.c2 - 5)
198
- add_column('times', t.c3 * 1.2)
199
- add_column('div', t.c3 / 1.7)
200
- add_column('mod', t.c2 % 11)
201
-
202
- # column_property_ref
203
- add_column('fileurl', t.c8.fileurl)
204
- add_column('localpath', t.c8.localpath)
205
-
206
- # comparison
207
- add_column('lt', t.c2 < t.c3)
208
- add_column('le', t.c2 <= t.c3)
209
- add_column('gt', t.c2 > t.c3)
210
- add_column('ge', t.c2 >= t.c3)
211
- add_column('ne', t.c2 != t.c3)
212
- add_column('eq', t.c2 == t.c3)
213
-
214
- # compound_predicate
215
- add_column('and', (t.c2 >= 5) & (t.c2 < 8))
216
- add_column('or', (t.c2 > 1) | t.c4)
217
- add_column('not', ~(t.c2 > 20))
218
-
219
- # function_call
220
- add_column('function_call', pxt.functions.string.format('{0} {key}', t.c1, key=t.c1)) # library function
221
- add_column('test_udf', test_udf_stored(t.c2)) # stored udf
222
- add_column('test_udf_batched', test_udf_stored_batched(t.c1, upper=False)) # batched stored udf
223
- if include_expensive_functions:
224
- # batched library function
225
- add_column('batched', pxt.functions.huggingface.clip_text(t.c1, model_id='openai/clip-vit-base-patch32'))
226
-
227
- # image_member_access
228
- add_column('image_mode', t.c8.mode)
229
- add_column('image_rot', t.c8.rotate(180), stored=False)
230
-
231
- # in_predicate
232
- add_column('isin_1', t.c1.isin(['test string 1', 'test string 2', 'test string 3']))
233
- add_column('isin_2', t.c2.isin([1, 2, 3, 4, 5]))
234
- add_column('isin_3', t.c2.isin(t.c6.f5))
235
-
236
- # inline_array, inline_list, inline_dict
237
- add_column('inline_array_1', pxt.array([[1, 2, 3], [4, 5, 6]]))
238
- add_column('inline_array_2', pxt.array([['a', 'b', 'c'], ['d', 'e', 'f']]))
239
- add_column('inline_array_exprs', pxt.array([[t.c2, t.c2 + 1], [t.c2 + 2, t.c2]]))
240
- add_column('inline_array_mixed', pxt.array([[1, t.c2], [3, t.c2]]))
241
- add_column('inline_list_1', [[1, 2, 3], [4, 5, 6]])
242
- add_column('inline_list_2', [['a', 'b', 'c'], ['d', 'e', 'f']])
243
- add_column('inline_list_exprs', [t.c1, [t.c1n, t.c2]])
244
- add_column('inline_list_mixed', [1, 'a', t.c1, [1, 'a', t.c1n], 1, 'a'])
245
- add_column('inline_dict', {'int': 22, 'dict': {'key': 'val'}, 'expr': t.c1})
246
-
247
- # is_null
248
- add_column('isnull', t.c1 == None)
249
-
250
- # json_mapper and json_path
251
- add_column('json_mapper', t.c6[3])
252
- add_column('json_path', t.c6.f1)
253
- add_column('json_path_nested', t.c6.f6.f7)
254
- add_column('json_path_star', t.c6.f5['*'])
255
- add_column('json_path_idx', t.c6.f5[3])
256
- add_column('json_path_slice', t.c6.f5[1:3:2])
257
-
258
- # literal
259
- add_column('str_const', 'str')
260
- add_column('int_const', 5)
261
- add_column('float_const', 5.0)
262
- add_column('timestamp_const_1', datetime.datetime.now())
263
- add_column('timestamp_const_2', datetime.datetime.now().astimezone(ZoneInfo('America/Anchorage')))
264
-
265
- # type_cast
266
- add_column('astype', t.c2.astype(FloatType()))
267
-
268
- # .apply
269
- add_column('c2_to_string', t.c2.apply(str))
270
- add_column('c6_to_string', t.c6.apply(json.dumps))
271
- add_column('c6_back_to_json', t[f'{col_prefix}_c6_to_string'].apply(json.loads))
272
-
273
- t.add_embedding_index(
274
- f'{col_prefix}_function_call',
275
- string_embed=pxt.functions.huggingface.clip_text.using(model_id='openai/clip-vit-base-patch32')
276
- )
277
-
278
- # query()
279
- @t.query
280
- def q1(i: int):
281
- # this breaks; TODO: why?
282
- #return t.where(t.c2 < i)
283
- return t.where(t.c2 < i).select(t.c1, t.c2)
284
- add_column('query_output', t.queries.q1(t.c2))
285
-
286
- @t.query
287
- def q2(s: str):
288
- sim = t[f'{col_prefix}_function_call'].similarity(s)
289
- return t.order_by(sim, asc=False).select(t[f'{col_prefix}_function_call']).limit(5)
290
- add_column('sim_output', t.queries.q2(t.c1))
291
-
292
-
293
- @pxt.udf(_force_stored=True)
294
- def test_udf_stored(n: int) -> int:
295
- return n + 1
296
-
297
-
298
- @pxt.udf(batch_size=4, _force_stored=True)
299
- def test_udf_stored_batched(strings: Batch[str], *, upper: bool = True) -> Batch[str]:
300
- return [string.upper() if upper else string.lower() for string in strings]
301
-
302
-
303
- def main() -> None:
304
- _logger.info("Creating pixeltable test artifact.")
305
- dumper = Dumper()
306
- dumper.create_tables()
307
- dumper.dump_db()
308
-
309
-
310
- if __name__ == "__main__":
311
- main()
@@ -1,81 +0,0 @@
1
- import av # type: ignore[import-untyped]
2
- import PIL.Image
3
- import PIL.ImageDraw
4
- import PIL.ImageFont
5
-
6
- from pathlib import Path
7
- from typing import Optional
8
- import tempfile
9
- import math
10
-
11
- def create_test_video(
12
- frame_count: int,
13
- frame_rate: float = 1.0,
14
- frame_width: int = 224,
15
- aspect_ratio: str = '16:9',
16
- frame_height: Optional[int] = None,
17
- output_path: Optional[Path] = None,
18
- font_file: str = '/Library/Fonts/Arial Unicode.ttf',
19
- ) -> Path:
20
- """
21
- Creates an .mp4 video file such as the ones in /tests/data/test_videos
22
- The video contains a frame number in each frame (for visual sanity check).
23
-
24
- Args:
25
- frame_count: Number of frames to create
26
- frame_rate: Frame rate of the video
27
- frame_width (int): Width in pixels of the video frame. Note: cost of decoding increases dramatically
28
- with frame width * frame height.
29
- aspect_ratio: Aspect ratio (width/height) of the video frames string of form 'width:height'
30
- frame_height: Height of the video frame, if given, aspect_ratio is ignored
31
- output_path: Path to save the video file
32
- font_file: Path to the font file used for text.
33
- """
34
-
35
- if output_path is None:
36
- output_path = Path(tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name)
37
-
38
- parts = [int(p) for p in aspect_ratio.split(':')]
39
- assert len(parts) == 2
40
- aspect_ratio = parts[0] / parts[1]
41
-
42
- if frame_height is None:
43
- frame_height = math.ceil(frame_width / aspect_ratio)
44
-
45
- frame_size = (frame_width, frame_height)
46
-
47
- font_size = min(frame_height, frame_width) // 4
48
- font = PIL.ImageFont.truetype(font=font_file, size=font_size)
49
- font_fill = 0xFFFFFF # white
50
- frame_color = 0xFFFFFF - font_fill # black
51
- # Create a video container
52
- container = av.open(str(output_path), mode='w')
53
-
54
- # Add a video stream
55
- stream = container.add_stream('h264', rate=frame_rate)
56
- stream.width, stream.height = frame_size
57
- stream.pix_fmt = 'yuv420p'
58
-
59
- for frame_number in range(frame_count):
60
- # Create an image with a number in it
61
- image = PIL.Image.new('RGB', frame_size, color=frame_color)
62
- draw = PIL.ImageDraw.Draw(image)
63
- # Optionally, add a font here if you have one
64
- text = str(frame_number)
65
- _, _, text_width, text_height = draw.textbbox((0, 0), text, font=font)
66
- text_position = ((frame_size[0] - text_width) // 2, (frame_size[1] - text_height) // 2)
67
- draw.text(text_position, text, font=font, fill=font_fill)
68
-
69
- # Convert the PIL image to an AVFrame
70
- frame = av.VideoFrame.from_image(image)
71
-
72
- # Encode and write the frame
73
- for packet in stream.encode(frame):
74
- container.mux(packet)
75
-
76
- # Flush and close the stream
77
- for packet in stream.encode():
78
- container.mux(packet)
79
-
80
- container.close()
81
- return output_path
@@ -1,50 +0,0 @@
1
- import ast
2
- import warnings
3
- from typing import Optional, Union
4
-
5
- import griffe
6
- import griffe.expressions
7
- from griffe import Extension, Object, ObjectNode
8
-
9
- import pixeltable as pxt
10
-
11
- logger = griffe.get_logger(__name__)
12
-
13
- class PxtGriffeExtension(Extension):
14
- """Implementation of a Pixeltable custom griffe extension."""
15
-
16
- def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None:
17
- if obj.docstring is None:
18
- # Skip over entities without a docstring
19
- return
20
-
21
- if isinstance(obj, griffe.Function):
22
- # See if the (Python) function has a @pxt.udf decorator
23
- if any(
24
- isinstance(dec.value, griffe.expressions.Expr) and dec.value.canonical_path in ['pixeltable.func.udf', 'pixeltable.udf']
25
- for dec in obj.decorators
26
- ):
27
- # Update the template
28
- self.__modify_pxt_udf(obj)
29
-
30
- def __modify_pxt_udf(self, func: griffe.Function) -> None:
31
- """
32
- Instructs the doc snippet for `func` to use the custom Pixeltable UDF jinja template, and
33
- converts all type hints to Pixeltable column type references, in accordance with the @udf
34
- decorator behavior.
35
- """
36
- func.extra['mkdocstrings']['template'] = 'udf.html.jinja'
37
- # Dynamically load the UDF reference so we can inspect the Pixeltable signature directly
38
- warnings.simplefilter("ignore")
39
- udf = griffe.dynamic_import(func.path)
40
- assert isinstance(udf, pxt.Function)
41
- # Convert the return type to a Pixeltable type reference
42
- func.returns = str(udf.signature.get_return_type())
43
- # Convert the parameter types to Pixeltable type references
44
- for griffe_param in func.parameters:
45
- assert isinstance(griffe_param.annotation, griffe.expressions.Expr)
46
- if griffe_param.name not in udf.signature.parameters:
47
- logger.warning(f'Parameter `{griffe_param.name}` not found in signature for UDF: {udf.display_name}')
48
- continue
49
- pxt_param = udf.signature.parameters[griffe_param.name]
50
- griffe_param.annotation = str(pxt_param.col_type)
@@ -1,6 +0,0 @@
1
- from pathlib import Path
2
-
3
-
4
- def get_templates_path() -> Path:
5
- """Implementation of the 'mkdocstrings.python.templates' plugin for custom jinja templates."""
6
- return Path(__file__).parent / "templates"
@@ -1,135 +0,0 @@
1
- {#- Template for Pixeltable UDFs. Cargo-culted (with modification) from _base/function.html.jinja. -#}
2
-
3
- {% block logs scoped %}
4
- {#- Logging block.
5
-
6
- This block can be used to log debug messages, deprecation messages, warnings, etc.
7
- -#}
8
- {{ log.debug("Rendering " + function.path) }}
9
- {% endblock logs %}
10
-
11
- {% import "language"|get_template as lang with context %}
12
- {#- Language module providing the `t` translation method. -#}
13
-
14
- <div class="doc doc-object doc-function">
15
- {% with obj = function, html_id = function.path %}
16
-
17
- {% if root %}
18
- {% set show_full_path = config.show_root_full_path %}
19
- {% set root_members = True %}
20
- {% elif root_members %}
21
- {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %}
22
- {% set root_members = False %}
23
- {% else %}
24
- {% set show_full_path = config.show_object_full_path %}
25
- {% endif %}
26
-
27
- {% set function_name = function.path if show_full_path else function.name %}
28
- {#- Brief or full function name depending on configuration. -#}
29
- {% set symbol_type = "udf" %}
30
- {#- Symbol type: method when parent is a class, function otherwise. -#}
31
-
32
- {% if not root or config.show_root_heading %}
33
- {% filter heading(
34
- heading_level,
35
- role="function",
36
- id=html_id,
37
- class="doc doc-heading",
38
- toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code>&nbsp;')|safe if config.show_symbol_type_toc else '') + function.name,
39
- ) %}
40
-
41
- {% block heading scoped %}
42
- {#- Heading block.
43
-
44
- This block renders the heading for the function.
45
- -#}
46
- {% if config.show_symbol_type_heading %}<code class="doc-symbol doc-symbol-heading doc-symbol-{{ symbol_type }}"></code>{% endif %}
47
- {% if config.separate_signature %}
48
- <span class="doc doc-object-name doc-function-name">{{ function_name }}</span>
49
- {% else %}
50
- {%+ filter highlight(language="python", inline=True) %}
51
- {{ function_name }}{% include "signature"|get_template with context %}
52
- {% endfilter %}
53
- {% endif %}
54
- {% endblock heading %}
55
-
56
- {% block labels scoped %}
57
- {#- Labels block.
58
-
59
- This block renders the labels for the function.
60
- -#}
61
- {% with labels = function.labels %}
62
- {% include "labels"|get_template with context %}
63
- {% endwith %}
64
- {% endblock labels %}
65
-
66
- {% endfilter %}
67
-
68
- {% block signature scoped %}
69
- {#- Signature block.
70
-
71
- This block renders the signature for the function.
72
- -#}
73
- {% if config.separate_signature %}
74
- {% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %}
75
- {{ function.name }}
76
- {% endfilter %}
77
- {% endif %}
78
- {% endblock signature %}
79
-
80
- {% else %}
81
-
82
- {% if config.show_root_toc_entry %}
83
- {% filter heading(
84
- heading_level,
85
- role="function",
86
- id=html_id,
87
- toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code>&nbsp;')|safe if config.show_symbol_type_toc else '') + function.name,
88
- hidden=True,
89
- ) %}
90
- {% endfilter %}
91
- {% endif %}
92
- {% set heading_level = heading_level - 1 %}
93
- {% endif %}
94
-
95
- <div class="doc doc-contents {% if root %}first{% endif %}">
96
- {% block contents scoped %}
97
- {#- Contents block.
98
-
99
- This block renders the contents of the function.
100
- It contains other blocks that users can override.
101
- Overriding the contents block allows to rearrange the order of the blocks.
102
- -#}
103
- {% block docstring scoped %}
104
- {#- Docstring block.
105
-
106
- This block renders the docstring for the function.
107
- -#}
108
- {% with docstring_sections = function.docstring.parsed %}
109
- {% include "docstring"|get_template with context %}
110
- {% endwith %}
111
- {% endblock docstring %}
112
-
113
- {% block source scoped %}
114
- {#- Source block.
115
-
116
- This block renders the source code for the function.
117
- -#}
118
- {% if config.show_source and function.source %}
119
- <details class="quote">
120
- <summary>{{ lang.t("Source code in") }} <code>
121
- {%- if function.relative_filepath.is_absolute() -%}
122
- {{ function.relative_package_filepath }}
123
- {%- else -%}
124
- {{ function.relative_filepath }}
125
- {%- endif -%}
126
- </code></summary>
127
- {{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }}
128
- </details>
129
- {% endif %}
130
- {% endblock source %}
131
- {% endblock contents %}
132
- </div>
133
-
134
- {% endwith %}
135
- </div>
@@ -1,9 +0,0 @@
1
- import numpy as np
2
-
3
- import pixeltable as pxt
4
-
5
-
6
- # TODO This can go away once we have the ability to inline expr_udf's
7
- @pxt.expr_udf
8
- def clip_text_embed(txt: str) -> np.ndarray:
9
- return pxt.functions.huggingface.clip_text(txt, model_id='openai/clip-vit-base-patch32') # type: ignore[return-value]
@@ -1,55 +0,0 @@
1
- from typing import Callable, Optional
2
-
3
- from mypy import nodes
4
- from mypy.plugin import AnalyzeTypeContext, ClassDefContext, Plugin
5
- from mypy.plugins.common import add_method_to_class
6
- from mypy.types import AnyType, Type, TypeOfAny
7
-
8
- import pixeltable as pxt
9
-
10
-
11
- class PxtPlugin(Plugin):
12
- __UDA_FULLNAME = f'{pxt.uda.__module__}.{pxt.uda.__name__}'
13
- __TYPE_MAP = {
14
- pxt.Json: 'typing.Any',
15
- pxt.Array: 'numpy.ndarray',
16
- pxt.Image: 'PIL.Image.Image',
17
- pxt.Video: 'builtins.str',
18
- pxt.Audio: 'builtins.str',
19
- pxt.Document: 'builtins.str',
20
- }
21
- __FULLNAME_MAP = {
22
- f'{k.__module__}.{k.__name__}': v
23
- for k, v in __TYPE_MAP.items()
24
- }
25
-
26
- def get_type_analyze_hook(self, fullname: str) -> Optional[Callable[[AnalyzeTypeContext], Type]]:
27
- if fullname in self.__FULLNAME_MAP:
28
- subst_name = self.__FULLNAME_MAP[fullname]
29
- return lambda ctx: pxt_hook(ctx, subst_name)
30
- return None
31
-
32
- def get_class_decorator_hook_2(self, fullname: str) -> Optional[Callable[[ClassDefContext], bool]]:
33
- if fullname == self.__UDA_FULLNAME:
34
- return pxt_decorator_hook
35
- return None
36
-
37
- def plugin(version: str) -> type:
38
- return PxtPlugin
39
-
40
- def pxt_hook(ctx: AnalyzeTypeContext, subst_name: str) -> Type:
41
- if subst_name == 'typing.Any':
42
- return AnyType(TypeOfAny.special_form)
43
- return ctx.api.named_type(subst_name, [])
44
-
45
- def pxt_decorator_hook(ctx: ClassDefContext) -> bool:
46
- arg = nodes.Argument(nodes.Var('fn'), AnyType(TypeOfAny.special_form), None, nodes.ARG_POS)
47
- add_method_to_class(
48
- ctx.api,
49
- ctx.cls,
50
- "to_sql",
51
- args=[arg],
52
- return_type=AnyType(TypeOfAny.special_form),
53
- is_staticmethod=True,
54
- )
55
- return True