pixeltable 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (126) hide show
  1. {pixeltable-0.2.2 → pixeltable-0.2.3}/PKG-INFO +5 -3
  2. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/cache_prefetch_node.py +14 -11
  3. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/data_row.py +14 -6
  4. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/iterators/document.py +1 -1
  5. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/store.py +15 -10
  6. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_dataframe.py +7 -1
  7. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_document.py +3 -0
  8. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_exprs.py +2 -1
  9. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_table.py +6 -0
  10. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_video.py +2 -0
  11. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/utils.py +6 -4
  12. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/type_system.py +1 -1
  13. {pixeltable-0.2.2 → pixeltable-0.2.3}/pyproject.toml +5 -6
  14. {pixeltable-0.2.2 → pixeltable-0.2.3}/LICENSE +0 -0
  15. {pixeltable-0.2.2 → pixeltable-0.2.3}/README.md +0 -0
  16. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/__init__.py +0 -0
  17. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/__init__.py +0 -0
  18. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/catalog.py +0 -0
  19. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/column.py +0 -0
  20. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/dir.py +0 -0
  21. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/globals.py +0 -0
  22. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/insertable_table.py +0 -0
  23. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/named_function.py +0 -0
  24. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/path.py +0 -0
  25. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/path_dict.py +0 -0
  26. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/schema_object.py +0 -0
  27. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/table.py +0 -0
  28. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/table_version.py +0 -0
  29. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/table_version_path.py +0 -0
  30. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/catalog/view.py +0 -0
  31. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/client.py +0 -0
  32. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/dataframe.py +0 -0
  33. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/env.py +0 -0
  34. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exceptions.py +0 -0
  35. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/__init__.py +0 -0
  36. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/aggregation_node.py +0 -0
  37. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/component_iteration_node.py +0 -0
  38. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/data_row_batch.py +0 -0
  39. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/exec_context.py +0 -0
  40. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/exec_node.py +0 -0
  41. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/expr_eval_node.py +0 -0
  42. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/in_memory_data_node.py +0 -0
  43. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/media_validation_node.py +0 -0
  44. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exec/sql_scan_node.py +0 -0
  45. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/__init__.py +0 -0
  46. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/arithmetic_expr.py +0 -0
  47. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/array_slice.py +0 -0
  48. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/column_property_ref.py +0 -0
  49. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/column_ref.py +0 -0
  50. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/comparison.py +0 -0
  51. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/compound_predicate.py +0 -0
  52. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/expr.py +0 -0
  53. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/expr_set.py +0 -0
  54. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/function_call.py +0 -0
  55. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/globals.py +0 -0
  56. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/image_member_access.py +0 -0
  57. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/image_similarity_predicate.py +0 -0
  58. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/inline_array.py +0 -0
  59. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/inline_dict.py +0 -0
  60. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/is_null.py +0 -0
  61. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/json_mapper.py +0 -0
  62. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/json_path.py +0 -0
  63. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/literal.py +0 -0
  64. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/object_ref.py +0 -0
  65. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/predicate.py +0 -0
  66. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/row_builder.py +0 -0
  67. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/rowid_ref.py +0 -0
  68. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/type_cast.py +0 -0
  69. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/exprs/variable.py +0 -0
  70. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/__init__.py +0 -0
  71. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/aggregate_function.py +0 -0
  72. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/batched_function.py +0 -0
  73. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/callable_function.py +0 -0
  74. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/expr_template_function.py +0 -0
  75. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/function.py +0 -0
  76. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/function_registry.py +0 -0
  77. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/globals.py +0 -0
  78. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/nos_function.py +0 -0
  79. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/signature.py +0 -0
  80. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/func/udf.py +0 -0
  81. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/__init__.py +0 -0
  82. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/eval.py +0 -0
  83. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/fireworks.py +0 -0
  84. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/huggingface.py +0 -0
  85. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/image.py +0 -0
  86. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/openai.py +0 -0
  87. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/pil/image.py +0 -0
  88. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/string.py +0 -0
  89. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/together.py +0 -0
  90. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/util.py +0 -0
  91. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/functions/video.py +0 -0
  92. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/iterators/__init__.py +0 -0
  93. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/iterators/base.py +0 -0
  94. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/iterators/video.py +0 -0
  95. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/metadata/__init__.py +0 -0
  96. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/metadata/converters/convert_10.py +0 -0
  97. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/metadata/schema.py +0 -0
  98. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/plan.py +0 -0
  99. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/conftest.py +0 -0
  100. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_audio.py +0 -0
  101. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_catalog.py +0 -0
  102. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_client.py +0 -0
  103. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_component_view.py +0 -0
  104. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_dirs.py +0 -0
  105. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_function.py +0 -0
  106. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_functions.py +0 -0
  107. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_migration.py +0 -0
  108. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_nos.py +0 -0
  109. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_snapshot.py +0 -0
  110. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_transactional_directory.py +0 -0
  111. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_types.py +0 -0
  112. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tests/test_view.py +0 -0
  113. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tool/create_test_db_dump.py +0 -0
  114. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/tool/create_test_video.py +0 -0
  115. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/__init__.py +0 -0
  116. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/clip.py +0 -0
  117. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/coco.py +0 -0
  118. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/documents.py +0 -0
  119. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/filecache.py +0 -0
  120. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/help.py +0 -0
  121. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/media_store.py +0 -0
  122. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/parquet.py +0 -0
  123. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/pytorch.py +0 -0
  124. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/s3.py +0 -0
  125. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/sql.py +0 -0
  126. {pixeltable-0.2.2 → pixeltable-0.2.3}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Marcel Kornacker
6
6
  Author-email: marcelk@gmail.com
@@ -11,19 +11,21 @@ Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
13
  Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
14
15
  Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
15
16
  Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
16
17
  Requires-Dist: jmespath (>=1.0.1,<2.0.0)
17
- Requires-Dist: numpy (>=1.24.1,<2.0.0)
18
+ Requires-Dist: numpy (>=1.26,<2.0)
18
19
  Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
19
20
  Requires-Dist: pandas (>=2.0,<3.0)
20
- Requires-Dist: pgserver (==0.0.7)
21
+ Requires-Dist: pgserver (==0.0.9)
21
22
  Requires-Dist: pgvector (>=0.2.1,<0.3.0)
22
23
  Requires-Dist: pillow (>=9.4.0,<10.0.0)
23
24
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
24
25
  Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
25
26
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
26
27
  Requires-Dist: regex (>=2022.10.31,<2023.0.0)
28
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
27
29
  Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
28
30
  Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
29
31
  Requires-Dist: tqdm (>=4.64.1,<5.0.0)
@@ -1,19 +1,21 @@
1
1
  from __future__ import annotations
2
- from typing import List, Optional, Any, Tuple, Dict
2
+
3
+ import concurrent.futures
4
+ import logging
3
5
  import threading
6
+ import urllib.parse
7
+ import urllib.request
4
8
  from collections import defaultdict
5
- from uuid import UUID
6
- import concurrent
7
- import logging
8
- import urllib
9
9
  from pathlib import Path
10
+ from typing import List, Optional, Any, Tuple, Dict
11
+ from uuid import UUID
10
12
 
11
- from .data_row_batch import DataRowBatch
12
- from .exec_node import ExecNode
13
- import pixeltable.exprs as exprs
14
- from pixeltable.utils.filecache import FileCache
15
13
  import pixeltable.env as env
16
14
  import pixeltable.exceptions as excs
15
+ import pixeltable.exprs as exprs
16
+ from pixeltable.utils.filecache import FileCache
17
+ from .data_row_batch import DataRowBatch
18
+ from .exec_node import ExecNode
17
19
 
18
20
  _logger = logging.getLogger('pixeltable')
19
21
 
@@ -81,7 +83,9 @@ class CachePrefetchNode(ExecNode):
81
83
  """Fetches a remote URL into Env.tmp_dir and returns its path"""
82
84
  url = row.file_urls[slot_idx]
83
85
  parsed = urllib.parse.urlparse(url)
84
- assert parsed.scheme != '' and parsed.scheme != 'file'
86
+ # Use len(parsed.scheme) > 1 here to ensure we're not being passed
87
+ # a Windows filename
88
+ assert len(parsed.scheme) > 1 and parsed.scheme != 'file'
85
89
  # preserve the file extension, if there is one
86
90
  extension = ''
87
91
  if parsed.path != '':
@@ -95,7 +99,6 @@ class CachePrefetchNode(ExecNode):
95
99
  if self.boto_client is None:
96
100
  self.boto_client = get_client()
97
101
  self.boto_client.download_file(parsed.netloc, parsed.path.lstrip('/'), str(tmp_path))
98
- return tmp_path
99
102
  elif parsed.scheme == 'http' or parsed.scheme == 'https':
100
103
  with urllib.request.urlopen(url) as resp, open(tmp_path, 'wb') as f:
101
104
  data = resp.read()
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Tuple
2
+
3
3
  import io
4
- import urllib
4
+ import urllib.parse
5
+ import urllib.request
6
+ from typing import Optional, List, Any, Tuple
5
7
 
6
8
  import PIL
7
9
  import numpy as np
@@ -104,6 +106,7 @@ class DataRow:
104
106
  assert self.file_paths[index] is not None
105
107
  if self.vals[index] is None:
106
108
  self.vals[index] = PIL.Image.open(self.file_paths[index])
109
+ self.vals[index].load()
107
110
 
108
111
  return self.vals[index]
109
112
 
@@ -137,14 +140,19 @@ class DataRow:
137
140
  if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
138
141
  # this is either a local file path or a URL
139
142
  parsed = urllib.parse.urlparse(val)
140
- if parsed.scheme == '' or parsed.scheme == 'file':
143
+ # Determine if this is a local file or a remote URL. If the scheme length is <= 1,
144
+ # we assume it's a local file. (This is because a Windows path will be interpreted
145
+ # by urllib as a URL with scheme equal to the drive letter.)
146
+ if len(parsed.scheme) <= 1 or parsed.scheme == 'file':
141
147
  # local file path
142
148
  assert self.file_urls[idx] is None and self.file_paths[idx] is None
143
- if parsed.scheme == '':
144
- self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(parsed.path))
149
+ if len(parsed.scheme) <= 1:
150
+ self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(val))
151
+ self.file_paths[idx] = val
145
152
  else:
146
153
  self.file_urls[idx] = val
147
- self.file_paths[idx] = urllib.parse.unquote(parsed.path)
154
+ # Wrap the path in a url2pathname() call to ensure proper handling on Windows.
155
+ self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
148
156
  else:
149
157
  # URL
150
158
  assert self.file_urls[idx] is None
@@ -61,7 +61,7 @@ class DocumentSplitter(ComponentIterator):
61
61
  import bs4
62
62
  if html_skip_tags is None:
63
63
  html_skip_tags = ['nav']
64
- with open(document, 'r') as fh:
64
+ with open(document, 'r', encoding='utf8') as fh:
65
65
  s = fh.read()
66
66
  self._doc_handle = get_document_handle(s)
67
67
  assert self._doc_handle is not None
@@ -1,24 +1,26 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import abc
4
+ import logging
3
5
  import os
4
6
  import sys
7
+ import urllib.parse
8
+ import urllib.request
5
9
  import warnings
6
10
  from typing import Optional, Dict, Any, List, Tuple, Set
7
- import logging
8
- import urllib
11
+
9
12
  import sqlalchemy as sql
10
13
  from tqdm import tqdm, TqdmWarning
11
- import abc
12
14
 
13
15
  import pixeltable.catalog as catalog
16
+ import pixeltable.env as env
17
+ from pixeltable import exprs
18
+ import pixeltable.exceptions as excs
19
+ from pixeltable.exec import ExecNode
14
20
  from pixeltable.metadata import schema
15
21
  from pixeltable.type_system import StringType
16
- from pixeltable.exec import ExecNode
17
- from pixeltable import exprs
18
- from pixeltable.utils.sql import log_stmt, log_explain
19
- import pixeltable.env as env
20
22
  from pixeltable.utils.media_store import MediaStore
21
-
23
+ from pixeltable.utils.sql import log_stmt, log_explain
22
24
 
23
25
  _logger = logging.getLogger('pixeltable')
24
26
 
@@ -121,10 +123,13 @@ class StoreBase:
121
123
  if file_url is None:
122
124
  return None
123
125
  parsed = urllib.parse.urlparse(file_url)
124
- if parsed.scheme != '' and parsed.scheme != 'file':
126
+ # We should never be passed a local file path here. The "len > 1" ensures that Windows
127
+ # file paths aren't mistaken for URLs with a single-character scheme.
128
+ assert len(parsed.scheme) > 1
129
+ if parsed.scheme != 'file':
125
130
  # remote url
126
131
  return file_url
127
- file_path = urllib.parse.unquote(parsed.path)
132
+ file_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
128
133
  if not file_path.startswith(pxt_tmp_dir):
129
134
  # not a tmp file
130
135
  return file_url
@@ -7,7 +7,6 @@ import bs4
7
7
  import numpy as np
8
8
  import pytest
9
9
  import requests
10
- from pycocotools.coco import COCO
11
10
 
12
11
  import pixeltable as pxt
13
12
  from pixeltable import catalog
@@ -184,6 +183,8 @@ class TestDataFrame:
184
183
  res = t.select(1.0).where(t.c2 < 10).collect()
185
184
  assert res[res.column_names()[0]] == [1.0] * 10
186
185
 
186
+ # TODO This test doesn't work on Windows due to reliance on the structure of file URLs
187
+ @pytest.mark.skip('Test is not portable')
187
188
  def test_html_media_url(self, test_client: pxt.Client) -> None:
188
189
  tab = test_client.create_table('test_html_repr', {'video': pxt.VideoType(), 'audio': pxt.AudioType()})
189
190
  status = tab.insert(video=get_video_files()[0], audio=get_audio_files()[0])
@@ -208,6 +209,7 @@ class TestDataFrame:
208
209
  def test_to_pytorch_dataset(self, all_datatypes_tbl: catalog.Table):
209
210
  """ tests all types are handled correctly in this conversion
210
211
  """
212
+ skip_test_if_not_installed('torch')
211
213
  import torch
212
214
 
213
215
  t = all_datatypes_tbl
@@ -238,6 +240,7 @@ class TestDataFrame:
238
240
  def test_to_pytorch_image_format(self, all_datatypes_tbl: catalog.Table) -> None:
239
241
  """ tests the image_format parameter is honored
240
242
  """
243
+ skip_test_if_not_installed('torch')
241
244
  import torch
242
245
  import torchvision.transforms as T
243
246
 
@@ -295,6 +298,7 @@ class TestDataFrame:
295
298
  1. compatibility with multiprocessing
296
299
  2. compatibility of all types with default collate_fn
297
300
  """
301
+ skip_test_if_not_installed('torch')
298
302
  import torch.utils.data
299
303
  @pxt.udf(param_types=[pxt.JsonType()], return_type=pxt.JsonType())
300
304
  def restrict_json_for_default_collate(obj):
@@ -352,6 +356,7 @@ class TestDataFrame:
352
356
  2. adding a row to the table invalidates the cached version
353
357
  3. changing the select list invalidates the cached version
354
358
  """
359
+ skip_test_if_not_installed('torch')
355
360
  t = all_datatypes_tbl
356
361
 
357
362
  t.drop_column('c_video') # null value video column triggers internal assertions in DataRow
@@ -383,6 +388,7 @@ class TestDataFrame:
383
388
 
384
389
  def test_to_coco(self, test_client: pxt.Client) -> None:
385
390
  skip_test_if_not_installed('nos')
391
+ from pycocotools.coco import COCO
386
392
  cl = test_client
387
393
  base_t = cl.create_table('videos', {'video': pxt.VideoType()})
388
394
  args = {'video': base_t.video, 'fps': 1}
@@ -8,6 +8,7 @@ import pytest
8
8
  import pixeltable as pxt
9
9
  from pixeltable.iterators.document import DocumentSplitter
10
10
  from pixeltable.tests.utils import get_documents, get_video_files, get_audio_files, get_image_files
11
+ from pixeltable.tests.utils import skip_test_if_not_installed
11
12
  from pixeltable.type_system import DocumentType
12
13
 
13
14
 
@@ -34,6 +35,7 @@ class TestDocument:
34
35
  assert status.num_excs == len(file_paths)
35
36
 
36
37
  def test_doc_splitter(self, test_client: pxt.Client) -> None:
38
+ skip_test_if_not_installed('tiktoken')
37
39
  file_paths = self.valid_doc_paths()
38
40
  cl = test_client
39
41
  doc_t = cl.create_table('docs', {'doc': DocumentType()})
@@ -88,6 +90,7 @@ class TestDocument:
88
90
  cl.drop_table('chunks')
89
91
 
90
92
  def test_doc_splitter_headings(self, test_client: pxt.Client) -> None:
93
+ skip_test_if_not_installed('spacy')
91
94
  file_paths = self.valid_doc_paths()
92
95
  cl = test_client
93
96
  doc_t = cl.create_table('docs', {'doc': DocumentType()})
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import urllib.parse
3
+ import urllib.request
3
4
  from typing import List, Dict
4
5
 
5
6
  import pytest
@@ -174,7 +175,7 @@ class TestExprs:
174
175
  res = img_t.select(img_t.img.fileurl).show(0).to_pandas()
175
176
  stored_urls = set(res.iloc[:, 0])
176
177
  assert len(stored_urls) == len(res)
177
- all_urls = set([urllib.parse.urljoin('file:', path) for path in get_image_files()])
178
+ all_urls = set(urllib.parse.urljoin('file:', urllib.request.pathname2url(path)) for path in get_image_files())
178
179
  assert stored_urls <= all_urls
179
180
 
180
181
  # localpath
@@ -18,6 +18,7 @@ from pixeltable.iterators import FrameIterator
18
18
  from pixeltable.tests.utils import \
19
19
  make_tbl, create_table_data, read_data_file, get_video_files, get_audio_files, get_image_files, get_documents, \
20
20
  assert_resultset_eq
21
+ from pixeltable.tests.utils import skip_test_if_not_installed
21
22
  from pixeltable.type_system import \
22
23
  StringType, IntType, FloatType, TimestampType, ImageType, VideoType, JsonType, BoolType, ArrayType, AudioType, \
23
24
  DocumentType
@@ -296,6 +297,7 @@ class TestTable:
296
297
  self.check_bad_media(test_client, rows, DocumentType(nullable=True))
297
298
 
298
299
  def test_validate_external_url(self, test_client: pxt.Client) -> None:
300
+ skip_test_if_not_installed('boto3')
299
301
  rows = [
300
302
  {'media': 's3://open-images-dataset/validation/doesnotexist.jpg', 'is_bad_media': True},
301
303
  {'media': 'https://archive.random.org/download?file=2024-01-28.bin', 'is_bad_media': True}, # 403 error
@@ -315,6 +317,7 @@ class TestTable:
315
317
  self.check_bad_media(test_client, rows, VideoType(nullable=True))
316
318
 
317
319
  def test_create_s3_image_table(self, test_client: pxt.Client) -> None:
320
+ skip_test_if_not_installed('boto3')
318
321
  cl = test_client
319
322
  tbl = cl.create_table('test', {'img': ImageType(nullable=False)})
320
323
  # this is needed because Client.reset_catalog() doesn't call TableVersion.drop(), which would
@@ -371,6 +374,7 @@ class TestTable:
371
374
  assert cache_stats.total_size == 0
372
375
 
373
376
  def test_video_url(self, test_client: pxt.Client) -> None:
377
+ skip_test_if_not_installed('boto3')
374
378
  cl = test_client
375
379
  schema = {
376
380
  'payload': IntType(nullable=False),
@@ -390,6 +394,7 @@ class TestTable:
390
394
  cap.release()
391
395
 
392
396
  def test_create_video_table(self, test_client: pxt.Client) -> None:
397
+ skip_test_if_not_installed('boto3')
393
398
  cl = test_client
394
399
  tbl = cl.create_table(
395
400
  'test_tbl',
@@ -529,6 +534,7 @@ class TestTable:
529
534
  assert 'expected ndarray((2, 3)' in str(exc_info.value)
530
535
 
531
536
  def test_query(self, test_client: pxt.Client) -> None:
537
+ skip_test_if_not_installed('boto3')
532
538
  cl = test_client
533
539
  col_names = ['c1', 'c2', 'c3', 'c4', 'c5']
534
540
  t = make_tbl(cl, 'test', col_names)
@@ -8,6 +8,7 @@ from pixeltable import catalog
8
8
  from pixeltable import exceptions as excs
9
9
  from pixeltable.iterators import FrameIterator
10
10
  from pixeltable.tests.utils import get_video_files
11
+ from pixeltable.tests.utils import skip_test_if_not_installed
11
12
  from pixeltable.type_system import VideoType, ImageType
12
13
  from pixeltable.utils.media_store import MediaStore
13
14
 
@@ -61,6 +62,7 @@ class TestVideo:
61
62
  assert MediaStore.count(view.get_id()) == view.count()
62
63
 
63
64
  def test_query(self, test_client: pxt.client) -> None:
65
+ skip_test_if_not_installed('boto3')
64
66
  video_filepaths = get_video_files()
65
67
  cl = test_client
66
68
  base_t, view_t = self.create_tbls(cl)
@@ -225,7 +225,7 @@ def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[
225
225
  df[col_name] = df.apply(lambda r: str(abs_path / r[col_name]), axis=1)
226
226
  return df.to_dict(orient='records')
227
227
 
228
- def get_video_files(include_bad_video=False) -> List[str]:
228
+ def get_video_files(include_bad_video: bool = False) -> List[str]:
229
229
  tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
230
230
  glob_result = glob.glob(f'{tests_dir}/**/videos/*', recursive=True)
231
231
  if not include_bad_video:
@@ -239,12 +239,14 @@ def get_test_video_files() -> List[str]:
239
239
  glob_result = glob.glob(f'{tests_dir}/**/test_videos/*', recursive=True)
240
240
  return glob_result
241
241
 
242
- def get_image_files() -> List[str]:
242
+ def get_image_files(include_bad_image: bool = False) -> List[str]:
243
243
  tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
244
244
  glob_result = glob.glob(f'{tests_dir}/**/imagenette2-160/*', recursive=True)
245
+ if not include_bad_image:
246
+ glob_result = [f for f in glob_result if 'bad_image' not in f]
245
247
  return glob_result
246
248
 
247
- def get_audio_files(include_bad_audio=False) -> List[str]:
249
+ def get_audio_files(include_bad_audio: bool = False) -> List[str]:
248
250
  tests_dir = os.path.dirname(__file__)
249
251
  glob_result = glob.glob(f'{tests_dir}/**/audio/*', recursive=True)
250
252
  if not include_bad_audio:
@@ -259,7 +261,7 @@ def get_documents() -> List[str]:
259
261
  def get_sentences(n: int = 100) -> List[str]:
260
262
  tests_dir = os.path.dirname(__file__)
261
263
  path = glob.glob(f'{tests_dir}/**/jeopardy.json', recursive=True)[0]
262
- with open(path, 'r') as f:
264
+ with open(path, 'r', encoding='utf8') as f:
263
265
  questions_list = json.load(f)
264
266
  # this dataset contains \' around the questions
265
267
  return [q['question'].replace("'", '') for q in questions_list[:n]]
@@ -911,7 +911,7 @@ class DocumentType(ColumnType):
911
911
  def validate_media(self, val: Any) -> None:
912
912
  assert isinstance(val, str)
913
913
  from pixeltable.utils.documents import get_document_handle
914
- with open(val, 'r') as fh:
914
+ with open(val, 'r', encoding='utf8') as fh:
915
915
  try:
916
916
  s = fh.read()
917
917
  dh = get_document_handle(s)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "pixeltable"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "Pixeltable: The Multimodal AI Data Plane"
9
9
  authors = ["Marcel Kornacker <marcelk@gmail.com>"]
10
10
  readme = "README.md"
@@ -17,7 +17,7 @@ exclude = [
17
17
 
18
18
  [tool.poetry.dependencies]
19
19
  python = ">=3.9,<4.0"
20
- numpy = "^1.24.1"
20
+ numpy = "^1.26"
21
21
  pandas = ">=2.0,<3.0"
22
22
  pillow = "^9.4.0"
23
23
  opencv-python-headless = "^4.7.0.68"
@@ -31,9 +31,11 @@ sqlalchemy = {extras = ["mypy"], version = "^2.0.23"}
31
31
  sqlalchemy-utils = "^0.41.1"
32
32
  pgvector = "^0.2.1"
33
33
  av = ">=10.0.0"
34
+ beautifulsoup4 = "^4.0.0"
35
+ requests = "^2.31.0"
34
36
  pyyaml = "^6.0.1"
35
37
  jinja2 = "^3.1.3"
36
- pgserver = "0.0.7"
38
+ pgserver = "0.0.9"
37
39
 
38
40
  [tool.poetry.group.dev]
39
41
  optional = true
@@ -61,8 +63,6 @@ mkdocs-jupyter = "^0.24"
61
63
  pycocotools = "^2.0.7"
62
64
  ipykernel = "^6.27.1"
63
65
  nbmake = "^1.4.6"
64
- bs4 = "^0.0.2"
65
- requests = "^2.31.0"
66
66
  # packages required by various optional pieces of the codebase
67
67
  torch = "^2.2"
68
68
  torchvision = "^0.17"
@@ -74,7 +74,6 @@ boto3 = "^1.17"
74
74
  spacy = "^3.0"
75
75
  en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl"}
76
76
  tiktoken = ">=0.3"
77
- beautifulsoup4 = "^4.0.0"
78
77
  sentence-transformers = "^2.0.0"
79
78
  transformers = "^4.20"
80
79
 
File without changes
File without changes
File without changes