pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +590 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +359 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +116 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +195 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +34 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +256 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +122 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +418 -182
- pixeltable/tests/conftest.py +146 -88
- pixeltable/tests/functions/test_fireworks.py +42 -0
- pixeltable/tests/functions/test_functions.py +60 -0
- pixeltable/tests/functions/test_huggingface.py +158 -0
- pixeltable/tests/functions/test_openai.py +152 -0
- pixeltable/tests/functions/test_together.py +111 -0
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +370 -0
- pixeltable/tests/test_dataframe.py +439 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +120 -0
- pixeltable/tests/test_exprs.py +592 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1195 -263
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +151 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +320 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/tool/create_test_video.py +81 -0
- pixeltable/type_system.py +445 -124
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/hf_datasets.py +157 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +167 -0
- pixeltable/utils/pytorch.py +91 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.4.dist-info/LICENSE +18 -0
- pixeltable-0.2.4.dist-info/METADATA +127 -0
- pixeltable-0.2.4.dist-info/RECORD +132 -0
- {pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_functions.py +0 -11
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.0.dist-info/METADATA +0 -34
- pixeltable-0.1.0.dist-info/RECORD +0 -36
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from .globals import ComparisonOperator
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .predicate import Predicate
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .row_builder import RowBuilder
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Comparison(Predicate):
|
|
15
|
+
def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
|
|
16
|
+
super().__init__()
|
|
17
|
+
self.operator = operator
|
|
18
|
+
self.components = [op1, op2]
|
|
19
|
+
self.id = self._create_id()
|
|
20
|
+
|
|
21
|
+
def __str__(self) -> str:
|
|
22
|
+
return f'{self._op1} {self.operator} {self._op2}'
|
|
23
|
+
|
|
24
|
+
def _equals(self, other: Comparison) -> bool:
|
|
25
|
+
return self.operator == other.operator
|
|
26
|
+
|
|
27
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
28
|
+
return super()._id_attrs() + [('operator', self.operator.value)]
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def _op1(self) -> Expr:
|
|
32
|
+
return self.components[0]
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def _op2(self) -> Expr:
|
|
36
|
+
return self.components[1]
|
|
37
|
+
|
|
38
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
39
|
+
left = self._op1.sql_expr()
|
|
40
|
+
right = self._op2.sql_expr()
|
|
41
|
+
if left is None or right is None:
|
|
42
|
+
return None
|
|
43
|
+
if self.operator == ComparisonOperator.LT:
|
|
44
|
+
return left < right
|
|
45
|
+
if self.operator == ComparisonOperator.LE:
|
|
46
|
+
return left <= right
|
|
47
|
+
if self.operator == ComparisonOperator.EQ:
|
|
48
|
+
return left == right
|
|
49
|
+
if self.operator == ComparisonOperator.NE:
|
|
50
|
+
return left != right
|
|
51
|
+
if self.operator == ComparisonOperator.GT:
|
|
52
|
+
return left > right
|
|
53
|
+
if self.operator == ComparisonOperator.GE:
|
|
54
|
+
return left >= right
|
|
55
|
+
|
|
56
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
57
|
+
if self.operator == ComparisonOperator.LT:
|
|
58
|
+
data_row[self.slot_idx] = data_row[self._op1.slot_idx] < data_row[self._op2.slot_idx]
|
|
59
|
+
elif self.operator == ComparisonOperator.LE:
|
|
60
|
+
data_row[self.slot_idx] = data_row[self._op1.slot_idx] <= data_row[self._op2.slot_idx]
|
|
61
|
+
elif self.operator == ComparisonOperator.EQ:
|
|
62
|
+
data_row[self.slot_idx] = data_row[self._op1.slot_idx] == data_row[self._op2.slot_idx]
|
|
63
|
+
elif self.operator == ComparisonOperator.NE:
|
|
64
|
+
data_row[self.slot_idx] = data_row[self._op1.slot_idx] != data_row[self._op2.slot_idx]
|
|
65
|
+
elif self.operator == ComparisonOperator.GT:
|
|
66
|
+
data_row[self.slot_idx] = data_row[self._op1.slot_idx] > data_row[self._op2.slot_idx]
|
|
67
|
+
elif self.operator == ComparisonOperator.GE:
|
|
68
|
+
data_row[self.slot_idx] = data_row[self._op1.slot_idx] >= data_row[self._op2.slot_idx]
|
|
69
|
+
|
|
70
|
+
def _as_dict(self) -> Dict:
|
|
71
|
+
return {'operator': self.operator.value, **super()._as_dict()}
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
75
|
+
assert 'operator' in d
|
|
76
|
+
return cls(ComparisonOperator(d['operator']), components[0], components[1])
|
|
77
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple, Callable
|
|
3
|
+
import operator
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .globals import LogicalOperator
|
|
9
|
+
from .predicate import Predicate
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
from .row_builder import RowBuilder
|
|
12
|
+
import pixeltable.catalog as catalog
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CompoundPredicate(Predicate):
|
|
16
|
+
def __init__(self, operator: LogicalOperator, operands: List[Predicate]):
|
|
17
|
+
super().__init__()
|
|
18
|
+
self.operator = operator
|
|
19
|
+
# operands are stored in self.components
|
|
20
|
+
if self.operator == LogicalOperator.NOT:
|
|
21
|
+
assert len(operands) == 1
|
|
22
|
+
self.components = operands
|
|
23
|
+
else:
|
|
24
|
+
assert len(operands) > 1
|
|
25
|
+
self.operands: List[Predicate] = []
|
|
26
|
+
for operand in operands:
|
|
27
|
+
self._merge_operand(operand)
|
|
28
|
+
|
|
29
|
+
self.id = self._create_id()
|
|
30
|
+
|
|
31
|
+
def __str__(self) -> str:
|
|
32
|
+
if self.operator == LogicalOperator.NOT:
|
|
33
|
+
return f'~({self.components[0]})'
|
|
34
|
+
return f' {self.operator} '.join([f'({e})' for e in self.components])
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def make_conjunction(cls, operands: List[Predicate]) -> Optional[Predicate]:
|
|
38
|
+
if len(operands) == 0:
|
|
39
|
+
return None
|
|
40
|
+
if len(operands) == 1:
|
|
41
|
+
return operands[0]
|
|
42
|
+
return CompoundPredicate(LogicalOperator.AND, operands)
|
|
43
|
+
|
|
44
|
+
def _merge_operand(self, operand: Predicate) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Merge this operand, if possible, otherwise simply record it.
|
|
47
|
+
"""
|
|
48
|
+
if isinstance(operand, CompoundPredicate) and operand.operator == self.operator:
|
|
49
|
+
# this can be merged
|
|
50
|
+
for child_op in operand.components:
|
|
51
|
+
self._merge_operand(child_op)
|
|
52
|
+
else:
|
|
53
|
+
self.components.append(operand)
|
|
54
|
+
|
|
55
|
+
def _equals(self, other: CompoundPredicate) -> bool:
|
|
56
|
+
return self.operator == other.operator
|
|
57
|
+
|
|
58
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
59
|
+
return super()._id_attrs() + [('operator', self.operator.value)]
|
|
60
|
+
|
|
61
|
+
def split_conjuncts(
|
|
62
|
+
self, condition: Callable[[Predicate], bool]) -> Tuple[List[Predicate], Optional[Predicate]]:
|
|
63
|
+
if self.operator == LogicalOperator.OR or self.operator == LogicalOperator.NOT:
|
|
64
|
+
return super().split_conjuncts(condition)
|
|
65
|
+
matches = [op for op in self.components if condition(op)]
|
|
66
|
+
non_matches = [op for op in self.components if not condition(op)]
|
|
67
|
+
return (matches, self.make_conjunction(non_matches))
|
|
68
|
+
|
|
69
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
70
|
+
sql_exprs = [op.sql_expr() for op in self.components]
|
|
71
|
+
if any(e is None for e in sql_exprs):
|
|
72
|
+
return None
|
|
73
|
+
if self.operator == LogicalOperator.NOT:
|
|
74
|
+
assert len(sql_exprs) == 1
|
|
75
|
+
return sql.not_(sql_exprs[0])
|
|
76
|
+
assert len(sql_exprs) > 1
|
|
77
|
+
operator = sql.and_ if self.operator == LogicalOperator.AND else sql.or_
|
|
78
|
+
combined = operator(*sql_exprs)
|
|
79
|
+
return combined
|
|
80
|
+
|
|
81
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
82
|
+
if self.operator == LogicalOperator.NOT:
|
|
83
|
+
data_row[self.slot_idx] = not data_row[self.components[0].slot_idx]
|
|
84
|
+
else:
|
|
85
|
+
val = True if self.operator == LogicalOperator.AND else False
|
|
86
|
+
op_function = operator.and_ if self.operator == LogicalOperator.AND else operator.or_
|
|
87
|
+
for op in self.components:
|
|
88
|
+
val = op_function(val, data_row[op.slot_idx])
|
|
89
|
+
data_row[self.slot_idx] = val
|
|
90
|
+
|
|
91
|
+
def _as_dict(self) -> Dict:
|
|
92
|
+
return {'operator': self.operator.value, **super()._as_dict()}
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
96
|
+
assert 'operator' in d
|
|
97
|
+
return cls(LogicalOperator(d['operator']), components)
|
|
98
|
+
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import urllib.parse
|
|
5
|
+
import urllib.request
|
|
6
|
+
from typing import Optional, List, Any, Tuple
|
|
7
|
+
|
|
8
|
+
import PIL
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataRow:
|
|
13
|
+
"""
|
|
14
|
+
Encapsulates all data and execution state needed by RowBuilder and DataRowBatch:
|
|
15
|
+
- state for in-memory computation
|
|
16
|
+
- state for storing the data
|
|
17
|
+
This is not meant to be a black-box abstraction.
|
|
18
|
+
|
|
19
|
+
In-memory representations by column type:
|
|
20
|
+
- StringType: str
|
|
21
|
+
- IntType: int
|
|
22
|
+
- FloatType: float
|
|
23
|
+
- BoolType: bool
|
|
24
|
+
- TimestampType: datetime.datetime
|
|
25
|
+
- JsonType: json-serializable object
|
|
26
|
+
- ArrayType: numpy.ndarray
|
|
27
|
+
- ImageType: PIL.Image.Image
|
|
28
|
+
- VideoType: local path if available, otherwise url
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, size: int, img_slot_idxs: List[int], media_slot_idxs: List[int], array_slot_idxs: List[int]):
|
|
31
|
+
self.vals: List[Any] = [None] * size # either cell values or exceptions
|
|
32
|
+
self.has_val = [False] * size
|
|
33
|
+
self.excs: List[Optional[Exception]] = [None] * size
|
|
34
|
+
|
|
35
|
+
# control structures that are shared across all DataRows in a batch
|
|
36
|
+
self.img_slot_idxs = img_slot_idxs
|
|
37
|
+
self.media_slot_idxs = media_slot_idxs # all media types aside from image
|
|
38
|
+
self.array_slot_idxs = array_slot_idxs
|
|
39
|
+
|
|
40
|
+
# the primary key of a store row is a sequence of ints (the number is different for table vs view)
|
|
41
|
+
self.pk: Optional[Tuple[int, ...]] = None
|
|
42
|
+
|
|
43
|
+
# file_urls:
|
|
44
|
+
# - stored url of file for media in vals[i]
|
|
45
|
+
# - None if vals[i] is not media type
|
|
46
|
+
# - not None if file_paths[i] is not None
|
|
47
|
+
self.file_urls: List[Optional[str]] = [None] * size
|
|
48
|
+
|
|
49
|
+
# file_paths:
|
|
50
|
+
# - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
|
|
51
|
+
# - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
|
|
52
|
+
self.file_paths: List[Optional[str]] = [None] * size
|
|
53
|
+
|
|
54
|
+
def clear(self) -> None:
|
|
55
|
+
size = len(self.vals)
|
|
56
|
+
self.vals = [None] * size
|
|
57
|
+
self.has_val = [False] * size
|
|
58
|
+
self.excs = [None] * size
|
|
59
|
+
self.pk = None
|
|
60
|
+
self.file_urls = [None] * size
|
|
61
|
+
self.file_paths = [None] * size
|
|
62
|
+
|
|
63
|
+
def copy(self, target: DataRow) -> None:
|
|
64
|
+
"""Create a copy of the contents of this DataRow in target
|
|
65
|
+
The copy shares the cell values, but not the control structures (eg, self.has_val), because these
|
|
66
|
+
need to be independently updateable.
|
|
67
|
+
"""
|
|
68
|
+
target.vals = self.vals.copy()
|
|
69
|
+
target.has_val = self.has_val.copy()
|
|
70
|
+
target.excs = self.excs.copy()
|
|
71
|
+
target.pk = self.pk
|
|
72
|
+
target.file_urls = self.file_urls.copy()
|
|
73
|
+
target.file_paths = self.file_paths.copy()
|
|
74
|
+
|
|
75
|
+
def set_pk(self, pk: Tuple[int, ...]) -> None:
|
|
76
|
+
self.pk = pk
|
|
77
|
+
|
|
78
|
+
def has_exc(self, slot_idx: int) -> bool:
|
|
79
|
+
return self.excs[slot_idx] is not None
|
|
80
|
+
|
|
81
|
+
def get_exc(self, slot_idx: int) -> Exception:
|
|
82
|
+
assert self.has_val[slot_idx] is False
|
|
83
|
+
assert self.excs[slot_idx] is not None
|
|
84
|
+
return self.excs[slot_idx]
|
|
85
|
+
|
|
86
|
+
def set_exc(self, slot_idx: int, exc: Exception) -> None:
|
|
87
|
+
assert self.excs[slot_idx] is None
|
|
88
|
+
self.excs[slot_idx] = exc
|
|
89
|
+
|
|
90
|
+
if self.has_val[slot_idx]:
|
|
91
|
+
# eg. during validation, where contents of file is found invalid
|
|
92
|
+
self.has_val[slot_idx] = False
|
|
93
|
+
self.vals[slot_idx] = None
|
|
94
|
+
self.file_paths[slot_idx] = None
|
|
95
|
+
self.file_urls[slot_idx] = None
|
|
96
|
+
|
|
97
|
+
def __getitem__(self, index: object) -> Any:
|
|
98
|
+
"""Returns in-memory value, ie, what is needed for expr evaluation"""
|
|
99
|
+
if not self.has_val[index]:
|
|
100
|
+
# for debugging purposes
|
|
101
|
+
pass
|
|
102
|
+
assert self.has_val[index], index
|
|
103
|
+
|
|
104
|
+
if self.file_urls[index] is not None and index in self.img_slot_idxs:
|
|
105
|
+
# if we need to load this from a file, it should have been materialized locally
|
|
106
|
+
assert self.file_paths[index] is not None
|
|
107
|
+
if self.vals[index] is None:
|
|
108
|
+
self.vals[index] = PIL.Image.open(self.file_paths[index])
|
|
109
|
+
self.vals[index].load()
|
|
110
|
+
|
|
111
|
+
return self.vals[index]
|
|
112
|
+
|
|
113
|
+
def get_stored_val(self, index: object) -> Any:
|
|
114
|
+
"""Return the value that gets stored in the db"""
|
|
115
|
+
assert self.excs[index] is None
|
|
116
|
+
if not self.has_val[index]:
|
|
117
|
+
# for debugging purposes
|
|
118
|
+
pass
|
|
119
|
+
assert self.has_val[index]
|
|
120
|
+
|
|
121
|
+
if self.file_urls[index] is not None and (index in self.img_slot_idxs or index in self.media_slot_idxs):
|
|
122
|
+
# if this is an image or other media type we want to store, we should have a url
|
|
123
|
+
return self.file_urls[index]
|
|
124
|
+
|
|
125
|
+
if self.vals[index] is not None and index in self.array_slot_idxs:
|
|
126
|
+
assert isinstance(self.vals[index], np.ndarray)
|
|
127
|
+
np_array = self.vals[index]
|
|
128
|
+
buffer = io.BytesIO()
|
|
129
|
+
np.save(buffer, np_array)
|
|
130
|
+
return buffer.getvalue()
|
|
131
|
+
|
|
132
|
+
return self.vals[index]
|
|
133
|
+
|
|
134
|
+
def __setitem__(self, idx: object, val: Any) -> None:
|
|
135
|
+
"""Assign in-memory cell value
|
|
136
|
+
This allows overwriting
|
|
137
|
+
"""
|
|
138
|
+
assert self.excs[idx] is None
|
|
139
|
+
|
|
140
|
+
if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
|
|
141
|
+
# this is either a local file path or a URL
|
|
142
|
+
parsed = urllib.parse.urlparse(val)
|
|
143
|
+
# Determine if this is a local file or a remote URL. If the scheme length is <= 1,
|
|
144
|
+
# we assume it's a local file. (This is because a Windows path will be interpreted
|
|
145
|
+
# by urllib as a URL with scheme equal to the drive letter.)
|
|
146
|
+
if len(parsed.scheme) <= 1 or parsed.scheme == 'file':
|
|
147
|
+
# local file path
|
|
148
|
+
assert self.file_urls[idx] is None and self.file_paths[idx] is None
|
|
149
|
+
if len(parsed.scheme) <= 1:
|
|
150
|
+
self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(val))
|
|
151
|
+
self.file_paths[idx] = val
|
|
152
|
+
else:
|
|
153
|
+
self.file_urls[idx] = val
|
|
154
|
+
# Wrap the path in a url2pathname() call to ensure proper handling on Windows.
|
|
155
|
+
self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
|
|
156
|
+
else:
|
|
157
|
+
# URL
|
|
158
|
+
assert self.file_urls[idx] is None
|
|
159
|
+
self.file_urls[idx] = val
|
|
160
|
+
|
|
161
|
+
if idx in self.media_slot_idxs:
|
|
162
|
+
self.vals[idx] = self.file_paths[idx] if self.file_paths[idx] is not None else self.file_urls[idx]
|
|
163
|
+
elif idx in self.array_slot_idxs and isinstance(val, bytes):
|
|
164
|
+
self.vals[idx] = np.load(io.BytesIO(val))
|
|
165
|
+
else:
|
|
166
|
+
self.vals[idx] = val
|
|
167
|
+
self.has_val[idx] = True
|
|
168
|
+
|
|
169
|
+
def set_file_path(self, idx: object, path: str) -> None:
|
|
170
|
+
"""Augment an existing url with a local file path"""
|
|
171
|
+
assert self.has_val[idx]
|
|
172
|
+
assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
|
|
173
|
+
self.file_paths[idx] = path
|
|
174
|
+
if idx in self.media_slot_idxs:
|
|
175
|
+
self.vals[idx] = path
|
|
176
|
+
|
|
177
|
+
def flush_img(self, index: object, filepath: Optional[str] = None) -> None:
|
|
178
|
+
"""Discard the in-memory value and save it to a local file, if filepath is not None"""
|
|
179
|
+
if self.vals[index] is None:
|
|
180
|
+
return
|
|
181
|
+
assert self.excs[index] is None
|
|
182
|
+
if self.file_paths[index] is None:
|
|
183
|
+
if filepath is not None:
|
|
184
|
+
# we want to save this to a file
|
|
185
|
+
self.file_paths[index] = filepath
|
|
186
|
+
self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
|
|
187
|
+
self.vals[index].save(filepath, format='JPEG')
|
|
188
|
+
else:
|
|
189
|
+
# we discard the content of this cell
|
|
190
|
+
self.has_val[index] = False
|
|
191
|
+
else:
|
|
192
|
+
# we already have a file for this image, nothing left to do
|
|
193
|
+
pass
|
|
194
|
+
self.vals[index] = None
|
|
195
|
+
|