pyspiral 0.2.5__cp310-abi3-macosx_11_0_arm64.whl → 0.4.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/METADATA +12 -14
  2. pyspiral-0.4.0.dist-info/RECORD +98 -0
  3. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +6 -7
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +21 -14
  7. spiral/api/__init__.py +15 -172
  8. spiral/api/admin.py +12 -26
  9. spiral/api/client.py +160 -0
  10. spiral/api/filesystems.py +100 -72
  11. spiral/api/organizations.py +45 -58
  12. spiral/api/projects.py +171 -134
  13. spiral/api/telemetry.py +19 -0
  14. spiral/api/types.py +20 -0
  15. spiral/api/workloads.py +32 -25
  16. spiral/{arrow.py → arrow_.py} +12 -0
  17. spiral/cli/__init__.py +2 -5
  18. spiral/cli/admin.py +7 -12
  19. spiral/cli/app.py +23 -6
  20. spiral/cli/console.py +1 -1
  21. spiral/cli/fs.py +83 -18
  22. spiral/cli/iceberg/__init__.py +7 -0
  23. spiral/cli/iceberg/namespaces.py +47 -0
  24. spiral/cli/iceberg/tables.py +60 -0
  25. spiral/cli/indexes/__init__.py +19 -0
  26. spiral/cli/login.py +14 -5
  27. spiral/cli/orgs.py +90 -0
  28. spiral/cli/printer.py +9 -1
  29. spiral/cli/projects.py +136 -0
  30. spiral/cli/state.py +2 -0
  31. spiral/cli/tables/__init__.py +121 -0
  32. spiral/cli/telemetry.py +18 -0
  33. spiral/cli/types.py +8 -10
  34. spiral/cli/{workload.py → workloads.py} +11 -11
  35. spiral/{catalog.py → client.py} +22 -21
  36. spiral/core/client/__init__.pyi +117 -0
  37. spiral/core/index/__init__.pyi +15 -0
  38. spiral/core/table/__init__.pyi +108 -0
  39. spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
  40. spiral/core/table/metastore/__init__.pyi +62 -0
  41. spiral/core/{spec → table/spec}/__init__.pyi +49 -92
  42. spiral/datetime_.py +27 -0
  43. spiral/expressions/__init__.py +40 -17
  44. spiral/expressions/base.py +5 -5
  45. spiral/expressions/list_.py +1 -1
  46. spiral/expressions/mp4.py +62 -0
  47. spiral/expressions/png.py +18 -0
  48. spiral/expressions/qoi.py +18 -0
  49. spiral/expressions/refs.py +23 -9
  50. spiral/expressions/struct.py +7 -5
  51. spiral/expressions/text.py +62 -0
  52. spiral/expressions/tiff.py +88 -88
  53. spiral/expressions/udf.py +3 -3
  54. spiral/iceberg/__init__.py +3 -0
  55. spiral/iceberg/client.py +33 -0
  56. spiral/indexes/__init__.py +5 -0
  57. spiral/indexes/client.py +137 -0
  58. spiral/indexes/index.py +34 -0
  59. spiral/indexes/scan.py +22 -0
  60. spiral/project.py +19 -110
  61. spiral/{proto → protogen}/_/scandal/__init__.py +32 -77
  62. spiral/protogen/_/spiral/table/__init__.py +22 -0
  63. spiral/protogen/substrait/__init__.py +3399 -0
  64. spiral/protogen/substrait/extensions/__init__.py +115 -0
  65. spiral/server.py +17 -0
  66. spiral/settings.py +31 -87
  67. spiral/substrait_.py +10 -6
  68. spiral/tables/__init__.py +12 -0
  69. spiral/tables/client.py +130 -0
  70. spiral/{dataset.py → tables/dataset.py} +36 -25
  71. spiral/tables/debug/manifests.py +70 -0
  72. spiral/tables/debug/metrics.py +56 -0
  73. spiral/{debug.py → tables/debug/scan.py} +6 -9
  74. spiral/tables/maintenance.py +12 -0
  75. spiral/tables/scan.py +193 -0
  76. spiral/tables/snapshot.py +78 -0
  77. spiral/tables/table.py +157 -0
  78. spiral/tables/transaction.py +52 -0
  79. pyspiral-0.2.5.dist-info/RECORD +0 -81
  80. spiral/api/tables.py +0 -94
  81. spiral/api/tokens.py +0 -56
  82. spiral/authn/authn.py +0 -89
  83. spiral/authn/device.py +0 -206
  84. spiral/authn/github_.py +0 -33
  85. spiral/authn/modal_.py +0 -18
  86. spiral/cli/org.py +0 -90
  87. spiral/cli/project.py +0 -107
  88. spiral/cli/table.py +0 -20
  89. spiral/cli/token.py +0 -27
  90. spiral/config.py +0 -26
  91. spiral/core/core/__init__.pyi +0 -53
  92. spiral/core/metastore/__init__.pyi +0 -91
  93. spiral/proto/_/spfs/__init__.py +0 -36
  94. spiral/proto/_/spiral/table/__init__.py +0 -225
  95. spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
  96. spiral/proto/__init__.py +0 -0
  97. spiral/proto/scandal/__init__.py +0 -45
  98. spiral/proto/spiral/__init__.py +0 -0
  99. spiral/proto/spiral/table/__init__.py +0 -96
  100. spiral/scan_.py +0 -168
  101. spiral/table.py +0 -157
  102. {pyspiral-0.2.5.dist-info → pyspiral-0.4.0.dist-info}/entry_points.txt +0 -0
  103. /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
  104. /spiral/{core → protogen/_}/__init__.py +0 -0
  105. /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
  106. /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
  107. /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
  108. /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
  109. /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
  110. /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
  111. /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
  112. /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
  113. /spiral/{proto → protogen}/util.py +0 -0
  114. /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
@@ -14,6 +14,24 @@ class ColumnGroup:
14
14
  @staticmethod
15
15
  def from_str(path: str) -> ColumnGroup: ...
16
16
 
17
+ class KeySpaceMetadata:
18
+ def __init__(
19
+ self,
20
+ *,
21
+ manifest_handle: ManifestHandle | None,
22
+ last_modified_at: int,
23
+ ): ...
24
+
25
+ manifest_handle: ManifestHandle | None
26
+ last_modified_at: int
27
+
28
+ def asof(self, asof: int) -> KeySpaceMetadata:
29
+ """Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
30
+ ...
31
+
32
+ def apply_wal(self, wal: WriteAheadLog) -> KeySpaceMetadata:
33
+ """Applies the given WAL to the metadata."""
34
+
17
35
  class ColumnGroupMetadata:
18
36
  def __init__(
19
37
  self,
@@ -44,57 +62,36 @@ class ColumnGroupMetadata:
44
62
  def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
45
63
  """Applies the given WAL to the metadata."""
46
64
 
47
- def __bytes__(self):
48
- """Serializes the ColumnGroupMetadata to a protobuf buffer."""
49
-
50
- @staticmethod
51
- def from_proto(buffer: bytes) -> ColumnGroupMetadata:
52
- """Deserializes a ColumnGroupMetadata from a protobuf buffer."""
53
- ...
54
-
55
65
  class LogEntry:
56
66
  ts: int
57
- operation: KeySpaceWriteOp | FragmentSetWriteOp | ConfigurationOp | SchemaEvolutionOp | SchemaBreakOp
67
+ operation: (
68
+ KeySpaceWriteOp
69
+ | ColumnGroupWriteOp
70
+ | SchemaEvolutionOp
71
+ | SchemaBreakOp
72
+ | KeySpaceCompactOp
73
+ | ColumnGroupCompactOp
74
+ )
58
75
 
59
76
  def column_group(self) -> ColumnGroup | None:
60
77
  """Returns the column group of the entry if it is associated with one."""
61
78
 
62
- def replace_timestamp(self, ts: int) -> LogEntry:
63
- """Returns a copy of the entry with the timestamp replaced."""
64
-
65
- @staticmethod
66
- def schema_break(*, column_group: ColumnGroup, removed_column_names: list[str]) -> LogEntry: ...
67
- @staticmethod
68
- def schema_evolution(*, column_group: ColumnGroup, new_schema: Schema) -> LogEntry: ...
69
- @staticmethod
70
- def ks_write(
71
- *,
72
- ks_id: str,
73
- manifest_handle: ManifestHandle,
74
- ) -> LogEntry: ...
75
- @staticmethod
76
- def fs_write(
77
- *,
78
- column_group: ColumnGroup,
79
- fs_id: str,
80
- fs_level: FragmentLevel,
81
- manifest_handle: ManifestHandle,
82
- key_span: KeySpan,
83
- key_extent: KeyExtent,
84
- column_ids: list[str],
85
- ) -> LogEntry: ...
86
-
87
79
  class FileFormat:
88
80
  def __init__(self, value: int): ...
89
81
 
90
82
  Parquet: FileFormat
91
83
  Protobuf: FileFormat
92
84
  BinaryArray: FileFormat
85
+ Vortex: FileFormat
93
86
 
94
87
  def __int__(self) -> int:
95
88
  """Returns the protobuf enum int value."""
96
89
  ...
97
90
 
91
+ def __str__(self) -> str:
92
+ """Returns the string representation of the file format."""
93
+ ...
94
+
98
95
  class FragmentLevel:
99
96
  L0: FragmentLevel
100
97
  L1: FragmentLevel
@@ -104,15 +101,9 @@ class FragmentLevel:
104
101
  ...
105
102
 
106
103
  class Key:
107
- def __init__(self, key: bytes): ...
108
-
109
104
  key: bytes
110
105
 
111
- def __add__(self, other: Key) -> Key:
112
- """Concatenates two keys.
113
-
114
- TODO(ngates): remove this function. It should not be necessary to concatenate keys."""
115
-
106
+ def __init__(self, key: bytes): ...
116
107
  def __bytes__(self): ...
117
108
  def step(self) -> Key:
118
109
  """Returns the next key in the key space."""
@@ -121,8 +112,6 @@ class Key:
121
112
  def min() -> Key: ...
122
113
  @staticmethod
123
114
  def max() -> Key: ...
124
- @staticmethod
125
- def from_array_tuple(array_tuple: tuple[pa.Array]) -> Key: ...
126
115
 
127
116
  class KeyExtent:
128
117
  """An inclusive range of keys."""
@@ -132,9 +121,6 @@ class KeyExtent:
132
121
  min: Key
133
122
  max: Key
134
123
 
135
- def to_range(self) -> KeyRange:
136
- """Turn this inclusive key extent into an exclusive key range."""
137
-
138
124
  def union(self, key_extent: KeyExtent) -> KeyExtent: ...
139
125
  def __or__(self, other: KeyExtent) -> KeyExtent: ...
140
126
  def intersection(self, key_extent: KeyExtent) -> KeyExtent | None: ...
@@ -142,30 +128,6 @@ class KeyExtent:
142
128
  def contains(self, item: Key) -> bool: ...
143
129
  def __contains__(self, item: Key) -> bool: ...
144
130
 
145
- class KeyRange:
146
- """A right-exclusive range of keys."""
147
-
148
- def __init__(self, *, begin: Key, end: Key): ...
149
-
150
- begin: Key
151
- end: Key
152
-
153
- def union(self, other: KeyRange) -> KeyRange: ...
154
- def __or__(self, other: KeyRange) -> KeyRange: ...
155
- def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
156
- def __and__(self, other: KeyRange) -> KeyRange | None: ...
157
- def contains(self, item: Key) -> bool: ...
158
- def __contains__(self, item: Key) -> bool: ...
159
- def is_disjoint(self, key_range: KeyRange) -> bool:
160
- return self.end <= key_range.begin or self.begin >= key_range.end
161
-
162
- @staticmethod
163
- def beginning_with(begin: Key) -> KeyRange: ...
164
- @staticmethod
165
- def ending_with(end: Key) -> KeyRange: ...
166
- @staticmethod
167
- def full() -> KeyRange: ...
168
-
169
131
  class KeySpan:
170
132
  """An exclusive range of keys as indexed by their position in a key space."""
171
133
 
@@ -179,26 +141,26 @@ class KeySpan:
179
141
  def union(self, other: KeySpan) -> KeySpan: ...
180
142
  def __or__(self, other: KeySpan) -> KeySpan: ...
181
143
 
182
- class KeyMap:
183
- """Displacement map."""
184
-
185
144
  class ManifestHandle:
186
- def __init__(self, id: str, format: FileFormat, file_size: int, spfs_format_metadata: bytes | None): ...
187
-
188
145
  id: str
189
146
  format: FileFormat
190
147
  file_size: int
191
- spfs_format_metadata: bytes | None
192
148
 
193
149
  class Schema:
194
150
  def to_arrow(self) -> pa.Schema:
195
151
  """Returns the Arrow schema."""
196
152
  ...
197
-
198
153
  @staticmethod
199
154
  def from_arrow(arrow: pa.Schema) -> Schema:
200
155
  """Creates a Schema from an Arrow schema."""
201
156
  ...
157
+ def __len__(self):
158
+ """Returns the number of columns in the schema."""
159
+ ...
160
+ @property
161
+ def names(self) -> list[str]:
162
+ """Returns the names of the columns in the schema."""
163
+ ...
202
164
 
203
165
  class VersionedSchema:
204
166
  ts: int
@@ -209,24 +171,28 @@ class KeySpaceWriteOp:
209
171
  ks_id: str
210
172
  manifest_handle: ManifestHandle
211
173
 
212
- class FragmentSetWriteOp:
174
+ class ColumnGroupWriteOp:
213
175
  column_group: ColumnGroup
214
- fs_id: str
215
- fs_level: FragmentLevel
176
+ level: FragmentLevel
216
177
  manifest_handle: ManifestHandle
217
178
  key_span: KeySpan
218
179
  key_extent: KeyExtent
219
180
  column_ids: list[str]
220
181
 
221
- class ConfigurationOp:
222
- column_group: ColumnGroup
223
-
224
182
  class SchemaEvolutionOp:
225
183
  column_group: ColumnGroup
226
184
 
227
185
  class SchemaBreakOp:
228
186
  column_group: ColumnGroup
229
187
 
188
+ class KeySpaceCompactOp:
189
+ ks_ids: list[str]
190
+ moved_ks_ids: list[str]
191
+
192
+ class ColumnGroupCompactOp:
193
+ column_group: ColumnGroup
194
+ fragment_ids: list[int]
195
+
230
196
  class WriteAheadLog:
231
197
  def __init__(
232
198
  self,
@@ -237,7 +203,6 @@ class WriteAheadLog:
237
203
 
238
204
  entries: list[LogEntry]
239
205
  truncated_up_to: int
240
- ks_manifest_handle: ManifestHandle | None
241
206
 
242
207
  @property
243
208
  def last_modified_at(self) -> int:
@@ -247,11 +212,3 @@ class WriteAheadLog:
247
212
  self, asof: int | None = None, since: int | None = None, column_group: ColumnGroup | None = None
248
213
  ) -> WriteAheadLog:
249
214
  """Filters the WAL to entries by the given parameters."""
250
-
251
- def __bytes__(self):
252
- """Serializes the ColumnGroupMetadata to a protobuf buffer."""
253
-
254
- @staticmethod
255
- def from_proto(buffer: bytes) -> WriteAheadLog:
256
- """Deserializes a WriteAheadLog from a protobuf buffer."""
257
- ...
spiral/datetime_.py ADDED
@@ -0,0 +1,27 @@
1
+ import warnings
2
+ from datetime import datetime, timedelta, timezone, tzinfo
3
+
4
+ _THE_EPOCH = datetime.fromtimestamp(0, tz=timezone.utc)
5
+
6
+
7
+ def local_tz() -> tzinfo:
8
+ """Determine this machine's local timezone."""
9
+ tz = datetime.now().astimezone().tzinfo
10
+ if tz is None:
11
+ raise ValueError("Could not determine this machine's local timezone.")
12
+ return tz
13
+
14
+
15
+ def timestamp_micros(instant: datetime) -> int:
16
+ """The number of microseconds between the epoch and the given instant."""
17
+ if instant.tzinfo is None:
18
+ warnings.warn("assuming timezone-naive datetime is local time", stacklevel=2)
19
+ instant = instant.replace(tzinfo=local_tz())
20
+ return (instant - _THE_EPOCH) // timedelta(microseconds=1)
21
+
22
+
23
+ def from_timestamp_micros(ts: int) -> datetime:
24
+ """Convert a timestamp in microseconds to a datetime."""
25
+ if ts < 0:
26
+ raise ValueError("Timestamp must be non-negative")
27
+ return _THE_EPOCH + timedelta(microseconds=ts)
@@ -1,20 +1,25 @@
1
1
  import builtins
2
2
  import functools
3
3
  import operator
4
+ import warnings
4
5
  from typing import Any
5
6
 
6
7
  import pyarrow as pa
7
8
 
8
- from spiral import _lib, arrow
9
+ from spiral import _lib, arrow_
9
10
 
10
11
  from . import http as http
11
12
  from . import io as io
12
13
  from . import list_ as list
14
+ from . import mp4 as mp4
15
+ from . import png as png
16
+ from . import qoi as qoi
13
17
  from . import refs as refs
14
18
  from . import str_ as str
15
19
  from . import struct as struct
20
+ from . import text as text
16
21
  from . import tiff as tiff
17
- from .base import Expr, ExprLike
22
+ from .base import Expr, ExprLike, NativeExpr
18
23
 
19
24
  __all__ = [
20
25
  "Expr",
@@ -42,6 +47,7 @@ __all__ = [
42
47
  "not_",
43
48
  "or_",
44
49
  "pack",
50
+ "keyed",
45
51
  "ref",
46
52
  "refs",
47
53
  "scalar",
@@ -50,8 +56,11 @@ __all__ = [
50
56
  "struct",
51
57
  "subtract",
52
58
  "tiff",
53
- "var",
54
59
  "xor",
60
+ "png",
61
+ "qoi",
62
+ "mp4",
63
+ "text",
55
64
  ]
56
65
 
57
66
  # Inline some of the struct expressions since they're so common
@@ -67,13 +76,15 @@ def lift(expr: ExprLike) -> Expr:
67
76
  # Convert an ExprLike into an Expr.
68
77
  if isinstance(expr, Expr):
69
78
  return expr
79
+ if isinstance(expr, NativeExpr):
80
+ return Expr(expr)
70
81
 
71
82
  if isinstance(expr, dict):
72
83
  # NOTE: we assume this is a struct expression. We could be smarter and be context aware to determine if
73
84
  # this is in fact a struct scalar, but the user can always create one of those manually.
74
85
 
75
86
  # First we un-nest any dot-separated field names
76
- expr: dict = arrow.nest_structs(expr)
87
+ expr: dict = arrow_.nest_structs(expr)
77
88
 
78
89
  return pack({k: lift(v) for k, v in expr.items()})
79
90
 
@@ -88,18 +99,30 @@ def lift(expr: ExprLike) -> Expr:
88
99
 
89
100
  # If the value is struct-like, we un-nest any dot-separated field names
90
101
  if isinstance(expr, pa.StructArray | pa.StructScalar):
91
- return lift(arrow.nest_structs(expr))
102
+ # TODO(marko): Figure out what to do with nullable struct arrays when unpacking them.
103
+ # We need to merge struct validity into the child validity?
104
+ if isinstance(expr, pa.StructArray) and expr.null_count != 0:
105
+ # raise ValueError("lift: cannot lift a struct array with nulls.")
106
+ warnings.warn("found a struct array with nulls", stacklevel=2)
107
+ if isinstance(expr, pa.StructScalar) and not expr.is_valid():
108
+ # raise ValueError("lift: cannot lift a struct scalar with nulls.")
109
+ warnings.warn("found a struct scalar with nulls", stacklevel=2)
110
+ return lift(arrow_.nest_structs(expr))
92
111
 
93
112
  if isinstance(expr, pa.Array):
94
- return Expr(_lib.spql.expr.array_lit(expr))
113
+ return Expr(_lib.expr.array_lit(expr))
95
114
 
96
115
  # Otherwise, assume it's a scalar.
97
116
  return scalar(expr)
98
117
 
99
118
 
100
- def var(name: builtins.str) -> Expr:
101
- """Create a variable expression."""
102
- return Expr(_lib.spql.expr.var(name))
119
+ def key(name: builtins.str) -> Expr:
120
+ """Create a variable expression referencing a key column.
121
+
122
+ Args:
123
+ name: variable name
124
+ """
125
+ return Expr(_lib.expr.keyed(name))
103
126
 
104
127
 
105
128
  def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
@@ -112,20 +135,21 @@ def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
112
135
  name: variable name
113
136
  dtype: must match dtype of the column in the key table.
114
137
  """
115
- return Expr(_lib.spql.expr.keyed(f"#{name}", dtype))
138
+ return Expr(_lib.expr.keyed(name, dtype))
116
139
 
117
140
 
118
141
  def scalar(value: Any) -> Expr:
119
142
  """Create a scalar expression."""
120
143
  if not isinstance(value, pa.Scalar):
121
144
  value = pa.scalar(value)
122
- return Expr(_lib.spql.expr.scalar(value))
145
+ # TODO(marko): Use Vortex scalar instead of passing as array.
146
+ return Expr(_lib.expr.scalar(pa.array([value.as_py()], type=value.type)))
123
147
 
124
148
 
125
149
  def cast(expr: ExprLike, dtype: pa.DataType) -> Expr:
126
150
  """Cast an expression into another PyArrow DataType."""
127
151
  expr = lift(expr)
128
- return Expr(_lib.spql.expr.cast(expr.__expr__, dtype))
152
+ return Expr(_lib.expr.cast(expr.__expr__, dtype))
129
153
 
130
154
 
131
155
  def and_(expr: ExprLike, *exprs: ExprLike) -> Expr:
@@ -182,19 +206,18 @@ def negate(expr: ExprLike) -> Expr:
182
206
  def not_(expr: ExprLike) -> Expr:
183
207
  """Negate the given expression."""
184
208
  expr = lift(expr)
185
- return Expr(_lib.spql.expr.unary("not", expr.__expr__))
209
+ return Expr(_lib.expr.not_(expr.__expr__))
186
210
 
187
211
 
188
212
  def is_null(expr: ExprLike) -> Expr:
189
213
  """Check if the given expression is null."""
190
214
  expr = lift(expr)
191
- return Expr(_lib.spql.expr.unary("is_null", expr.__expr__))
215
+ return Expr(_lib.expr.is_null(expr.__expr__))
192
216
 
193
217
 
194
218
  def is_not_null(expr: ExprLike) -> Expr:
195
- """Check if the given expression is null."""
196
- expr = lift(expr)
197
- return Expr(_lib.spql.expr.unary("is_not_null", expr.__expr__))
219
+ """Check if the given expression is not null."""
220
+ return not_(is_null(expr))
198
221
 
199
222
 
200
223
  def add(lhs: ExprLike, rhs: ExprLike) -> Expr:
@@ -5,7 +5,7 @@ import pyarrow as pa
5
5
 
6
6
  from spiral import _lib
7
7
 
8
- NativeExpr: TypeAlias = _lib.spql.expr.Expr
8
+ NativeExpr: TypeAlias = _lib.expr.Expr
9
9
 
10
10
 
11
11
  class Expr:
@@ -92,13 +92,13 @@ class Expr:
92
92
  return self._binary("mod", other)
93
93
 
94
94
  def __neg__(self):
95
- return Expr(_lib.spql.expr.unary("neg", self.__expr__))
95
+ return Expr(_lib.expr.unary("neg", self.__expr__))
96
96
 
97
97
  def in_(self, other: "ExprLike") -> "Expr":
98
98
  from spiral import expressions as se
99
99
 
100
100
  other = se.lift(other)
101
- return Expr(_lib.spql.expr.list.contains(other.__expr__, self.__expr__))
101
+ return Expr(_lib.expr.list.contains(other.__expr__, self.__expr__))
102
102
 
103
103
  def contains(self, other: "ExprLike") -> "Expr":
104
104
  from spiral import expressions as se
@@ -107,7 +107,7 @@ class Expr:
107
107
 
108
108
  def cast(self, dtype: pa.DataType) -> "Expr":
109
109
  """Cast the expression result to a different data type."""
110
- return Expr(_lib.spql.expr.cast(self.__expr__, dtype))
110
+ return Expr(_lib.expr.cast(self.__expr__, dtype))
111
111
 
112
112
  def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
113
113
  """Select fields from a struct-like expression.
@@ -142,7 +142,7 @@ class Expr:
142
142
  from spiral import expressions as se
143
143
 
144
144
  rhs = se.lift(rhs)
145
- return Expr(_lib.spql.expr.binary(op, self.__expr__, rhs.__expr__))
145
+ return Expr(_lib.expr.binary(op, self.__expr__, rhs.__expr__))
146
146
 
147
147
 
148
148
  ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
@@ -29,7 +29,7 @@ def element_at(expr: ExprLike, index: ExprLike) -> Expr:
29
29
 
30
30
  expr = lift(expr)
31
31
  index = lift(index)
32
- return Expr(_lib.spql.expr.list.element_at(expr.__expr__, index.__expr__))
32
+ return Expr(_lib.expr.list.element_at(expr.__expr__, index.__expr__))
33
33
 
34
34
 
35
35
  def of(*expr: ExprLike) -> Expr:
@@ -0,0 +1,62 @@
1
+ import pyarrow as pa
2
+
3
+ from spiral.expressions.base import Expr, ExprLike
4
+
5
+ _MP4_RES_DTYPE: pa.DataType = pa.struct(
6
+ [
7
+ pa.field("pixels", pa.large_binary()),
8
+ pa.field("height", pa.uint32()),
9
+ pa.field("width", pa.uint32()),
10
+ pa.field("frames", pa.uint32()),
11
+ ]
12
+ )
13
+
14
+
15
+ # TODO(marko): Support optional range and crop.
16
+ # IMPORTANT: Frames is currently broken and defaults to full.
17
+ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
18
+ """
19
+ Read referenced cell in a `MP4` format. Requires `ffmpeg`.
20
+
21
+ Args:
22
+ expr: The referenced `Mp4` bytes.
23
+ A str is assumed to be the `se.keyed` expression.
24
+ frames: The range of frames to read. Each element must be a list of two uint32,
25
+ frame start and frame end, or null / empty list to read all frames.
26
+ A str is assumed to be the `se.keyed` expression.
27
+ crop: The crop of the frames to read. Each element must be a list of four uint32,
28
+ x, y, width, height or null / empty list to read full frames.
29
+ A str is assumed to be the `se.keyed` expression.
30
+
31
+ Returns:
32
+ An array where each element is a decoded cropped video with fields:
33
+ pixels: RGB8 bytes, frames * width * height * 3.
34
+ width: Width of the image with type `pa.uint32()`.
35
+ height: Height of the image with type `pa.uint32()`.
36
+ frames: Number of frames with type `pa.uint32()`.
37
+ """
38
+ from spiral import _lib
39
+ from spiral.expressions import keyed, lift
40
+
41
+ if isinstance(expr, str):
42
+ expr = keyed(
43
+ expr,
44
+ pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
45
+ )
46
+ if isinstance(frames, str):
47
+ frames = keyed(frames, pa.list_(pa.uint32()))
48
+ if isinstance(crop, str):
49
+ crop = keyed(crop, pa.list_(pa.uint32()))
50
+
51
+ expr = lift(expr)
52
+ frames = lift(frames)
53
+ crop = lift(crop)
54
+
55
+ return Expr(
56
+ _lib.expr.video.read(
57
+ expr.__expr__,
58
+ frames.__expr__,
59
+ crop.__expr__,
60
+ format="mp4",
61
+ )
62
+ )
@@ -0,0 +1,18 @@
1
+ from spiral.expressions.base import Expr, ExprLike
2
+
3
+
4
+ def encode(expr: ExprLike) -> Expr:
5
+ """Encode the given expression as a PNG image.
6
+
7
+ Args:
8
+ expr: The expression to encode.
9
+ Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
10
+
11
+ Returns:
12
+ The encoded PNG images.
13
+ """
14
+ from spiral import _lib
15
+ from spiral.expressions import lift
16
+
17
+ expr = lift(expr)
18
+ return Expr(_lib.expr.img.encode(expr.__expr__, format="png"))
@@ -0,0 +1,18 @@
1
+ from spiral.expressions.base import Expr, ExprLike
2
+
3
+
4
+ def encode(expr: ExprLike) -> Expr:
5
+ """Encode the given expression as a QOI image.
6
+
7
+ Args:
8
+ expr: The expression to encode.
9
+ Expects a struct with `pixels`, `width`, `height`, `channels`, `channel_bit_depth` fields.
10
+
11
+ Returns:
12
+ The encoded QOI images.
13
+ """
14
+ from spiral import _lib
15
+ from spiral.expressions import lift
16
+
17
+ expr = lift(expr)
18
+ return Expr(_lib.expr.img.encode(expr.__expr__, format="qoi"))
@@ -1,10 +1,7 @@
1
- from typing import TYPE_CHECKING
1
+ import pyarrow as pa
2
2
 
3
3
  from spiral.expressions.base import Expr, ExprLike
4
4
 
5
- if TYPE_CHECKING:
6
- from spiral import Table
7
-
8
5
 
9
6
  def ref(expr: ExprLike, field: str | None = None) -> Expr:
10
7
  """Store binary values as references. This expression can only be used on write.
@@ -22,23 +19,40 @@ def ref(expr: ExprLike, field: str | None = None) -> Expr:
22
19
  from spiral.expressions import lift
23
20
 
24
21
  expr = lift(expr)
25
- return Expr(_lib.spql.expr.ref(expr.__expr__, field))
22
+ return Expr(_lib.expr.refs.ref(expr.__expr__, field))
26
23
 
27
24
 
28
- def deref(expr: ExprLike, field: str | None = None, table: "Table" = None) -> Expr:
25
+ def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
29
26
  """De-reference referenced values.
30
27
 
31
28
  See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
32
29
  column back into their original form, e.g. binary.
33
30
 
34
31
  Args:
35
- expr: The expression to de-reference.
32
+ expr: The expression to de-reference. A str is assumed to be the `se.keyed` expression.
36
33
  field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
37
34
  If `None`, the expr must evaluate into a reference type.
38
- table (optional): The table to de-reference from, if not available in input expression.
39
35
  """
40
36
  from spiral import _lib
37
+ from spiral.expressions import keyed, lift
38
+
39
+ if isinstance(expr, str):
40
+ expr = keyed(
41
+ expr,
42
+ pa.struct([("__ref__", pa.struct([("id", pa.string()), ("begin", pa.uint64()), ("end", pa.uint64())]))]),
43
+ )
44
+
45
+ expr = lift(expr)
46
+ return Expr(_lib.expr.refs.deref(expr.__expr__, field=field))
47
+
48
+
49
+ def nbytes(expr: ExprLike) -> Expr:
50
+ """Return the number of bytes in a reference.
51
+
52
+ Args:
53
+ expr: The ref expression to get the number of bytes from.
54
+ """
41
55
  from spiral.expressions import lift
42
56
 
43
57
  expr = lift(expr)
44
- return Expr(_lib.spql.expr.deref(expr.__expr__, field, table._table if table is not None else None))
58
+ return expr["__ref__"]["end"] - expr["__ref__"]["begin"]
@@ -12,10 +12,10 @@ def getitem(expr: ExprLike, field: str) -> Expr:
12
12
  from spiral import expressions as se
13
13
 
14
14
  expr = se.lift(expr)
15
- return Expr(_lib.spql.expr.struct.getitem(expr.__expr__, field))
15
+ return Expr(_lib.expr.struct.getitem(expr.__expr__, field))
16
16
 
17
17
 
18
- def pack(fields: dict[str, ExprLike]) -> Expr:
18
+ def pack(fields: dict[str, ExprLike], *, nullable: bool = False) -> Expr:
19
19
  """Assemble a new struct from the given named fields.
20
20
 
21
21
  Args:
@@ -23,7 +23,9 @@ def pack(fields: dict[str, ExprLike]) -> Expr:
23
23
  """
24
24
  from spiral import expressions as se
25
25
 
26
- return Expr(_lib.spql.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()]))
26
+ return Expr(
27
+ _lib.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()], nullable)
28
+ )
27
29
 
28
30
 
29
31
  def merge(*structs: "ExprLike") -> Expr:
@@ -40,7 +42,7 @@ def merge(*structs: "ExprLike") -> Expr:
40
42
 
41
43
  if len(structs) == 1:
42
44
  return se.lift(structs[0])
43
- return Expr(_lib.spql.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
45
+ return Expr(_lib.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
44
46
 
45
47
 
46
48
  def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
@@ -54,4 +56,4 @@ def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -
54
56
  from spiral import expressions as se
55
57
 
56
58
  expr = se.lift(expr)
57
- return Expr(_lib.spql.expr.struct.select(expr.__expr__, names, exclude))
59
+ return Expr(_lib.expr.struct.select(expr.__expr__, names, exclude))