pyspiral 0.3.1__cp310-abi3-macosx_11_0_arm64.whl → 0.4.0__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/METADATA +9 -13
  2. pyspiral-0.4.0.dist-info/RECORD +98 -0
  3. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/WHEEL +1 -1
  4. spiral/__init__.py +6 -9
  5. spiral/_lib.abi3.so +0 -0
  6. spiral/adbc.py +21 -14
  7. spiral/api/__init__.py +14 -175
  8. spiral/api/admin.py +12 -26
  9. spiral/api/client.py +160 -0
  10. spiral/api/filesystems.py +100 -72
  11. spiral/api/organizations.py +45 -58
  12. spiral/api/projects.py +171 -134
  13. spiral/api/telemetry.py +19 -0
  14. spiral/api/types.py +20 -0
  15. spiral/api/workloads.py +32 -25
  16. spiral/{arrow.py → arrow_.py} +12 -0
  17. spiral/cli/__init__.py +2 -5
  18. spiral/cli/admin.py +7 -12
  19. spiral/cli/app.py +23 -6
  20. spiral/cli/console.py +1 -1
  21. spiral/cli/fs.py +82 -17
  22. spiral/cli/iceberg/__init__.py +7 -0
  23. spiral/cli/iceberg/namespaces.py +47 -0
  24. spiral/cli/iceberg/tables.py +60 -0
  25. spiral/cli/indexes/__init__.py +19 -0
  26. spiral/cli/login.py +14 -5
  27. spiral/cli/orgs.py +90 -0
  28. spiral/cli/printer.py +9 -1
  29. spiral/cli/projects.py +136 -0
  30. spiral/cli/state.py +2 -0
  31. spiral/cli/tables/__init__.py +121 -0
  32. spiral/cli/telemetry.py +18 -0
  33. spiral/cli/types.py +8 -10
  34. spiral/cli/{workload.py → workloads.py} +11 -11
  35. spiral/{catalog.py → client.py} +23 -37
  36. spiral/core/client/__init__.pyi +117 -0
  37. spiral/core/index/__init__.pyi +15 -0
  38. spiral/core/{core → table}/__init__.pyi +44 -17
  39. spiral/core/{manifests → table/manifests}/__init__.pyi +5 -23
  40. spiral/core/table/metastore/__init__.pyi +62 -0
  41. spiral/core/{spec → table/spec}/__init__.pyi +41 -66
  42. spiral/datetime_.py +27 -0
  43. spiral/expressions/__init__.py +26 -18
  44. spiral/expressions/base.py +5 -5
  45. spiral/expressions/list_.py +1 -1
  46. spiral/expressions/mp4.py +2 -9
  47. spiral/expressions/png.py +1 -1
  48. spiral/expressions/qoi.py +1 -1
  49. spiral/expressions/refs.py +3 -9
  50. spiral/expressions/struct.py +7 -5
  51. spiral/expressions/text.py +62 -0
  52. spiral/expressions/udf.py +3 -3
  53. spiral/iceberg/__init__.py +3 -0
  54. spiral/iceberg/client.py +33 -0
  55. spiral/indexes/__init__.py +5 -0
  56. spiral/indexes/client.py +137 -0
  57. spiral/indexes/index.py +34 -0
  58. spiral/indexes/scan.py +22 -0
  59. spiral/project.py +19 -110
  60. spiral/{proto → protogen}/_/scandal/__init__.py +23 -135
  61. spiral/protogen/_/spiral/table/__init__.py +22 -0
  62. spiral/protogen/substrait/__init__.py +3399 -0
  63. spiral/protogen/substrait/extensions/__init__.py +115 -0
  64. spiral/server.py +17 -0
  65. spiral/settings.py +29 -91
  66. spiral/substrait_.py +9 -5
  67. spiral/tables/__init__.py +12 -0
  68. spiral/tables/client.py +130 -0
  69. spiral/{dataset.py → tables/dataset.py} +9 -199
  70. spiral/tables/debug/manifests.py +70 -0
  71. spiral/tables/debug/metrics.py +56 -0
  72. spiral/{debug.py → tables/debug/scan.py} +6 -9
  73. spiral/{maintenance.py → tables/maintenance.py} +1 -1
  74. spiral/{scan_.py → tables/scan.py} +63 -89
  75. spiral/tables/snapshot.py +78 -0
  76. spiral/{table.py → tables/table.py} +59 -73
  77. spiral/{txn.py → tables/transaction.py} +7 -3
  78. pyspiral-0.3.1.dist-info/RECORD +0 -85
  79. spiral/api/tables.py +0 -91
  80. spiral/api/tokens.py +0 -56
  81. spiral/authn/authn.py +0 -89
  82. spiral/authn/device.py +0 -206
  83. spiral/authn/github_.py +0 -33
  84. spiral/authn/modal_.py +0 -18
  85. spiral/cli/org.py +0 -90
  86. spiral/cli/project.py +0 -109
  87. spiral/cli/table.py +0 -20
  88. spiral/cli/token.py +0 -27
  89. spiral/core/metastore/__init__.pyi +0 -91
  90. spiral/proto/_/spfs/__init__.py +0 -36
  91. spiral/proto/_/spiral/table/__init__.py +0 -276
  92. spiral/proto/_/spiraldb/metastore/__init__.py +0 -499
  93. spiral/proto/__init__.py +0 -0
  94. spiral/proto/scandal/__init__.py +0 -45
  95. spiral/proto/spiral/__init__.py +0 -0
  96. spiral/proto/spiral/table/__init__.py +0 -96
  97. {pyspiral-0.3.1.dist-info → pyspiral-0.4.0.dist-info}/entry_points.txt +0 -0
  98. /spiral/{authn/__init__.py → core/__init__.pyi} +0 -0
  99. /spiral/{core → protogen/_}/__init__.py +0 -0
  100. /spiral/{proto/_ → protogen/_/arrow}/__init__.py +0 -0
  101. /spiral/{proto/_/arrow → protogen/_/arrow/flight}/__init__.py +0 -0
  102. /spiral/{proto/_/arrow/flight → protogen/_/arrow/flight/protocol}/__init__.py +0 -0
  103. /spiral/{proto → protogen}/_/arrow/flight/protocol/sql/__init__.py +0 -0
  104. /spiral/{proto/_/arrow/flight/protocol → protogen/_/spiral}/__init__.py +0 -0
  105. /spiral/{proto → protogen/_}/substrait/__init__.py +0 -0
  106. /spiral/{proto → protogen/_}/substrait/extensions/__init__.py +0 -0
  107. /spiral/{proto/_/spiral → protogen}/__init__.py +0 -0
  108. /spiral/{proto → protogen}/util.py +0 -0
  109. /spiral/{proto/_/spiraldb → tables/debug}/__init__.py +0 -0
@@ -14,6 +14,24 @@ class ColumnGroup:
14
14
  @staticmethod
15
15
  def from_str(path: str) -> ColumnGroup: ...
16
16
 
17
+ class KeySpaceMetadata:
18
+ def __init__(
19
+ self,
20
+ *,
21
+ manifest_handle: ManifestHandle | None,
22
+ last_modified_at: int,
23
+ ): ...
24
+
25
+ manifest_handle: ManifestHandle | None
26
+ last_modified_at: int
27
+
28
+ def asof(self, asof: int) -> KeySpaceMetadata:
29
+ """Returns the metadata as of a given timestamp. Currently just filtering versioned schemas."""
30
+ ...
31
+
32
+ def apply_wal(self, wal: WriteAheadLog) -> KeySpaceMetadata:
33
+ """Applies the given WAL to the metadata."""
34
+
17
35
  class ColumnGroupMetadata:
18
36
  def __init__(
19
37
  self,
@@ -44,17 +62,16 @@ class ColumnGroupMetadata:
44
62
  def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
45
63
  """Applies the given WAL to the metadata."""
46
64
 
47
- def __bytes__(self):
48
- """Serializes the ColumnGroupMetadata to a protobuf buffer."""
49
-
50
- @staticmethod
51
- def from_proto(buffer: bytes) -> ColumnGroupMetadata:
52
- """Deserializes a ColumnGroupMetadata from a protobuf buffer."""
53
- ...
54
-
55
65
  class LogEntry:
56
66
  ts: int
57
- operation: KeySpaceWriteOp | FragmentSetWriteOp | ConfigurationOp | SchemaEvolutionOp | SchemaBreakOp
67
+ operation: (
68
+ KeySpaceWriteOp
69
+ | ColumnGroupWriteOp
70
+ | SchemaEvolutionOp
71
+ | SchemaBreakOp
72
+ | KeySpaceCompactOp
73
+ | ColumnGroupCompactOp
74
+ )
58
75
 
59
76
  def column_group(self) -> ColumnGroup | None:
60
77
  """Returns the column group of the entry if it is associated with one."""
@@ -71,6 +88,10 @@ class FileFormat:
71
88
  """Returns the protobuf enum int value."""
72
89
  ...
73
90
 
91
+ def __str__(self) -> str:
92
+ """Returns the string representation of the file format."""
93
+ ...
94
+
74
95
  class FragmentLevel:
75
96
  L0: FragmentLevel
76
97
  L1: FragmentLevel
@@ -80,15 +101,9 @@ class FragmentLevel:
80
101
  ...
81
102
 
82
103
  class Key:
83
- def __init__(self, key: bytes): ...
84
-
85
104
  key: bytes
86
105
 
87
- def __add__(self, other: Key) -> Key:
88
- """Concatenates two keys.
89
-
90
- TODO(ngates): remove this function. It should not be necessary to concatenate keys."""
91
-
106
+ def __init__(self, key: bytes): ...
92
107
  def __bytes__(self): ...
93
108
  def step(self) -> Key:
94
109
  """Returns the next key in the key space."""
@@ -97,8 +112,6 @@ class Key:
97
112
  def min() -> Key: ...
98
113
  @staticmethod
99
114
  def max() -> Key: ...
100
- @staticmethod
101
- def from_array_tuple(array_tuple: tuple[pa.Array]) -> Key: ...
102
115
 
103
116
  class KeyExtent:
104
117
  """An inclusive range of keys."""
@@ -108,9 +121,6 @@ class KeyExtent:
108
121
  min: Key
109
122
  max: Key
110
123
 
111
- def to_range(self) -> KeyRange:
112
- """Turn this inclusive key extent into an exclusive key range."""
113
-
114
124
  def union(self, key_extent: KeyExtent) -> KeyExtent: ...
115
125
  def __or__(self, other: KeyExtent) -> KeyExtent: ...
116
126
  def intersection(self, key_extent: KeyExtent) -> KeyExtent | None: ...
@@ -118,30 +128,6 @@ class KeyExtent:
118
128
  def contains(self, item: Key) -> bool: ...
119
129
  def __contains__(self, item: Key) -> bool: ...
120
130
 
121
- class KeyRange:
122
- """A right-exclusive range of keys."""
123
-
124
- def __init__(self, *, begin: Key, end: Key): ...
125
-
126
- begin: Key
127
- end: Key
128
-
129
- def union(self, other: KeyRange) -> KeyRange: ...
130
- def __or__(self, other: KeyRange) -> KeyRange: ...
131
- def intersection(self, key_extent: KeyRange) -> KeyRange | None: ...
132
- def __and__(self, other: KeyRange) -> KeyRange | None: ...
133
- def contains(self, item: Key) -> bool: ...
134
- def __contains__(self, item: Key) -> bool: ...
135
- def is_disjoint(self, key_range: KeyRange) -> bool:
136
- return self.end <= key_range.begin or self.begin >= key_range.end
137
-
138
- @staticmethod
139
- def beginning_with(begin: Key) -> KeyRange: ...
140
- @staticmethod
141
- def ending_with(end: Key) -> KeyRange: ...
142
- @staticmethod
143
- def full() -> KeyRange: ...
144
-
145
131
  class KeySpan:
146
132
  """An exclusive range of keys as indexed by their position in a key space."""
147
133
 
@@ -155,16 +141,10 @@ class KeySpan:
155
141
  def union(self, other: KeySpan) -> KeySpan: ...
156
142
  def __or__(self, other: KeySpan) -> KeySpan: ...
157
143
 
158
- class KeyMap:
159
- """Displacement map."""
160
-
161
144
  class ManifestHandle:
162
- def __init__(self, id: str, format: FileFormat, file_size: int, spfs_format_metadata: bytes | None): ...
163
-
164
145
  id: str
165
146
  format: FileFormat
166
147
  file_size: int
167
- spfs_format_metadata: bytes | None
168
148
 
169
149
  class Schema:
170
150
  def to_arrow(self) -> pa.Schema:
@@ -191,24 +171,28 @@ class KeySpaceWriteOp:
191
171
  ks_id: str
192
172
  manifest_handle: ManifestHandle
193
173
 
194
- class FragmentSetWriteOp:
174
+ class ColumnGroupWriteOp:
195
175
  column_group: ColumnGroup
196
- fs_id: str
197
- fs_level: FragmentLevel
176
+ level: FragmentLevel
198
177
  manifest_handle: ManifestHandle
199
178
  key_span: KeySpan
200
179
  key_extent: KeyExtent
201
180
  column_ids: list[str]
202
181
 
203
- class ConfigurationOp:
204
- column_group: ColumnGroup
205
-
206
182
  class SchemaEvolutionOp:
207
183
  column_group: ColumnGroup
208
184
 
209
185
  class SchemaBreakOp:
210
186
  column_group: ColumnGroup
211
187
 
188
+ class KeySpaceCompactOp:
189
+ ks_ids: list[str]
190
+ moved_ks_ids: list[str]
191
+
192
+ class ColumnGroupCompactOp:
193
+ column_group: ColumnGroup
194
+ fragment_ids: list[int]
195
+
212
196
  class WriteAheadLog:
213
197
  def __init__(
214
198
  self,
@@ -219,7 +203,6 @@ class WriteAheadLog:
219
203
 
220
204
  entries: list[LogEntry]
221
205
  truncated_up_to: int
222
- ks_manifest_handle: ManifestHandle | None
223
206
 
224
207
  @property
225
208
  def last_modified_at(self) -> int:
@@ -229,11 +212,3 @@ class WriteAheadLog:
229
212
  self, asof: int | None = None, since: int | None = None, column_group: ColumnGroup | None = None
230
213
  ) -> WriteAheadLog:
231
214
  """Filters the WAL to entries by the given parameters."""
232
-
233
- def __bytes__(self):
234
- """Serializes the ColumnGroupMetadata to a protobuf buffer."""
235
-
236
- @staticmethod
237
- def from_proto(buffer: bytes) -> WriteAheadLog:
238
- """Deserializes a WriteAheadLog from a protobuf buffer."""
239
- ...
spiral/datetime_.py ADDED
@@ -0,0 +1,27 @@
1
+ import warnings
2
+ from datetime import datetime, timedelta, timezone, tzinfo
3
+
4
+ _THE_EPOCH = datetime.fromtimestamp(0, tz=timezone.utc)
5
+
6
+
7
+ def local_tz() -> tzinfo:
8
+ """Determine this machine's local timezone."""
9
+ tz = datetime.now().astimezone().tzinfo
10
+ if tz is None:
11
+ raise ValueError("Could not determine this machine's local timezone.")
12
+ return tz
13
+
14
+
15
+ def timestamp_micros(instant: datetime) -> int:
16
+ """The number of microseconds between the epoch and the given instant."""
17
+ if instant.tzinfo is None:
18
+ warnings.warn("assuming timezone-naive datetime is local time", stacklevel=2)
19
+ instant = instant.replace(tzinfo=local_tz())
20
+ return (instant - _THE_EPOCH) // timedelta(microseconds=1)
21
+
22
+
23
+ def from_timestamp_micros(ts: int) -> datetime:
24
+ """Convert a timestamp in microseconds to a datetime."""
25
+ if ts < 0:
26
+ raise ValueError("Timestamp must be non-negative")
27
+ return _THE_EPOCH + timedelta(microseconds=ts)
@@ -1,11 +1,12 @@
1
1
  import builtins
2
2
  import functools
3
3
  import operator
4
+ import warnings
4
5
  from typing import Any
5
6
 
6
7
  import pyarrow as pa
7
8
 
8
- from spiral import _lib, arrow
9
+ from spiral import _lib, arrow_
9
10
 
10
11
  from . import http as http
11
12
  from . import io as io
@@ -16,8 +17,9 @@ from . import qoi as qoi
16
17
  from . import refs as refs
17
18
  from . import str_ as str
18
19
  from . import struct as struct
20
+ from . import text as text
19
21
  from . import tiff as tiff
20
- from .base import Expr, ExprLike
22
+ from .base import Expr, ExprLike, NativeExpr
21
23
 
22
24
  __all__ = [
23
25
  "Expr",
@@ -54,11 +56,11 @@ __all__ = [
54
56
  "struct",
55
57
  "subtract",
56
58
  "tiff",
57
- "var",
58
59
  "xor",
59
60
  "png",
60
61
  "qoi",
61
62
  "mp4",
63
+ "text",
62
64
  ]
63
65
 
64
66
  # Inline some of the struct expressions since they're so common
@@ -74,13 +76,15 @@ def lift(expr: ExprLike) -> Expr:
74
76
  # Convert an ExprLike into an Expr.
75
77
  if isinstance(expr, Expr):
76
78
  return expr
79
+ if isinstance(expr, NativeExpr):
80
+ return Expr(expr)
77
81
 
78
82
  if isinstance(expr, dict):
79
83
  # NOTE: we assume this is a struct expression. We could be smarter and be context aware to determine if
80
84
  # this is in fact a struct scalar, but the user can always create one of those manually.
81
85
 
82
86
  # First we un-nest any dot-separated field names
83
- expr: dict = arrow.nest_structs(expr)
87
+ expr: dict = arrow_.nest_structs(expr)
84
88
 
85
89
  return pack({k: lift(v) for k, v in expr.items()})
86
90
 
@@ -95,14 +99,18 @@ def lift(expr: ExprLike) -> Expr:
95
99
 
96
100
  # If the value is struct-like, we un-nest any dot-separated field names
97
101
  if isinstance(expr, pa.StructArray | pa.StructScalar):
102
+ # TODO(marko): Figure out what to do with nullable struct arrays when unpacking them.
103
+ # We need to merge struct validity into the child validity?
98
104
  if isinstance(expr, pa.StructArray) and expr.null_count != 0:
99
- raise ValueError("lift: cannot lift a struct array with nulls.")
100
- if isinstance(expr, pa.StructArray) and not expr.is_valid():
101
- raise ValueError("lift: cannot lift a struct scalar with nulls.")
102
- return lift(arrow.nest_structs(expr))
105
+ # raise ValueError("lift: cannot lift a struct array with nulls.")
106
+ warnings.warn("found a struct array with nulls", stacklevel=2)
107
+ if isinstance(expr, pa.StructScalar) and not expr.is_valid():
108
+ # raise ValueError("lift: cannot lift a struct scalar with nulls.")
109
+ warnings.warn("found a struct scalar with nulls", stacklevel=2)
110
+ return lift(arrow_.nest_structs(expr))
103
111
 
104
112
  if isinstance(expr, pa.Array):
105
- return Expr(_lib.spql.expr.array_lit(expr))
113
+ return Expr(_lib.expr.array_lit(expr))
106
114
 
107
115
  # Otherwise, assume it's a scalar.
108
116
  return scalar(expr)
@@ -114,7 +122,7 @@ def key(name: builtins.str) -> Expr:
114
122
  Args:
115
123
  name: variable name
116
124
  """
117
- return Expr(_lib.spql.expr.keyed(name))
125
+ return Expr(_lib.expr.keyed(name))
118
126
 
119
127
 
120
128
  def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
@@ -127,20 +135,21 @@ def keyed(name: builtins.str, dtype: pa.DataType) -> Expr:
127
135
  name: variable name
128
136
  dtype: must match dtype of the column in the key table.
129
137
  """
130
- return Expr(_lib.spql.expr.keyed(name, dtype))
138
+ return Expr(_lib.expr.keyed(name, dtype))
131
139
 
132
140
 
133
141
  def scalar(value: Any) -> Expr:
134
142
  """Create a scalar expression."""
135
143
  if not isinstance(value, pa.Scalar):
136
144
  value = pa.scalar(value)
137
- return Expr(_lib.spql.expr.scalar(value))
145
+ # TODO(marko): Use Vortex scalar instead of passing as array.
146
+ return Expr(_lib.expr.scalar(pa.array([value.as_py()], type=value.type)))
138
147
 
139
148
 
140
149
  def cast(expr: ExprLike, dtype: pa.DataType) -> Expr:
141
150
  """Cast an expression into another PyArrow DataType."""
142
151
  expr = lift(expr)
143
- return Expr(_lib.spql.expr.cast(expr.__expr__, dtype))
152
+ return Expr(_lib.expr.cast(expr.__expr__, dtype))
144
153
 
145
154
 
146
155
  def and_(expr: ExprLike, *exprs: ExprLike) -> Expr:
@@ -197,19 +206,18 @@ def negate(expr: ExprLike) -> Expr:
197
206
  def not_(expr: ExprLike) -> Expr:
198
207
  """Negate the given expression."""
199
208
  expr = lift(expr)
200
- return Expr(_lib.spql.expr.unary("not", expr.__expr__))
209
+ return Expr(_lib.expr.not_(expr.__expr__))
201
210
 
202
211
 
203
212
  def is_null(expr: ExprLike) -> Expr:
204
213
  """Check if the given expression is null."""
205
214
  expr = lift(expr)
206
- return Expr(_lib.spql.expr.unary("is_null", expr.__expr__))
215
+ return Expr(_lib.expr.is_null(expr.__expr__))
207
216
 
208
217
 
209
218
  def is_not_null(expr: ExprLike) -> Expr:
210
- """Check if the given expression is null."""
211
- expr = lift(expr)
212
- return Expr(_lib.spql.expr.unary("is_not_null", expr.__expr__))
219
+ """Check if the given expression is not null."""
220
+ return not_(is_null(expr))
213
221
 
214
222
 
215
223
  def add(lhs: ExprLike, rhs: ExprLike) -> Expr:
@@ -5,7 +5,7 @@ import pyarrow as pa
5
5
 
6
6
  from spiral import _lib
7
7
 
8
- NativeExpr: TypeAlias = _lib.spql.expr.Expr
8
+ NativeExpr: TypeAlias = _lib.expr.Expr
9
9
 
10
10
 
11
11
  class Expr:
@@ -92,13 +92,13 @@ class Expr:
92
92
  return self._binary("mod", other)
93
93
 
94
94
  def __neg__(self):
95
- return Expr(_lib.spql.expr.unary("neg", self.__expr__))
95
+ return Expr(_lib.expr.unary("neg", self.__expr__))
96
96
 
97
97
  def in_(self, other: "ExprLike") -> "Expr":
98
98
  from spiral import expressions as se
99
99
 
100
100
  other = se.lift(other)
101
- return Expr(_lib.spql.expr.list.contains(other.__expr__, self.__expr__))
101
+ return Expr(_lib.expr.list.contains(other.__expr__, self.__expr__))
102
102
 
103
103
  def contains(self, other: "ExprLike") -> "Expr":
104
104
  from spiral import expressions as se
@@ -107,7 +107,7 @@ class Expr:
107
107
 
108
108
  def cast(self, dtype: pa.DataType) -> "Expr":
109
109
  """Cast the expression result to a different data type."""
110
- return Expr(_lib.spql.expr.cast(self.__expr__, dtype))
110
+ return Expr(_lib.expr.cast(self.__expr__, dtype))
111
111
 
112
112
  def select(self, *paths: str, exclude: list[str] = None) -> "Expr":
113
113
  """Select fields from a struct-like expression.
@@ -142,7 +142,7 @@ class Expr:
142
142
  from spiral import expressions as se
143
143
 
144
144
  rhs = se.lift(rhs)
145
- return Expr(_lib.spql.expr.binary(op, self.__expr__, rhs.__expr__))
145
+ return Expr(_lib.expr.binary(op, self.__expr__, rhs.__expr__))
146
146
 
147
147
 
148
148
  ScalarLike: TypeAlias = bool | int | float | str | list | datetime.datetime | None
@@ -29,7 +29,7 @@ def element_at(expr: ExprLike, index: ExprLike) -> Expr:
29
29
 
30
30
  expr = lift(expr)
31
31
  index = lift(index)
32
- return Expr(_lib.spql.expr.list.element_at(expr.__expr__, index.__expr__))
32
+ return Expr(_lib.expr.list.element_at(expr.__expr__, index.__expr__))
33
33
 
34
34
 
35
35
  def of(*expr: ExprLike) -> Expr:
spiral/expressions/mp4.py CHANGED
@@ -1,12 +1,7 @@
1
- from typing import TYPE_CHECKING
2
-
3
1
  import pyarrow as pa
4
2
 
5
3
  from spiral.expressions.base import Expr, ExprLike
6
4
 
7
- if TYPE_CHECKING:
8
- from spiral import Table
9
-
10
5
  _MP4_RES_DTYPE: pa.DataType = pa.struct(
11
6
  [
12
7
  pa.field("pixels", pa.large_binary()),
@@ -19,7 +14,7 @@ _MP4_RES_DTYPE: pa.DataType = pa.struct(
19
14
 
20
15
  # TODO(marko): Support optional range and crop.
21
16
  # IMPORTANT: Frames is currently broken and defaults to full.
22
- def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str, *, table: "Table" = None):
17
+ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str):
23
18
  """
24
19
  Read referenced cell in a `MP4` format. Requires `ffmpeg`.
25
20
 
@@ -32,7 +27,6 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str, *,
32
27
  crop: The crop of the frames to read. Each element must be a list of four uint32,
33
28
  x, y, width, height or null / empty list to read full frames.
34
29
  A str is assumed to be the `se.keyed` expression.
35
- table (optional): The table to de-reference from, if not available in input expression.
36
30
 
37
31
  Returns:
38
32
  An array where each element is a decoded cropped video with fields:
@@ -59,11 +53,10 @@ def read(expr: ExprLike | str, frames: ExprLike | str, crop: ExprLike | str, *,
59
53
  crop = lift(crop)
60
54
 
61
55
  return Expr(
62
- _lib.spql.expr.video.read(
56
+ _lib.expr.video.read(
63
57
  expr.__expr__,
64
58
  frames.__expr__,
65
59
  crop.__expr__,
66
60
  format="mp4",
67
- table=table._table if table is not None else None,
68
61
  )
69
62
  )
spiral/expressions/png.py CHANGED
@@ -15,4 +15,4 @@ def encode(expr: ExprLike) -> Expr:
15
15
  from spiral.expressions import lift
16
16
 
17
17
  expr = lift(expr)
18
- return Expr(_lib.spql.expr.img.encode(expr.__expr__, format="png"))
18
+ return Expr(_lib.expr.img.encode(expr.__expr__, format="png"))
spiral/expressions/qoi.py CHANGED
@@ -15,4 +15,4 @@ def encode(expr: ExprLike) -> Expr:
15
15
  from spiral.expressions import lift
16
16
 
17
17
  expr = lift(expr)
18
- return Expr(_lib.spql.expr.img.encode(expr.__expr__, format="qoi"))
18
+ return Expr(_lib.expr.img.encode(expr.__expr__, format="qoi"))
@@ -1,12 +1,7 @@
1
- from typing import TYPE_CHECKING
2
-
3
1
  import pyarrow as pa
4
2
 
5
3
  from spiral.expressions.base import Expr, ExprLike
6
4
 
7
- if TYPE_CHECKING:
8
- from spiral import Table
9
-
10
5
 
11
6
  def ref(expr: ExprLike, field: str | None = None) -> Expr:
12
7
  """Store binary values as references. This expression can only be used on write.
@@ -24,10 +19,10 @@ def ref(expr: ExprLike, field: str | None = None) -> Expr:
24
19
  from spiral.expressions import lift
25
20
 
26
21
  expr = lift(expr)
27
- return Expr(_lib.spql.expr.ref(expr.__expr__, field))
22
+ return Expr(_lib.expr.refs.ref(expr.__expr__, field))
28
23
 
29
24
 
30
- def deref(expr: ExprLike | str, field: str | None = None, *, table: "Table" = None) -> Expr:
25
+ def deref(expr: ExprLike | str, field: str | None = None) -> Expr:
31
26
  """De-reference referenced values.
32
27
 
33
28
  See `ref` for more information on Spiral's reference values. This expression is used to de-reference referenced
@@ -37,7 +32,6 @@ def deref(expr: ExprLike | str, field: str | None = None, *, table: "Table" = No
37
32
  expr: The expression to de-reference. A str is assumed to be the `se.keyed` expression.
38
33
  field: If the expr evaluates into struct, the field name of that struct that should be de-referenced.
39
34
  If `None`, the expr must evaluate into a reference type.
40
- table (optional): The table to de-reference from, if not available in input expression.
41
35
  """
42
36
  from spiral import _lib
43
37
  from spiral.expressions import keyed, lift
@@ -49,7 +43,7 @@ def deref(expr: ExprLike | str, field: str | None = None, *, table: "Table" = No
49
43
  )
50
44
 
51
45
  expr = lift(expr)
52
- return Expr(_lib.spql.expr.deref(expr.__expr__, field=field, table=table._table if table is not None else None))
46
+ return Expr(_lib.expr.refs.deref(expr.__expr__, field=field))
53
47
 
54
48
 
55
49
  def nbytes(expr: ExprLike) -> Expr:
@@ -12,10 +12,10 @@ def getitem(expr: ExprLike, field: str) -> Expr:
12
12
  from spiral import expressions as se
13
13
 
14
14
  expr = se.lift(expr)
15
- return Expr(_lib.spql.expr.struct.getitem(expr.__expr__, field))
15
+ return Expr(_lib.expr.struct.getitem(expr.__expr__, field))
16
16
 
17
17
 
18
- def pack(fields: dict[str, ExprLike]) -> Expr:
18
+ def pack(fields: dict[str, ExprLike], *, nullable: bool = False) -> Expr:
19
19
  """Assemble a new struct from the given named fields.
20
20
 
21
21
  Args:
@@ -23,7 +23,9 @@ def pack(fields: dict[str, ExprLike]) -> Expr:
23
23
  """
24
24
  from spiral import expressions as se
25
25
 
26
- return Expr(_lib.spql.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()]))
26
+ return Expr(
27
+ _lib.expr.struct.pack(list(fields.keys()), [se.lift(expr).__expr__ for expr in fields.values()], nullable)
28
+ )
27
29
 
28
30
 
29
31
  def merge(*structs: "ExprLike") -> Expr:
@@ -40,7 +42,7 @@ def merge(*structs: "ExprLike") -> Expr:
40
42
 
41
43
  if len(structs) == 1:
42
44
  return se.lift(structs[0])
43
- return Expr(_lib.spql.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
45
+ return Expr(_lib.expr.struct.merge([se.lift(struct).__expr__ for struct in structs]))
44
46
 
45
47
 
46
48
  def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -> Expr:
@@ -54,4 +56,4 @@ def select(expr: ExprLike, names: list[str] = None, exclude: list[str] = None) -
54
56
  from spiral import expressions as se
55
57
 
56
58
  expr = se.lift(expr)
57
- return Expr(_lib.spql.expr.struct.select(expr.__expr__, names, exclude))
59
+ return Expr(_lib.expr.struct.select(expr.__expr__, names, exclude))
@@ -0,0 +1,62 @@
1
+ from spiral.expressions.base import Expr, ExprLike
2
+
3
+
4
+ def field(expr: ExprLike, field_name: str | None = None, tokenizer: str | None = None) -> Expr:
5
+ """Configure a column for text indexing.
6
+
7
+ Args:
8
+ expr: An input column. The expression must either evaluate to a UTF-8,
9
+ or, if a `field_name` is provided, to a struct with a field of that name.
10
+ field_name: If provided, the expression must evaluate to a struct with a field of that name.
11
+ The given field will be indexed.
12
+ tokenizer: If provided, the text will be tokenized using the given tokenizer.
13
+
14
+ Returns:
15
+ An expression that can be used to construct a text index.
16
+ """
17
+ from spiral import _lib
18
+ from spiral.expressions import getitem, lift, merge, pack
19
+
20
+ expr = lift(expr)
21
+ if field_name is None:
22
+ return Expr(_lib.expr.text.field(expr.__expr__, tokenizer))
23
+
24
+ child = _lib.expr.text.field(getitem(expr, field_name).__expr__)
25
+ return merge(
26
+ expr,
27
+ pack({field_name: child}),
28
+ )
29
+
30
+
31
+ def find(expr: ExprLike, term: str) -> Expr:
32
+ """Search for a term in the text.
33
+
34
+ Args:
35
+ expr: An index field.
36
+ term: The term to search for.
37
+
38
+ Returns:
39
+ An expression that can be used in ranking for text search.
40
+ """
41
+ from spiral import _lib
42
+ from spiral.expressions import lift
43
+
44
+ expr = lift(expr)
45
+ return Expr(_lib.expr.text.find(expr.__expr__, term))
46
+
47
+
48
+ def boost(expr: ExprLike, factor: float) -> Expr:
49
+ """Boost the relevance of a ranking expression.
50
+
51
+ Args:
52
+ expr: Rank by expression.
53
+ factor: The factor by which to boost the relevance.
54
+
55
+ Returns:
56
+ An expression that can be used in ranking for text search.
57
+ """
58
+ from spiral import _lib
59
+ from spiral.expressions import lift
60
+
61
+ expr = lift(expr)
62
+ return Expr(_lib.expr.text.boost(expr.__expr__, factor))
spiral/expressions/udf.py CHANGED
@@ -25,7 +25,7 @@ class UDF(BaseUDF):
25
25
  """A User-Defined Function (UDF)."""
26
26
 
27
27
  def __init__(self, name: str):
28
- super().__init__(_lib.spql.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke))
28
+ super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke))
29
29
 
30
30
  @abc.abstractmethod
31
31
  def invoke(self, *input_args: pa.Array) -> pa.Array: ...
@@ -35,10 +35,10 @@ class RefUDF(BaseUDF):
35
35
  """A UDF over a single ref cell, and therefore can access the file object."""
36
36
 
37
37
  def __init__(self, name: str):
38
- super().__init__(_lib.spql.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke, scope="ref"))
38
+ super().__init__(_lib.expr.udf.create(name, return_type=self.return_type, invoke=self.invoke, scope="ref"))
39
39
 
40
40
  @abc.abstractmethod
41
- def invoke(self, fp, *input_args: pa.Array) -> pa.Array:
41
+ def invoke(self, fp: _lib.FileObject, *input_args: pa.Array) -> pa.Array:
42
42
  """Invoke the UDF with the given arguments.
43
43
 
44
44
  NOTE: The first argument is always the ref cell. All array input args will be sliced to the appropriate row.
@@ -0,0 +1,3 @@
1
+ from spiral.iceberg.client import Iceberg
2
+
3
+ __all__ = ["Iceberg"]
@@ -0,0 +1,33 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ if TYPE_CHECKING:
4
+ from pyiceberg.catalog import Catalog
5
+
6
+ from spiral.client import Spiral
7
+
8
+
9
+ class Iceberg:
10
+ """
11
+ Apache Iceberg is a powerful open-source table format designed for high-performance data lakes.
12
+ Iceberg brings reliability, scalability, and advanced features like time travel, schema evolution,
13
+ and ACID transactions to your warehouse.
14
+ """
15
+
16
+ def __init__(self, spiral: "Spiral", *, project_id: str | None = None):
17
+ self._spiral = spiral
18
+ self._project_id = project_id
19
+
20
+ self._api = self._spiral.config.api
21
+
22
+ def catalog(self) -> "Catalog":
23
+ """Open the Iceberg catalog."""
24
+ from pyiceberg.catalog import load_catalog
25
+
26
+ return load_catalog(
27
+ "default",
28
+ **{
29
+ "type": "rest",
30
+ "uri": self._spiral.config.spiraldb.uri + "/iceberg",
31
+ "token": self._spiral.config.authn.token().expose_secret(),
32
+ },
33
+ )
@@ -0,0 +1,5 @@
1
+ from spiral.indexes.client import Indexes
2
+ from spiral.indexes.index import TextIndex
3
+ from spiral.indexes.scan import SearchScan
4
+
5
+ __all__ = ["Indexes", "SearchScan", "TextIndex"]