pyspiral 0.2.5__cp310-abi3-macosx_11_0_arm64.whl → 0.3.1__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spiral/substrait_.py CHANGED
@@ -251,7 +251,7 @@ class SubstraitConverter:
251
251
 
252
252
  if field_name in self.key_names:
253
253
  # This is a key column, so we need to select it from the scope.
254
- return se.var(field_name)
254
+ return se.key(field_name)
255
255
 
256
256
  scope = se.getitem(scope, field_name)
257
257
  scope_type = scope_type.field(ref.field).type
spiral/table.py CHANGED
@@ -3,11 +3,13 @@ from typing import TYPE_CHECKING, Literal
3
3
 
4
4
  import pyarrow as pa
5
5
 
6
- from spiral import expressions as se
7
- from spiral.config import FILE_FORMAT, Config
8
6
  from spiral.core.core import Table as CoreTable
9
- from spiral.core.core import flush_wal, write
7
+ from spiral.core.core import TableMaintenance, TableTransaction
8
+ from spiral.core.spec import Schema
10
9
  from spiral.expressions.base import Expr, ExprLike
10
+ from spiral.maintenance import Maintenance
11
+ from spiral.settings import FILE_FORMAT
12
+ from spiral.txn import Transaction
11
13
 
12
14
  if TYPE_CHECKING:
13
15
  import duckdb
@@ -53,7 +55,7 @@ class Table(Expr):
53
55
  from spiral import expressions as se
54
56
 
55
57
  if item in self._key_columns:
56
- return se.var(name=item)
58
+ return se.key(name=item)
57
59
 
58
60
  return super().__getitem__(item)
59
61
 
@@ -72,7 +74,7 @@ class Table(Expr):
72
74
 
73
75
  from spiral import expressions as se
74
76
 
75
- return se.merge(se.pack({key: se.var(key) for key in key_paths}), super().select(*other_paths, exclude=exclude))
77
+ return se.merge(se.pack({key: se.key(key) for key in key_paths}), super().select(*other_paths, exclude=exclude))
76
78
 
77
79
  @property
78
80
  def key_schema(self) -> pa.Schema:
@@ -80,7 +82,7 @@ class Table(Expr):
80
82
  return self._key_schema
81
83
 
82
84
  @property
83
- def schema(self) -> pa.Schema:
85
+ def schema(self) -> Schema:
84
86
  """Returns the FULL schema of the table.
85
87
 
86
88
  NOTE: This can be expensive for large tables.
@@ -111,8 +113,6 @@ class Table(Expr):
111
113
  where: ExprLike | None = None,
112
114
  asof: datetime | int | str = None,
113
115
  exclude_keys: bool = False,
114
- # TODO(marko): Support config.
115
- # config: Config | None = None,
116
116
  ) -> "Scan":
117
117
  """Reads the table. If projections are not provided, the entire table is read.
118
118
 
@@ -128,7 +128,6 @@ class Table(Expr):
128
128
  where=where,
129
129
  asof=asof,
130
130
  exclude_keys=exclude_keys,
131
- # config=config,
132
131
  )
133
132
 
134
133
  # NOTE: "vortex" is valid format. We don't want that visible in the API docs.
@@ -137,21 +136,36 @@ class Table(Expr):
137
136
  expr: ExprLike,
138
137
  *,
139
138
  format: Literal["parquet"] | None = None,
140
- # TODO(joe): support group_by, and config
141
- config: Config | None = None,
139
+ partition_size_bytes: int | None = None,
142
140
  ) -> None:
143
141
  """Write an item to the table inside a single transaction.
144
142
 
145
143
  :param expr: The expression to write. Must evaluate to a struct array.
146
144
  :param format: the format to write the data in. Defaults to "parquet".
147
- :param config: The configuration to use for this write.
145
+ :param partition_size_bytes: The maximum partition size in bytes.
148
146
  """
149
- write(
150
- self._table,
151
- se.lift(expr).__expr__,
152
- format=format or FILE_FORMAT,
153
- partition_size=config.partition_file_min_size if config else None,
154
- )
155
- # Flush the WAL if configured.
156
- if config is not None and config.flush_wal_on_write:
157
- flush_wal(self._table, manifest_format=format or FILE_FORMAT)
147
+ format = format or FILE_FORMAT
148
+
149
+ with self.txn(format=format) as txn:
150
+ txn.write(
151
+ expr,
152
+ partition_size_bytes=partition_size_bytes,
153
+ )
154
+
155
+ # NOTE: "vortex" is valid format. We don't want that visible in the API docs.
156
+ def txn(self, format: Literal["parquet"] | None = None) -> Transaction:
157
+ """Begins a new transaction. Transaction must be committed for writes to become visible.
158
+
159
+ IMPORTANT: While transaction can be used to atomically write data to the table,
160
+ it is important that the primary key columns are unique within the transaction.
161
+
162
+ :param format: The format to use for the transaction. Defaults to "parquet".
163
+ """
164
+ return Transaction(TableTransaction(self._table.metastore, format or FILE_FORMAT))
165
+
166
+ def maintenance(self, format: Literal["parquet"] | None = None) -> Maintenance:
167
+ """Maintenance tasks for the table.
168
+
169
+ :param format: The format to use. Defaults to "parquet".
170
+ """
171
+ return Maintenance(TableMaintenance(self._table.metastore, format or FILE_FORMAT))
spiral/txn.py ADDED
@@ -0,0 +1,48 @@
1
+ from spiral.core.core import TableTransaction
2
+ from spiral.expressions import ExprLike
3
+
4
+
5
+ class Transaction:
6
+ """Spiral table transaction.
7
+
8
+ IMPORTANT: While transaction can be used to atomically write data to the table,
9
+ it is important that the primary key columns are unique within the transaction.
10
+ """
11
+
12
+ def __init__(self, transaction: TableTransaction):
13
+ self._transaction = transaction
14
+
15
+ @property
16
+ def status(self) -> str:
17
+ """The status of the transaction."""
18
+ return self._transaction.status
19
+
20
+ def __enter__(self):
21
+ return self
22
+
23
+ def __exit__(self, exc_type, exc_value, traceback):
24
+ if exc_type is None:
25
+ self._transaction.commit()
26
+ else:
27
+ self._transaction.abort()
28
+
29
+ def write(self, expr: ExprLike, *, partition_size_bytes: int | None = None):
30
+ """Write an item to the table inside a single transaction.
31
+
32
+ :param expr: The expression to write. Must evaluate to a struct array.
33
+ :param partition_size_bytes: The maximum partition size in bytes.
34
+ If not provided, the default partition size is used.
35
+ """
36
+ from spiral import expressions as se
37
+
38
+ expr = se.lift(expr)
39
+
40
+ self._transaction.write(expr.__expr__, partition_size_bytes=partition_size_bytes)
41
+
42
+ def commit(self):
43
+ """Commit the transaction."""
44
+ self._transaction.commit()
45
+
46
+ def abort(self):
47
+ """Abort the transaction."""
48
+ self._transaction.abort()
spiral/config.py DELETED
@@ -1,26 +0,0 @@
1
- import os
2
-
3
- from pydantic_settings import BaseSettings, SettingsConfigDict
4
-
5
- FILE_FORMAT = os.environ.get("SPIRAL_FILE_FORMAT", "parquet")
6
-
7
-
8
- class Config(BaseSettings):
9
- model_config = SettingsConfigDict(
10
- env_nested_delimiter="__",
11
- env_prefix="SPIRAL_CORE__",
12
- frozen=True,
13
- )
14
-
15
- partition_file_min_size: int = 256 * 1024 * 1024 # 256MB
16
- flush_wal_on_write: bool = False
17
-
18
- # TODO(marko): Support config. Unused after migration to Rust.
19
- # #: Defaults to ThreadPoolExecutor's default (based on os.cpu_count().
20
- # scan_num_threads: int | None = 61 # 61 is used by Golang and Tokio, for some reason...
21
- #
22
- # #: The duration of WAL that is preserved to allow for txn conflict resolution.
23
- # transaction_window: int = 0 if DEV else timedelta(days=1).total_seconds()
24
- #
25
- # #: Truncation length of string statistics.
26
- # string_truncation_length: int = 1024