pyspiral 0.6.10__cp312-abi3-macosx_11_0_arm64.whl → 0.6.12__cp312-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyspiral might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.6.10
3
+ Version: 0.6.12
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -1,8 +1,8 @@
1
- pyspiral-0.6.10.dist-info/METADATA,sha256=WpZleQ1yllvMlxL6zo5Ypw5Bzlebi10H7XxHM8EnstY,1843
2
- pyspiral-0.6.10.dist-info/WHEEL,sha256=KQvxBiy7GLcML6Ad3w_ZPrgSvER1uXd7aYb6wy6b44Y,103
3
- pyspiral-0.6.10.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
1
+ pyspiral-0.6.12.dist-info/METADATA,sha256=ANXjtdzd8s_zdWLd-mTm0X07pWbjlpkjQP8X5yP4qpY,1843
2
+ pyspiral-0.6.12.dist-info/WHEEL,sha256=KQvxBiy7GLcML6Ad3w_ZPrgSvER1uXd7aYb6wy6b44Y,103
3
+ pyspiral-0.6.12.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
4
  spiral/__init__.py,sha256=n4JNLrO3wyw_k_U_JKyNiGON0wEpfvqxDhDdB2P6dhM,1007
5
- spiral/_lib.abi3.so,sha256=ptsuCD4f03_iVGiwI6y5VTZThQxOvPtOW-6xBMfMHSI,66386640
5
+ spiral/_lib.abi3.so,sha256=ws3Kwbb_S3lcrGFbBHeYwfNVPWHi0uSPjJyzIS1r5W8,70093168
6
6
  spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
7
7
  spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
@@ -24,52 +24,56 @@ spiral/cli/app.py,sha256=smzGj5a2RwhM9RQChmlEeKZLN4Fk60-bP7Lm5_Is1Rw,2760
24
24
  spiral/cli/console.py,sha256=6JHbAQV6MFWz3P-VzqPOjhHpkIQagsCdzTMvmuDKMkU,2580
25
25
  spiral/cli/fs.py,sha256=vaPcSc2YghhHeipxNitIdsHaBhFwlwkvPFqYsFSN9P0,2927
26
26
  spiral/cli/iceberg.py,sha256=Q14tcGcn1LixbFCYP0GhfYwFFXTmmi8tqBPYwalJEyE,3248
27
- spiral/cli/key_spaces.py,sha256=x3IFRP5d47pKiAHeWExYMOBaT2TwxbWjVM01SUqKrwI,2943
27
+ spiral/cli/key_spaces.py,sha256=TF1tbRnrjemp4aMAbLc7o4_jPChIumaQGPuvfW0sR5o,2945
28
28
  spiral/cli/login.py,sha256=2tw6uN5rEpiMMAmjQSB3-JUPf3C0Wc1eTGCDxhYtJps,731
29
29
  spiral/cli/orgs.py,sha256=fmOuLxpeIFfKqePRi292Gv9k-EF5pPn_tbKd2BLl2Ig,2869
30
30
  spiral/cli/printer.py,sha256=aosc763hDFgoXJGkiANmNyO3kAsecAS1JWgjEhn8GCM,1784
31
31
  spiral/cli/projects.py,sha256=1M1nGrBT-t0aY9RV5Cnmzy7YrhIvmHwdkpa3y9j8rG8,5756
32
32
  spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
33
- spiral/cli/tables.py,sha256=fFte_wMNcB0V-fmfSXfSbtV4UlAi-Xw5nYDJ0b62CGk,6360
33
+ spiral/cli/tables.py,sha256=qm3izcysElJrQlerNZdfx5RWSVXtyVfkP3o_H51ltFw,6366
34
34
  spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
35
35
  spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
36
36
  spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
37
37
  spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
38
- spiral/client.py,sha256=N4sQLxtQ6GYCnj00hm4VX1vUVUqzQdHhl_KfQwp-1LQ,6345
38
+ spiral/client.py,sha256=pw6vB85oLVbBudc_HRzmLCItcecsTjNM5SMu_kVOMCo,6568
39
39
  spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
41
41
  spiral/core/authn/__init__.pyi,sha256=z_GWyIS62fuiYQrYO8hzw4W8oGaiciqS1u5qtAt54VY,769
42
- spiral/core/client/__init__.pyi,sha256=1HK3SOMT1QKmD5Hai58ZFjiEZK0QzyYtP84hse8SBEI,6666
43
- spiral/core/table/__init__.pyi,sha256=QqG_pMlPhMtXG-56dXyQjOWVKMugPP0nnYnvYaY0Q10,3288
42
+ spiral/core/client/__init__.pyi,sha256=ajF8XaxThnTdsPpw1k3pPLEurIaDg9yeXqwIRJNlJTY,6665
43
+ spiral/core/expr/__init__.pyi,sha256=3HSKjkotiEkxBvGBALXEBIie0JiyI9bCpehwA3nMQkU,571
44
+ spiral/core/expr/images/__init__.pyi,sha256=wnE_wZXq7a4iqTg3SVm-ssxGw1WQZyk5dGOPaP4Btko,73
45
+ spiral/core/expr/list_/__init__.pyi,sha256=Q_9c87eIQfZbqlaw_rq3fvs93YEsW7K5VYk6VZ4g6mU,126
46
+ spiral/core/expr/refs/__init__.pyi,sha256=nZZP3l_Z6bLx6V8lTcH3Jgo--xwfADOU2XdTAvM5IMk,127
47
+ spiral/core/expr/str_/__init__.pyi,sha256=Bm6fZK-d4fNbJuuBhVoWMACXUbQQ2SjlhgrOpdOHIPM,86
48
+ spiral/core/expr/struct_/__init__.pyi,sha256=MXckd98eV_x3X0RhEWvlkA3DcDXRtLs5pNnTQkc09nE,296
49
+ spiral/core/expr/text/__init__.pyi,sha256=ed83n1xcsGY7_QDhMmJGnSQ20UrJFXcdv1AveSEcS1c,175
50
+ spiral/core/expr/udf/__init__.pyi,sha256=zsZs081KVhY3-1JidqTkWMW81Qd_ScoTGZvasIhIK-4,358
51
+ spiral/core/expr/video/__init__.pyi,sha256=nQJEcSsigZuRpMjkI_O4EEtMK_n2zRvorcL_KEeD5vU,95
52
+ spiral/core/table/__init__.pyi,sha256=HN4ag8E1QDF_VgekJZqjhuQLhorU3ivjIOBHai2OEVc,3672
44
53
  spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
45
54
  spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
46
- spiral/core/table/spec/__init__.pyi,sha256=OFYJXPXix7gskYJIMog7IniZslEPJ0xvL-sUSFDPbXs,5643
47
- spiral/dataloader.py,sha256=FFZhIflQPEygXe-xBLifQnnxANi4CFooaHRm4i-EGHo,10335
55
+ spiral/core/table/spec/__init__.pyi,sha256=PgacM_fZmkHuplj7IbYrj5KfFI3-VPYnyuzI2w7A70Y,5717
56
+ spiral/dataloader.py,sha256=2haLoI6KLrzXfPozAgEa-eCOSDsNldJ1qwCmFpNMyTQ,10281
48
57
  spiral/dataset.py,sha256=PMLoXnXuEUciP6-NXqTmQLXu0UIH7OcC4-iZtY_iuO8,7973
49
58
  spiral/datetime_.py,sha256=elXaUWtZuuLVcu9E0aXnvYRPB9XWqZbLDToozQYQYjU,950
50
59
  spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
60
  spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
52
61
  spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
53
62
  spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
54
- spiral/expressions/__init__.py,sha256=KhwFjVKoFgx1S6hkVcE8aZjoHY_1N-BgQ2rGEZfPQvM,7957
55
- spiral/expressions/base.py,sha256=915gpvZZCTRCO5q93pwwmhf-R6C23LQsyDt4Q2dHk9s,5290
56
- spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
57
- spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
63
+ spiral/expressions/__init__.py,sha256=UNxK5qQNrl-BuHsjKcWDj35w5lJviLkGFUQj8OhLID0,7919
64
+ spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
58
65
  spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
59
- spiral/expressions/mp4.py,sha256=_xGVnkygddzxP9a8OACJ8_KXnejuVbYCVKBCXBQ798Y,2151
60
- spiral/expressions/png.py,sha256=KO8X0OmMzUFwpg2I_j0JTyldPzVXDWIMzjWMWDV9vIY,506
61
- spiral/expressions/qoi.py,sha256=gvIbb6fXb_Bb080sn9wkpbGGrPs2UEcTXCfuv4-kcYQ,506
62
- spiral/expressions/refs.py,sha256=omeHBQ5o6N4xgZ3x5Xz7IRrWwYBBtQY8DYK0NNAxeGo,2109
66
+ spiral/expressions/s3.py,sha256=D-kuLifIEY314Q8rB2-ZP8U-IT0FywtbJDMuyusBKiQ,414
63
67
  spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
64
68
  spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
65
69
  spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
66
- spiral/expressions/tiff.py,sha256=fQwIn0kLFBM2Y3YYIHmTgb_EIRHKT2fNc77nioDQQw4,8044
67
- spiral/expressions/udf.py,sha256=yb9MIcrFftpNDxgBF228cvdv6TY-hEFikYz2fq_nzWo,1353
70
+ spiral/expressions/tiff.py,sha256=4dngO97bT1QY0By7-PxOQVmSwQC3PQAiixVhLJ-4HMQ,7986
71
+ spiral/expressions/udf.py,sha256=yvZCuGK9S9Sa9I18h-apUxsDni2B7E9WEqPrxHBjUWE,1657
68
72
  spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
69
73
  spiral/iceberg.py,sha256=JGq62Qnf296r9_hRAoH85GQq45-uSBjwXWw_CvPi6G4,930
70
74
  spiral/iterable_dataset.py,sha256=Eekg9ad8tcwXcloHWReBbvCSr5ZappRHn2ldKTvwqS0,4622
71
75
  spiral/key_space_index.py,sha256=NAB_nONEjpMYbse8suz42w7Qb5OPHuKN9h9CT2NJe08,1460
72
- spiral/project.py,sha256=CO_Pn6vPqaonNvRdCNRFcBWr4TqO2AsAUTH5xawIeCE,7283
76
+ spiral/project.py,sha256=VsokZgS0TqIel7UAXMyoBToxn-l_D3ivGwc41x7HLF0,7277
73
77
  spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
78
  spiral/protogen/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
79
  spiral/protogen/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -87,16 +91,16 @@ spiral/protogen/_/substrait/extensions/__init__.py,sha256=nhnEnho70GAT8WPj2xtwJU
87
91
  spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
92
  spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
89
93
  spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
- spiral/scan.py,sha256=4PUlI_DHbO1WTttLia6DinhGtOWsCiqek4ZljoEiRZc,10523
94
+ spiral/scan.py,sha256=fXZq0NL9YApt-UwkGpaT0ETn-rK-1_tltq7nqsImZI4,11199
91
95
  spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
92
96
  spiral/settings.py,sha256=JRQSwjJyNaCqQdQLxiqB_O_LZRQXMLyshJBrI2LZHwM,3113
93
97
  spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
94
98
  spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
95
99
  spiral/streaming_/reader.py,sha256=tl_lC9xgh1-QFhsZn4xQT7It3PVTzHCEUT2BG2dWBRQ,4166
96
- spiral/streaming_/stream.py,sha256=nXnygiuCxi1D3PhaxV8Ujif4J9ly_OczA7CZ3W4WN2w,5913
100
+ spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5915
97
101
  spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
98
- spiral/table.py,sha256=G05b6M0uVmT5ew5GxuzsVB4rQzg25W3zGMTftL07pJU,11026
102
+ spiral/table.py,sha256=dwQr1EAACbfxG8fISFqRrUEAE2P2y6xsx0vFK9Gwyfc,11662
99
103
  spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
100
- spiral/transaction.py,sha256=h6YdAwOYX6qq-tXYV4i9yhy1Nq1tIfRphY_fk7Q_yLQ,1854
104
+ spiral/transaction.py,sha256=M_Tf-TijVBluuInWk6XSFNCR2dKN4S9EdsHM3QD20ng,2948
101
105
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
102
- pyspiral-0.6.10.dist-info/RECORD,,
106
+ pyspiral-0.6.12.dist-info/RECORD,,
spiral/_lib.abi3.so CHANGED
Binary file
spiral/cli/key_spaces.py CHANGED
@@ -64,7 +64,7 @@ def show(
64
64
  """Show index partitions."""
65
65
  index_id = get_index_id(project, name)
66
66
  index = state.spiral.key_space_index(index_id)
67
- shards = state.spiral._ops().compute_shards(index.core)
67
+ shards = state.spiral.internal.compute_shards(index.core)
68
68
 
69
69
  rich_table = rich.table.Table("Begin", "End", "Cardinality", title=f"Index {index.name} Partitions")
70
70
  for partition in shards:
spiral/cli/tables.py CHANGED
@@ -130,7 +130,7 @@ def flush(
130
130
  keep_latest_s = int(duration.total_seconds()) if duration is not None else None
131
131
 
132
132
  identifier, t = get_table(project, table, dataset)
133
- state.spiral._ops().flush_wal(t.core, keep_latest_s=keep_latest_s) # pyright: ignore[reportPrivateUsage]
133
+ state.spiral.internal.flush_wal(t.core, keep_latest_s=keep_latest_s) # pyright: ignore[reportPrivateUsage]
134
134
  CONSOLE.print(f"Flushed WAL for table {identifier} in project {project}.")
135
135
 
136
136
 
@@ -143,10 +143,10 @@ def manifests(
143
143
  _, t = get_table(project, table, dataset)
144
144
  s = t.snapshot()
145
145
 
146
- key_space_state = state.spiral._ops().key_space_state(s.core) # pyright: ignore[reportPrivateUsage]
146
+ key_space_state = state.spiral.internal.key_space_state(s.core) # pyright: ignore[reportPrivateUsage]
147
147
  key_space_manifest = key_space_state.manifest
148
148
 
149
- column_groups_states = state.spiral._ops().column_groups_states(s.core, key_space_state) # pyright: ignore[reportPrivateUsage]
149
+ column_groups_states = state.spiral.internal.column_groups_states(s.core, key_space_state) # pyright: ignore[reportPrivateUsage]
150
150
  display_manifests(key_space_manifest, [(x.column_group, x.manifest) for x in column_groups_states])
151
151
 
152
152
 
spiral/client.py CHANGED
@@ -6,11 +6,11 @@ import pyarrow as pa
6
6
 
7
7
  from spiral.api import SpiralAPI
8
8
  from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
9
- from spiral.core.client import Operations
9
+ from spiral.core.client import Internal
10
10
  from spiral.core.client import Spiral as CoreSpiral
11
11
  from spiral.datetime_ import timestamp_micros
12
12
  from spiral.expressions import ExprLike
13
- from spiral.scan import Scan
13
+ from spiral.scan import Scan, ScanState
14
14
  from spiral.settings import Settings, settings
15
15
 
16
16
  if TYPE_CHECKING:
@@ -35,9 +35,13 @@ class Spiral:
35
35
  return self._config.api
36
36
 
37
37
  @property
38
- def _core(self) -> CoreSpiral:
38
+ def core(self) -> CoreSpiral:
39
39
  return self._config.core
40
40
 
41
+ @property
42
+ def internal(self) -> Internal:
43
+ return self.core.internal(format=settings().file_format)
44
+
41
45
  @property
42
46
  def organization(self) -> str:
43
47
  if self._org is None:
@@ -79,19 +83,19 @@ class Spiral:
79
83
  """Open a table using an ID."""
80
84
  from spiral.table import Table
81
85
 
82
- return Table(self, self._core.table(table_id))
86
+ return Table(self, self.core.table(table_id))
83
87
 
84
88
  def text_index(self, index_id: str) -> "TextIndex":
85
89
  """Open a text index using an ID."""
86
90
  from spiral.text_index import TextIndex
87
91
 
88
- return TextIndex(self._core.text_index(index_id))
92
+ return TextIndex(self.core.text_index(index_id))
89
93
 
90
94
  def key_space_index(self, index_id: str) -> "KeySpaceIndex":
91
95
  """Open a key space index using an ID."""
92
96
  from spiral.key_space_index import KeySpaceIndex
93
97
 
94
- return KeySpaceIndex(self._core.key_space_index(index_id))
98
+ return KeySpaceIndex(self.core.key_space_index(index_id))
95
99
 
96
100
  def scan(
97
101
  self,
@@ -117,13 +121,21 @@ class Spiral:
117
121
  where = se.lift(where)
118
122
 
119
123
  return Scan(
120
- self._core.scan(
124
+ self.core.scan(
121
125
  projection.__expr__,
122
126
  filter=where.__expr__ if where else None,
123
127
  asof=asof,
124
128
  ),
125
129
  )
126
130
 
131
+ def load_scan(self, scan_state: ScanState) -> Scan:
132
+ """Load a scan from a serialized scan state.
133
+
134
+ Args:
135
+ scan_state: The serialized scan state.
136
+ """
137
+ return Scan(self.core.load_scan(scan_state.core))
138
+
127
139
  # TODO(marko): This should be query, and search should be query + scan.
128
140
  def search(
129
141
  self,
@@ -155,17 +167,13 @@ class Spiral:
155
167
  freshness_window = timedelta(seconds=0)
156
168
  freshness_window_s = int(freshness_window.total_seconds())
157
169
 
158
- return self._core.search(
170
+ return self.core.search(
159
171
  top_k=top_k,
160
172
  rank_by=rank_by.__expr__,
161
173
  filters=filters.__expr__ if filters else None,
162
174
  freshness_window_s=freshness_window_s,
163
175
  )
164
176
 
165
- def _ops(self) -> Operations:
166
- """Access maintenance operations."""
167
- return self._core._ops(format=settings().file_format)
168
-
169
177
  @property
170
178
  def iceberg(self) -> "Iceberg":
171
179
  """
@@ -3,7 +3,7 @@ from typing import Any, Literal
3
3
  import pyarrow as pa
4
4
  from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableName
5
5
  from spiral.core.authn import Authn
6
- from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, Snapshot, Table, Transaction
6
+ from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
7
7
  from spiral.core.table.spec import ColumnGroup, Schema
8
8
  from spiral.expressions import Expr
9
9
 
@@ -30,6 +30,10 @@ class Spiral:
30
30
  """Construct a table scan."""
31
31
  ...
32
32
 
33
+ def load_scan(self, scan_state: ScanState) -> Scan:
34
+ """Load a scan from a serialized scan state."""
35
+ ...
36
+
33
37
  def transaction(self, table: Table, format: str | None = None, retries: int | None = 3) -> Transaction:
34
38
  """Being a table transaction."""
35
39
  ...
@@ -100,12 +104,8 @@ class Spiral:
100
104
  """Create a new key space index in the specified project."""
101
105
  ...
102
106
 
103
- def _ops(self, *, format: str | None = None) -> Operations:
104
- """Access maintenance operations.
105
-
106
- IMPORTANT: This API is internal and is currently exposed for development & testing.
107
- Maintenance operations are run by SpiralDB.
108
- """
107
+ def internal(self, *, format: str | None = None) -> Internal:
108
+ """Internal client APIs. It can change without notice."""
109
109
  ...
110
110
 
111
111
  class TextIndex:
@@ -158,7 +158,7 @@ class ShuffleConfig:
158
158
  max_batch_size: int | None = None,
159
159
  ): ...
160
160
 
161
- class Operations:
161
+ class Internal:
162
162
  def flush_wal(self, table: Table, *, keep_latest_s: int | None = None) -> None:
163
163
  """
164
164
  Flush the write-ahead log of the table.
@@ -0,0 +1,15 @@
1
+ from pyarrow import Array, DataType, Scalar
2
+
3
+ class Expr:
4
+ """Low level expression class."""
5
+
6
+ def aux(name: str, data_type: DataType) -> Expr: ...
7
+
8
+ # Array is correct (there is no ArrayData), see the table here:
9
+ # https://arrow.apache.org/rust/arrow_pyarrow/index.html
10
+ def scalar(array: Array[Scalar[DataType]]) -> Expr: ...
11
+ def not_(expr: Expr) -> Expr: ...
12
+ def is_null(expr: Expr) -> Expr: ...
13
+ def binary(op: str, expr: Expr, Expr: Expr) -> Expr: ...
14
+ def cast(_expr: Expr, _data_type: DataType) -> Expr: ...
15
+ def array_lit(array: Array[Scalar[DataType]]) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def encode_(images: Expr, format: str) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def contains(list: Expr, expr: Expr) -> Expr: ...
4
+ def element_at(list: Expr, element: Expr) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def ref(expr: Expr, field: str | None) -> Expr: ...
4
+ def deref(expr: Expr, field: str | None) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def substr(expr: Expr, begin: int, end: int | None) -> Expr: ...
@@ -0,0 +1,6 @@
1
+ from .. import Expr
2
+
3
+ def getitem(expr: Expr, item: str) -> Expr: ...
4
+ def select(expr: Expr, including: list[str] | None = None, excluding: list[str] | None = None) -> Expr: ...
5
+ def pack(names: list[str], children: list[str], nullable: bool) -> Expr: ...
6
+ def merge(names: list[Expr]) -> Expr: ...
@@ -0,0 +1,5 @@
1
+ from .. import Expr
2
+
3
+ def field(expr: Expr, tokeneizer: str | None) -> Expr: ...
4
+ def find(expr: Expr, term: str) -> Expr: ...
5
+ def boost(expr: Expr, factor: float) -> Expr: ...
@@ -0,0 +1,14 @@
1
+ from collections.abc import Callable
2
+
3
+ from pyarrow import Array, DataType, Scalar
4
+
5
+ from .. import Expr
6
+
7
+ class UDF:
8
+ def __call__(self, args: list[Expr]) -> Expr: ...
9
+
10
+ def create(
11
+ name: str,
12
+ return_type: Callable[[tuple[DataType, ...]], DataType],
13
+ invoke: Callable[[tuple[Array[Scalar[DataType]], ...]], Array[Scalar[DataType]]],
14
+ ) -> UDF: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def read(expr: Expr, ranges: Expr, crops: Expr, format: str) -> Expr: ...
@@ -5,7 +5,7 @@ from spiral.core.client import Shard, ShuffleConfig
5
5
 
6
6
  from .manifests import FragmentManifest
7
7
  from .metastore import PyMetastore
8
- from .spec import ColumnGroup, Key, Schema, WriteAheadLog
8
+ from .spec import ColumnGroup, Key, Operation, Schema, WriteAheadLog
9
9
 
10
10
  class KeyRange:
11
11
  """A right-exclusive range of keys."""
@@ -52,6 +52,11 @@ class Snapshot:
52
52
  table: Table
53
53
  wal: WriteAheadLog
54
54
 
55
+ class ScanState:
56
+ def to_json(self) -> str: ...
57
+ @staticmethod
58
+ def from_json(json: str) -> ScanState: ...
59
+
55
60
  class Scan:
56
61
  def key_schema(self) -> Schema: ...
57
62
  def schema(self) -> Schema: ...
@@ -62,6 +67,7 @@ class Scan:
62
67
  def column_groups(self) -> list[ColumnGroup]: ...
63
68
  def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
64
69
  def key_space_state(self, table_id: str) -> KeySpaceState: ...
70
+ def scan_state(self) -> ScanState: ...
65
71
  def to_record_batches(
66
72
  self,
67
73
  key_table: pa.Table | pa.RecordBatch | None = None,
@@ -95,7 +101,10 @@ class Transaction:
95
101
  status: str
96
102
 
97
103
  def write(self, table: pa.RecordBatchReader, *, partition_size_bytes: int | None = None): ...
104
+ def writeback(self, scan: Scan, *, partition_size_bytes: int | None = None, batch_readahead: int | None = None): ...
98
105
  def drop_columns(self, column_paths: list[str]): ...
106
+ def take(self) -> list[Operation]: ...
107
+ def include(self, ops: list[Operation]): ...
99
108
  def commit(self): ...
100
109
  def abort(self): ...
101
110
  def metrics(self) -> dict[str, Any]: ...
@@ -62,6 +62,10 @@ class ColumnGroupMetadata:
62
62
  def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
63
63
  """Applies the given WAL to the metadata."""
64
64
 
65
+ class Operation:
66
+ # Base class for all operations in the WAL.
67
+ ...
68
+
65
69
  class LogEntry:
66
70
  ts: int
67
71
  operation: (
spiral/dataloader.py CHANGED
@@ -88,22 +88,24 @@ class SpiralDataLoader:
88
88
  - map_workers for parallel post-processing (tokenization, decoding, etc.)
89
89
  - Built-in checkpoint support via skip_samples
90
90
  - Explicit shard-based architecture for distributed training
91
- """
92
91
 
93
- # Example usage:
94
- #
95
- # Simple usage:
96
- # loader = SpiralDataLoader(scan, batch_size=32)
97
- # for batch in loader:
98
- # train_step(batch)
99
- #
100
- # With parallel transforms:
101
- # loader = SpiralDataLoader(
102
- # scan,
103
- # batch_size=32,
104
- # transform_fn=tokenize_batch,
105
- # map_workers=4,
106
- # )
92
+ Simple usage:
93
+ ```python
94
+ loader = SpiralDataLoader(scan, batch_size=32)
95
+ for batch in loader:
96
+ train_step(batch)
97
+ ```
98
+
99
+ With parallel transforms:
100
+ ```python
101
+ loader = SpiralDataLoader(
102
+ scan,
103
+ batch_size=32,
104
+ transform_fn=tokenize_batch,
105
+ map_workers=4,
106
+ )
107
+ ```
108
+ """
107
109
 
108
110
  def __init__(
109
111
  self,
@@ -220,16 +222,21 @@ class SpiralDataLoader:
220
222
 
221
223
  Returns:
222
224
  Dictionary containing samples_yielded, seed, and shards.
225
+
226
+ Example checkpoint:
227
+ ```python
228
+ loader = SpiralDataLoader(scan, batch_size=32, seed=42)
229
+ for i, batch in enumerate(loader):
230
+ if i == 10:
231
+ checkpoint = loader.state_dict()
232
+ break
233
+ ```
234
+
235
+ Example resume:
236
+ ```python
237
+ loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
238
+ ```
223
239
  """
224
- # Example usage:
225
- # loader = SpiralDataLoader(scan, batch_size=32, seed=42)
226
- # for i, batch in enumerate(loader):
227
- # if i == 10:
228
- # checkpoint = loader.state_dict()
229
- # break
230
- #
231
- # # Resume later with exact same shards
232
- # loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
233
240
  return {
234
241
  "samples_yielded": self._samples_yielded,
235
242
  "seed": self.seed,
@@ -257,20 +264,22 @@ class SpiralDataLoader:
257
264
 
258
265
  Returns:
259
266
  New SpiralDataLoader instance configured to resume from the checkpoint.
267
+
268
+ Save checkpoint during training:
269
+ ```python
270
+ loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
271
+ checkpoint = loader.state_dict()
272
+ ```
273
+
274
+ Resume later using the same shards from checkpoint:
275
+ ```python
276
+ resumed_loader = SpiralDataLoader.from_state_dict(
277
+ scan,
278
+ checkpoint,
279
+ batch_size=32,
280
+ transform_fn=my_transform,
281
+ )
260
282
  """
261
- # Example usage:
262
- #
263
- # Save checkpoint during training:
264
- # loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
265
- # checkpoint = loader.state_dict()
266
- #
267
- # Resume later using the same shards from checkpoint:
268
- # resumed_loader = SpiralDataLoader.from_state_dict(
269
- # scan,
270
- # checkpoint,
271
- # batch_size=32,
272
- # transform_fn=my_transform,
273
- # )
274
283
 
275
284
  # Extract resume parameters from state
276
285
  seed = state.get("seed", 42)
@@ -8,31 +8,22 @@ import pyarrow as pa
8
8
 
9
9
  from spiral import _lib, arrow_
10
10
 
11
- from . import http as http
12
- from . import io as io
13
11
  from . import list_ as list
14
- from . import mp4 as mp4
15
- from . import png as png
16
- from . import qoi as qoi
17
- from . import refs as refs
18
12
  from . import str_ as str
19
13
  from . import struct as struct
20
14
  from . import text as text
21
- from . import tiff as tiff
22
15
  from .base import Expr, ExprLike, NativeExpr
16
+ from .udf import UDF
23
17
 
24
18
  __all__ = [
25
19
  "Expr",
26
20
  "add",
27
21
  "and_",
28
- "deref",
29
22
  "divide",
30
23
  "eq",
31
24
  "getitem",
32
25
  "gt",
33
26
  "gte",
34
- "http",
35
- "io",
36
27
  "is_not_null",
37
28
  "is_null",
38
29
  "lift",
@@ -48,19 +39,15 @@ __all__ = [
48
39
  "or_",
49
40
  "pack",
50
41
  "aux",
51
- "ref",
52
- "refs",
53
42
  "scalar",
54
43
  "select",
55
44
  "str",
56
45
  "struct",
57
46
  "subtract",
58
- "tiff",
59
47
  "xor",
60
- "png",
61
- "qoi",
62
- "mp4",
63
48
  "text",
49
+ "s3",
50
+ "UDF",
64
51
  ]
65
52
 
66
53
  # Inline some of the struct expressions since they're so common
@@ -68,8 +55,6 @@ getitem = struct.getitem
68
55
  merge = struct.merge
69
56
  pack = struct.pack
70
57
  select = struct.select
71
- ref = refs.ref
72
- deref = refs.deref
73
58
 
74
59
 
75
60
  def lift(expr: ExprLike) -> Expr:
@@ -127,9 +112,17 @@ def evaluate(expr: ExprLike) -> pa.RecordBatchReader:
127
112
  return pa.RecordBatchReader.from_batches(expr.schema, [expr])
128
113
  if isinstance(expr, pa.StructArray):
129
114
  return pa.Table.from_struct_array(expr).to_reader()
115
+
130
116
  if isinstance(expr, pa.ChunkedArray):
131
- # TODO(marko): We shouldn't need to combine chunks here._
132
- return evaluate(expr.combine_chunks())
117
+ if not pa.types.is_struct(expr.type):
118
+ raise ValueError("Arrow chunked array must be a struct type.")
119
+
120
+ def _iter_batches():
121
+ for chunk in expr.chunks:
122
+ yield pa.RecordBatch.from_struct_array(chunk)
123
+
124
+ return pa.RecordBatchReader.from_batches(pa.schema(expr.type.fields), _iter_batches())
125
+
133
126
  if isinstance(expr, pa.Array):
134
127
  raise ValueError("Arrow array must be a struct array.")
135
128
 
@@ -1,6 +1,5 @@
1
- import builtins
2
1
  import datetime
3
- from typing import TypeAlias
2
+ from typing import TypeAlias, Union
4
3
 
5
4
  import pyarrow as pa
6
5
 
@@ -153,5 +152,11 @@ class Expr:
153
152
 
154
153
 
155
154
  ScalarLike: TypeAlias = bool | int | float | str | list["ScalarLike"] | datetime.datetime | None
156
- ArrowLike: TypeAlias = pa.Array | pa.ChunkedArray | pa.Scalar | pa.RecordBatch | pa.Table
157
- ExprLike: TypeAlias = Expr | dict[str, "ExprLike"] | builtins.list | ArrowLike | ScalarLike
155
+ ArrowLike: TypeAlias = Union[
156
+ pa.RecordBatch,
157
+ "pa.Array[pa.Scalar[pa.DataType]]",
158
+ "pa.ChunkedArray[pa.Scalar[pa.DataType]]",
159
+ "pa.Scalar[pa.DataType]",
160
+ pa.Table,
161
+ ]
162
+ ExprLike: TypeAlias = Expr | dict[str, "ExprLike"] | list["ExprLike"] | ArrowLike | ScalarLike
@@ -0,0 +1,18 @@
1
+ from spiral import _lib
2
+ from spiral.expressions.base import Expr, ExprLike
3
+
4
+
5
+ def get(expr: ExprLike) -> Expr:
6
+ """Read data from object storage by the object's URL.
7
+
8
+ Args:
9
+ expr: URLs of the data that needs to be read from object storage.
10
+ """
11
+ from spiral import expressions as se
12
+
13
+ expr = se.lift(expr)
14
+ return Expr(
15
+ _lib.expr.s3.get(
16
+ expr.__expr__,
17
+ )
18
+ )