pyspiral 0.6.11__cp312-abi3-manylinux_2_28_aarch64.whl → 0.6.13__cp312-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyspiral might be problematic. Click here for more details.

Files changed (41) hide show
  1. {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/METADATA +8 -5
  2. {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/RECORD +36 -30
  3. spiral/__init__.py +7 -0
  4. spiral/_lib.abi3.so +0 -0
  5. spiral/cli/iceberg.py +1 -1
  6. spiral/cli/key_spaces.py +15 -1
  7. spiral/cli/tables.py +3 -3
  8. spiral/client.py +12 -11
  9. spiral/core/client/__init__.pyi +8 -8
  10. spiral/core/expr/__init__.pyi +15 -0
  11. spiral/core/expr/images/__init__.pyi +3 -0
  12. spiral/core/expr/list_/__init__.pyi +4 -0
  13. spiral/core/expr/refs/__init__.pyi +4 -0
  14. spiral/core/expr/str_/__init__.pyi +3 -0
  15. spiral/core/expr/struct_/__init__.pyi +6 -0
  16. spiral/core/expr/text/__init__.pyi +5 -0
  17. spiral/core/expr/udf/__init__.pyi +14 -0
  18. spiral/core/expr/video/__init__.pyi +3 -0
  19. spiral/core/table/__init__.pyi +19 -1
  20. spiral/core/table/spec/__init__.pyi +6 -0
  21. spiral/dataloader.py +52 -38
  22. spiral/enrichment.py +153 -0
  23. spiral/expressions/__init__.py +15 -19
  24. spiral/expressions/base.py +9 -4
  25. spiral/expressions/http.py +10 -80
  26. spiral/expressions/s3.py +15 -0
  27. spiral/expressions/tiff.py +2 -3
  28. spiral/expressions/udf.py +38 -24
  29. spiral/project.py +6 -6
  30. spiral/scan.py +76 -33
  31. spiral/settings.py +9 -6
  32. spiral/streaming_/stream.py +1 -1
  33. spiral/table.py +41 -9
  34. spiral/transaction.py +42 -0
  35. spiral/expressions/io.py +0 -100
  36. spiral/expressions/mp4.py +0 -62
  37. spiral/expressions/png.py +0 -18
  38. spiral/expressions/qoi.py +0 -18
  39. spiral/expressions/refs.py +0 -58
  40. {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/WHEEL +0 -0
  41. {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyspiral
3
- Version: 0.6.11
3
+ Version: 0.6.13
4
4
  Classifier: Intended Audience :: Science/Research
5
5
  Classifier: Operating System :: OS Independent
6
6
  Classifier: Programming Language :: Python
@@ -31,15 +31,18 @@ Requires-Dist: typer>=0.16
31
31
  Requires-Dist: xxhash>=3.4.1
32
32
  Requires-Dist: polars>=1.31.0 ; extra == 'polars'
33
33
  Requires-Dist: duckdb>=1.3.2 ; extra == 'duckdb'
34
- Requires-Dist: datasets>=4.0.0 ; extra == 'datasets'
35
- Requires-Dist: pyiceberg>=0.9.1 ; extra == 'pyiceberg'
34
+ Requires-Dist: pyiceberg[s3fs]>=0.9.1 ; extra == 'iceberg'
35
+ Requires-Dist: datasets>=4.0.0 ; extra == 'huggingface'
36
36
  Requires-Dist: mosaicml-streaming>=0.13.0 ; extra == 'streaming'
37
37
  Requires-Dist: vortex-data>=0.52.1 ; extra == 'streaming'
38
+ Requires-Dist: dask>=2025.10.0 ; extra == 'dask'
39
+ Requires-Dist: distributed>=2025.10.0 ; extra == 'dask'
38
40
  Provides-Extra: polars
39
41
  Provides-Extra: duckdb
40
- Provides-Extra: datasets
41
- Provides-Extra: pyiceberg
42
+ Provides-Extra: iceberg
43
+ Provides-Extra: huggingface
42
44
  Provides-Extra: streaming
45
+ Provides-Extra: dask
43
46
  Summary: Python client for Spiral.
44
47
  Home-Page: https://spiraldb.com
45
48
  Author-email: SpiralDB <hello@spiraldb.com>
@@ -1,8 +1,8 @@
1
- pyspiral-0.6.11.dist-info/METADATA,sha256=_Km6gYKZorM7zvU5kOO6lUSY39ei8orw_8vz30yF1Gk,1843
2
- pyspiral-0.6.11.dist-info/WHEEL,sha256=I5JYpyYzeAl2SOerY_wvkm-HJti0rDQc6zMeJs35MpM,108
3
- pyspiral-0.6.11.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
- spiral/__init__.py,sha256=n4JNLrO3wyw_k_U_JKyNiGON0wEpfvqxDhDdB2P6dhM,1007
5
- spiral/_lib.abi3.so,sha256=lNiMj4FtAFVhmeisqNtLg9DUA3in95UYoct6lnagkC4,57478712
1
+ pyspiral-0.6.13.dist-info/METADATA,sha256=AvViHjB1v9OqYTKONYf_DbfCB0HktAy-numkhQhuq20,1977
2
+ pyspiral-0.6.13.dist-info/WHEEL,sha256=I5JYpyYzeAl2SOerY_wvkm-HJti0rDQc6zMeJs35MpM,108
3
+ pyspiral-0.6.13.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
4
+ spiral/__init__.py,sha256=gAysTwG_oEeKVMdCOfOzDhl0bM2miiK8Ds2vvUihBWw,1153
5
+ spiral/_lib.abi3.so,sha256=x-kkHeRPwdf73BSVaVwxA6K71a0mtTXCV_omc2t0j7g,61221816
6
6
  spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
7
7
  spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
8
8
  spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
@@ -23,53 +23,59 @@ spiral/cli/admin.py,sha256=-ubYqs8nKjnQStbQ68jpWx_9xh0TsaxI0wM1Hfko8_U,319
23
23
  spiral/cli/app.py,sha256=smzGj5a2RwhM9RQChmlEeKZLN4Fk60-bP7Lm5_Is1Rw,2760
24
24
  spiral/cli/console.py,sha256=6JHbAQV6MFWz3P-VzqPOjhHpkIQagsCdzTMvmuDKMkU,2580
25
25
  spiral/cli/fs.py,sha256=vaPcSc2YghhHeipxNitIdsHaBhFwlwkvPFqYsFSN9P0,2927
26
- spiral/cli/iceberg.py,sha256=Q14tcGcn1LixbFCYP0GhfYwFFXTmmi8tqBPYwalJEyE,3248
27
- spiral/cli/key_spaces.py,sha256=x3IFRP5d47pKiAHeWExYMOBaT2TwxbWjVM01SUqKrwI,2943
26
+ spiral/cli/iceberg.py,sha256=wdMyl0j821MLnXNZ6Kwm65ogh98C-pjMJm3Y6YqlnTI,3249
27
+ spiral/cli/key_spaces.py,sha256=Xaw7WH-Qw_j6AxisdIoKfjAgVRXLM9qBFzuCTjPAFLI,3516
28
28
  spiral/cli/login.py,sha256=2tw6uN5rEpiMMAmjQSB3-JUPf3C0Wc1eTGCDxhYtJps,731
29
29
  spiral/cli/orgs.py,sha256=fmOuLxpeIFfKqePRi292Gv9k-EF5pPn_tbKd2BLl2Ig,2869
30
30
  spiral/cli/printer.py,sha256=aosc763hDFgoXJGkiANmNyO3kAsecAS1JWgjEhn8GCM,1784
31
31
  spiral/cli/projects.py,sha256=1M1nGrBT-t0aY9RV5Cnmzy7YrhIvmHwdkpa3y9j8rG8,5756
32
32
  spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
33
- spiral/cli/tables.py,sha256=fFte_wMNcB0V-fmfSXfSbtV4UlAi-Xw5nYDJ0b62CGk,6360
33
+ spiral/cli/tables.py,sha256=qm3izcysElJrQlerNZdfx5RWSVXtyVfkP3o_H51ltFw,6366
34
34
  spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
35
35
  spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
36
36
  spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
37
37
  spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
38
- spiral/client.py,sha256=N4sQLxtQ6GYCnj00hm4VX1vUVUqzQdHhl_KfQwp-1LQ,6345
38
+ spiral/client.py,sha256=zMp-xXGL4R1Py_rYrC5o3jFLam1oA74azi50dvMP-_o,6329
39
39
  spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
41
41
  spiral/core/authn/__init__.pyi,sha256=z_GWyIS62fuiYQrYO8hzw4W8oGaiciqS1u5qtAt54VY,769
42
- spiral/core/client/__init__.pyi,sha256=1HK3SOMT1QKmD5Hai58ZFjiEZK0QzyYtP84hse8SBEI,6666
43
- spiral/core/table/__init__.pyi,sha256=QqG_pMlPhMtXG-56dXyQjOWVKMugPP0nnYnvYaY0Q10,3288
42
+ spiral/core/client/__init__.pyi,sha256=ajF8XaxThnTdsPpw1k3pPLEurIaDg9yeXqwIRJNlJTY,6665
43
+ spiral/core/expr/__init__.pyi,sha256=3HSKjkotiEkxBvGBALXEBIie0JiyI9bCpehwA3nMQkU,571
44
+ spiral/core/expr/images/__init__.pyi,sha256=wnE_wZXq7a4iqTg3SVm-ssxGw1WQZyk5dGOPaP4Btko,73
45
+ spiral/core/expr/list_/__init__.pyi,sha256=Q_9c87eIQfZbqlaw_rq3fvs93YEsW7K5VYk6VZ4g6mU,126
46
+ spiral/core/expr/refs/__init__.pyi,sha256=nZZP3l_Z6bLx6V8lTcH3Jgo--xwfADOU2XdTAvM5IMk,127
47
+ spiral/core/expr/str_/__init__.pyi,sha256=Bm6fZK-d4fNbJuuBhVoWMACXUbQQ2SjlhgrOpdOHIPM,86
48
+ spiral/core/expr/struct_/__init__.pyi,sha256=MXckd98eV_x3X0RhEWvlkA3DcDXRtLs5pNnTQkc09nE,296
49
+ spiral/core/expr/text/__init__.pyi,sha256=ed83n1xcsGY7_QDhMmJGnSQ20UrJFXcdv1AveSEcS1c,175
50
+ spiral/core/expr/udf/__init__.pyi,sha256=zsZs081KVhY3-1JidqTkWMW81Qd_ScoTGZvasIhIK-4,358
51
+ spiral/core/expr/video/__init__.pyi,sha256=nQJEcSsigZuRpMjkI_O4EEtMK_n2zRvorcL_KEeD5vU,95
52
+ spiral/core/table/__init__.pyi,sha256=YBL12_JPTWz2mNbqlDqbT1exxVJYzwfXdHCi6Z37JxA,3841
44
53
  spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
45
54
  spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
46
- spiral/core/table/spec/__init__.pyi,sha256=OFYJXPXix7gskYJIMog7IniZslEPJ0xvL-sUSFDPbXs,5643
47
- spiral/dataloader.py,sha256=FFZhIflQPEygXe-xBLifQnnxANi4CFooaHRm4i-EGHo,10335
55
+ spiral/core/table/spec/__init__.pyi,sha256=twzX4vFmgBxInZWq_nyP6DR9OQjjOVrbZMn97kndeS8,5808
56
+ spiral/dataloader.py,sha256=W9siY4BF4p_rwTTSS4KgsaQsPLxxza6XmQhrdBzzMJ8,10592
48
57
  spiral/dataset.py,sha256=PMLoXnXuEUciP6-NXqTmQLXu0UIH7OcC4-iZtY_iuO8,7973
49
58
  spiral/datetime_.py,sha256=elXaUWtZuuLVcu9E0aXnvYRPB9XWqZbLDToozQYQYjU,950
50
59
  spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
60
  spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
52
61
  spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
53
62
  spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
54
- spiral/expressions/__init__.py,sha256=KhwFjVKoFgx1S6hkVcE8aZjoHY_1N-BgQ2rGEZfPQvM,7957
55
- spiral/expressions/base.py,sha256=915gpvZZCTRCO5q93pwwmhf-R6C23LQsyDt4Q2dHk9s,5290
56
- spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
57
- spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
63
+ spiral/enrichment.py,sha256=e2yzNWTTG73uEkLTc4ccTNRQ94cBtM04eGzlJ2-kBOI,5851
64
+ spiral/expressions/__init__.py,sha256=Fp7Xx3exh9KJad92tgd_TGGIpYLQTHqWjW-pexzQibU,7981
65
+ spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
66
+ spiral/expressions/http.py,sha256=WfHVLqz_LjBr78mN3ARBRQqgBrkao7-S73JxjC4Xwvo,356
58
67
  spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
59
- spiral/expressions/mp4.py,sha256=_xGVnkygddzxP9a8OACJ8_KXnejuVbYCVKBCXBQ798Y,2151
60
- spiral/expressions/png.py,sha256=KO8X0OmMzUFwpg2I_j0JTyldPzVXDWIMzjWMWDV9vIY,506
61
- spiral/expressions/qoi.py,sha256=gvIbb6fXb_Bb080sn9wkpbGGrPs2UEcTXCfuv4-kcYQ,506
62
- spiral/expressions/refs.py,sha256=omeHBQ5o6N4xgZ3x5Xz7IRrWwYBBtQY8DYK0NNAxeGo,2109
68
+ spiral/expressions/s3.py,sha256=bkd0HANerNKlOblp2z7JJOSWjF9Bw9lZe1A-KTrUEgk,378
63
69
  spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
64
70
  spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
65
71
  spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
66
- spiral/expressions/tiff.py,sha256=fQwIn0kLFBM2Y3YYIHmTgb_EIRHKT2fNc77nioDQQw4,8044
67
- spiral/expressions/udf.py,sha256=yb9MIcrFftpNDxgBF228cvdv6TY-hEFikYz2fq_nzWo,1353
72
+ spiral/expressions/tiff.py,sha256=4dngO97bT1QY0By7-PxOQVmSwQC3PQAiixVhLJ-4HMQ,7986
73
+ spiral/expressions/udf.py,sha256=XOxa7Kocb4Cg4q_qFvRT6hVnVzi22CQenqrvS-TL-VY,1936
68
74
  spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
69
75
  spiral/iceberg.py,sha256=JGq62Qnf296r9_hRAoH85GQq45-uSBjwXWw_CvPi6G4,930
70
76
  spiral/iterable_dataset.py,sha256=Eekg9ad8tcwXcloHWReBbvCSr5ZappRHn2ldKTvwqS0,4622
71
77
  spiral/key_space_index.py,sha256=NAB_nONEjpMYbse8suz42w7Qb5OPHuKN9h9CT2NJe08,1460
72
- spiral/project.py,sha256=CO_Pn6vPqaonNvRdCNRFcBWr4TqO2AsAUTH5xawIeCE,7283
78
+ spiral/project.py,sha256=VsokZgS0TqIel7UAXMyoBToxn-l_D3ivGwc41x7HLF0,7277
73
79
  spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
80
  spiral/protogen/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
81
  spiral/protogen/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -87,16 +93,16 @@ spiral/protogen/_/substrait/extensions/__init__.py,sha256=nhnEnho70GAT8WPj2xtwJU
87
93
  spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
94
  spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
89
95
  spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
- spiral/scan.py,sha256=4PUlI_DHbO1WTttLia6DinhGtOWsCiqek4ZljoEiRZc,10523
96
+ spiral/scan.py,sha256=csbk5ePbU-RlEVIF7isccF2zRBB8L8ZY_HEpalMjgLY,12340
91
97
  spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
92
- spiral/settings.py,sha256=JRQSwjJyNaCqQdQLxiqB_O_LZRQXMLyshJBrI2LZHwM,3113
98
+ spiral/settings.py,sha256=sUhMMBCXaPvUYztN_gztD9TjeUYJwVeEcJrq4FLy6M0,3232
93
99
  spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
94
100
  spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
95
101
  spiral/streaming_/reader.py,sha256=tl_lC9xgh1-QFhsZn4xQT7It3PVTzHCEUT2BG2dWBRQ,4166
96
- spiral/streaming_/stream.py,sha256=nXnygiuCxi1D3PhaxV8Ujif4J9ly_OczA7CZ3W4WN2w,5913
102
+ spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5915
97
103
  spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
98
- spiral/table.py,sha256=G05b6M0uVmT5ew5GxuzsVB4rQzg25W3zGMTftL07pJU,11026
104
+ spiral/table.py,sha256=prjDBcm6Qerdq3ypXzfbXb7ngAcO0j-Z9aTeZvzKoqs,12209
99
105
  spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
100
- spiral/transaction.py,sha256=h6YdAwOYX6qq-tXYV4i9yhy1Nq1tIfRphY_fk7Q_yLQ,1854
106
+ spiral/transaction.py,sha256=hQm6DfCklMDpIYJ9qA2wR45cCuUPGCiJy1tHGE3AsEY,3418
101
107
  spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
102
- pyspiral-0.6.11.dist-info/RECORD,,
108
+ pyspiral-0.6.13.dist-info/RECORD,,
spiral/__init__.py CHANGED
@@ -1,14 +1,18 @@
1
1
  """Python client for Spiral"""
2
2
 
3
+ import importlib
4
+
3
5
  # This is here to make sure we load the native extension first
4
6
  from spiral import _lib
5
7
 
6
8
  # Eagerly import the Spiral library
7
9
  assert _lib, "Spiral library"
8
10
 
11
+
9
12
  from spiral.client import Spiral # noqa: E402
10
13
  from spiral.core.client import Shard, ShuffleConfig # noqa: E402
11
14
  from spiral.dataloader import SpiralDataLoader, World # noqa: E402
15
+ from spiral.enrichment import Enrichment # noqa: E402
12
16
  from spiral.iceberg import Iceberg # noqa: E402
13
17
  from spiral.key_space_index import KeySpaceIndex # noqa: E402
14
18
  from spiral.project import Project # noqa: E402
@@ -24,6 +28,7 @@ __all__ = [
24
28
  "Table",
25
29
  "Snapshot",
26
30
  "Transaction",
31
+ "Enrichment",
27
32
  "Scan",
28
33
  "Shard",
29
34
  "ShuffleConfig",
@@ -33,3 +38,5 @@ __all__ = [
33
38
  "World",
34
39
  "Iceberg",
35
40
  ]
41
+
42
+ __version__ = importlib.metadata.version("pyspiral")
spiral/_lib.abi3.so CHANGED
Binary file
spiral/cli/iceberg.py CHANGED
@@ -8,7 +8,7 @@ from typer import Argument
8
8
  from spiral.cli import CONSOLE, ERR_CONSOLE, AsyncTyper, state
9
9
  from spiral.cli.types import ProjectArg
10
10
 
11
- app = AsyncTyper(short_help="Apache Iceberg Catalog")
11
+ app = AsyncTyper(short_help="Apache Iceberg Catalog.")
12
12
 
13
13
 
14
14
  @app.command(help="List namespaces.")
spiral/cli/key_spaces.py CHANGED
@@ -64,7 +64,7 @@ def show(
64
64
  """Show index partitions."""
65
65
  index_id = get_index_id(project, name)
66
66
  index = state.spiral.key_space_index(index_id)
67
- shards = state.spiral._ops().compute_shards(index.core)
67
+ shards = state.spiral.internal.compute_shards(index.core)
68
68
 
69
69
  rich_table = rich.table.Table("Begin", "End", "Cardinality", title=f"Index {index.name} Partitions")
70
70
  for partition in shards:
@@ -87,3 +87,17 @@ def sync(
87
87
  index_id = get_index_id(project, name)
88
88
  response = state.spiral.api.key_space_indexes.sync_index(index_id, SyncIndexRequest(resources=resources))
89
89
  CONSOLE.print(f"Triggered sync job {response.worker_id} for index {index_id}.")
90
+
91
+
92
+ # TODO(marko): This will be removed.
93
+ @app.command(help="Run a sync and wait for it to complete.")
94
+ def sync_local(
95
+ project: ProjectArg,
96
+ name: Annotated[str | None, Option(help="Index name.")] = None,
97
+ ):
98
+ """Run a sync and wait for it to complete."""
99
+ index_id = get_index_id(project, name)
100
+ index = state.spiral.key_space_index(index_id)
101
+ snapshot = state.spiral.table(index.table_id).snapshot()
102
+ state.spiral.internal.update_key_space_index(index.core, snapshot.core)
103
+ CONSOLE.print(f"Index {index.name} is up to date as-of {snapshot.asof}.")
spiral/cli/tables.py CHANGED
@@ -130,7 +130,7 @@ def flush(
130
130
  keep_latest_s = int(duration.total_seconds()) if duration is not None else None
131
131
 
132
132
  identifier, t = get_table(project, table, dataset)
133
- state.spiral._ops().flush_wal(t.core, keep_latest_s=keep_latest_s) # pyright: ignore[reportPrivateUsage]
133
+ state.spiral.internal.flush_wal(t.core, keep_latest_s=keep_latest_s) # pyright: ignore[reportPrivateUsage]
134
134
  CONSOLE.print(f"Flushed WAL for table {identifier} in project {project}.")
135
135
 
136
136
 
@@ -143,10 +143,10 @@ def manifests(
143
143
  _, t = get_table(project, table, dataset)
144
144
  s = t.snapshot()
145
145
 
146
- key_space_state = state.spiral._ops().key_space_state(s.core) # pyright: ignore[reportPrivateUsage]
146
+ key_space_state = state.spiral.internal.key_space_state(s.core) # pyright: ignore[reportPrivateUsage]
147
147
  key_space_manifest = key_space_state.manifest
148
148
 
149
- column_groups_states = state.spiral._ops().column_groups_states(s.core, key_space_state) # pyright: ignore[reportPrivateUsage]
149
+ column_groups_states = state.spiral.internal.column_groups_states(s.core, key_space_state) # pyright: ignore[reportPrivateUsage]
150
150
  display_manifests(key_space_manifest, [(x.column_group, x.manifest) for x in column_groups_states])
151
151
 
152
152
 
spiral/client.py CHANGED
@@ -6,7 +6,7 @@ import pyarrow as pa
6
6
 
7
7
  from spiral.api import SpiralAPI
8
8
  from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
9
- from spiral.core.client import Operations
9
+ from spiral.core.client import Internal
10
10
  from spiral.core.client import Spiral as CoreSpiral
11
11
  from spiral.datetime_ import timestamp_micros
12
12
  from spiral.expressions import ExprLike
@@ -35,9 +35,13 @@ class Spiral:
35
35
  return self._config.api
36
36
 
37
37
  @property
38
- def _core(self) -> CoreSpiral:
38
+ def core(self) -> CoreSpiral:
39
39
  return self._config.core
40
40
 
41
+ @property
42
+ def internal(self) -> Internal:
43
+ return self.core.internal(format=settings().file_format)
44
+
41
45
  @property
42
46
  def organization(self) -> str:
43
47
  if self._org is None:
@@ -79,19 +83,19 @@ class Spiral:
79
83
  """Open a table using an ID."""
80
84
  from spiral.table import Table
81
85
 
82
- return Table(self, self._core.table(table_id))
86
+ return Table(self, self.core.table(table_id))
83
87
 
84
88
  def text_index(self, index_id: str) -> "TextIndex":
85
89
  """Open a text index using an ID."""
86
90
  from spiral.text_index import TextIndex
87
91
 
88
- return TextIndex(self._core.text_index(index_id))
92
+ return TextIndex(self.core.text_index(index_id))
89
93
 
90
94
  def key_space_index(self, index_id: str) -> "KeySpaceIndex":
91
95
  """Open a key space index using an ID."""
92
96
  from spiral.key_space_index import KeySpaceIndex
93
97
 
94
- return KeySpaceIndex(self._core.key_space_index(index_id))
98
+ return KeySpaceIndex(self.core.key_space_index(index_id))
95
99
 
96
100
  def scan(
97
101
  self,
@@ -117,7 +121,8 @@ class Spiral:
117
121
  where = se.lift(where)
118
122
 
119
123
  return Scan(
120
- self._core.scan(
124
+ self,
125
+ self.core.scan(
121
126
  projection.__expr__,
122
127
  filter=where.__expr__ if where else None,
123
128
  asof=asof,
@@ -155,17 +160,13 @@ class Spiral:
155
160
  freshness_window = timedelta(seconds=0)
156
161
  freshness_window_s = int(freshness_window.total_seconds())
157
162
 
158
- return self._core.search(
163
+ return self.core.search(
159
164
  top_k=top_k,
160
165
  rank_by=rank_by.__expr__,
161
166
  filters=filters.__expr__ if filters else None,
162
167
  freshness_window_s=freshness_window_s,
163
168
  )
164
169
 
165
- def _ops(self) -> Operations:
166
- """Access maintenance operations."""
167
- return self._core._ops(format=settings().file_format)
168
-
169
170
  @property
170
171
  def iceberg(self) -> "Iceberg":
171
172
  """
@@ -3,7 +3,7 @@ from typing import Any, Literal
3
3
  import pyarrow as pa
4
4
  from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableName
5
5
  from spiral.core.authn import Authn
6
- from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, Snapshot, Table, Transaction
6
+ from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
7
7
  from spiral.core.table.spec import ColumnGroup, Schema
8
8
  from spiral.expressions import Expr
9
9
 
@@ -30,6 +30,10 @@ class Spiral:
30
30
  """Construct a table scan."""
31
31
  ...
32
32
 
33
+ def load_scan(self, scan_state: ScanState) -> Scan:
34
+ """Load a scan from a serialized scan state."""
35
+ ...
36
+
33
37
  def transaction(self, table: Table, format: str | None = None, retries: int | None = 3) -> Transaction:
34
38
  """Being a table transaction."""
35
39
  ...
@@ -100,12 +104,8 @@ class Spiral:
100
104
  """Create a new key space index in the specified project."""
101
105
  ...
102
106
 
103
- def _ops(self, *, format: str | None = None) -> Operations:
104
- """Access maintenance operations.
105
-
106
- IMPORTANT: This API is internal and is currently exposed for development & testing.
107
- Maintenance operations are run by SpiralDB.
108
- """
107
+ def internal(self, *, format: str | None = None) -> Internal:
108
+ """Internal client APIs. It can change without notice."""
109
109
  ...
110
110
 
111
111
  class TextIndex:
@@ -158,7 +158,7 @@ class ShuffleConfig:
158
158
  max_batch_size: int | None = None,
159
159
  ): ...
160
160
 
161
- class Operations:
161
+ class Internal:
162
162
  def flush_wal(self, table: Table, *, keep_latest_s: int | None = None) -> None:
163
163
  """
164
164
  Flush the write-ahead log of the table.
@@ -0,0 +1,15 @@
1
+ from pyarrow import Array, DataType, Scalar
2
+
3
+ class Expr:
4
+ """Low level expression class."""
5
+
6
+ def aux(name: str, data_type: DataType) -> Expr: ...
7
+
8
+ # Array is correct (there is no ArrayData), see the table here:
9
+ # https://arrow.apache.org/rust/arrow_pyarrow/index.html
10
+ def scalar(array: Array[Scalar[DataType]]) -> Expr: ...
11
+ def not_(expr: Expr) -> Expr: ...
12
+ def is_null(expr: Expr) -> Expr: ...
13
+ def binary(op: str, expr: Expr, Expr: Expr) -> Expr: ...
14
+ def cast(_expr: Expr, _data_type: DataType) -> Expr: ...
15
+ def array_lit(array: Array[Scalar[DataType]]) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def encode_(images: Expr, format: str) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def contains(list: Expr, expr: Expr) -> Expr: ...
4
+ def element_at(list: Expr, element: Expr) -> Expr: ...
@@ -0,0 +1,4 @@
1
+ from .. import Expr
2
+
3
+ def ref(expr: Expr, field: str | None) -> Expr: ...
4
+ def deref(expr: Expr, field: str | None) -> Expr: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def substr(expr: Expr, begin: int, end: int | None) -> Expr: ...
@@ -0,0 +1,6 @@
1
+ from .. import Expr
2
+
3
+ def getitem(expr: Expr, item: str) -> Expr: ...
4
+ def select(expr: Expr, including: list[str] | None = None, excluding: list[str] | None = None) -> Expr: ...
5
+ def pack(names: list[str], children: list[str], nullable: bool) -> Expr: ...
6
+ def merge(names: list[Expr]) -> Expr: ...
@@ -0,0 +1,5 @@
1
+ from .. import Expr
2
+
3
+ def field(expr: Expr, tokeneizer: str | None) -> Expr: ...
4
+ def find(expr: Expr, term: str) -> Expr: ...
5
+ def boost(expr: Expr, factor: float) -> Expr: ...
@@ -0,0 +1,14 @@
1
+ from collections.abc import Callable
2
+
3
+ from pyarrow import Array, DataType, Scalar
4
+
5
+ from .. import Expr
6
+
7
+ class UDF:
8
+ def __call__(self, args: list[Expr]) -> Expr: ...
9
+
10
+ def create(
11
+ name: str,
12
+ return_type: Callable[[tuple[DataType, ...]], DataType],
13
+ invoke: Callable[[tuple[Array[Scalar[DataType]], ...]], Array[Scalar[DataType]]],
14
+ ) -> UDF: ...
@@ -0,0 +1,3 @@
1
+ from .. import Expr
2
+
3
+ def read(expr: Expr, ranges: Expr, crops: Expr, format: str) -> Expr: ...
@@ -5,7 +5,7 @@ from spiral.core.client import Shard, ShuffleConfig
5
5
 
6
6
  from .manifests import FragmentManifest
7
7
  from .metastore import PyMetastore
8
- from .spec import ColumnGroup, Key, Schema, WriteAheadLog
8
+ from .spec import ColumnGroup, Key, Operation, Schema, WriteAheadLog
9
9
 
10
10
  class KeyRange:
11
11
  """A right-exclusive range of keys."""
@@ -52,6 +52,11 @@ class Snapshot:
52
52
  table: Table
53
53
  wal: WriteAheadLog
54
54
 
55
+ class ScanState:
56
+ def to_json(self) -> str: ...
57
+ @staticmethod
58
+ def from_json(json: str) -> ScanState: ...
59
+
55
60
  class Scan:
56
61
  def key_schema(self) -> Schema: ...
57
62
  def schema(self) -> Schema: ...
@@ -62,8 +67,10 @@ class Scan:
62
67
  def column_groups(self) -> list[ColumnGroup]: ...
63
68
  def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
64
69
  def key_space_state(self, table_id: str) -> KeySpaceState: ...
70
+ def scan_state(self) -> ScanState: ...
65
71
  def to_record_batches(
66
72
  self,
73
+ key_range: KeyRange | None = None,
67
74
  key_table: pa.Table | pa.RecordBatch | None = None,
68
75
  batch_readahead: int | None = None,
69
76
  ) -> pa.RecordBatchReader: ...
@@ -95,7 +102,18 @@ class Transaction:
95
102
  status: str
96
103
 
97
104
  def write(self, table: pa.RecordBatchReader, *, partition_size_bytes: int | None = None): ...
105
+ def writeback(
106
+ self,
107
+ scan: Scan,
108
+ *,
109
+ key_range: KeyRange | None = None,
110
+ partition_size_bytes: int | None = None,
111
+ batch_readahead: int | None = None,
112
+ ): ...
98
113
  def drop_columns(self, column_paths: list[str]): ...
114
+ def take(self) -> list[Operation]: ...
115
+ def include(self, ops: list[Operation]): ...
99
116
  def commit(self): ...
100
117
  def abort(self): ...
118
+ def is_empty(self) -> bool: ...
101
119
  def metrics(self) -> dict[str, Any]: ...
@@ -62,6 +62,12 @@ class ColumnGroupMetadata:
62
62
  def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
63
63
  """Applies the given WAL to the metadata."""
64
64
 
65
+ class Operation:
66
+ # Base class for all operations in the WAL.
67
+ def to_json(self) -> str: ...
68
+ @staticmethod
69
+ def from_json(json: str) -> Operation: ...
70
+
65
71
  class LogEntry:
66
72
  ts: int
67
73
  operation: (
spiral/dataloader.py CHANGED
@@ -88,22 +88,24 @@ class SpiralDataLoader:
88
88
  - map_workers for parallel post-processing (tokenization, decoding, etc.)
89
89
  - Built-in checkpoint support via skip_samples
90
90
  - Explicit shard-based architecture for distributed training
91
- """
92
91
 
93
- # Example usage:
94
- #
95
- # Simple usage:
96
- # loader = SpiralDataLoader(scan, batch_size=32)
97
- # for batch in loader:
98
- # train_step(batch)
99
- #
100
- # With parallel transforms:
101
- # loader = SpiralDataLoader(
102
- # scan,
103
- # batch_size=32,
104
- # transform_fn=tokenize_batch,
105
- # map_workers=4,
106
- # )
92
+ Simple usage:
93
+ ```python
94
+ loader = SpiralDataLoader(scan, batch_size=32)
95
+ for batch in loader:
96
+ train_step(batch)
97
+ ```
98
+
99
+ With parallel transforms:
100
+ ```python
101
+ loader = SpiralDataLoader(
102
+ scan,
103
+ batch_size=32,
104
+ transform_fn=tokenize_batch,
105
+ map_workers=4,
106
+ )
107
+ ```
108
+ """
107
109
 
108
110
  def __init__(
109
111
  self,
@@ -119,6 +121,7 @@ class SpiralDataLoader:
119
121
  # TODO(os): accept vortex arrays here instead of Arrow
120
122
  transform_fn: Callable[[pa.RecordBatch], Any] | None = None,
121
123
  map_workers: int = 0,
124
+ infinite: bool = False,
122
125
  ):
123
126
  """Initialize SpiralDataLoader.
124
127
 
@@ -143,6 +146,9 @@ class SpiralDataLoader:
143
146
  map_workers: Number of worker processes for parallel transform_fn
144
147
  application. 0 means single-process (no parallelism). Use this for
145
148
  CPU-bound transforms like tokenization or audio decoding.
149
+ infinite: Whether to cycle through the dataset infinitely. If True,
150
+ the dataloader will repeat the dataset indefinitely. If False,
151
+ the dataloader will stop after going through the dataset once.
146
152
  """
147
153
  self.scan = scan
148
154
  self.shards = shards if shards is not None else scan.shards()
@@ -155,6 +161,7 @@ class SpiralDataLoader:
155
161
  self.batch_readahead = batch_readahead
156
162
  self.transform_fn = transform_fn
157
163
  self.map_workers = map_workers
164
+ self.infinite = infinite
158
165
 
159
166
  self._samples_yielded = 0
160
167
 
@@ -174,7 +181,7 @@ class SpiralDataLoader:
174
181
  shuffle=shuffle,
175
182
  max_batch_size=self.batch_size,
176
183
  batch_readahead=self.batch_readahead,
177
- infinite=False,
184
+ infinite=self.infinite,
178
185
  )
179
186
 
180
187
  if self.skip_samples > 0:
@@ -220,16 +227,21 @@ class SpiralDataLoader:
220
227
 
221
228
  Returns:
222
229
  Dictionary containing samples_yielded, seed, and shards.
230
+
231
+ Example checkpoint:
232
+ ```python
233
+ loader = SpiralDataLoader(scan, batch_size=32, seed=42)
234
+ for i, batch in enumerate(loader):
235
+ if i == 10:
236
+ checkpoint = loader.state_dict()
237
+ break
238
+ ```
239
+
240
+ Example resume:
241
+ ```python
242
+ loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
243
+ ```
223
244
  """
224
- # Example usage:
225
- # loader = SpiralDataLoader(scan, batch_size=32, seed=42)
226
- # for i, batch in enumerate(loader):
227
- # if i == 10:
228
- # checkpoint = loader.state_dict()
229
- # break
230
- #
231
- # # Resume later with exact same shards
232
- # loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
233
245
  return {
234
246
  "samples_yielded": self._samples_yielded,
235
247
  "seed": self.seed,
@@ -257,20 +269,22 @@ class SpiralDataLoader:
257
269
 
258
270
  Returns:
259
271
  New SpiralDataLoader instance configured to resume from the checkpoint.
272
+
273
+ Save checkpoint during training:
274
+ ```python
275
+ loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
276
+ checkpoint = loader.state_dict()
277
+ ```
278
+
279
+ Resume later using the same shards from checkpoint:
280
+ ```python
281
+ resumed_loader = SpiralDataLoader.from_state_dict(
282
+ scan,
283
+ checkpoint,
284
+ batch_size=32,
285
+ transform_fn=my_transform,
286
+ )
260
287
  """
261
- # Example usage:
262
- #
263
- # Save checkpoint during training:
264
- # loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
265
- # checkpoint = loader.state_dict()
266
- #
267
- # Resume later using the same shards from checkpoint:
268
- # resumed_loader = SpiralDataLoader.from_state_dict(
269
- # scan,
270
- # checkpoint,
271
- # batch_size=32,
272
- # transform_fn=my_transform,
273
- # )
274
288
 
275
289
  # Extract resume parameters from state
276
290
  seed = state.get("seed", 42)