pyspiral 0.6.11__cp312-abi3-manylinux_2_28_aarch64.whl → 0.6.13__cp312-abi3-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyspiral might be problematic. Click here for more details.
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/METADATA +8 -5
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/RECORD +36 -30
- spiral/__init__.py +7 -0
- spiral/_lib.abi3.so +0 -0
- spiral/cli/iceberg.py +1 -1
- spiral/cli/key_spaces.py +15 -1
- spiral/cli/tables.py +3 -3
- spiral/client.py +12 -11
- spiral/core/client/__init__.pyi +8 -8
- spiral/core/expr/__init__.pyi +15 -0
- spiral/core/expr/images/__init__.pyi +3 -0
- spiral/core/expr/list_/__init__.pyi +4 -0
- spiral/core/expr/refs/__init__.pyi +4 -0
- spiral/core/expr/str_/__init__.pyi +3 -0
- spiral/core/expr/struct_/__init__.pyi +6 -0
- spiral/core/expr/text/__init__.pyi +5 -0
- spiral/core/expr/udf/__init__.pyi +14 -0
- spiral/core/expr/video/__init__.pyi +3 -0
- spiral/core/table/__init__.pyi +19 -1
- spiral/core/table/spec/__init__.pyi +6 -0
- spiral/dataloader.py +52 -38
- spiral/enrichment.py +153 -0
- spiral/expressions/__init__.py +15 -19
- spiral/expressions/base.py +9 -4
- spiral/expressions/http.py +10 -80
- spiral/expressions/s3.py +15 -0
- spiral/expressions/tiff.py +2 -3
- spiral/expressions/udf.py +38 -24
- spiral/project.py +6 -6
- spiral/scan.py +76 -33
- spiral/settings.py +9 -6
- spiral/streaming_/stream.py +1 -1
- spiral/table.py +41 -9
- spiral/transaction.py +42 -0
- spiral/expressions/io.py +0 -100
- spiral/expressions/mp4.py +0 -62
- spiral/expressions/png.py +0 -18
- spiral/expressions/qoi.py +0 -18
- spiral/expressions/refs.py +0 -58
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/WHEEL +0 -0
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.13.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyspiral
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.13
|
|
4
4
|
Classifier: Intended Audience :: Science/Research
|
|
5
5
|
Classifier: Operating System :: OS Independent
|
|
6
6
|
Classifier: Programming Language :: Python
|
|
@@ -31,15 +31,18 @@ Requires-Dist: typer>=0.16
|
|
|
31
31
|
Requires-Dist: xxhash>=3.4.1
|
|
32
32
|
Requires-Dist: polars>=1.31.0 ; extra == 'polars'
|
|
33
33
|
Requires-Dist: duckdb>=1.3.2 ; extra == 'duckdb'
|
|
34
|
-
Requires-Dist:
|
|
35
|
-
Requires-Dist:
|
|
34
|
+
Requires-Dist: pyiceberg[s3fs]>=0.9.1 ; extra == 'iceberg'
|
|
35
|
+
Requires-Dist: datasets>=4.0.0 ; extra == 'huggingface'
|
|
36
36
|
Requires-Dist: mosaicml-streaming>=0.13.0 ; extra == 'streaming'
|
|
37
37
|
Requires-Dist: vortex-data>=0.52.1 ; extra == 'streaming'
|
|
38
|
+
Requires-Dist: dask>=2025.10.0 ; extra == 'dask'
|
|
39
|
+
Requires-Dist: distributed>=2025.10.0 ; extra == 'dask'
|
|
38
40
|
Provides-Extra: polars
|
|
39
41
|
Provides-Extra: duckdb
|
|
40
|
-
Provides-Extra:
|
|
41
|
-
Provides-Extra:
|
|
42
|
+
Provides-Extra: iceberg
|
|
43
|
+
Provides-Extra: huggingface
|
|
42
44
|
Provides-Extra: streaming
|
|
45
|
+
Provides-Extra: dask
|
|
43
46
|
Summary: Python client for Spiral.
|
|
44
47
|
Home-Page: https://spiraldb.com
|
|
45
48
|
Author-email: SpiralDB <hello@spiraldb.com>
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
pyspiral-0.6.
|
|
2
|
-
pyspiral-0.6.
|
|
3
|
-
pyspiral-0.6.
|
|
4
|
-
spiral/__init__.py,sha256=
|
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
|
1
|
+
pyspiral-0.6.13.dist-info/METADATA,sha256=AvViHjB1v9OqYTKONYf_DbfCB0HktAy-numkhQhuq20,1977
|
|
2
|
+
pyspiral-0.6.13.dist-info/WHEEL,sha256=I5JYpyYzeAl2SOerY_wvkm-HJti0rDQc6zMeJs35MpM,108
|
|
3
|
+
pyspiral-0.6.13.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
|
|
4
|
+
spiral/__init__.py,sha256=gAysTwG_oEeKVMdCOfOzDhl0bM2miiK8Ds2vvUihBWw,1153
|
|
5
|
+
spiral/_lib.abi3.so,sha256=x-kkHeRPwdf73BSVaVwxA6K71a0mtTXCV_omc2t0j7g,61221816
|
|
6
6
|
spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
|
|
7
7
|
spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
|
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
|
@@ -23,53 +23,59 @@ spiral/cli/admin.py,sha256=-ubYqs8nKjnQStbQ68jpWx_9xh0TsaxI0wM1Hfko8_U,319
|
|
|
23
23
|
spiral/cli/app.py,sha256=smzGj5a2RwhM9RQChmlEeKZLN4Fk60-bP7Lm5_Is1Rw,2760
|
|
24
24
|
spiral/cli/console.py,sha256=6JHbAQV6MFWz3P-VzqPOjhHpkIQagsCdzTMvmuDKMkU,2580
|
|
25
25
|
spiral/cli/fs.py,sha256=vaPcSc2YghhHeipxNitIdsHaBhFwlwkvPFqYsFSN9P0,2927
|
|
26
|
-
spiral/cli/iceberg.py,sha256=
|
|
27
|
-
spiral/cli/key_spaces.py,sha256=
|
|
26
|
+
spiral/cli/iceberg.py,sha256=wdMyl0j821MLnXNZ6Kwm65ogh98C-pjMJm3Y6YqlnTI,3249
|
|
27
|
+
spiral/cli/key_spaces.py,sha256=Xaw7WH-Qw_j6AxisdIoKfjAgVRXLM9qBFzuCTjPAFLI,3516
|
|
28
28
|
spiral/cli/login.py,sha256=2tw6uN5rEpiMMAmjQSB3-JUPf3C0Wc1eTGCDxhYtJps,731
|
|
29
29
|
spiral/cli/orgs.py,sha256=fmOuLxpeIFfKqePRi292Gv9k-EF5pPn_tbKd2BLl2Ig,2869
|
|
30
30
|
spiral/cli/printer.py,sha256=aosc763hDFgoXJGkiANmNyO3kAsecAS1JWgjEhn8GCM,1784
|
|
31
31
|
spiral/cli/projects.py,sha256=1M1nGrBT-t0aY9RV5Cnmzy7YrhIvmHwdkpa3y9j8rG8,5756
|
|
32
32
|
spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
|
|
33
|
-
spiral/cli/tables.py,sha256=
|
|
33
|
+
spiral/cli/tables.py,sha256=qm3izcysElJrQlerNZdfx5RWSVXtyVfkP3o_H51ltFw,6366
|
|
34
34
|
spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
|
|
35
35
|
spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
|
|
36
36
|
spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
|
|
37
37
|
spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
|
|
38
|
-
spiral/client.py,sha256=
|
|
38
|
+
spiral/client.py,sha256=zMp-xXGL4R1Py_rYrC5o3jFLam1oA74azi50dvMP-_o,6329
|
|
39
39
|
spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
|
|
41
41
|
spiral/core/authn/__init__.pyi,sha256=z_GWyIS62fuiYQrYO8hzw4W8oGaiciqS1u5qtAt54VY,769
|
|
42
|
-
spiral/core/client/__init__.pyi,sha256=
|
|
43
|
-
spiral/core/
|
|
42
|
+
spiral/core/client/__init__.pyi,sha256=ajF8XaxThnTdsPpw1k3pPLEurIaDg9yeXqwIRJNlJTY,6665
|
|
43
|
+
spiral/core/expr/__init__.pyi,sha256=3HSKjkotiEkxBvGBALXEBIie0JiyI9bCpehwA3nMQkU,571
|
|
44
|
+
spiral/core/expr/images/__init__.pyi,sha256=wnE_wZXq7a4iqTg3SVm-ssxGw1WQZyk5dGOPaP4Btko,73
|
|
45
|
+
spiral/core/expr/list_/__init__.pyi,sha256=Q_9c87eIQfZbqlaw_rq3fvs93YEsW7K5VYk6VZ4g6mU,126
|
|
46
|
+
spiral/core/expr/refs/__init__.pyi,sha256=nZZP3l_Z6bLx6V8lTcH3Jgo--xwfADOU2XdTAvM5IMk,127
|
|
47
|
+
spiral/core/expr/str_/__init__.pyi,sha256=Bm6fZK-d4fNbJuuBhVoWMACXUbQQ2SjlhgrOpdOHIPM,86
|
|
48
|
+
spiral/core/expr/struct_/__init__.pyi,sha256=MXckd98eV_x3X0RhEWvlkA3DcDXRtLs5pNnTQkc09nE,296
|
|
49
|
+
spiral/core/expr/text/__init__.pyi,sha256=ed83n1xcsGY7_QDhMmJGnSQ20UrJFXcdv1AveSEcS1c,175
|
|
50
|
+
spiral/core/expr/udf/__init__.pyi,sha256=zsZs081KVhY3-1JidqTkWMW81Qd_ScoTGZvasIhIK-4,358
|
|
51
|
+
spiral/core/expr/video/__init__.pyi,sha256=nQJEcSsigZuRpMjkI_O4EEtMK_n2zRvorcL_KEeD5vU,95
|
|
52
|
+
spiral/core/table/__init__.pyi,sha256=YBL12_JPTWz2mNbqlDqbT1exxVJYzwfXdHCi6Z37JxA,3841
|
|
44
53
|
spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
|
|
45
54
|
spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
|
|
46
|
-
spiral/core/table/spec/__init__.pyi,sha256=
|
|
47
|
-
spiral/dataloader.py,sha256=
|
|
55
|
+
spiral/core/table/spec/__init__.pyi,sha256=twzX4vFmgBxInZWq_nyP6DR9OQjjOVrbZMn97kndeS8,5808
|
|
56
|
+
spiral/dataloader.py,sha256=W9siY4BF4p_rwTTSS4KgsaQsPLxxza6XmQhrdBzzMJ8,10592
|
|
48
57
|
spiral/dataset.py,sha256=PMLoXnXuEUciP6-NXqTmQLXu0UIH7OcC4-iZtY_iuO8,7973
|
|
49
58
|
spiral/datetime_.py,sha256=elXaUWtZuuLVcu9E0aXnvYRPB9XWqZbLDToozQYQYjU,950
|
|
50
59
|
spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
60
|
spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
|
|
52
61
|
spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
|
53
62
|
spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
|
|
54
|
-
spiral/
|
|
55
|
-
spiral/expressions/
|
|
56
|
-
spiral/expressions/
|
|
57
|
-
spiral/expressions/
|
|
63
|
+
spiral/enrichment.py,sha256=e2yzNWTTG73uEkLTc4ccTNRQ94cBtM04eGzlJ2-kBOI,5851
|
|
64
|
+
spiral/expressions/__init__.py,sha256=Fp7Xx3exh9KJad92tgd_TGGIpYLQTHqWjW-pexzQibU,7981
|
|
65
|
+
spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
|
|
66
|
+
spiral/expressions/http.py,sha256=WfHVLqz_LjBr78mN3ARBRQqgBrkao7-S73JxjC4Xwvo,356
|
|
58
67
|
spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
|
|
59
|
-
spiral/expressions/
|
|
60
|
-
spiral/expressions/png.py,sha256=KO8X0OmMzUFwpg2I_j0JTyldPzVXDWIMzjWMWDV9vIY,506
|
|
61
|
-
spiral/expressions/qoi.py,sha256=gvIbb6fXb_Bb080sn9wkpbGGrPs2UEcTXCfuv4-kcYQ,506
|
|
62
|
-
spiral/expressions/refs.py,sha256=omeHBQ5o6N4xgZ3x5Xz7IRrWwYBBtQY8DYK0NNAxeGo,2109
|
|
68
|
+
spiral/expressions/s3.py,sha256=bkd0HANerNKlOblp2z7JJOSWjF9Bw9lZe1A-KTrUEgk,378
|
|
63
69
|
spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
|
|
64
70
|
spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
|
|
65
71
|
spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
|
|
66
|
-
spiral/expressions/tiff.py,sha256=
|
|
67
|
-
spiral/expressions/udf.py,sha256=
|
|
72
|
+
spiral/expressions/tiff.py,sha256=4dngO97bT1QY0By7-PxOQVmSwQC3PQAiixVhLJ-4HMQ,7986
|
|
73
|
+
spiral/expressions/udf.py,sha256=XOxa7Kocb4Cg4q_qFvRT6hVnVzi22CQenqrvS-TL-VY,1936
|
|
68
74
|
spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
|
|
69
75
|
spiral/iceberg.py,sha256=JGq62Qnf296r9_hRAoH85GQq45-uSBjwXWw_CvPi6G4,930
|
|
70
76
|
spiral/iterable_dataset.py,sha256=Eekg9ad8tcwXcloHWReBbvCSr5ZappRHn2ldKTvwqS0,4622
|
|
71
77
|
spiral/key_space_index.py,sha256=NAB_nONEjpMYbse8suz42w7Qb5OPHuKN9h9CT2NJe08,1460
|
|
72
|
-
spiral/project.py,sha256=
|
|
78
|
+
spiral/project.py,sha256=VsokZgS0TqIel7UAXMyoBToxn-l_D3ivGwc41x7HLF0,7277
|
|
73
79
|
spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
80
|
spiral/protogen/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
81
|
spiral/protogen/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -87,16 +93,16 @@ spiral/protogen/_/substrait/extensions/__init__.py,sha256=nhnEnho70GAT8WPj2xtwJU
|
|
|
87
93
|
spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
88
94
|
spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
|
89
95
|
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
|
-
spiral/scan.py,sha256=
|
|
96
|
+
spiral/scan.py,sha256=csbk5ePbU-RlEVIF7isccF2zRBB8L8ZY_HEpalMjgLY,12340
|
|
91
97
|
spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
|
|
92
|
-
spiral/settings.py,sha256=
|
|
98
|
+
spiral/settings.py,sha256=sUhMMBCXaPvUYztN_gztD9TjeUYJwVeEcJrq4FLy6M0,3232
|
|
93
99
|
spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
|
|
94
100
|
spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
|
|
95
101
|
spiral/streaming_/reader.py,sha256=tl_lC9xgh1-QFhsZn4xQT7It3PVTzHCEUT2BG2dWBRQ,4166
|
|
96
|
-
spiral/streaming_/stream.py,sha256=
|
|
102
|
+
spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5915
|
|
97
103
|
spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
|
|
98
|
-
spiral/table.py,sha256=
|
|
104
|
+
spiral/table.py,sha256=prjDBcm6Qerdq3ypXzfbXb7ngAcO0j-Z9aTeZvzKoqs,12209
|
|
99
105
|
spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
|
|
100
|
-
spiral/transaction.py,sha256=
|
|
106
|
+
spiral/transaction.py,sha256=hQm6DfCklMDpIYJ9qA2wR45cCuUPGCiJy1tHGE3AsEY,3418
|
|
101
107
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
|
102
|
-
pyspiral-0.6.
|
|
108
|
+
pyspiral-0.6.13.dist-info/RECORD,,
|
spiral/__init__.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
"""Python client for Spiral"""
|
|
2
2
|
|
|
3
|
+
import importlib
|
|
4
|
+
|
|
3
5
|
# This is here to make sure we load the native extension first
|
|
4
6
|
from spiral import _lib
|
|
5
7
|
|
|
6
8
|
# Eagerly import the Spiral library
|
|
7
9
|
assert _lib, "Spiral library"
|
|
8
10
|
|
|
11
|
+
|
|
9
12
|
from spiral.client import Spiral # noqa: E402
|
|
10
13
|
from spiral.core.client import Shard, ShuffleConfig # noqa: E402
|
|
11
14
|
from spiral.dataloader import SpiralDataLoader, World # noqa: E402
|
|
15
|
+
from spiral.enrichment import Enrichment # noqa: E402
|
|
12
16
|
from spiral.iceberg import Iceberg # noqa: E402
|
|
13
17
|
from spiral.key_space_index import KeySpaceIndex # noqa: E402
|
|
14
18
|
from spiral.project import Project # noqa: E402
|
|
@@ -24,6 +28,7 @@ __all__ = [
|
|
|
24
28
|
"Table",
|
|
25
29
|
"Snapshot",
|
|
26
30
|
"Transaction",
|
|
31
|
+
"Enrichment",
|
|
27
32
|
"Scan",
|
|
28
33
|
"Shard",
|
|
29
34
|
"ShuffleConfig",
|
|
@@ -33,3 +38,5 @@ __all__ = [
|
|
|
33
38
|
"World",
|
|
34
39
|
"Iceberg",
|
|
35
40
|
]
|
|
41
|
+
|
|
42
|
+
__version__ = importlib.metadata.version("pyspiral")
|
spiral/_lib.abi3.so
CHANGED
|
Binary file
|
spiral/cli/iceberg.py
CHANGED
|
@@ -8,7 +8,7 @@ from typer import Argument
|
|
|
8
8
|
from spiral.cli import CONSOLE, ERR_CONSOLE, AsyncTyper, state
|
|
9
9
|
from spiral.cli.types import ProjectArg
|
|
10
10
|
|
|
11
|
-
app = AsyncTyper(short_help="Apache Iceberg Catalog")
|
|
11
|
+
app = AsyncTyper(short_help="Apache Iceberg Catalog.")
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@app.command(help="List namespaces.")
|
spiral/cli/key_spaces.py
CHANGED
|
@@ -64,7 +64,7 @@ def show(
|
|
|
64
64
|
"""Show index partitions."""
|
|
65
65
|
index_id = get_index_id(project, name)
|
|
66
66
|
index = state.spiral.key_space_index(index_id)
|
|
67
|
-
shards = state.spiral.
|
|
67
|
+
shards = state.spiral.internal.compute_shards(index.core)
|
|
68
68
|
|
|
69
69
|
rich_table = rich.table.Table("Begin", "End", "Cardinality", title=f"Index {index.name} Partitions")
|
|
70
70
|
for partition in shards:
|
|
@@ -87,3 +87,17 @@ def sync(
|
|
|
87
87
|
index_id = get_index_id(project, name)
|
|
88
88
|
response = state.spiral.api.key_space_indexes.sync_index(index_id, SyncIndexRequest(resources=resources))
|
|
89
89
|
CONSOLE.print(f"Triggered sync job {response.worker_id} for index {index_id}.")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# TODO(marko): This will be removed.
|
|
93
|
+
@app.command(help="Run a sync and wait for it to complete.")
|
|
94
|
+
def sync_local(
|
|
95
|
+
project: ProjectArg,
|
|
96
|
+
name: Annotated[str | None, Option(help="Index name.")] = None,
|
|
97
|
+
):
|
|
98
|
+
"""Run a sync and wait for it to complete."""
|
|
99
|
+
index_id = get_index_id(project, name)
|
|
100
|
+
index = state.spiral.key_space_index(index_id)
|
|
101
|
+
snapshot = state.spiral.table(index.table_id).snapshot()
|
|
102
|
+
state.spiral.internal.update_key_space_index(index.core, snapshot.core)
|
|
103
|
+
CONSOLE.print(f"Index {index.name} is up to date as-of {snapshot.asof}.")
|
spiral/cli/tables.py
CHANGED
|
@@ -130,7 +130,7 @@ def flush(
|
|
|
130
130
|
keep_latest_s = int(duration.total_seconds()) if duration is not None else None
|
|
131
131
|
|
|
132
132
|
identifier, t = get_table(project, table, dataset)
|
|
133
|
-
state.spiral.
|
|
133
|
+
state.spiral.internal.flush_wal(t.core, keep_latest_s=keep_latest_s) # pyright: ignore[reportPrivateUsage]
|
|
134
134
|
CONSOLE.print(f"Flushed WAL for table {identifier} in project {project}.")
|
|
135
135
|
|
|
136
136
|
|
|
@@ -143,10 +143,10 @@ def manifests(
|
|
|
143
143
|
_, t = get_table(project, table, dataset)
|
|
144
144
|
s = t.snapshot()
|
|
145
145
|
|
|
146
|
-
key_space_state = state.spiral.
|
|
146
|
+
key_space_state = state.spiral.internal.key_space_state(s.core) # pyright: ignore[reportPrivateUsage]
|
|
147
147
|
key_space_manifest = key_space_state.manifest
|
|
148
148
|
|
|
149
|
-
column_groups_states = state.spiral.
|
|
149
|
+
column_groups_states = state.spiral.internal.column_groups_states(s.core, key_space_state) # pyright: ignore[reportPrivateUsage]
|
|
150
150
|
display_manifests(key_space_manifest, [(x.column_group, x.manifest) for x in column_groups_states])
|
|
151
151
|
|
|
152
152
|
|
spiral/client.py
CHANGED
|
@@ -6,7 +6,7 @@ import pyarrow as pa
|
|
|
6
6
|
|
|
7
7
|
from spiral.api import SpiralAPI
|
|
8
8
|
from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
|
|
9
|
-
from spiral.core.client import
|
|
9
|
+
from spiral.core.client import Internal
|
|
10
10
|
from spiral.core.client import Spiral as CoreSpiral
|
|
11
11
|
from spiral.datetime_ import timestamp_micros
|
|
12
12
|
from spiral.expressions import ExprLike
|
|
@@ -35,9 +35,13 @@ class Spiral:
|
|
|
35
35
|
return self._config.api
|
|
36
36
|
|
|
37
37
|
@property
|
|
38
|
-
def
|
|
38
|
+
def core(self) -> CoreSpiral:
|
|
39
39
|
return self._config.core
|
|
40
40
|
|
|
41
|
+
@property
|
|
42
|
+
def internal(self) -> Internal:
|
|
43
|
+
return self.core.internal(format=settings().file_format)
|
|
44
|
+
|
|
41
45
|
@property
|
|
42
46
|
def organization(self) -> str:
|
|
43
47
|
if self._org is None:
|
|
@@ -79,19 +83,19 @@ class Spiral:
|
|
|
79
83
|
"""Open a table using an ID."""
|
|
80
84
|
from spiral.table import Table
|
|
81
85
|
|
|
82
|
-
return Table(self, self.
|
|
86
|
+
return Table(self, self.core.table(table_id))
|
|
83
87
|
|
|
84
88
|
def text_index(self, index_id: str) -> "TextIndex":
|
|
85
89
|
"""Open a text index using an ID."""
|
|
86
90
|
from spiral.text_index import TextIndex
|
|
87
91
|
|
|
88
|
-
return TextIndex(self.
|
|
92
|
+
return TextIndex(self.core.text_index(index_id))
|
|
89
93
|
|
|
90
94
|
def key_space_index(self, index_id: str) -> "KeySpaceIndex":
|
|
91
95
|
"""Open a key space index using an ID."""
|
|
92
96
|
from spiral.key_space_index import KeySpaceIndex
|
|
93
97
|
|
|
94
|
-
return KeySpaceIndex(self.
|
|
98
|
+
return KeySpaceIndex(self.core.key_space_index(index_id))
|
|
95
99
|
|
|
96
100
|
def scan(
|
|
97
101
|
self,
|
|
@@ -117,7 +121,8 @@ class Spiral:
|
|
|
117
121
|
where = se.lift(where)
|
|
118
122
|
|
|
119
123
|
return Scan(
|
|
120
|
-
self
|
|
124
|
+
self,
|
|
125
|
+
self.core.scan(
|
|
121
126
|
projection.__expr__,
|
|
122
127
|
filter=where.__expr__ if where else None,
|
|
123
128
|
asof=asof,
|
|
@@ -155,17 +160,13 @@ class Spiral:
|
|
|
155
160
|
freshness_window = timedelta(seconds=0)
|
|
156
161
|
freshness_window_s = int(freshness_window.total_seconds())
|
|
157
162
|
|
|
158
|
-
return self.
|
|
163
|
+
return self.core.search(
|
|
159
164
|
top_k=top_k,
|
|
160
165
|
rank_by=rank_by.__expr__,
|
|
161
166
|
filters=filters.__expr__ if filters else None,
|
|
162
167
|
freshness_window_s=freshness_window_s,
|
|
163
168
|
)
|
|
164
169
|
|
|
165
|
-
def _ops(self) -> Operations:
|
|
166
|
-
"""Access maintenance operations."""
|
|
167
|
-
return self._core._ops(format=settings().file_format)
|
|
168
|
-
|
|
169
170
|
@property
|
|
170
171
|
def iceberg(self) -> "Iceberg":
|
|
171
172
|
"""
|
spiral/core/client/__init__.pyi
CHANGED
|
@@ -3,7 +3,7 @@ from typing import Any, Literal
|
|
|
3
3
|
import pyarrow as pa
|
|
4
4
|
from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableName
|
|
5
5
|
from spiral.core.authn import Authn
|
|
6
|
-
from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, Snapshot, Table, Transaction
|
|
6
|
+
from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
|
|
7
7
|
from spiral.core.table.spec import ColumnGroup, Schema
|
|
8
8
|
from spiral.expressions import Expr
|
|
9
9
|
|
|
@@ -30,6 +30,10 @@ class Spiral:
|
|
|
30
30
|
"""Construct a table scan."""
|
|
31
31
|
...
|
|
32
32
|
|
|
33
|
+
def load_scan(self, scan_state: ScanState) -> Scan:
|
|
34
|
+
"""Load a scan from a serialized scan state."""
|
|
35
|
+
...
|
|
36
|
+
|
|
33
37
|
def transaction(self, table: Table, format: str | None = None, retries: int | None = 3) -> Transaction:
|
|
34
38
|
"""Being a table transaction."""
|
|
35
39
|
...
|
|
@@ -100,12 +104,8 @@ class Spiral:
|
|
|
100
104
|
"""Create a new key space index in the specified project."""
|
|
101
105
|
...
|
|
102
106
|
|
|
103
|
-
def
|
|
104
|
-
"""
|
|
105
|
-
|
|
106
|
-
IMPORTANT: This API is internal and is currently exposed for development & testing.
|
|
107
|
-
Maintenance operations are run by SpiralDB.
|
|
108
|
-
"""
|
|
107
|
+
def internal(self, *, format: str | None = None) -> Internal:
|
|
108
|
+
"""Internal client APIs. It can change without notice."""
|
|
109
109
|
...
|
|
110
110
|
|
|
111
111
|
class TextIndex:
|
|
@@ -158,7 +158,7 @@ class ShuffleConfig:
|
|
|
158
158
|
max_batch_size: int | None = None,
|
|
159
159
|
): ...
|
|
160
160
|
|
|
161
|
-
class
|
|
161
|
+
class Internal:
|
|
162
162
|
def flush_wal(self, table: Table, *, keep_latest_s: int | None = None) -> None:
|
|
163
163
|
"""
|
|
164
164
|
Flush the write-ahead log of the table.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pyarrow import Array, DataType, Scalar
|
|
2
|
+
|
|
3
|
+
class Expr:
|
|
4
|
+
"""Low level expression class."""
|
|
5
|
+
|
|
6
|
+
def aux(name: str, data_type: DataType) -> Expr: ...
|
|
7
|
+
|
|
8
|
+
# Array is correct (there is no ArrayData), see the table here:
|
|
9
|
+
# https://arrow.apache.org/rust/arrow_pyarrow/index.html
|
|
10
|
+
def scalar(array: Array[Scalar[DataType]]) -> Expr: ...
|
|
11
|
+
def not_(expr: Expr) -> Expr: ...
|
|
12
|
+
def is_null(expr: Expr) -> Expr: ...
|
|
13
|
+
def binary(op: str, expr: Expr, Expr: Expr) -> Expr: ...
|
|
14
|
+
def cast(_expr: Expr, _data_type: DataType) -> Expr: ...
|
|
15
|
+
def array_lit(array: Array[Scalar[DataType]]) -> Expr: ...
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from .. import Expr
|
|
2
|
+
|
|
3
|
+
def getitem(expr: Expr, item: str) -> Expr: ...
|
|
4
|
+
def select(expr: Expr, including: list[str] | None = None, excluding: list[str] | None = None) -> Expr: ...
|
|
5
|
+
def pack(names: list[str], children: list[str], nullable: bool) -> Expr: ...
|
|
6
|
+
def merge(names: list[Expr]) -> Expr: ...
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
|
|
3
|
+
from pyarrow import Array, DataType, Scalar
|
|
4
|
+
|
|
5
|
+
from .. import Expr
|
|
6
|
+
|
|
7
|
+
class UDF:
|
|
8
|
+
def __call__(self, args: list[Expr]) -> Expr: ...
|
|
9
|
+
|
|
10
|
+
def create(
|
|
11
|
+
name: str,
|
|
12
|
+
return_type: Callable[[tuple[DataType, ...]], DataType],
|
|
13
|
+
invoke: Callable[[tuple[Array[Scalar[DataType]], ...]], Array[Scalar[DataType]]],
|
|
14
|
+
) -> UDF: ...
|
spiral/core/table/__init__.pyi
CHANGED
|
@@ -5,7 +5,7 @@ from spiral.core.client import Shard, ShuffleConfig
|
|
|
5
5
|
|
|
6
6
|
from .manifests import FragmentManifest
|
|
7
7
|
from .metastore import PyMetastore
|
|
8
|
-
from .spec import ColumnGroup, Key, Schema, WriteAheadLog
|
|
8
|
+
from .spec import ColumnGroup, Key, Operation, Schema, WriteAheadLog
|
|
9
9
|
|
|
10
10
|
class KeyRange:
|
|
11
11
|
"""A right-exclusive range of keys."""
|
|
@@ -52,6 +52,11 @@ class Snapshot:
|
|
|
52
52
|
table: Table
|
|
53
53
|
wal: WriteAheadLog
|
|
54
54
|
|
|
55
|
+
class ScanState:
|
|
56
|
+
def to_json(self) -> str: ...
|
|
57
|
+
@staticmethod
|
|
58
|
+
def from_json(json: str) -> ScanState: ...
|
|
59
|
+
|
|
55
60
|
class Scan:
|
|
56
61
|
def key_schema(self) -> Schema: ...
|
|
57
62
|
def schema(self) -> Schema: ...
|
|
@@ -62,8 +67,10 @@ class Scan:
|
|
|
62
67
|
def column_groups(self) -> list[ColumnGroup]: ...
|
|
63
68
|
def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
|
|
64
69
|
def key_space_state(self, table_id: str) -> KeySpaceState: ...
|
|
70
|
+
def scan_state(self) -> ScanState: ...
|
|
65
71
|
def to_record_batches(
|
|
66
72
|
self,
|
|
73
|
+
key_range: KeyRange | None = None,
|
|
67
74
|
key_table: pa.Table | pa.RecordBatch | None = None,
|
|
68
75
|
batch_readahead: int | None = None,
|
|
69
76
|
) -> pa.RecordBatchReader: ...
|
|
@@ -95,7 +102,18 @@ class Transaction:
|
|
|
95
102
|
status: str
|
|
96
103
|
|
|
97
104
|
def write(self, table: pa.RecordBatchReader, *, partition_size_bytes: int | None = None): ...
|
|
105
|
+
def writeback(
|
|
106
|
+
self,
|
|
107
|
+
scan: Scan,
|
|
108
|
+
*,
|
|
109
|
+
key_range: KeyRange | None = None,
|
|
110
|
+
partition_size_bytes: int | None = None,
|
|
111
|
+
batch_readahead: int | None = None,
|
|
112
|
+
): ...
|
|
98
113
|
def drop_columns(self, column_paths: list[str]): ...
|
|
114
|
+
def take(self) -> list[Operation]: ...
|
|
115
|
+
def include(self, ops: list[Operation]): ...
|
|
99
116
|
def commit(self): ...
|
|
100
117
|
def abort(self): ...
|
|
118
|
+
def is_empty(self) -> bool: ...
|
|
101
119
|
def metrics(self) -> dict[str, Any]: ...
|
|
@@ -62,6 +62,12 @@ class ColumnGroupMetadata:
|
|
|
62
62
|
def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
|
|
63
63
|
"""Applies the given WAL to the metadata."""
|
|
64
64
|
|
|
65
|
+
class Operation:
|
|
66
|
+
# Base class for all operations in the WAL.
|
|
67
|
+
def to_json(self) -> str: ...
|
|
68
|
+
@staticmethod
|
|
69
|
+
def from_json(json: str) -> Operation: ...
|
|
70
|
+
|
|
65
71
|
class LogEntry:
|
|
66
72
|
ts: int
|
|
67
73
|
operation: (
|
spiral/dataloader.py
CHANGED
|
@@ -88,22 +88,24 @@ class SpiralDataLoader:
|
|
|
88
88
|
- map_workers for parallel post-processing (tokenization, decoding, etc.)
|
|
89
89
|
- Built-in checkpoint support via skip_samples
|
|
90
90
|
- Explicit shard-based architecture for distributed training
|
|
91
|
-
"""
|
|
92
91
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
92
|
+
Simple usage:
|
|
93
|
+
```python
|
|
94
|
+
loader = SpiralDataLoader(scan, batch_size=32)
|
|
95
|
+
for batch in loader:
|
|
96
|
+
train_step(batch)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
With parallel transforms:
|
|
100
|
+
```python
|
|
101
|
+
loader = SpiralDataLoader(
|
|
102
|
+
scan,
|
|
103
|
+
batch_size=32,
|
|
104
|
+
transform_fn=tokenize_batch,
|
|
105
|
+
map_workers=4,
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
"""
|
|
107
109
|
|
|
108
110
|
def __init__(
|
|
109
111
|
self,
|
|
@@ -119,6 +121,7 @@ class SpiralDataLoader:
|
|
|
119
121
|
# TODO(os): accept vortex arrays here instead of Arrow
|
|
120
122
|
transform_fn: Callable[[pa.RecordBatch], Any] | None = None,
|
|
121
123
|
map_workers: int = 0,
|
|
124
|
+
infinite: bool = False,
|
|
122
125
|
):
|
|
123
126
|
"""Initialize SpiralDataLoader.
|
|
124
127
|
|
|
@@ -143,6 +146,9 @@ class SpiralDataLoader:
|
|
|
143
146
|
map_workers: Number of worker processes for parallel transform_fn
|
|
144
147
|
application. 0 means single-process (no parallelism). Use this for
|
|
145
148
|
CPU-bound transforms like tokenization or audio decoding.
|
|
149
|
+
infinite: Whether to cycle through the dataset infinitely. If True,
|
|
150
|
+
the dataloader will repeat the dataset indefinitely. If False,
|
|
151
|
+
the dataloader will stop after going through the dataset once.
|
|
146
152
|
"""
|
|
147
153
|
self.scan = scan
|
|
148
154
|
self.shards = shards if shards is not None else scan.shards()
|
|
@@ -155,6 +161,7 @@ class SpiralDataLoader:
|
|
|
155
161
|
self.batch_readahead = batch_readahead
|
|
156
162
|
self.transform_fn = transform_fn
|
|
157
163
|
self.map_workers = map_workers
|
|
164
|
+
self.infinite = infinite
|
|
158
165
|
|
|
159
166
|
self._samples_yielded = 0
|
|
160
167
|
|
|
@@ -174,7 +181,7 @@ class SpiralDataLoader:
|
|
|
174
181
|
shuffle=shuffle,
|
|
175
182
|
max_batch_size=self.batch_size,
|
|
176
183
|
batch_readahead=self.batch_readahead,
|
|
177
|
-
infinite=
|
|
184
|
+
infinite=self.infinite,
|
|
178
185
|
)
|
|
179
186
|
|
|
180
187
|
if self.skip_samples > 0:
|
|
@@ -220,16 +227,21 @@ class SpiralDataLoader:
|
|
|
220
227
|
|
|
221
228
|
Returns:
|
|
222
229
|
Dictionary containing samples_yielded, seed, and shards.
|
|
230
|
+
|
|
231
|
+
Example checkpoint:
|
|
232
|
+
```python
|
|
233
|
+
loader = SpiralDataLoader(scan, batch_size=32, seed=42)
|
|
234
|
+
for i, batch in enumerate(loader):
|
|
235
|
+
if i == 10:
|
|
236
|
+
checkpoint = loader.state_dict()
|
|
237
|
+
break
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
Example resume:
|
|
241
|
+
```python
|
|
242
|
+
loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
|
|
243
|
+
```
|
|
223
244
|
"""
|
|
224
|
-
# Example usage:
|
|
225
|
-
# loader = SpiralDataLoader(scan, batch_size=32, seed=42)
|
|
226
|
-
# for i, batch in enumerate(loader):
|
|
227
|
-
# if i == 10:
|
|
228
|
-
# checkpoint = loader.state_dict()
|
|
229
|
-
# break
|
|
230
|
-
#
|
|
231
|
-
# # Resume later with exact same shards
|
|
232
|
-
# loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
|
|
233
245
|
return {
|
|
234
246
|
"samples_yielded": self._samples_yielded,
|
|
235
247
|
"seed": self.seed,
|
|
@@ -257,20 +269,22 @@ class SpiralDataLoader:
|
|
|
257
269
|
|
|
258
270
|
Returns:
|
|
259
271
|
New SpiralDataLoader instance configured to resume from the checkpoint.
|
|
272
|
+
|
|
273
|
+
Save checkpoint during training:
|
|
274
|
+
```python
|
|
275
|
+
loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
|
|
276
|
+
checkpoint = loader.state_dict()
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
Resume later using the same shards from checkpoint:
|
|
280
|
+
```python
|
|
281
|
+
resumed_loader = SpiralDataLoader.from_state_dict(
|
|
282
|
+
scan,
|
|
283
|
+
checkpoint,
|
|
284
|
+
batch_size=32,
|
|
285
|
+
transform_fn=my_transform,
|
|
286
|
+
)
|
|
260
287
|
"""
|
|
261
|
-
# Example usage:
|
|
262
|
-
#
|
|
263
|
-
# Save checkpoint during training:
|
|
264
|
-
# loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
|
|
265
|
-
# checkpoint = loader.state_dict()
|
|
266
|
-
#
|
|
267
|
-
# Resume later using the same shards from checkpoint:
|
|
268
|
-
# resumed_loader = SpiralDataLoader.from_state_dict(
|
|
269
|
-
# scan,
|
|
270
|
-
# checkpoint,
|
|
271
|
-
# batch_size=32,
|
|
272
|
-
# transform_fn=my_transform,
|
|
273
|
-
# )
|
|
274
288
|
|
|
275
289
|
# Extract resume parameters from state
|
|
276
290
|
seed = state.get("seed", 42)
|