pyspiral 0.6.11__cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.6.12__cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyspiral might be problematic. Click here for more details.
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.12.dist-info}/METADATA +1 -1
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.12.dist-info}/RECORD +31 -27
- spiral/_lib.abi3.so +0 -0
- spiral/cli/key_spaces.py +1 -1
- spiral/cli/tables.py +3 -3
- spiral/client.py +20 -12
- spiral/core/client/__init__.pyi +8 -8
- spiral/core/expr/__init__.pyi +15 -0
- spiral/core/expr/images/__init__.pyi +3 -0
- spiral/core/expr/list_/__init__.pyi +4 -0
- spiral/core/expr/refs/__init__.pyi +4 -0
- spiral/core/expr/str_/__init__.pyi +3 -0
- spiral/core/expr/struct_/__init__.pyi +6 -0
- spiral/core/expr/text/__init__.pyi +5 -0
- spiral/core/expr/udf/__init__.pyi +14 -0
- spiral/core/expr/video/__init__.pyi +3 -0
- spiral/core/table/__init__.pyi +10 -1
- spiral/core/table/spec/__init__.pyi +4 -0
- spiral/dataloader.py +46 -37
- spiral/expressions/__init__.py +13 -20
- spiral/expressions/base.py +9 -4
- spiral/expressions/s3.py +18 -0
- spiral/expressions/tiff.py +2 -3
- spiral/expressions/udf.py +34 -25
- spiral/project.py +6 -6
- spiral/scan.py +28 -0
- spiral/streaming_/stream.py +1 -1
- spiral/table.py +25 -5
- spiral/transaction.py +27 -0
- spiral/expressions/http.py +0 -86
- spiral/expressions/io.py +0 -100
- spiral/expressions/mp4.py +0 -62
- spiral/expressions/png.py +0 -18
- spiral/expressions/qoi.py +0 -18
- spiral/expressions/refs.py +0 -58
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.12.dist-info}/WHEEL +0 -0
- {pyspiral-0.6.11.dist-info → pyspiral-0.6.12.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
pyspiral-0.6.
|
|
2
|
-
pyspiral-0.6.
|
|
3
|
-
pyspiral-0.6.
|
|
1
|
+
pyspiral-0.6.12.dist-info/METADATA,sha256=ANXjtdzd8s_zdWLd-mTm0X07pWbjlpkjQP8X5yP4qpY,1843
|
|
2
|
+
pyspiral-0.6.12.dist-info/WHEEL,sha256=0ecHyBdkJfSXYIVmWsPh7S-4h4fSrB4FlXhlnIu9c_A,130
|
|
3
|
+
pyspiral-0.6.12.dist-info/entry_points.txt,sha256=uft7u-a6g40NLt4Q6BleWbK4NY0M8nZuYPpP8DV0EOk,45
|
|
4
4
|
spiral/__init__.py,sha256=n4JNLrO3wyw_k_U_JKyNiGON0wEpfvqxDhDdB2P6dhM,1007
|
|
5
|
-
spiral/_lib.abi3.so,sha256=
|
|
5
|
+
spiral/_lib.abi3.so,sha256=XaAqnKj8sXWK4OjxsLtR-hAI2hAw523pL2aTyBrF_Is,61055320
|
|
6
6
|
spiral/adbc.py,sha256=7IxfWIeQN-fh0W5OdN_PP2x3pzQYg6ZUOLsHg3jktqw,14842
|
|
7
7
|
spiral/api/__init__.py,sha256=ULBlVq3PnfNOO6T5naE_ULmmii-83--qTuN2PpAUQN0,2241
|
|
8
8
|
spiral/api/admin.py,sha256=A1iVR1XYJSObZivPAD5UzmPuMgupXc9kaHNYYa_kwfs,585
|
|
@@ -24,52 +24,56 @@ spiral/cli/app.py,sha256=smzGj5a2RwhM9RQChmlEeKZLN4Fk60-bP7Lm5_Is1Rw,2760
|
|
|
24
24
|
spiral/cli/console.py,sha256=6JHbAQV6MFWz3P-VzqPOjhHpkIQagsCdzTMvmuDKMkU,2580
|
|
25
25
|
spiral/cli/fs.py,sha256=vaPcSc2YghhHeipxNitIdsHaBhFwlwkvPFqYsFSN9P0,2927
|
|
26
26
|
spiral/cli/iceberg.py,sha256=Q14tcGcn1LixbFCYP0GhfYwFFXTmmi8tqBPYwalJEyE,3248
|
|
27
|
-
spiral/cli/key_spaces.py,sha256=
|
|
27
|
+
spiral/cli/key_spaces.py,sha256=TF1tbRnrjemp4aMAbLc7o4_jPChIumaQGPuvfW0sR5o,2945
|
|
28
28
|
spiral/cli/login.py,sha256=2tw6uN5rEpiMMAmjQSB3-JUPf3C0Wc1eTGCDxhYtJps,731
|
|
29
29
|
spiral/cli/orgs.py,sha256=fmOuLxpeIFfKqePRi292Gv9k-EF5pPn_tbKd2BLl2Ig,2869
|
|
30
30
|
spiral/cli/printer.py,sha256=aosc763hDFgoXJGkiANmNyO3kAsecAS1JWgjEhn8GCM,1784
|
|
31
31
|
spiral/cli/projects.py,sha256=1M1nGrBT-t0aY9RV5Cnmzy7YrhIvmHwdkpa3y9j8rG8,5756
|
|
32
32
|
spiral/cli/state.py,sha256=10wTIVQ0SJkY67Z6-KQ1LFlt3aVIPmZhoHFdTwp4kNA,130
|
|
33
|
-
spiral/cli/tables.py,sha256=
|
|
33
|
+
spiral/cli/tables.py,sha256=qm3izcysElJrQlerNZdfx5RWSVXtyVfkP3o_H51ltFw,6366
|
|
34
34
|
spiral/cli/telemetry.py,sha256=Uxo1Q1FkKJ6n6QNGOUmL3j_pRRWRx0qWIhoP-U9BuR0,589
|
|
35
35
|
spiral/cli/text.py,sha256=DlWGe4JrkdERAiqyITNpk91Wqb63Re99rNYlIFsIamc,4031
|
|
36
36
|
spiral/cli/types.py,sha256=XYzo1GgX7dBBItoBSrHI4vO5C2lLmS2sktb-2GnGH3E,1362
|
|
37
37
|
spiral/cli/workloads.py,sha256=2_SLfQTFN6y73R9H0i9dk8VIOVagKxSxOpHXC56yptY,2015
|
|
38
|
-
spiral/client.py,sha256=
|
|
38
|
+
spiral/client.py,sha256=pw6vB85oLVbBudc_HRzmLCItcecsTjNM5SMu_kVOMCo,6568
|
|
39
39
|
spiral/core/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
spiral/core/_tools/__init__.pyi,sha256=b2KLfTOQ67pjfbYt07o0IGiTu5o2bZw69lllV8v0Dps,143
|
|
41
41
|
spiral/core/authn/__init__.pyi,sha256=z_GWyIS62fuiYQrYO8hzw4W8oGaiciqS1u5qtAt54VY,769
|
|
42
|
-
spiral/core/client/__init__.pyi,sha256=
|
|
43
|
-
spiral/core/
|
|
42
|
+
spiral/core/client/__init__.pyi,sha256=ajF8XaxThnTdsPpw1k3pPLEurIaDg9yeXqwIRJNlJTY,6665
|
|
43
|
+
spiral/core/expr/__init__.pyi,sha256=3HSKjkotiEkxBvGBALXEBIie0JiyI9bCpehwA3nMQkU,571
|
|
44
|
+
spiral/core/expr/images/__init__.pyi,sha256=wnE_wZXq7a4iqTg3SVm-ssxGw1WQZyk5dGOPaP4Btko,73
|
|
45
|
+
spiral/core/expr/list_/__init__.pyi,sha256=Q_9c87eIQfZbqlaw_rq3fvs93YEsW7K5VYk6VZ4g6mU,126
|
|
46
|
+
spiral/core/expr/refs/__init__.pyi,sha256=nZZP3l_Z6bLx6V8lTcH3Jgo--xwfADOU2XdTAvM5IMk,127
|
|
47
|
+
spiral/core/expr/str_/__init__.pyi,sha256=Bm6fZK-d4fNbJuuBhVoWMACXUbQQ2SjlhgrOpdOHIPM,86
|
|
48
|
+
spiral/core/expr/struct_/__init__.pyi,sha256=MXckd98eV_x3X0RhEWvlkA3DcDXRtLs5pNnTQkc09nE,296
|
|
49
|
+
spiral/core/expr/text/__init__.pyi,sha256=ed83n1xcsGY7_QDhMmJGnSQ20UrJFXcdv1AveSEcS1c,175
|
|
50
|
+
spiral/core/expr/udf/__init__.pyi,sha256=zsZs081KVhY3-1JidqTkWMW81Qd_ScoTGZvasIhIK-4,358
|
|
51
|
+
spiral/core/expr/video/__init__.pyi,sha256=nQJEcSsigZuRpMjkI_O4EEtMK_n2zRvorcL_KEeD5vU,95
|
|
52
|
+
spiral/core/table/__init__.pyi,sha256=HN4ag8E1QDF_VgekJZqjhuQLhorU3ivjIOBHai2OEVc,3672
|
|
44
53
|
spiral/core/table/manifests/__init__.pyi,sha256=eVfDpmhYSjafIvvALqAkZe5baN3Y1HpKpxYEbjwd4gQ,1043
|
|
45
54
|
spiral/core/table/metastore/__init__.pyi,sha256=rc3u9MwEKRvL2kxOc8lBorddFRnM8o_o1frqtae86a4,1697
|
|
46
|
-
spiral/core/table/spec/__init__.pyi,sha256=
|
|
47
|
-
spiral/dataloader.py,sha256=
|
|
55
|
+
spiral/core/table/spec/__init__.pyi,sha256=PgacM_fZmkHuplj7IbYrj5KfFI3-VPYnyuzI2w7A70Y,5717
|
|
56
|
+
spiral/dataloader.py,sha256=2haLoI6KLrzXfPozAgEa-eCOSDsNldJ1qwCmFpNMyTQ,10281
|
|
48
57
|
spiral/dataset.py,sha256=PMLoXnXuEUciP6-NXqTmQLXu0UIH7OcC4-iZtY_iuO8,7973
|
|
49
58
|
spiral/datetime_.py,sha256=elXaUWtZuuLVcu9E0aXnvYRPB9XWqZbLDToozQYQYjU,950
|
|
50
59
|
spiral/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
60
|
spiral/debug/manifests.py,sha256=7f1O3ba9mrA5nXpOF9cEIQuUAteP5wiBkFy_diQJ7No,3216
|
|
52
61
|
spiral/debug/metrics.py,sha256=XdRDcjggtsLNGCAjam6IxG9072pz_d2C8iLApNRFUtk,2044
|
|
53
62
|
spiral/debug/scan.py,sha256=UEm_aRnql5pwDPTpZgakMLNjlzkKL4RurBFFqH_BLAQ,9526
|
|
54
|
-
spiral/expressions/__init__.py,sha256=
|
|
55
|
-
spiral/expressions/base.py,sha256=
|
|
56
|
-
spiral/expressions/http.py,sha256=begUydWoFHEqjeLkATvI_v66Ez6_rR-OQBWO5cHbb9c,2742
|
|
57
|
-
spiral/expressions/io.py,sha256=gJ2a0FKMmdxarWKENulPRwH7KDvSJTIh_OUxX306xAM,3045
|
|
63
|
+
spiral/expressions/__init__.py,sha256=UNxK5qQNrl-BuHsjKcWDj35w5lJviLkGFUQj8OhLID0,7919
|
|
64
|
+
spiral/expressions/base.py,sha256=PvhJkcUSsPSIaxirHVzM9zlqyBXiaiia1HXohXdOmL4,5377
|
|
58
65
|
spiral/expressions/list_.py,sha256=MMt5lf5H1M3O-x6N_PvqOLGq9NOk6Ukv0fPWwPC_uy4,1809
|
|
59
|
-
spiral/expressions/
|
|
60
|
-
spiral/expressions/png.py,sha256=KO8X0OmMzUFwpg2I_j0JTyldPzVXDWIMzjWMWDV9vIY,506
|
|
61
|
-
spiral/expressions/qoi.py,sha256=gvIbb6fXb_Bb080sn9wkpbGGrPs2UEcTXCfuv4-kcYQ,506
|
|
62
|
-
spiral/expressions/refs.py,sha256=omeHBQ5o6N4xgZ3x5Xz7IRrWwYBBtQY8DYK0NNAxeGo,2109
|
|
66
|
+
spiral/expressions/s3.py,sha256=D-kuLifIEY314Q8rB2-ZP8U-IT0FywtbJDMuyusBKiQ,414
|
|
63
67
|
spiral/expressions/str_.py,sha256=tY8RXW3JWvr1-bEfCZtk5FAf11wKJnXPuA9EoeJ9tA4,1265
|
|
64
68
|
spiral/expressions/struct.py,sha256=pGAnCDh6AK0BK1XfZ1qG4ce4ranIQEE1HQsgmzBcfwQ,2038
|
|
65
69
|
spiral/expressions/text.py,sha256=-02gBWYoyNQ3qQ1--9HTa8IryUDojYQVIp8C7rgnOWQ,1893
|
|
66
|
-
spiral/expressions/tiff.py,sha256=
|
|
67
|
-
spiral/expressions/udf.py,sha256=
|
|
70
|
+
spiral/expressions/tiff.py,sha256=4dngO97bT1QY0By7-PxOQVmSwQC3PQAiixVhLJ-4HMQ,7986
|
|
71
|
+
spiral/expressions/udf.py,sha256=yvZCuGK9S9Sa9I18h-apUxsDni2B7E9WEqPrxHBjUWE,1657
|
|
68
72
|
spiral/grpc_.py,sha256=f3czdP1Mxme42Y5--a5ogYq1TTiWn-J_MlGjwJ2mWwM,1015
|
|
69
73
|
spiral/iceberg.py,sha256=JGq62Qnf296r9_hRAoH85GQq45-uSBjwXWw_CvPi6G4,930
|
|
70
74
|
spiral/iterable_dataset.py,sha256=Eekg9ad8tcwXcloHWReBbvCSr5ZappRHn2ldKTvwqS0,4622
|
|
71
75
|
spiral/key_space_index.py,sha256=NAB_nONEjpMYbse8suz42w7Qb5OPHuKN9h9CT2NJe08,1460
|
|
72
|
-
spiral/project.py,sha256=
|
|
76
|
+
spiral/project.py,sha256=VsokZgS0TqIel7UAXMyoBToxn-l_D3ivGwc41x7HLF0,7277
|
|
73
77
|
spiral/protogen/_/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
78
|
spiral/protogen/_/arrow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
79
|
spiral/protogen/_/arrow/flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -87,16 +91,16 @@ spiral/protogen/_/substrait/extensions/__init__.py,sha256=nhnEnho70GAT8WPj2xtwJU
|
|
|
87
91
|
spiral/protogen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
88
92
|
spiral/protogen/util.py,sha256=smnvVo6nYH3FfDm9jqhNLaXz4bbTBaQezHQDCTvZyiQ,1486
|
|
89
93
|
spiral/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
|
-
spiral/scan.py,sha256=
|
|
94
|
+
spiral/scan.py,sha256=fXZq0NL9YApt-UwkGpaT0ETn-rK-1_tltq7nqsImZI4,11199
|
|
91
95
|
spiral/server.py,sha256=ztBmB5lBnUz-smQxR_tC8AI5SOhz17wH0MI3GuzDUdM,600
|
|
92
96
|
spiral/settings.py,sha256=JRQSwjJyNaCqQdQLxiqB_O_LZRQXMLyshJBrI2LZHwM,3113
|
|
93
97
|
spiral/snapshot.py,sha256=cTobi5jtiANxalGA-isokQHblNmXGtuUvgUGGNVybsI,1555
|
|
94
98
|
spiral/streaming_/__init__.py,sha256=s7MlW2ERsuZmZGExLFL6RcZon2e0tNBocBg5ANgki7k,61
|
|
95
99
|
spiral/streaming_/reader.py,sha256=tl_lC9xgh1-QFhsZn4xQT7It3PVTzHCEUT2BG2dWBRQ,4166
|
|
96
|
-
spiral/streaming_/stream.py,sha256=
|
|
100
|
+
spiral/streaming_/stream.py,sha256=DM1hBDHnWm1ZFKZ-hZ4zxeSXITcUI6kWzwdJZvywI8o,5915
|
|
97
101
|
spiral/substrait_.py,sha256=AKeOD4KIXvz2J4TYxnIneOiHddtBIyOhuNxVO_uH0eg,12592
|
|
98
|
-
spiral/table.py,sha256=
|
|
102
|
+
spiral/table.py,sha256=dwQr1EAACbfxG8fISFqRrUEAE2P2y6xsx0vFK9Gwyfc,11662
|
|
99
103
|
spiral/text_index.py,sha256=FQ9rgIEGLSJryS9lFdMhKtPFey18BXoWbPXyvZPJJ04,442
|
|
100
|
-
spiral/transaction.py,sha256=
|
|
104
|
+
spiral/transaction.py,sha256=M_Tf-TijVBluuInWk6XSFNCR2dKN4S9EdsHM3QD20ng,2948
|
|
101
105
|
spiral/types_.py,sha256=W_jyO7F6rpPiH69jhgSgV7OxQZbOlb1Ho3InpKUP6Eo,155
|
|
102
|
-
pyspiral-0.6.
|
|
106
|
+
pyspiral-0.6.12.dist-info/RECORD,,
|
spiral/_lib.abi3.so
CHANGED
|
Binary file
|
spiral/cli/key_spaces.py
CHANGED
|
@@ -64,7 +64,7 @@ def show(
|
|
|
64
64
|
"""Show index partitions."""
|
|
65
65
|
index_id = get_index_id(project, name)
|
|
66
66
|
index = state.spiral.key_space_index(index_id)
|
|
67
|
-
shards = state.spiral.
|
|
67
|
+
shards = state.spiral.internal.compute_shards(index.core)
|
|
68
68
|
|
|
69
69
|
rich_table = rich.table.Table("Begin", "End", "Cardinality", title=f"Index {index.name} Partitions")
|
|
70
70
|
for partition in shards:
|
spiral/cli/tables.py
CHANGED
|
@@ -130,7 +130,7 @@ def flush(
|
|
|
130
130
|
keep_latest_s = int(duration.total_seconds()) if duration is not None else None
|
|
131
131
|
|
|
132
132
|
identifier, t = get_table(project, table, dataset)
|
|
133
|
-
state.spiral.
|
|
133
|
+
state.spiral.internal.flush_wal(t.core, keep_latest_s=keep_latest_s) # pyright: ignore[reportPrivateUsage]
|
|
134
134
|
CONSOLE.print(f"Flushed WAL for table {identifier} in project {project}.")
|
|
135
135
|
|
|
136
136
|
|
|
@@ -143,10 +143,10 @@ def manifests(
|
|
|
143
143
|
_, t = get_table(project, table, dataset)
|
|
144
144
|
s = t.snapshot()
|
|
145
145
|
|
|
146
|
-
key_space_state = state.spiral.
|
|
146
|
+
key_space_state = state.spiral.internal.key_space_state(s.core) # pyright: ignore[reportPrivateUsage]
|
|
147
147
|
key_space_manifest = key_space_state.manifest
|
|
148
148
|
|
|
149
|
-
column_groups_states = state.spiral.
|
|
149
|
+
column_groups_states = state.spiral.internal.column_groups_states(s.core, key_space_state) # pyright: ignore[reportPrivateUsage]
|
|
150
150
|
display_manifests(key_space_manifest, [(x.column_group, x.manifest) for x in column_groups_states])
|
|
151
151
|
|
|
152
152
|
|
spiral/client.py
CHANGED
|
@@ -6,11 +6,11 @@ import pyarrow as pa
|
|
|
6
6
|
|
|
7
7
|
from spiral.api import SpiralAPI
|
|
8
8
|
from spiral.api.projects import CreateProjectRequest, CreateProjectResponse
|
|
9
|
-
from spiral.core.client import
|
|
9
|
+
from spiral.core.client import Internal
|
|
10
10
|
from spiral.core.client import Spiral as CoreSpiral
|
|
11
11
|
from spiral.datetime_ import timestamp_micros
|
|
12
12
|
from spiral.expressions import ExprLike
|
|
13
|
-
from spiral.scan import Scan
|
|
13
|
+
from spiral.scan import Scan, ScanState
|
|
14
14
|
from spiral.settings import Settings, settings
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
@@ -35,9 +35,13 @@ class Spiral:
|
|
|
35
35
|
return self._config.api
|
|
36
36
|
|
|
37
37
|
@property
|
|
38
|
-
def
|
|
38
|
+
def core(self) -> CoreSpiral:
|
|
39
39
|
return self._config.core
|
|
40
40
|
|
|
41
|
+
@property
|
|
42
|
+
def internal(self) -> Internal:
|
|
43
|
+
return self.core.internal(format=settings().file_format)
|
|
44
|
+
|
|
41
45
|
@property
|
|
42
46
|
def organization(self) -> str:
|
|
43
47
|
if self._org is None:
|
|
@@ -79,19 +83,19 @@ class Spiral:
|
|
|
79
83
|
"""Open a table using an ID."""
|
|
80
84
|
from spiral.table import Table
|
|
81
85
|
|
|
82
|
-
return Table(self, self.
|
|
86
|
+
return Table(self, self.core.table(table_id))
|
|
83
87
|
|
|
84
88
|
def text_index(self, index_id: str) -> "TextIndex":
|
|
85
89
|
"""Open a text index using an ID."""
|
|
86
90
|
from spiral.text_index import TextIndex
|
|
87
91
|
|
|
88
|
-
return TextIndex(self.
|
|
92
|
+
return TextIndex(self.core.text_index(index_id))
|
|
89
93
|
|
|
90
94
|
def key_space_index(self, index_id: str) -> "KeySpaceIndex":
|
|
91
95
|
"""Open a key space index using an ID."""
|
|
92
96
|
from spiral.key_space_index import KeySpaceIndex
|
|
93
97
|
|
|
94
|
-
return KeySpaceIndex(self.
|
|
98
|
+
return KeySpaceIndex(self.core.key_space_index(index_id))
|
|
95
99
|
|
|
96
100
|
def scan(
|
|
97
101
|
self,
|
|
@@ -117,13 +121,21 @@ class Spiral:
|
|
|
117
121
|
where = se.lift(where)
|
|
118
122
|
|
|
119
123
|
return Scan(
|
|
120
|
-
self.
|
|
124
|
+
self.core.scan(
|
|
121
125
|
projection.__expr__,
|
|
122
126
|
filter=where.__expr__ if where else None,
|
|
123
127
|
asof=asof,
|
|
124
128
|
),
|
|
125
129
|
)
|
|
126
130
|
|
|
131
|
+
def load_scan(self, scan_state: ScanState) -> Scan:
|
|
132
|
+
"""Load a scan from a serialized scan state.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
scan_state: The serialized scan state.
|
|
136
|
+
"""
|
|
137
|
+
return Scan(self.core.load_scan(scan_state.core))
|
|
138
|
+
|
|
127
139
|
# TODO(marko): This should be query, and search should be query + scan.
|
|
128
140
|
def search(
|
|
129
141
|
self,
|
|
@@ -155,17 +167,13 @@ class Spiral:
|
|
|
155
167
|
freshness_window = timedelta(seconds=0)
|
|
156
168
|
freshness_window_s = int(freshness_window.total_seconds())
|
|
157
169
|
|
|
158
|
-
return self.
|
|
170
|
+
return self.core.search(
|
|
159
171
|
top_k=top_k,
|
|
160
172
|
rank_by=rank_by.__expr__,
|
|
161
173
|
filters=filters.__expr__ if filters else None,
|
|
162
174
|
freshness_window_s=freshness_window_s,
|
|
163
175
|
)
|
|
164
176
|
|
|
165
|
-
def _ops(self) -> Operations:
|
|
166
|
-
"""Access maintenance operations."""
|
|
167
|
-
return self._core._ops(format=settings().file_format)
|
|
168
|
-
|
|
169
177
|
@property
|
|
170
178
|
def iceberg(self) -> "Iceberg":
|
|
171
179
|
"""
|
spiral/core/client/__init__.pyi
CHANGED
|
@@ -3,7 +3,7 @@ from typing import Any, Literal
|
|
|
3
3
|
import pyarrow as pa
|
|
4
4
|
from spiral.api.types import DatasetName, IndexName, ProjectId, RootUri, TableName
|
|
5
5
|
from spiral.core.authn import Authn
|
|
6
|
-
from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, Snapshot, Table, Transaction
|
|
6
|
+
from spiral.core.table import ColumnGroupState, KeyRange, KeySpaceState, Scan, ScanState, Snapshot, Table, Transaction
|
|
7
7
|
from spiral.core.table.spec import ColumnGroup, Schema
|
|
8
8
|
from spiral.expressions import Expr
|
|
9
9
|
|
|
@@ -30,6 +30,10 @@ class Spiral:
|
|
|
30
30
|
"""Construct a table scan."""
|
|
31
31
|
...
|
|
32
32
|
|
|
33
|
+
def load_scan(self, scan_state: ScanState) -> Scan:
|
|
34
|
+
"""Load a scan from a serialized scan state."""
|
|
35
|
+
...
|
|
36
|
+
|
|
33
37
|
def transaction(self, table: Table, format: str | None = None, retries: int | None = 3) -> Transaction:
|
|
34
38
|
"""Being a table transaction."""
|
|
35
39
|
...
|
|
@@ -100,12 +104,8 @@ class Spiral:
|
|
|
100
104
|
"""Create a new key space index in the specified project."""
|
|
101
105
|
...
|
|
102
106
|
|
|
103
|
-
def
|
|
104
|
-
"""
|
|
105
|
-
|
|
106
|
-
IMPORTANT: This API is internal and is currently exposed for development & testing.
|
|
107
|
-
Maintenance operations are run by SpiralDB.
|
|
108
|
-
"""
|
|
107
|
+
def internal(self, *, format: str | None = None) -> Internal:
|
|
108
|
+
"""Internal client APIs. It can change without notice."""
|
|
109
109
|
...
|
|
110
110
|
|
|
111
111
|
class TextIndex:
|
|
@@ -158,7 +158,7 @@ class ShuffleConfig:
|
|
|
158
158
|
max_batch_size: int | None = None,
|
|
159
159
|
): ...
|
|
160
160
|
|
|
161
|
-
class
|
|
161
|
+
class Internal:
|
|
162
162
|
def flush_wal(self, table: Table, *, keep_latest_s: int | None = None) -> None:
|
|
163
163
|
"""
|
|
164
164
|
Flush the write-ahead log of the table.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pyarrow import Array, DataType, Scalar
|
|
2
|
+
|
|
3
|
+
class Expr:
|
|
4
|
+
"""Low level expression class."""
|
|
5
|
+
|
|
6
|
+
def aux(name: str, data_type: DataType) -> Expr: ...
|
|
7
|
+
|
|
8
|
+
# Array is correct (there is no ArrayData), see the table here:
|
|
9
|
+
# https://arrow.apache.org/rust/arrow_pyarrow/index.html
|
|
10
|
+
def scalar(array: Array[Scalar[DataType]]) -> Expr: ...
|
|
11
|
+
def not_(expr: Expr) -> Expr: ...
|
|
12
|
+
def is_null(expr: Expr) -> Expr: ...
|
|
13
|
+
def binary(op: str, expr: Expr, Expr: Expr) -> Expr: ...
|
|
14
|
+
def cast(_expr: Expr, _data_type: DataType) -> Expr: ...
|
|
15
|
+
def array_lit(array: Array[Scalar[DataType]]) -> Expr: ...
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from .. import Expr
|
|
2
|
+
|
|
3
|
+
def getitem(expr: Expr, item: str) -> Expr: ...
|
|
4
|
+
def select(expr: Expr, including: list[str] | None = None, excluding: list[str] | None = None) -> Expr: ...
|
|
5
|
+
def pack(names: list[str], children: list[str], nullable: bool) -> Expr: ...
|
|
6
|
+
def merge(names: list[Expr]) -> Expr: ...
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
|
|
3
|
+
from pyarrow import Array, DataType, Scalar
|
|
4
|
+
|
|
5
|
+
from .. import Expr
|
|
6
|
+
|
|
7
|
+
class UDF:
|
|
8
|
+
def __call__(self, args: list[Expr]) -> Expr: ...
|
|
9
|
+
|
|
10
|
+
def create(
|
|
11
|
+
name: str,
|
|
12
|
+
return_type: Callable[[tuple[DataType, ...]], DataType],
|
|
13
|
+
invoke: Callable[[tuple[Array[Scalar[DataType]], ...]], Array[Scalar[DataType]]],
|
|
14
|
+
) -> UDF: ...
|
spiral/core/table/__init__.pyi
CHANGED
|
@@ -5,7 +5,7 @@ from spiral.core.client import Shard, ShuffleConfig
|
|
|
5
5
|
|
|
6
6
|
from .manifests import FragmentManifest
|
|
7
7
|
from .metastore import PyMetastore
|
|
8
|
-
from .spec import ColumnGroup, Key, Schema, WriteAheadLog
|
|
8
|
+
from .spec import ColumnGroup, Key, Operation, Schema, WriteAheadLog
|
|
9
9
|
|
|
10
10
|
class KeyRange:
|
|
11
11
|
"""A right-exclusive range of keys."""
|
|
@@ -52,6 +52,11 @@ class Snapshot:
|
|
|
52
52
|
table: Table
|
|
53
53
|
wal: WriteAheadLog
|
|
54
54
|
|
|
55
|
+
class ScanState:
|
|
56
|
+
def to_json(self) -> str: ...
|
|
57
|
+
@staticmethod
|
|
58
|
+
def from_json(json: str) -> ScanState: ...
|
|
59
|
+
|
|
55
60
|
class Scan:
|
|
56
61
|
def key_schema(self) -> Schema: ...
|
|
57
62
|
def schema(self) -> Schema: ...
|
|
@@ -62,6 +67,7 @@ class Scan:
|
|
|
62
67
|
def column_groups(self) -> list[ColumnGroup]: ...
|
|
63
68
|
def column_group_state(self, column_group: ColumnGroup) -> ColumnGroupState: ...
|
|
64
69
|
def key_space_state(self, table_id: str) -> KeySpaceState: ...
|
|
70
|
+
def scan_state(self) -> ScanState: ...
|
|
65
71
|
def to_record_batches(
|
|
66
72
|
self,
|
|
67
73
|
key_table: pa.Table | pa.RecordBatch | None = None,
|
|
@@ -95,7 +101,10 @@ class Transaction:
|
|
|
95
101
|
status: str
|
|
96
102
|
|
|
97
103
|
def write(self, table: pa.RecordBatchReader, *, partition_size_bytes: int | None = None): ...
|
|
104
|
+
def writeback(self, scan: Scan, *, partition_size_bytes: int | None = None, batch_readahead: int | None = None): ...
|
|
98
105
|
def drop_columns(self, column_paths: list[str]): ...
|
|
106
|
+
def take(self) -> list[Operation]: ...
|
|
107
|
+
def include(self, ops: list[Operation]): ...
|
|
99
108
|
def commit(self): ...
|
|
100
109
|
def abort(self): ...
|
|
101
110
|
def metrics(self) -> dict[str, Any]: ...
|
|
@@ -62,6 +62,10 @@ class ColumnGroupMetadata:
|
|
|
62
62
|
def apply_wal(self, wal: WriteAheadLog) -> ColumnGroupMetadata:
|
|
63
63
|
"""Applies the given WAL to the metadata."""
|
|
64
64
|
|
|
65
|
+
class Operation:
|
|
66
|
+
# Base class for all operations in the WAL.
|
|
67
|
+
...
|
|
68
|
+
|
|
65
69
|
class LogEntry:
|
|
66
70
|
ts: int
|
|
67
71
|
operation: (
|
spiral/dataloader.py
CHANGED
|
@@ -88,22 +88,24 @@ class SpiralDataLoader:
|
|
|
88
88
|
- map_workers for parallel post-processing (tokenization, decoding, etc.)
|
|
89
89
|
- Built-in checkpoint support via skip_samples
|
|
90
90
|
- Explicit shard-based architecture for distributed training
|
|
91
|
-
"""
|
|
92
91
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
92
|
+
Simple usage:
|
|
93
|
+
```python
|
|
94
|
+
loader = SpiralDataLoader(scan, batch_size=32)
|
|
95
|
+
for batch in loader:
|
|
96
|
+
train_step(batch)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
With parallel transforms:
|
|
100
|
+
```python
|
|
101
|
+
loader = SpiralDataLoader(
|
|
102
|
+
scan,
|
|
103
|
+
batch_size=32,
|
|
104
|
+
transform_fn=tokenize_batch,
|
|
105
|
+
map_workers=4,
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
"""
|
|
107
109
|
|
|
108
110
|
def __init__(
|
|
109
111
|
self,
|
|
@@ -220,16 +222,21 @@ class SpiralDataLoader:
|
|
|
220
222
|
|
|
221
223
|
Returns:
|
|
222
224
|
Dictionary containing samples_yielded, seed, and shards.
|
|
225
|
+
|
|
226
|
+
Example checkpoint:
|
|
227
|
+
```python
|
|
228
|
+
loader = SpiralDataLoader(scan, batch_size=32, seed=42)
|
|
229
|
+
for i, batch in enumerate(loader):
|
|
230
|
+
if i == 10:
|
|
231
|
+
checkpoint = loader.state_dict()
|
|
232
|
+
break
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
Example resume:
|
|
236
|
+
```python
|
|
237
|
+
loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
|
|
238
|
+
```
|
|
223
239
|
"""
|
|
224
|
-
# Example usage:
|
|
225
|
-
# loader = SpiralDataLoader(scan, batch_size=32, seed=42)
|
|
226
|
-
# for i, batch in enumerate(loader):
|
|
227
|
-
# if i == 10:
|
|
228
|
-
# checkpoint = loader.state_dict()
|
|
229
|
-
# break
|
|
230
|
-
#
|
|
231
|
-
# # Resume later with exact same shards
|
|
232
|
-
# loader = SpiralDataLoader.from_state_dict(scan, checkpoint, batch_size=32)
|
|
233
240
|
return {
|
|
234
241
|
"samples_yielded": self._samples_yielded,
|
|
235
242
|
"seed": self.seed,
|
|
@@ -257,20 +264,22 @@ class SpiralDataLoader:
|
|
|
257
264
|
|
|
258
265
|
Returns:
|
|
259
266
|
New SpiralDataLoader instance configured to resume from the checkpoint.
|
|
267
|
+
|
|
268
|
+
Save checkpoint during training:
|
|
269
|
+
```python
|
|
270
|
+
loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
|
|
271
|
+
checkpoint = loader.state_dict()
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Resume later using the same shards from checkpoint:
|
|
275
|
+
```python
|
|
276
|
+
resumed_loader = SpiralDataLoader.from_state_dict(
|
|
277
|
+
scan,
|
|
278
|
+
checkpoint,
|
|
279
|
+
batch_size=32,
|
|
280
|
+
transform_fn=my_transform,
|
|
281
|
+
)
|
|
260
282
|
"""
|
|
261
|
-
# Example usage:
|
|
262
|
-
#
|
|
263
|
-
# Save checkpoint during training:
|
|
264
|
-
# loader = scan.to_distributed_data_loader(scan, batch_size=32, seed=42)
|
|
265
|
-
# checkpoint = loader.state_dict()
|
|
266
|
-
#
|
|
267
|
-
# Resume later using the same shards from checkpoint:
|
|
268
|
-
# resumed_loader = SpiralDataLoader.from_state_dict(
|
|
269
|
-
# scan,
|
|
270
|
-
# checkpoint,
|
|
271
|
-
# batch_size=32,
|
|
272
|
-
# transform_fn=my_transform,
|
|
273
|
-
# )
|
|
274
283
|
|
|
275
284
|
# Extract resume parameters from state
|
|
276
285
|
seed = state.get("seed", 42)
|
spiral/expressions/__init__.py
CHANGED
|
@@ -8,31 +8,22 @@ import pyarrow as pa
|
|
|
8
8
|
|
|
9
9
|
from spiral import _lib, arrow_
|
|
10
10
|
|
|
11
|
-
from . import http as http
|
|
12
|
-
from . import io as io
|
|
13
11
|
from . import list_ as list
|
|
14
|
-
from . import mp4 as mp4
|
|
15
|
-
from . import png as png
|
|
16
|
-
from . import qoi as qoi
|
|
17
|
-
from . import refs as refs
|
|
18
12
|
from . import str_ as str
|
|
19
13
|
from . import struct as struct
|
|
20
14
|
from . import text as text
|
|
21
|
-
from . import tiff as tiff
|
|
22
15
|
from .base import Expr, ExprLike, NativeExpr
|
|
16
|
+
from .udf import UDF
|
|
23
17
|
|
|
24
18
|
__all__ = [
|
|
25
19
|
"Expr",
|
|
26
20
|
"add",
|
|
27
21
|
"and_",
|
|
28
|
-
"deref",
|
|
29
22
|
"divide",
|
|
30
23
|
"eq",
|
|
31
24
|
"getitem",
|
|
32
25
|
"gt",
|
|
33
26
|
"gte",
|
|
34
|
-
"http",
|
|
35
|
-
"io",
|
|
36
27
|
"is_not_null",
|
|
37
28
|
"is_null",
|
|
38
29
|
"lift",
|
|
@@ -48,19 +39,15 @@ __all__ = [
|
|
|
48
39
|
"or_",
|
|
49
40
|
"pack",
|
|
50
41
|
"aux",
|
|
51
|
-
"ref",
|
|
52
|
-
"refs",
|
|
53
42
|
"scalar",
|
|
54
43
|
"select",
|
|
55
44
|
"str",
|
|
56
45
|
"struct",
|
|
57
46
|
"subtract",
|
|
58
|
-
"tiff",
|
|
59
47
|
"xor",
|
|
60
|
-
"png",
|
|
61
|
-
"qoi",
|
|
62
|
-
"mp4",
|
|
63
48
|
"text",
|
|
49
|
+
"s3",
|
|
50
|
+
"UDF",
|
|
64
51
|
]
|
|
65
52
|
|
|
66
53
|
# Inline some of the struct expressions since they're so common
|
|
@@ -68,8 +55,6 @@ getitem = struct.getitem
|
|
|
68
55
|
merge = struct.merge
|
|
69
56
|
pack = struct.pack
|
|
70
57
|
select = struct.select
|
|
71
|
-
ref = refs.ref
|
|
72
|
-
deref = refs.deref
|
|
73
58
|
|
|
74
59
|
|
|
75
60
|
def lift(expr: ExprLike) -> Expr:
|
|
@@ -127,9 +112,17 @@ def evaluate(expr: ExprLike) -> pa.RecordBatchReader:
|
|
|
127
112
|
return pa.RecordBatchReader.from_batches(expr.schema, [expr])
|
|
128
113
|
if isinstance(expr, pa.StructArray):
|
|
129
114
|
return pa.Table.from_struct_array(expr).to_reader()
|
|
115
|
+
|
|
130
116
|
if isinstance(expr, pa.ChunkedArray):
|
|
131
|
-
|
|
132
|
-
|
|
117
|
+
if not pa.types.is_struct(expr.type):
|
|
118
|
+
raise ValueError("Arrow chunked array must be a struct type.")
|
|
119
|
+
|
|
120
|
+
def _iter_batches():
|
|
121
|
+
for chunk in expr.chunks:
|
|
122
|
+
yield pa.RecordBatch.from_struct_array(chunk)
|
|
123
|
+
|
|
124
|
+
return pa.RecordBatchReader.from_batches(pa.schema(expr.type.fields), _iter_batches())
|
|
125
|
+
|
|
133
126
|
if isinstance(expr, pa.Array):
|
|
134
127
|
raise ValueError("Arrow array must be a struct array.")
|
|
135
128
|
|
spiral/expressions/base.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import builtins
|
|
2
1
|
import datetime
|
|
3
|
-
from typing import TypeAlias
|
|
2
|
+
from typing import TypeAlias, Union
|
|
4
3
|
|
|
5
4
|
import pyarrow as pa
|
|
6
5
|
|
|
@@ -153,5 +152,11 @@ class Expr:
|
|
|
153
152
|
|
|
154
153
|
|
|
155
154
|
ScalarLike: TypeAlias = bool | int | float | str | list["ScalarLike"] | datetime.datetime | None
|
|
156
|
-
ArrowLike: TypeAlias =
|
|
157
|
-
|
|
155
|
+
ArrowLike: TypeAlias = Union[
|
|
156
|
+
pa.RecordBatch,
|
|
157
|
+
"pa.Array[pa.Scalar[pa.DataType]]",
|
|
158
|
+
"pa.ChunkedArray[pa.Scalar[pa.DataType]]",
|
|
159
|
+
"pa.Scalar[pa.DataType]",
|
|
160
|
+
pa.Table,
|
|
161
|
+
]
|
|
162
|
+
ExprLike: TypeAlias = Expr | dict[str, "ExprLike"] | list["ExprLike"] | ArrowLike | ScalarLike
|
spiral/expressions/s3.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from spiral import _lib
|
|
2
|
+
from spiral.expressions.base import Expr, ExprLike
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get(expr: ExprLike) -> Expr:
|
|
6
|
+
"""Read data from object storage by the object's URL.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
expr: URLs of the data that needs to be read from object storage.
|
|
10
|
+
"""
|
|
11
|
+
from spiral import expressions as se
|
|
12
|
+
|
|
13
|
+
expr = se.lift(expr)
|
|
14
|
+
return Expr(
|
|
15
|
+
_lib.expr.s3.get(
|
|
16
|
+
expr.__expr__,
|
|
17
|
+
)
|
|
18
|
+
)
|